diff --git a/repopack/__init__.py b/repopack/__init__.py index c73059c..c25f8ab 100644 --- a/repopack/__init__.py +++ b/repopack/__init__.py @@ -2,4 +2,10 @@ from .cli import run_cli from .version import __version__ -__all__ = ["pack", "run_cli", "__version__"] +# Define the public API of the package +__all__: list[str] = ["pack", "run_cli", "__version__"] + +# Type hints for imported objects +pack: callable +run_cli: callable +__version__: str diff --git a/repopack/__main__.py b/repopack/__main__.py index 6c29da0..2869b60 100644 --- a/repopack/__main__.py +++ b/repopack/__main__.py @@ -1,4 +1,10 @@ from .cli import run_cli +# This is the main entry point for the repopack command-line application. +# It checks if the script is being run directly (not imported as a module) +# and if so, it calls the run_cli function to start the CLI. + if __name__ == "__main__": - run_cli() + run_cli() # type: ignore + # Note: The type: ignore comment is added because run_cli is imported + # from a local module and mypy might not be able to infer its type. diff --git a/repopack/cli.py b/repopack/cli.py index 35e76e6..79c1f89 100644 --- a/repopack/cli.py +++ b/repopack/cli.py @@ -2,6 +2,7 @@ import logging import os import sys +from typing import Dict, Any from .packager import pack from .config import load_config, merge_configs from .exceptions import RepopackError, ConfigurationError @@ -11,7 +12,12 @@ from .version import __version__ -def run_cli(): +def run_cli() -> None: + """ + Main entry point for the Repopack CLI. + Parses command-line arguments, loads and merges configurations, and executes the packing process. + """ + # Set up argument parser parser = argparse.ArgumentParser( description="Repopack - Pack your repository into a single AI-friendly file" ) @@ -37,10 +43,12 @@ def run_cli(): ) args = parser.parse_args() + # Set verbosity level logger.set_verbose(args.verbose) + # Load configuration try: - config = load_config(args.config) + config: Dict[str, Any] = load_config(args.config) except ConfigurationError as e: logger.error(f"Configuration file error: {str(e)}") logger.debug("Stack trace:", exc_info=True) @@ -50,7 +58,8 @@ def run_cli(): logger.debug("Stack trace:", exc_info=True) sys.exit(1) - cli_config = {} + # Create CLI configuration + cli_config: Dict[str, Any] = {} if args.output: cli_config["output"] = {"file_path": args.output} if args.ignore: @@ -65,8 +74,9 @@ def run_cli(): cli_config["output"] = cli_config.get("output", {}) cli_config["output"]["style"] = args.output_style + # Merge configurations try: - merged_config = merge_configs(config, cli_config) + merged_config: Dict[str, Any] = merge_configs(config, cli_config) except ConfigurationError as e: logger.error(f"Error merging configurations: {str(e)}") logger.debug("Stack trace:", exc_info=True) @@ -74,12 +84,15 @@ def run_cli(): logger.debug(f"Merged configuration: {merged_config}") + # Initialize spinner for visual feedback spinner = Spinner("Packing files...") try: spinner.start() - pack_result = pack(os.path.abspath(args.directory), merged_config) + # Execute packing process + pack_result: Dict[str, Any] = pack(os.path.abspath(args.directory), merged_config) spinner.succeed("Packing completed successfully!") + # Print summary and completion message print_summary( pack_result["total_files"], pack_result["total_characters"], diff --git a/repopack/config.py b/repopack/config.py index 11169c0..2a1cf01 100644 --- a/repopack/config.py +++ b/repopack/config.py @@ -1,9 +1,10 @@ import json -from typing import Dict, Any +from typing import Dict, Any, Optional from .exceptions import ConfigurationError -DEFAULT_CONFIG = { +# Default configuration for Repopack +DEFAULT_CONFIG: Dict[str, Dict[str, Any]] = { "output": { "file_path": "repopack-output.txt", "style": "plain", @@ -20,7 +21,19 @@ } -def load_config(config_path: str = None) -> Dict[str, Any]: +def load_config(config_path: Optional[str] = None) -> Dict[str, Any]: + """ + Load configuration from a JSON file. + + Args: + config_path (Optional[str]): Path to the configuration file. + + Returns: + Dict[str, Any]: Loaded configuration or an empty dictionary if no file is provided. + + Raises: + ConfigurationError: If there's an error reading or parsing the configuration file. + """ if config_path: try: with open(config_path, "r") as f: @@ -33,6 +46,19 @@ def load_config(config_path: str = None) -> Dict[str, Any]: def merge_configs(file_config: Dict[str, Any], cli_config: Dict[str, Any]) -> Dict[str, Any]: + """ + Merge configurations from different sources. + + Args: + file_config (Dict[str, Any]): Configuration loaded from a file. + cli_config (Dict[str, Any]): Configuration provided via command-line interface. + + Returns: + Dict[str, Any]: Merged configuration. + + Raises: + ConfigurationError: If there's an error during the merging process. + """ try: merged = DEFAULT_CONFIG.copy() merged = deep_merge(merged, file_config) @@ -42,7 +68,17 @@ def merge_configs(file_config: Dict[str, Any], cli_config: Dict[str, Any]) -> Di raise ConfigurationError(f"Error merging configurations: {str(e)}") -def deep_merge(dict1, dict2): +def deep_merge(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, Any]: + """ + Recursively merge two dictionaries. + + Args: + dict1 (Dict[str, Any]): First dictionary to merge. + dict2 (Dict[str, Any]): Second dictionary to merge. + + Returns: + Dict[str, Any]: Merged dictionary. + """ for key, value in dict2.items(): if key in dict1: if isinstance(dict1[key], dict) and isinstance(value, dict): diff --git a/repopack/exceptions.py b/repopack/exceptions.py index a07a560..f5f5576 100644 --- a/repopack/exceptions.py +++ b/repopack/exceptions.py @@ -1,25 +1,56 @@ # file: exceptions.py +from typing import Optional + class RepopackError(Exception): """Base exception class for Repopack errors.""" - pass + def __init__(self, message: Optional[str] = None) -> None: + """ + Initialize the RepopackError. + + Args: + message (Optional[str]): The error message. Defaults to None. + """ + super().__init__(message) class ConfigurationError(RepopackError): """Raised when there's an error in the configuration.""" - pass + def __init__(self, message: str) -> None: + """ + Initialize the ConfigurationError. + + Args: + message (str): The specific configuration error message. + """ + super().__init__(f"Configuration error: {message}") class FileProcessingError(RepopackError): """Raised when there's an error processing a file.""" - pass + def __init__(self, file_path: str, error_message: str) -> None: + """ + Initialize the FileProcessingError. + + Args: + file_path (str): The path of the file that caused the error. + error_message (str): The specific error message. + """ + super().__init__(f"Error processing file '{file_path}': {error_message}") class OutputGenerationError(RepopackError): """Raised when there's an error generating the output.""" - pass + def __init__(self, error_message: str) -> None: + """ + Initialize the OutputGenerationError. + + Args: + error_message (str): The specific error message related to output generation. + """ + super().__init__(f"Error generating output: {error_message}") diff --git a/repopack/output_generator.py b/repopack/output_generator.py index 9de3e95..10cc4a3 100644 --- a/repopack/output_generator.py +++ b/repopack/output_generator.py @@ -13,9 +13,22 @@ def generate_output( sanitized_files: List[Dict[str, str]], all_file_paths: List[str], file_char_counts: Dict[str, int], -): - output_path = os.path.join(root_dir, config["output"]["file_path"]) - tree_string = generate_tree_string(all_file_paths) +) -> None: + """ + Generate the output file based on the specified configuration. + + Args: + root_dir (str): The root directory of the repository. + config (Dict[str, Any]): The configuration dictionary. + sanitized_files (List[Dict[str, str]]): List of sanitized file contents. + all_file_paths (List[str]): List of all file paths in the repository. + file_char_counts (Dict[str, int]): Dictionary of file paths and their character counts. + + Raises: + OutputGenerationError: If there's an error during output generation. + """ + output_path: str = os.path.join(root_dir, config["output"]["file_path"]) + tree_string: str = generate_tree_string(all_file_paths) try: if config["output"]["style"] == "xml": @@ -37,20 +50,40 @@ def generate_plain_output( all_file_paths: List[str], file_char_counts: Dict[str, int], tree_string: str, -): +) -> None: + """ + Generate plain text output file. + + Args: + output_path (str): Path to the output file. + config (Dict[str, Any]): The configuration dictionary. + sanitized_files (List[Dict[str, str]]): List of sanitized file contents. + all_file_paths (List[str]): List of all file paths in the repository. + file_char_counts (Dict[str, int]): Dictionary of file paths and their character counts. + tree_string (str): String representation of the repository structure. + """ with open(output_path, "w", encoding="utf-8") as f: + # Write header f.write("=" * 64 + "\n") f.write("Repopack Output File\n") f.write("=" * 64 + "\n\n") f.write(f"This file was generated by Repopack on: {datetime.now().isoformat()}\n\n") + + # Write purpose f.write("Purpose:\n--------\n") f.write("This file contains a packed representation of the entire repository's contents.\n") f.write("It is designed to be easily consumable by AI systems for analysis, code review,\n") f.write("or other automated processes.\n\n") + + # Write notes if config["output"]["show_line_numbers"]: f.write("- Line numbers have been added to the beginning of each line.\n") + + # Write repository structure f.write("Repository Structure:\n---------------------\n") f.write(tree_string + "\n\n") + + # Write repository files f.write("=" * 64 + "\n") f.write("Repository Files\n") f.write("=" * 64 + "\n\n") @@ -61,13 +94,16 @@ def generate_plain_output( f.write("=" * 16 + "\n") f.write(file["content"] + "\n\n") - top_files_length = config["output"]["top_files_length"] + # Write top files by character count + top_files_length: int = config["output"]["top_files_length"] if top_files_length > 0: f.write("\n" + "=" * 64 + "\n") f.write(f"Top {top_files_length} Files by Character Count\n") f.write("=" * 64 + "\n") - sorted_files = sorted(file_char_counts.items(), key=lambda x: x[1], reverse=True) + sorted_files: List[tuple[str, int]] = sorted( + file_char_counts.items(), key=lambda x: x[1], reverse=True + ) for i, (file_path, char_count) in enumerate(sorted_files[:top_files_length], 1): f.write(f"{i}. {file_path} ({char_count} chars)\n") @@ -81,10 +117,22 @@ def generate_xml_output( all_file_paths: List[str], file_char_counts: Dict[str, int], tree_string: str, -): - root = ET.Element("repopack_output") - - summary = ET.SubElement(root, "summary") +) -> None: + """ + Generate XML output file. + + Args: + output_path (str): Path to the output file. + config (Dict[str, Any]): The configuration dictionary. + sanitized_files (List[Dict[str, str]]): List of sanitized file contents. + all_file_paths (List[str]): List of all file paths in the repository. + file_char_counts (Dict[str, int]): Dictionary of file paths and their character counts. + tree_string (str): String representation of the repository structure. + """ + root: ET.Element = ET.Element("repopack_output") + + # Add summary section + summary: ET.Element = ET.SubElement(root, "summary") ET.SubElement(summary, "header").text = ( f"Repopack Output File" "\nThis file was generated by Repopack on: {datetime.now().isoformat()}" @@ -95,7 +143,8 @@ def generate_xml_output( "or other automated processes." ) - notes = ET.SubElement(summary, "notes") + # Add notes + notes: ET.Element = ET.SubElement(summary, "notes") if config["output"]["remove_comments"]: ET.SubElement(notes, "note").text = "Code comments have been removed." if config["output"]["show_line_numbers"]: @@ -103,28 +152,34 @@ def generate_xml_output( notes, "note" ).text = "Line numbers have been added to the beginning of each line." + # Add repository structure ET.SubElement(root, "repository_structure").text = tree_string - files = ET.SubElement(root, "repository_files") + # Add repository files + files: ET.Element = ET.SubElement(root, "repository_files") for file in sanitized_files: - file_elem = ET.SubElement(files, "file") + file_elem: ET.Element = ET.SubElement(files, "file") file_elem.set("path", file["path"]) file_elem.text = file["content"] - top_files_length = config["output"]["top_files_length"] + # Add top files by character count + top_files_length: int = config["output"]["top_files_length"] if top_files_length > 0: - top_files = ET.SubElement(root, "top_files") + top_files: ET.Element = ET.SubElement(root, "top_files") top_files.set("count", str(top_files_length)) - sorted_files = sorted(file_char_counts.items(), key=lambda x: x[1], reverse=True) + sorted_files: List[tuple[str, int]] = sorted( + file_char_counts.items(), key=lambda x: x[1], reverse=True + ) for i, (file_path, char_count) in enumerate(sorted_files[:top_files_length], 1): - file_elem = ET.SubElement(top_files, "file") + file_elem: ET.Element = ET.SubElement(top_files, "file") file_elem.set("rank", str(i)) file_elem.set("path", file_path) file_elem.set("char_count", str(char_count)) # Pretty print the XML - xml_string = ET.tostring(root, encoding="unicode") - pretty_xml = minidom.parseString(xml_string).toprettyxml(indent=" ") + xml_string: str = ET.tostring(root, encoding="unicode") + pretty_xml: str = minidom.parseString(xml_string).toprettyxml(indent=" ") + # Write to file with open(output_path, "w", encoding="utf-8") as f: f.write(pretty_xml) diff --git a/repopack/packager.py b/repopack/packager.py index bcb6750..da68b14 100644 --- a/repopack/packager.py +++ b/repopack/packager.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Any +from typing import Dict, Any, List, Callable from .exceptions import RepopackError, FileProcessingError, OutputGenerationError from .utils.file_handler import sanitize_files from .utils.ignore_utils import get_all_ignore_patterns, create_ignore_filter @@ -8,19 +8,33 @@ def pack(root_dir: str, config: Dict[str, Any]) -> Dict[str, Any]: + """ + Pack the contents of a directory according to the given configuration. + + Args: + root_dir (str): The root directory to pack. + config (Dict[str, Any]): The configuration dictionary. + + Returns: + Dict[str, Any]: A dictionary containing statistics about the packed files. + + Raises: + RepopackError: If there's an error during the packing process. + """ logger.debug(f"Starting packing process for directory: {root_dir}") logger.debug(f"Configuration: {config}") try: - ignore_patterns = get_all_ignore_patterns(root_dir, config) + # Get ignore patterns and create ignore filter + ignore_patterns: List[str] = get_all_ignore_patterns(root_dir, config) logger.debug(f"Ignore patterns: {ignore_patterns}") + ignore_filter: Callable[[str], bool] = create_ignore_filter(ignore_patterns) - ignore_filter = create_ignore_filter(ignore_patterns) - - all_file_paths = [] + # Collect all file paths + all_file_paths: List[str] = [] for root, _, files in os.walk(root_dir): for file in files: - file_path = os.path.relpath(os.path.join(root, file), root_dir) + file_path: str = os.path.relpath(os.path.join(root, file), root_dir) if ignore_filter(file_path): all_file_paths.append(file_path) logger.trace(f"Including file: {file_path}") @@ -29,21 +43,28 @@ def pack(root_dir: str, config: Dict[str, Any]) -> Dict[str, Any]: logger.info(f"Total files to process: {len(all_file_paths)}") - sanitized_files = sanitize_files(all_file_paths, root_dir, config) + # Sanitize files + sanitized_files: List[Dict[str, str]] = sanitize_files(all_file_paths, root_dir, config) logger.debug(f"Sanitized {len(sanitized_files)} files") - file_char_counts = {file["path"]: len(file["content"]) for file in sanitized_files} + # Count characters in each file + file_char_counts: Dict[str, int] = { + file["path"]: len(file["content"]) for file in sanitized_files + } + # Generate output logger.debug("Generating output") generate_output(root_dir, config, sanitized_files, all_file_paths, file_char_counts) - total_files = len(sanitized_files) - total_characters = sum(len(file["content"]) for file in sanitized_files) + # Calculate statistics + total_files: int = len(sanitized_files) + total_characters: int = sum(len(file["content"]) for file in sanitized_files) logger.info( f"Packing complete. Total files: {total_files}, Total characters: {total_characters}" ) + # Return statistics return { "total_files": total_files, "total_characters": total_characters, @@ -52,15 +73,12 @@ def pack(root_dir: str, config: Dict[str, Any]) -> Dict[str, Any]: except FileProcessingError as e: logger.error(f"Error processing files: {str(e)}") raise RepopackError(f"File processing error: {str(e)}") from e - except OutputGenerationError as e: logger.error(f"Error generating output: {str(e)}") raise RepopackError(f"Output generation error: {str(e)}") from e - except OSError as e: logger.error(f"OS error: {str(e)}") raise RepopackError(f"OS error: {str(e)}") from e - except Exception as e: logger.error(f"Unexpected error during packing: {str(e)}") raise RepopackError(f"Unexpected error: {str(e)}") from e diff --git a/repopack/utils/cli_output.py b/repopack/utils/cli_output.py index 36c639e..fbc4f4c 100644 --- a/repopack/utils/cli_output.py +++ b/repopack/utils/cli_output.py @@ -1,20 +1,46 @@ -from typing import Dict +from typing import Dict, List, Tuple import colorama from colorama import Fore, Style +# Initialize colorama for cross-platform colored terminal output colorama.init(autoreset=True) -def print_top_files(file_char_counts: Dict[str, int], top_files_length: int): +def print_top_files(file_char_counts: Dict[str, int], top_files_length: int) -> None: + """ + Print the top files by character count. + + Args: + file_char_counts (Dict[str, int]): A dictionary of file paths and their character counts. + top_files_length (int): The number of top files to display. + """ print(f"\n{Fore.CYAN}📈 Top {top_files_length} Files by Character Count:") print(f"{Fore.CYAN}──────────────────────────────────") - sorted_files = sorted(file_char_counts.items(), key=lambda x: x[1], reverse=True) + sorted_files: List[Tuple[str, int]] = sorted( + file_char_counts.items(), key=lambda x: x[1], reverse=True + ) for i, (file_path, char_count) in enumerate(sorted_files[:top_files_length], 1): print(f"{Fore.WHITE}{i}. {file_path} {Style.DIM}({char_count} chars)") -def print_summary(total_files, total_characters, output_path, file_char_counts, top_files_length): +def print_summary( + total_files: int, + total_characters: int, + output_path: str, + file_char_counts: Dict[str, int], + top_files_length: int, +) -> None: + """ + Print a summary of the repository packing process. + + Args: + total_files (int): The total number of files processed. + total_characters (int): The total number of characters in all files. + output_path (str): The path where the output file is saved. + file_char_counts (Dict[str, int]): A dictionary of file paths and their character counts. + top_files_length (int): The number of top files to display. + """ print(f"\n{Fore.CYAN}📊 Pack Summary:") print(f"{Fore.CYAN}────────────────") print(f"{Fore.WHITE}Total Files: {total_files}") @@ -25,6 +51,9 @@ def print_summary(total_files, total_characters, output_path, file_char_counts, print_top_files(file_char_counts, top_files_length) -def print_completion(): +def print_completion() -> None: + """ + Print a completion message indicating that the repository has been successfully packed. + """ print(f"\n{Fore.GREEN}🎉 All Done!") print(f"{Fore.WHITE}Your repository has been successfully packed.") diff --git a/repopack/utils/file_handler.py b/repopack/utils/file_handler.py index dcbd266..5bdaa04 100644 --- a/repopack/utils/file_handler.py +++ b/repopack/utils/file_handler.py @@ -1,25 +1,46 @@ import os import chardet -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional from ..exceptions import FileProcessingError from .file_manipulator import FileManipulator from .logger import logger def is_binary(file_path: str) -> bool: - """Check if a file is binary.""" + """ + Check if a file is binary. + + Args: + file_path (str): The path to the file to check. + + Returns: + bool: True if the file is binary, False otherwise. + """ try: - with open(file_path, "tr") as check_file: - check_file.read() - return False - except: - return True + with open(file_path, "rb") as file: + chunk = file.read(1024) + return b"\0" in chunk # Check for null bytes + except IOError: + return False def sanitize_files( file_paths: List[str], root_dir: str, config: Dict[str, Any] ) -> List[Dict[str, str]]: - """Sanitize files based on the given configuration.""" + """ + Sanitize files based on the given configuration. + + Args: + file_paths (List[str]): List of file paths to sanitize. + root_dir (str): The root directory of the project. + config (Dict[str, Any]): Configuration dictionary. + + Returns: + List[Dict[str, str]]: List of dictionaries containing sanitized file paths and contents. + + Raises: + FileProcessingError: If there's an error processing a file. + """ sanitized_files = [] for file_path in file_paths: full_path = os.path.join(root_dir, file_path) @@ -37,28 +58,44 @@ def sanitize_files( return sanitized_files -def sanitize_file(file_path: str, config: Dict[str, Any]) -> str: - """Sanitize a single file.""" +def sanitize_file(file_path: str, config: Dict[str, Any]) -> Optional[str]: + """ + Sanitize a single file. + + Args: + file_path (str): The path to the file to sanitize. + config (Dict[str, Any]): Configuration dictionary. + + Returns: + Optional[str]: The sanitized content of the file, or None if the file is empty. + + Raises: + FileProcessingError: If there's an error sanitizing the file. + """ try: with open(file_path, "rb") as f: raw_content = f.read() + # Detect file encoding encoding = chardet.detect(raw_content)["encoding"] or "utf-8" content = raw_content.decode(encoding) logger.trace(f"File encoding detected: {encoding}") + # Remove comments (not implemented yet) if config["output"]["remove_comments"]: raise NotImplementedError("Comment removal is not implemented yet.") # file_extension = os.path.splitext(file_path)[1] # content = FileManipulator.remove_comments(content, file_extension) # logger.trace(f"Comments removed from file: {file_path}") + # Remove empty lines if configured if config["output"]["remove_empty_lines"]: content = remove_empty_lines(content) logger.trace(f"Empty lines removed from file: {file_path}") content = content.strip() + # Add line numbers if configured if config["output"]["show_line_numbers"]: content = add_line_numbers(content) logger.trace(f"Line numbers added to file: {file_path}") @@ -69,12 +106,28 @@ def sanitize_file(file_path: str, config: Dict[str, Any]) -> str: def remove_empty_lines(content: str) -> str: - """Remove empty lines from the content.""" + """ + Remove empty lines from the content. + + Args: + content (str): The content to process. + + Returns: + str: The content with empty lines removed. + """ return "\n".join(line for line in content.splitlines() if line.strip()) def add_line_numbers(content: str) -> str: - """Add line numbers to the content.""" + """ + Add line numbers to the content. + + Args: + content (str): The content to process. + + Returns: + str: The content with line numbers added. + """ lines = content.split("\n") max_line_num = len(lines) line_num_width = len(str(max_line_num)) diff --git a/repopack/utils/file_manipulator.py b/repopack/utils/file_manipulator.py index 2ae0909..1d10cab 100644 --- a/repopack/utils/file_manipulator.py +++ b/repopack/utils/file_manipulator.py @@ -1,34 +1,74 @@ # file: utils/file_manipulator.py import re +from typing import Dict, List class FileManipulator: + """A utility class for manipulating file contents, primarily for removing comments.""" + + # Mapping of file extensions to their respective comment removal methods + EXTENSION_METHODS: Dict[str, str] = { + ".py": "remove_python_comments", + ".pyw": "remove_python_comments", + ".js": "remove_js_comments", + ".ts": "remove_js_comments", + ".jsx": "remove_js_comments", + ".tsx": "remove_js_comments", + ".html": "remove_html_comments", + ".htm": "remove_html_comments", + ".css": "remove_css_comments", + } + @staticmethod def remove_comments(content: str, file_extension: str) -> str: - if file_extension in [".py", ".pyw"]: - return FileManipulator.remove_python_comments(content) - elif file_extension in [".js", ".ts", ".jsx", ".tsx"]: - return FileManipulator.remove_js_comments(content) - elif file_extension in [".html", ".htm"]: - return FileManipulator.remove_html_comments(content) - elif file_extension in [".css"]: - return FileManipulator.remove_css_comments(content) + """ + Remove comments from the given content based on the file extension. + + Args: + content (str): The content to remove comments from. + file_extension (str): The file extension to determine the comment style. + + Returns: + str: The content with comments removed. + """ + method_name = FileManipulator.EXTENSION_METHODS.get(file_extension) + if method_name: + method = getattr(FileManipulator, method_name) + return method(content) else: print(f"Skipping comment removal for unknown file type: {file_extension}") return content # No comment removal for unknown file types @staticmethod def remove_python_comments(content: str) -> str: + """ + Remove Python-style comments from the given content. + + Args: + content (str): The Python content to remove comments from. + + Returns: + str: The content with Python comments removed. + """ # Remove single-line comments content = re.sub(r"#.*$", "", content, flags=re.MULTILINE) - # Remove multi-line comments + # Remove multi-line comments (triple quotes) content = re.sub(r'"""[\s\S]*?"""', "", content) content = re.sub(r"'''[\s\S]*?'''", "", content) return content @staticmethod def remove_js_comments(content: str) -> str: + """ + Remove JavaScript-style comments from the given content. + + Args: + content (str): The JavaScript content to remove comments from. + + Returns: + str: The content with JavaScript comments removed. + """ # Remove single-line comments content = re.sub(r"//.*$", "", content, flags=re.MULTILINE) # Remove multi-line comments @@ -37,8 +77,26 @@ def remove_js_comments(content: str) -> str: @staticmethod def remove_html_comments(content: str) -> str: + """ + Remove HTML-style comments from the given content. + + Args: + content (str): The HTML content to remove comments from. + + Returns: + str: The content with HTML comments removed. + """ return re.sub(r"", "", content) @staticmethod def remove_css_comments(content: str) -> str: + """ + Remove CSS-style comments from the given content. + + Args: + content (str): The CSS content to remove comments from. + + Returns: + str: The content with CSS comments removed. + """ return re.sub(r"/\*[\s\S]*?\*/", "", content) diff --git a/repopack/utils/ignore_utils.py b/repopack/utils/ignore_utils.py index 19bf30e..1a6c088 100644 --- a/repopack/utils/ignore_utils.py +++ b/repopack/utils/ignore_utils.py @@ -1,10 +1,11 @@ import os -from typing import List, Dict, Any +from typing import List, Dict, Any, Callable from pathspec import PathSpec from pathspec.patterns import GitWildMatchPattern from .logger import logger -DEFAULT_IGNORE_LIST = [ +# Default list of patterns to ignore in repository packing +DEFAULT_IGNORE_LIST: List[str] = [ # Version control ".git", ".gitignore", @@ -98,9 +99,18 @@ def get_ignore_patterns(filename: str, root_dir: str) -> List[str]: - """Get ignore patterns from a file.""" - ignore_path = os.path.join(root_dir, filename) - patterns = [] + """ + Get ignore patterns from a file. + + Args: + filename (str): The name of the ignore file (e.g., '.gitignore'). + root_dir (str): The root directory of the project. + + Returns: + List[str]: A list of ignore patterns read from the file. + """ + ignore_path: str = os.path.join(root_dir, filename) + patterns: List[str] = [] if os.path.exists(ignore_path): try: with open(ignore_path, "r") as f: @@ -115,8 +125,17 @@ def get_ignore_patterns(filename: str, root_dir: str) -> List[str]: def get_all_ignore_patterns(root_dir: str, config: Dict[str, Any]) -> List[str]: - """Get all ignore patterns based on the configuration.""" - patterns = [] + """ + Get all ignore patterns based on the configuration. + + Args: + root_dir (str): The root directory of the project. + config (Dict[str, Any]): The configuration dictionary. + + Returns: + List[str]: A list of all ignore patterns to be used. + """ + patterns: List[str] = [] if config["ignore"]["use_default_patterns"]: patterns.extend(DEFAULT_IGNORE_LIST) if config["ignore"]["use_gitignore"]: @@ -126,7 +145,16 @@ def get_all_ignore_patterns(root_dir: str, config: Dict[str, Any]) -> List[str]: return patterns -def create_ignore_filter(patterns: List[str]): - """Create an ignore filter function based on the given patterns.""" - spec = PathSpec.from_lines(GitWildMatchPattern, patterns) +def create_ignore_filter(patterns: List[str]) -> Callable[[str], bool]: + """ + Create an ignore filter function based on the given patterns. + + Args: + patterns (List[str]): A list of ignore patterns. + + Returns: + Callable[[str], bool]: A function that takes a file path and returns True if the file should be included, + False if it should be ignored. + """ + spec: PathSpec = PathSpec.from_lines(GitWildMatchPattern, patterns) return lambda path: not spec.match_file(path) diff --git a/repopack/utils/logger.py b/repopack/utils/logger.py index 1baffb4..12b110a 100644 --- a/repopack/utils/logger.py +++ b/repopack/utils/logger.py @@ -1,11 +1,15 @@ import logging +from typing import Dict, Any from colorama import Fore, Style, init +# Initialize colorama for cross-platform colored output init(autoreset=True) class ColoredFormatter(logging.Formatter): - COLORS = { + """Custom formatter to add colors to log messages based on their level.""" + + COLORS: Dict[str, str] = { "DEBUG": Fore.BLUE, "INFO": Fore.CYAN, "WARNING": Fore.YELLOW, @@ -13,43 +17,73 @@ class ColoredFormatter(logging.Formatter): "CRITICAL": Fore.RED + Style.BRIGHT, } - def format(self, record): - levelname = record.levelname + def format(self, record: logging.LogRecord) -> str: + """ + Format the log record with appropriate colors. + + Args: + record (logging.LogRecord): The log record to format. + + Returns: + str: The formatted log message with color. + """ + levelname: str = record.levelname if levelname in self.COLORS: record.levelname = f"{self.COLORS[levelname]}{levelname}{Style.RESET_ALL}" return super().format(record) class Logger: - def __init__(self): - self.logger = logging.getLogger("repopack") + """Custom logger class for RepoPackage.""" + + def __init__(self) -> None: + """Initialize the logger with a console handler and colored formatter.""" + self.logger: logging.Logger = logging.getLogger("repopack") self.logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler() + console_handler: logging.StreamHandler = logging.StreamHandler() console_handler.setFormatter(ColoredFormatter("%(levelname)s: %(message)s")) self.logger.addHandler(console_handler) - def set_verbose(self, verbose: bool): + def set_verbose(self, verbose: bool) -> None: + """ + Set the verbosity level of the logger. + + Args: + verbose (bool): If True, set to DEBUG level; otherwise, set to INFO level. + """ self.logger.setLevel(logging.DEBUG if verbose else logging.INFO) - def debug(self, message): + def debug(self, message: str) -> None: + """Log a debug message.""" self.logger.debug(message) - def info(self, message): + def info(self, message: str) -> None: + """Log an info message.""" self.logger.info(message) - def warning(self, message): + def warning(self, message: str) -> None: + """Log a warning message.""" self.logger.warning(message) - def error(self, message): + def error(self, message: str) -> None: + """Log an error message.""" self.logger.error(message) - def critical(self, message): + def critical(self, message: str) -> None: + """Log a critical message.""" self.logger.critical(message) - def trace(self, message): + def trace(self, message: str) -> None: + """ + Log a trace message if the logger level is set to DEBUG or lower. + + Args: + message (str): The trace message to log. + """ if self.logger.level <= logging.DEBUG: self.logger.debug(f"{Fore.MAGENTA}TRACE: {message}{Style.RESET_ALL}") -logger = Logger() +# Create a global logger instance +logger: Logger = Logger() diff --git a/repopack/utils/spinner.py b/repopack/utils/spinner.py index a5fc721..bd64d7e 100644 --- a/repopack/utils/spinner.py +++ b/repopack/utils/spinner.py @@ -1,18 +1,41 @@ from halo import Halo +from typing import Any class Spinner: - def __init__(self, message): - self.spinner = Halo(text=message, spinner="dots") + """A wrapper class for the Halo spinner to provide a simple interface for displaying progress.""" - def start(self): + def __init__(self, message: str) -> None: + """ + Initialize the Spinner with a message. + + Args: + message (str): The initial message to display with the spinner. + """ + self.spinner: Halo = Halo(text=message, spinner="dots") + + def start(self) -> None: + """Start the spinner animation.""" self.spinner.start() - def stop(self): + def stop(self) -> None: + """Stop the spinner animation.""" self.spinner.stop() - def succeed(self, message): + def succeed(self, message: str) -> None: + """ + Display a success message and stop the spinner. + + Args: + message (str): The success message to display. + """ self.spinner.succeed(message) - def fail(self, message): + def fail(self, message: str) -> None: + """ + Display a failure message and stop the spinner. + + Args: + message (str): The failure message to display. + """ self.spinner.fail(message) diff --git a/repopack/utils/tree_generator.py b/repopack/utils/tree_generator.py index 5e9acf0..70e792f 100644 --- a/repopack/utils/tree_generator.py +++ b/repopack/utils/tree_generator.py @@ -1,14 +1,32 @@ -from typing import List, Dict +from typing import List, Dict, Optional class TreeNode: + """Represents a node in the file tree structure.""" + def __init__(self, name: str, is_directory: bool = False): - self.name = name - self.children = [] - self.is_directory = is_directory + """ + Initialize a TreeNode. + + Args: + name (str): The name of the file or directory. + is_directory (bool, optional): Whether this node represents a directory. Defaults to False. + """ + self.name: str = name + self.children: List["TreeNode"] = [] + self.is_directory: bool = is_directory def generate_file_tree(files: List[str]) -> TreeNode: + """ + Generate a file tree structure from a list of file paths. + + Args: + files (List[str]): List of file paths. + + Returns: + TreeNode: The root node of the generated file tree. + """ root = TreeNode("root", True) for file in files: parts = file.split("/") @@ -23,13 +41,31 @@ def generate_file_tree(files: List[str]) -> TreeNode: return root -def sort_tree_nodes(node: TreeNode): +def sort_tree_nodes(node: TreeNode) -> None: + """ + Sort the children of a TreeNode recursively. + + Directories are sorted before files, and then alphabetically. + + Args: + node (TreeNode): The node whose children should be sorted. + """ node.children.sort(key=lambda x: (not x.is_directory, x.name)) for child in node.children: sort_tree_nodes(child) def tree_to_string(node: TreeNode, prefix: str = "") -> str: + """ + Convert a TreeNode structure to a string representation. + + Args: + node (TreeNode): The root node of the tree to convert. + prefix (str, optional): The prefix to use for indentation. Defaults to "". + + Returns: + str: A string representation of the file tree. + """ sort_tree_nodes(node) result = "" for child in node.children: @@ -40,5 +76,14 @@ def tree_to_string(node: TreeNode, prefix: str = "") -> str: def generate_tree_string(files: List[str]) -> str: + """ + Generate a string representation of the file tree from a list of file paths. + + Args: + files (List[str]): List of file paths. + + Returns: + str: A string representation of the file tree. + """ tree = generate_file_tree(files) return tree_to_string(tree).strip() diff --git a/repopack/version.py b/repopack/version.py index 13aeaa8..cc49ff0 100644 --- a/repopack/version.py +++ b/repopack/version.py @@ -1,2 +1,12 @@ # repopack/version.py -__version__ = "0.1.0" + +from typing import Final + +# The current version of the Repopack package +__version__: Final[str] = "0.1.0" + +# Note: This version follows Semantic Versioning (SemVer) principles. +# Format: MAJOR.MINOR.PATCH +# - MAJOR version for incompatible API changes +# - MINOR version for backwards-compatible functionality additions +# - PATCH version for backwards-compatible bug fixes diff --git a/tests/test_config.py b/tests/test_config.py index 433a25c..ac2f17e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,29 +1,46 @@ import pytest +from pathlib import Path +from typing import Dict, Any from repopack.config import load_config, merge_configs, DEFAULT_CONFIG from repopack.exceptions import ConfigurationError -def test_load_config(tmp_path): - config_file = tmp_path / "config.json" +def test_load_config(tmp_path: Path) -> None: + """ + Test loading a valid configuration file. + + Args: + tmp_path (Path): Pytest fixture providing a temporary directory path. + """ + config_file: Path = tmp_path / "config.json" config_file.write_text('{"output": {"file_path": "custom_output.txt"}}') - config = load_config(str(config_file)) + config: Dict[str, Any] = load_config(str(config_file)) assert config["output"]["file_path"] == "custom_output.txt" -def test_load_config_invalid_json(tmp_path): - config_file = tmp_path / "invalid_config.json" +def test_load_config_invalid_json(tmp_path: Path) -> None: + """ + Test loading an invalid JSON configuration file. + + Args: + tmp_path (Path): Pytest fixture providing a temporary directory path. + """ + config_file: Path = tmp_path / "invalid_config.json" config_file.write_text('{"output": {') with pytest.raises(ConfigurationError): load_config(str(config_file)) -def test_merge_configs(): - file_config = {"output": {"file_path": "file_output.txt"}} - cli_config = {"output": {"show_line_numbers": True}} - merged = merge_configs(file_config, cli_config) +def test_merge_configs() -> None: + """ + Test merging configurations from different sources. + """ + file_config: Dict[str, Any] = {"output": {"file_path": "file_output.txt"}} + cli_config: Dict[str, Any] = {"output": {"show_line_numbers": True}} + merged: Dict[str, Any] = merge_configs(file_config, cli_config) assert merged["output"]["file_path"] == "file_output.txt" - assert merged["output"]["show_line_numbers"] == True + assert merged["output"]["show_line_numbers"] is True assert merged["output"]["style"] == DEFAULT_CONFIG["output"]["style"] diff --git a/tests/test_file_handler.py b/tests/test_file_handler.py index cd47cc8..f07f74b 100644 --- a/tests/test_file_handler.py +++ b/tests/test_file_handler.py @@ -1,25 +1,37 @@ from unittest.mock import patch, mock_open +from typing import Dict, Any from repopack.utils.file_handler import is_binary, sanitize_file -def test_is_binary(): - with patch("builtins.open", mock_open(read_data=b"\x00\x01\x02\x03")): +def test_is_binary() -> None: + """ + Test if a binary file is correctly identified. + """ + with patch("builtins.open", mock_open(read_data=b"\x00\x01\x02\x03")) as mock_file: + mock_file.return_value.__enter__.return_value.read.return_value = b"\x00\x01\x02\x03" assert is_binary("fake_binary_file") -def test_is_not_binary(): - with patch("builtins.open", mock_open(read_data="Hello, World!")): +def test_is_not_binary() -> None: + """ + Test if a text file is correctly identified as non-binary. + """ + with patch("builtins.open", mock_open(read_data=b"Hello, World!")) as mock_file: + mock_file.return_value.__enter__.return_value.read.return_value = b"Hello, World!" assert not is_binary("fake_text_file") -def test_sanitize_file(): - config = { +def test_sanitize_file() -> None: + """ + Test if a file is correctly sanitized according to the given configuration. + """ + config: Dict[str, Any] = { "output": {"remove_comments": False, "remove_empty_lines": True, "show_line_numbers": True} } - content = "Line 1\n\nLine 3\n" - expected = "1 | Line 1\n2 | Line 3" + content: str = "Line 1\n\nLine 3\n" + expected: str = "1 | Line 1\n2 | Line 3" with patch("builtins.open", mock_open(read_data=content.encode())): - result = sanitize_file("fake_file.txt", config) + result: str = sanitize_file("fake_file.txt", config) assert result == expected diff --git a/tests/test_ignore_utils.py b/tests/test_ignore_utils.py index 6f6e39d..0fefe2e 100644 --- a/tests/test_ignore_utils.py +++ b/tests/test_ignore_utils.py @@ -1,4 +1,6 @@ from unittest.mock import patch +from pathlib import Path +from typing import List, Dict, Any, Callable from repopack.utils.ignore_utils import ( get_ignore_patterns, get_all_ignore_patterns, @@ -6,16 +8,25 @@ ) -def test_get_ignore_patterns(tmp_path): - ignore_file = tmp_path / ".gitignore" +def test_get_ignore_patterns(tmp_path: Path) -> None: + """ + Test the get_ignore_patterns function with a mock .gitignore file. + + Args: + tmp_path (Path): Pytest fixture providing a temporary directory path. + """ + ignore_file: Path = tmp_path / ".gitignore" ignore_file.write_text("*.log\n#comment\nnode_modules/") - patterns = get_ignore_patterns(".gitignore", str(tmp_path)) + patterns: List[str] = get_ignore_patterns(".gitignore", str(tmp_path)) assert patterns == ["*.log", "node_modules/"] -def test_get_all_ignore_patterns(): - config = { +def test_get_all_ignore_patterns() -> None: + """ + Test the get_all_ignore_patterns function with a mock configuration. + """ + config: Dict[str, Any] = { "ignore": { "use_default_patterns": True, "use_gitignore": True, @@ -23,17 +34,20 @@ def test_get_all_ignore_patterns(): } } with patch("repopack.utils.ignore_utils.get_ignore_patterns", return_value=["*.gitignore"]): - patterns = get_all_ignore_patterns("/fake/path", config) + patterns: List[str] = get_all_ignore_patterns("/fake/path", config) assert "*.log" in patterns # from DEFAULT_IGNORE_LIST assert "*.gitignore" in patterns # from mocked .gitignore assert "*.custom" in patterns # from custom patterns -def test_create_ignore_filter(): - patterns = ["*.log", "node_modules/"] - ignore_filter = create_ignore_filter(patterns) +def test_create_ignore_filter() -> None: + """ + Test the create_ignore_filter function with sample patterns. + """ + patterns: List[str] = ["*.log", "node_modules/"] + ignore_filter: Callable[[str], bool] = create_ignore_filter(patterns) - assert not ignore_filter("test.log") - assert not ignore_filter("node_modules/package.json") - assert ignore_filter("src/main.py") + assert not ignore_filter("test.log") # Should be ignored + assert not ignore_filter("node_modules/package.json") # Should be ignored + assert ignore_filter("src/main.py") # Should not be ignored diff --git a/tests/test_tree_generator.py b/tests/test_tree_generator.py index 691e7b9..a63b970 100644 --- a/tests/test_tree_generator.py +++ b/tests/test_tree_generator.py @@ -1,16 +1,28 @@ +from typing import List from repopack.utils.tree_generator import generate_tree_string -def test_generate_tree_string(): - files = ["src/main.py", "src/utils/helper.py", "tests/test_main.py", "README.md"] - expected = ( - "README.md\n" +def test_generate_tree_string() -> None: + """ + Test the generate_tree_string function to ensure it correctly generates + a tree-like string representation of the given file structure. + """ + # Input: List of file paths + files: List[str] = ["src/main.py", "src/utils/helper.py", "tests/test_main.py", "README.md"] + + # Expected output: Tree-like string representation + expected: str = ( "src/\n" - " main.py\n" " utils/\n" " helper.py\n" + " main.py\n" "tests/\n" - " test_main.py" + " test_main.py\n" + "README.md" ) - result = generate_tree_string(files) - assert result == expected + + # Generate the tree string + result: str = generate_tree_string(files) + + # Assert that the generated string matches the expected output + assert result == expected, "The generated tree string does not match the expected output"