From 9a0cf6e9e0914293fa6410075f5c2242b3099414 Mon Sep 17 00:00:00 2001 From: Evan Doyle Date: Wed, 22 Jan 2025 18:51:50 -0800 Subject: [PATCH] Send stdlib modules to Rust to filter dependencies there, detect unused ignore directives on declared and excluded imports --- python/tach/check_external.py | 41 ++-------- python/tach/cli.py | 26 +++--- python/tach/extension.pyi | 1 + python/tach/utils/external.py | 14 ++++ src/commands/check/check_external.rs | 116 ++++++++++++++++++++------- src/lib.rs | 10 ++- 6 files changed, 132 insertions(+), 76 deletions(-) diff --git a/python/tach/check_external.py b/python/tach/check_external.py index 37246843..dedaed0a 100644 --- a/python/tach/check_external.py +++ b/python/tach/check_external.py @@ -8,7 +8,10 @@ check_external_dependencies, set_excluded_paths, ) -from tach.utils.external import get_module_mappings, is_stdlib_module +from tach.utils.external import ( + get_module_mappings, + get_stdlib_modules, +) if TYPE_CHECKING: from pathlib import Path @@ -43,43 +46,11 @@ def check_external( metadata_module_mappings.update( extract_module_mappings(project_config.external.rename) ) - diagnostics = check_external_dependencies( + return check_external_dependencies( project_root=str(project_root), project_config=project_config, module_mappings=metadata_module_mappings, - ) - undeclared_dependencies_by_file = diagnostics.undeclared_dependencies - unused_dependencies_by_project = diagnostics.unused_dependencies - - excluded_external_modules = set(project_config.external.exclude) - filtered_undeclared_dependencies: dict[str, list[str]] = {} - for filepath, undeclared_dependencies in undeclared_dependencies_by_file.items(): - dependencies = set( - filter( - lambda dependency: not is_stdlib_module(dependency) - and dependency not in excluded_external_modules, - undeclared_dependencies, - ) - ) - if dependencies: - filtered_undeclared_dependencies[filepath] = list(dependencies) - filtered_unused_dependencies: dict[str, list[str]] = {} - for filepath, unused_dependencies in unused_dependencies_by_project.items(): - dependencies = set( - filter( - lambda dependency: not is_stdlib_module(dependency) - and dependency not in excluded_external_modules, - unused_dependencies, - ) - ) - if dependencies: - filtered_unused_dependencies[filepath] = list(dependencies) - - return ExternalCheckDiagnostics( - undeclared_dependencies=filtered_undeclared_dependencies, - unused_dependencies=filtered_unused_dependencies, - errors=diagnostics.errors, - warnings=diagnostics.warnings, + stdlib_modules=get_stdlib_modules(), ) diff --git a/python/tach/cli.py b/python/tach/cli.py index a7c429a1..279dc784 100644 --- a/python/tach/cli.py +++ b/python/tach/cli.py @@ -249,35 +249,41 @@ def print_visibility_errors( def print_undeclared_dependencies( undeclared_dependencies: dict[str, list[str]], ) -> None: + any_undeclared = False for file_path, dependencies in undeclared_dependencies.items(): if dependencies: + any_undeclared = True print( f"{icons.FAIL}: {BCOLORS.FAIL}Undeclared dependencies in {BCOLORS.ENDC}{BCOLORS.WARNING}'{file_path}'{BCOLORS.ENDC}:" ) for dependency in dependencies: print(f"\t{BCOLORS.FAIL}{dependency}{BCOLORS.ENDC}") - print( - f"{BCOLORS.WARNING}\nAdd the undeclared dependencies to the corresponding pyproject.toml file, " - f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}", - file=sys.stderr, - ) + if any_undeclared: + print( + f"{BCOLORS.WARNING}\nAdd the undeclared dependencies to the corresponding pyproject.toml file, " + f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}", + file=sys.stderr, + ) def print_unused_external_dependencies( unused_dependencies: dict[str, list[str]], ) -> None: + any_unused = False for pyproject_path, dependencies in unused_dependencies.items(): if dependencies: + any_unused = True print( f"{icons.WARNING} {BCOLORS.WARNING}Unused dependencies from project at {BCOLORS.OKCYAN}'{pyproject_path}'{BCOLORS.ENDC}{BCOLORS.ENDC}:" ) for dependency in dependencies: print(f"\t{BCOLORS.WARNING}{dependency}{BCOLORS.ENDC}") - print( - f"{BCOLORS.OKCYAN}\nRemove the unused dependencies from the corresponding pyproject.toml file, " - f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}", - file=sys.stderr, - ) + if any_unused: + print( + f"{BCOLORS.OKCYAN}\nRemove the unused dependencies from the corresponding pyproject.toml file, " + f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}", + file=sys.stderr, + ) def add_base_arguments(parser: argparse.ArgumentParser) -> None: diff --git a/python/tach/extension.pyi b/python/tach/extension.pyi index f7287efe..973e9846 100644 --- a/python/tach/extension.pyi +++ b/python/tach/extension.pyi @@ -25,6 +25,7 @@ def check_external_dependencies( project_root: str, project_config: ProjectConfig, module_mappings: dict[str, list[str]], + stdlib_modules: list[str], ) -> ExternalCheckDiagnostics: ... def create_dependency_report( project_root: str, diff --git a/python/tach/utils/external.py b/python/tach/utils/external.py index 09d16566..f7eeadac 100644 --- a/python/tach/utils/external.py +++ b/python/tach/utils/external.py @@ -27,6 +27,20 @@ def is_stdlib_module(module: str) -> bool: return in_stdlib(module) # type: ignore +def get_stdlib_modules() -> list[str]: + if sys.version_info >= (3, 10): + modules = set(sys.builtin_module_names) + modules.update(sys.stdlib_module_names) + modules.update(KNOWN_MODULE_SPECIAL_CASES) + return sorted(modules) + else: + from stdlib_list import stdlib_list # type: ignore + + modules: set[str] = set(stdlib_list()) # type: ignore + modules.update(KNOWN_MODULE_SPECIAL_CASES) + return list(sorted(modules)) + + def _get_installed_modules(dist: Any) -> list[str]: # This method is best-effort, and is only used for Python < 3.10 module_names: set[str] = set() diff --git a/src/commands/check/check_external.rs b/src/commands/check/check_external.rs index 86e93349..7851a8df 100644 --- a/src/commands/check/check_external.rs +++ b/src/commands/check/check_external.rs @@ -13,22 +13,32 @@ use super::diagnostics::ExternalCheckDiagnostics; use super::error::ExternalCheckError; pub type Result = std::result::Result; +#[derive(Debug)] +enum ImportProcessResult { + UndeclaredDependency(String), + UsedDependencies(Vec), + Excluded(Vec), +} + pub fn check( project_root: &Path, project_config: &ProjectConfig, module_mappings: &HashMap>, + stdlib_modules: &[String], ) -> Result { + let stdlib_modules: HashSet = stdlib_modules.iter().cloned().collect(); + let excluded_external_modules: HashSet = + project_config.external.exclude.iter().cloned().collect(); let source_roots: Vec = project_config.prepend_roots(project_root); let mut diagnostics = ExternalCheckDiagnostics::default(); for pyproject in filesystem::walk_pyprojects(project_root.to_str().unwrap()) { let project_info = parse_pyproject_toml(&pyproject)?; let mut all_dependencies = project_info.dependencies.clone(); for source_root in &project_info.source_paths { - let source_files = filesystem::walk_pyfiles(source_root.to_str().unwrap()); - for file_path in source_files { + for file_path in filesystem::walk_pyfiles(source_root.to_str().unwrap()) { let absolute_file_path = source_root.join(&file_path); let display_file_path = relative_to(&absolute_file_path, project_root)? - .to_string_lossy() + .display() .to_string(); if let Ok(project_imports) = imports::get_external_imports( @@ -37,14 +47,27 @@ pub fn check( project_config.ignore_type_checking_imports, ) { for import in project_imports.active_imports() { - process_import( - &mut diagnostics, - &mut all_dependencies, + match process_import( import, &project_info, module_mappings, - &display_file_path, - ); + &excluded_external_modules, + &stdlib_modules, + ) { + ImportProcessResult::UndeclaredDependency(module_name) => { + let undeclared_dep_entry: &mut Vec = diagnostics + .undeclared_dependencies + .entry(display_file_path.to_string()) + .or_default(); + undeclared_dep_entry.push(module_name); + } + ImportProcessResult::UsedDependencies(deps) + | ImportProcessResult::Excluded(deps) => { + for dep in deps { + all_dependencies.remove(&dep); + } + } + } } for directive_ignored_import in project_imports.directive_ignored_imports() { @@ -74,6 +97,37 @@ pub fn check( } } } + + if project_config.rules.unused_ignore_directives != RuleSetting::Off { + if let ImportProcessResult::UsedDependencies(_) + | ImportProcessResult::Excluded(_) = process_import( + directive_ignored_import.import, + &project_info, + module_mappings, + &excluded_external_modules, + &stdlib_modules, + ) { + match project_config.rules.unused_ignore_directives { + RuleSetting::Error => { + diagnostics.errors.push(format!( + "{}:{}: Unused ignore directive: '{}'", + display_file_path, + directive_ignored_import.import.line_no, + directive_ignored_import.import.top_level_module_name() + )); + } + RuleSetting::Warn => { + diagnostics.warnings.push(format!( + "{}:{}: Unused ignore directive: '{}'", + display_file_path, + directive_ignored_import.import.line_no, + directive_ignored_import.import.top_level_module_name() + )); + } + RuleSetting::Off => {} + } + } + } } for unused_directive in project_imports.unused_ignore_directives() { @@ -105,7 +159,10 @@ pub fn check( relative_to(&pyproject, project_root)? .to_string_lossy() .to_string(), - all_dependencies.into_iter().collect(), + all_dependencies + .into_iter() + .filter(|dep| !excluded_external_modules.contains(dep)) // 'exclude' should hide unused errors unconditionally + .collect(), ); } @@ -113,36 +170,37 @@ pub fn check( } fn process_import( - diagnostics: &mut ExternalCheckDiagnostics, - all_dependencies: &mut HashSet, import: &NormalizedImport, project_info: &ProjectInfo, module_mappings: &HashMap>, - display_file_path: &str, -) { - let top_level_module_name = import.top_level_module_name(); - let default_distribution_names = vec![top_level_module_name.to_string()]; + excluded_external_modules: &HashSet, + stdlib_modules: &HashSet, +) -> ImportProcessResult { + let top_level_module_name = import.top_level_module_name().to_string(); + let default_distribution_names = vec![top_level_module_name.clone()]; let distribution_names: Vec = module_mappings - .get(top_level_module_name) + .get(&top_level_module_name) .unwrap_or(&default_distribution_names) .iter() .map(|dist_name| normalize_package_name(dist_name)) .collect(); - for dist_name in distribution_names.iter() { - all_dependencies.remove(dist_name); - } - if distribution_names .iter() - .all(|dist_name| !project_info.dependencies.contains(dist_name)) + .any(|dist_name| excluded_external_modules.contains(dist_name)) + || stdlib_modules.contains(&top_level_module_name) { - // Found a possible undeclared dependency in this file - let undeclared_dep_entry: &mut Vec = diagnostics - .undeclared_dependencies - .entry(display_file_path.to_string()) - .or_default(); - undeclared_dep_entry.push(top_level_module_name.to_string()); + return ImportProcessResult::Excluded(distribution_names); + } + + let is_declared = distribution_names + .iter() + .any(|dist_name| project_info.dependencies.contains(dist_name)); + + if !is_declared { + ImportProcessResult::UndeclaredDependency(top_level_module_name.to_string()) + } else { + ImportProcessResult::UsedDependencies(distribution_names) } } @@ -186,7 +244,7 @@ mod tests { module_mapping: HashMap>, ) { let project_root = example_dir.join("multi_package"); - let result = check(&project_root, &project_config, &module_mapping); + let result = check(&project_root, &project_config, &module_mapping, &[]); assert!(result.as_ref().unwrap().undeclared_dependencies.is_empty()); let unused_dependency_root = "src/pack-a/pyproject.toml"; assert!(result @@ -201,7 +259,7 @@ mod tests { project_config: ProjectConfig, ) { let project_root = example_dir.join("multi_package"); - let result = check(&project_root, &project_config, &HashMap::new()); + let result = check(&project_root, &project_config, &HashMap::new(), &[]); let expected_failure_path = "src/pack-a/src/myorg/pack_a/__init__.py"; let r = result.unwrap(); assert_eq!( diff --git a/src/lib.rs b/src/lib.rs index cb9659b9..59055ddb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -242,14 +242,20 @@ fn set_excluded_paths( /// Validate external dependency imports against pyproject.toml dependencies #[pyfunction] -#[pyo3(signature = (project_root, project_config, module_mappings))] +#[pyo3(signature = (project_root, project_config, module_mappings, stdlib_modules))] fn check_external_dependencies( project_root: String, project_config: config::ProjectConfig, module_mappings: HashMap>, + stdlib_modules: Vec, ) -> check::check_external::Result { let project_root = PathBuf::from(project_root); - check::check_external::check(&project_root, &project_config, &module_mappings) + check::check_external::check( + &project_root, + &project_config, + &module_mappings, + &stdlib_modules, + ) } /// Create a report of dependencies and usages of a given path