Skip to content

Commit

Permalink
Send stdlib modules to Rust to filter dependencies there, detect unus…
Browse files Browse the repository at this point in the history
…ed ignore directives on declared and excluded imports
  • Loading branch information
emdoyle committed Jan 23, 2025
1 parent e59fcbc commit 9a0cf6e
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 76 deletions.
41 changes: 6 additions & 35 deletions python/tach/check_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
check_external_dependencies,
set_excluded_paths,
)
from tach.utils.external import get_module_mappings, is_stdlib_module
from tach.utils.external import (
get_module_mappings,
get_stdlib_modules,
)

if TYPE_CHECKING:
from pathlib import Path
Expand Down Expand Up @@ -43,43 +46,11 @@ def check_external(
metadata_module_mappings.update(
extract_module_mappings(project_config.external.rename)
)
diagnostics = check_external_dependencies(
return check_external_dependencies(
project_root=str(project_root),
project_config=project_config,
module_mappings=metadata_module_mappings,
)
undeclared_dependencies_by_file = diagnostics.undeclared_dependencies
unused_dependencies_by_project = diagnostics.unused_dependencies

excluded_external_modules = set(project_config.external.exclude)
filtered_undeclared_dependencies: dict[str, list[str]] = {}
for filepath, undeclared_dependencies in undeclared_dependencies_by_file.items():
dependencies = set(
filter(
lambda dependency: not is_stdlib_module(dependency)
and dependency not in excluded_external_modules,
undeclared_dependencies,
)
)
if dependencies:
filtered_undeclared_dependencies[filepath] = list(dependencies)
filtered_unused_dependencies: dict[str, list[str]] = {}
for filepath, unused_dependencies in unused_dependencies_by_project.items():
dependencies = set(
filter(
lambda dependency: not is_stdlib_module(dependency)
and dependency not in excluded_external_modules,
unused_dependencies,
)
)
if dependencies:
filtered_unused_dependencies[filepath] = list(dependencies)

return ExternalCheckDiagnostics(
undeclared_dependencies=filtered_undeclared_dependencies,
unused_dependencies=filtered_unused_dependencies,
errors=diagnostics.errors,
warnings=diagnostics.warnings,
stdlib_modules=get_stdlib_modules(),
)


Expand Down
26 changes: 16 additions & 10 deletions python/tach/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,35 +249,41 @@ def print_visibility_errors(
def print_undeclared_dependencies(
undeclared_dependencies: dict[str, list[str]],
) -> None:
any_undeclared = False
for file_path, dependencies in undeclared_dependencies.items():
if dependencies:
any_undeclared = True
print(
f"{icons.FAIL}: {BCOLORS.FAIL}Undeclared dependencies in {BCOLORS.ENDC}{BCOLORS.WARNING}'{file_path}'{BCOLORS.ENDC}:"
)
for dependency in dependencies:
print(f"\t{BCOLORS.FAIL}{dependency}{BCOLORS.ENDC}")
print(
f"{BCOLORS.WARNING}\nAdd the undeclared dependencies to the corresponding pyproject.toml file, "
f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}",
file=sys.stderr,
)
if any_undeclared:
print(
f"{BCOLORS.WARNING}\nAdd the undeclared dependencies to the corresponding pyproject.toml file, "
f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}",
file=sys.stderr,
)


def print_unused_external_dependencies(
unused_dependencies: dict[str, list[str]],
) -> None:
any_unused = False
for pyproject_path, dependencies in unused_dependencies.items():
if dependencies:
any_unused = True
print(
f"{icons.WARNING} {BCOLORS.WARNING}Unused dependencies from project at {BCOLORS.OKCYAN}'{pyproject_path}'{BCOLORS.ENDC}{BCOLORS.ENDC}:"
)
for dependency in dependencies:
print(f"\t{BCOLORS.WARNING}{dependency}{BCOLORS.ENDC}")
print(
f"{BCOLORS.OKCYAN}\nRemove the unused dependencies from the corresponding pyproject.toml file, "
f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}",
file=sys.stderr,
)
if any_unused:
print(
f"{BCOLORS.OKCYAN}\nRemove the unused dependencies from the corresponding pyproject.toml file, "
f"or consider ignoring the dependencies by adding them to the 'external.exclude' list in {CONFIG_FILE_NAME}.toml.\n{BCOLORS.ENDC}",
file=sys.stderr,
)


def add_base_arguments(parser: argparse.ArgumentParser) -> None:
Expand Down
1 change: 1 addition & 0 deletions python/tach/extension.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def check_external_dependencies(
project_root: str,
project_config: ProjectConfig,
module_mappings: dict[str, list[str]],
stdlib_modules: list[str],
) -> ExternalCheckDiagnostics: ...
def create_dependency_report(
project_root: str,
Expand Down
14 changes: 14 additions & 0 deletions python/tach/utils/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ def is_stdlib_module(module: str) -> bool:
return in_stdlib(module) # type: ignore


def get_stdlib_modules() -> list[str]:
if sys.version_info >= (3, 10):
modules = set(sys.builtin_module_names)
modules.update(sys.stdlib_module_names)
modules.update(KNOWN_MODULE_SPECIAL_CASES)
return sorted(modules)
else:
from stdlib_list import stdlib_list # type: ignore

modules: set[str] = set(stdlib_list()) # type: ignore
modules.update(KNOWN_MODULE_SPECIAL_CASES)
return list(sorted(modules))


def _get_installed_modules(dist: Any) -> list[str]:
# This method is best-effort, and is only used for Python < 3.10
module_names: set[str] = set()
Expand Down
116 changes: 87 additions & 29 deletions src/commands/check/check_external.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,32 @@ use super::diagnostics::ExternalCheckDiagnostics;
use super::error::ExternalCheckError;
pub type Result<T> = std::result::Result<T, ExternalCheckError>;

#[derive(Debug)]
enum ImportProcessResult {
UndeclaredDependency(String),
UsedDependencies(Vec<String>),
Excluded(Vec<String>),
}

pub fn check(
project_root: &Path,
project_config: &ProjectConfig,
module_mappings: &HashMap<String, Vec<String>>,
stdlib_modules: &[String],
) -> Result<ExternalCheckDiagnostics> {
let stdlib_modules: HashSet<String> = stdlib_modules.iter().cloned().collect();
let excluded_external_modules: HashSet<String> =
project_config.external.exclude.iter().cloned().collect();
let source_roots: Vec<PathBuf> = project_config.prepend_roots(project_root);
let mut diagnostics = ExternalCheckDiagnostics::default();
for pyproject in filesystem::walk_pyprojects(project_root.to_str().unwrap()) {
let project_info = parse_pyproject_toml(&pyproject)?;
let mut all_dependencies = project_info.dependencies.clone();
for source_root in &project_info.source_paths {
let source_files = filesystem::walk_pyfiles(source_root.to_str().unwrap());
for file_path in source_files {
for file_path in filesystem::walk_pyfiles(source_root.to_str().unwrap()) {
let absolute_file_path = source_root.join(&file_path);
let display_file_path = relative_to(&absolute_file_path, project_root)?
.to_string_lossy()
.display()
.to_string();

if let Ok(project_imports) = imports::get_external_imports(
Expand All @@ -37,14 +47,27 @@ pub fn check(
project_config.ignore_type_checking_imports,
) {
for import in project_imports.active_imports() {
process_import(
&mut diagnostics,
&mut all_dependencies,
match process_import(
import,
&project_info,
module_mappings,
&display_file_path,
);
&excluded_external_modules,
&stdlib_modules,
) {
ImportProcessResult::UndeclaredDependency(module_name) => {
let undeclared_dep_entry: &mut Vec<String> = diagnostics
.undeclared_dependencies
.entry(display_file_path.to_string())
.or_default();
undeclared_dep_entry.push(module_name);
}
ImportProcessResult::UsedDependencies(deps)
| ImportProcessResult::Excluded(deps) => {
for dep in deps {
all_dependencies.remove(&dep);
}
}
}
}

for directive_ignored_import in project_imports.directive_ignored_imports() {
Expand Down Expand Up @@ -74,6 +97,37 @@ pub fn check(
}
}
}

if project_config.rules.unused_ignore_directives != RuleSetting::Off {
if let ImportProcessResult::UsedDependencies(_)
| ImportProcessResult::Excluded(_) = process_import(
directive_ignored_import.import,
&project_info,
module_mappings,
&excluded_external_modules,
&stdlib_modules,
) {
match project_config.rules.unused_ignore_directives {
RuleSetting::Error => {
diagnostics.errors.push(format!(
"{}:{}: Unused ignore directive: '{}'",
display_file_path,
directive_ignored_import.import.line_no,
directive_ignored_import.import.top_level_module_name()
));
}
RuleSetting::Warn => {
diagnostics.warnings.push(format!(
"{}:{}: Unused ignore directive: '{}'",
display_file_path,
directive_ignored_import.import.line_no,
directive_ignored_import.import.top_level_module_name()
));
}
RuleSetting::Off => {}
}
}
}
}

for unused_directive in project_imports.unused_ignore_directives() {
Expand Down Expand Up @@ -105,44 +159,48 @@ pub fn check(
relative_to(&pyproject, project_root)?
.to_string_lossy()
.to_string(),
all_dependencies.into_iter().collect(),
all_dependencies
.into_iter()
.filter(|dep| !excluded_external_modules.contains(dep)) // 'exclude' should hide unused errors unconditionally
.collect(),
);
}

Ok(diagnostics)
}

fn process_import(
diagnostics: &mut ExternalCheckDiagnostics,
all_dependencies: &mut HashSet<String>,
import: &NormalizedImport,
project_info: &ProjectInfo,
module_mappings: &HashMap<String, Vec<String>>,
display_file_path: &str,
) {
let top_level_module_name = import.top_level_module_name();
let default_distribution_names = vec![top_level_module_name.to_string()];
excluded_external_modules: &HashSet<String>,
stdlib_modules: &HashSet<String>,
) -> ImportProcessResult {
let top_level_module_name = import.top_level_module_name().to_string();
let default_distribution_names = vec![top_level_module_name.clone()];
let distribution_names: Vec<String> = module_mappings
.get(top_level_module_name)
.get(&top_level_module_name)
.unwrap_or(&default_distribution_names)
.iter()
.map(|dist_name| normalize_package_name(dist_name))
.collect();

for dist_name in distribution_names.iter() {
all_dependencies.remove(dist_name);
}

if distribution_names
.iter()
.all(|dist_name| !project_info.dependencies.contains(dist_name))
.any(|dist_name| excluded_external_modules.contains(dist_name))
|| stdlib_modules.contains(&top_level_module_name)
{
// Found a possible undeclared dependency in this file
let undeclared_dep_entry: &mut Vec<String> = diagnostics
.undeclared_dependencies
.entry(display_file_path.to_string())
.or_default();
undeclared_dep_entry.push(top_level_module_name.to_string());
return ImportProcessResult::Excluded(distribution_names);
}

let is_declared = distribution_names
.iter()
.any(|dist_name| project_info.dependencies.contains(dist_name));

if !is_declared {
ImportProcessResult::UndeclaredDependency(top_level_module_name.to_string())
} else {
ImportProcessResult::UsedDependencies(distribution_names)
}
}

Expand Down Expand Up @@ -186,7 +244,7 @@ mod tests {
module_mapping: HashMap<String, Vec<String>>,
) {
let project_root = example_dir.join("multi_package");
let result = check(&project_root, &project_config, &module_mapping);
let result = check(&project_root, &project_config, &module_mapping, &[]);
assert!(result.as_ref().unwrap().undeclared_dependencies.is_empty());
let unused_dependency_root = "src/pack-a/pyproject.toml";
assert!(result
Expand All @@ -201,7 +259,7 @@ mod tests {
project_config: ProjectConfig,
) {
let project_root = example_dir.join("multi_package");
let result = check(&project_root, &project_config, &HashMap::new());
let result = check(&project_root, &project_config, &HashMap::new(), &[]);
let expected_failure_path = "src/pack-a/src/myorg/pack_a/__init__.py";
let r = result.unwrap();
assert_eq!(
Expand Down
10 changes: 8 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,20 @@ fn set_excluded_paths(

/// Validate external dependency imports against pyproject.toml dependencies
#[pyfunction]
#[pyo3(signature = (project_root, project_config, module_mappings))]
#[pyo3(signature = (project_root, project_config, module_mappings, stdlib_modules))]
fn check_external_dependencies(
project_root: String,
project_config: config::ProjectConfig,
module_mappings: HashMap<String, Vec<String>>,
stdlib_modules: Vec<String>,
) -> check::check_external::Result<check::ExternalCheckDiagnostics> {
let project_root = PathBuf::from(project_root);
check::check_external::check(&project_root, &project_config, &module_mappings)
check::check_external::check(
&project_root,
&project_config,
&module_mappings,
&stdlib_modules,
)
}

/// Create a report of dependencies and usages of a given path
Expand Down

0 comments on commit 9a0cf6e

Please sign in to comment.