From a8b06ffeca0157c25bcc4e3503c3717a16566b31 Mon Sep 17 00:00:00 2001 From: purajit Date: Wed, 3 Jul 2024 13:17:51 -0700 Subject: [PATCH] Allow customizing semgrep configurations; correct rule matching glob --- .../pants/backend/tools/semgrep/rules.py | 33 +++++++++---------- .../pants/backend/tools/semgrep/subsystem.py | 12 ++++++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/python/pants/backend/tools/semgrep/rules.py b/src/python/pants/backend/tools/semgrep/rules.py index e2e68cbe8bf0..5d3d6b210ec4 100644 --- a/src/python/pants/backend/tools/semgrep/rules.py +++ b/src/python/pants/backend/tools/semgrep/rules.py @@ -51,17 +51,6 @@ def description(self) -> str: return ", ".join(sorted(str(path) for path in self.config_files)) -_IGNORE_FILE_NAME = ".semgrepignore" - -_RULES_DIR_NAME = ".semgrep" -_RULES_FILES_GLOBS = ( - ".semgrep.yml", - ".semgrep.yaml", - f"{_RULES_DIR_NAME}/*.yml", - f"{_RULES_DIR_NAME}/*.yaml", -) - - @dataclass class SemgrepIgnoreFiles: snapshot: Snapshot @@ -85,14 +74,14 @@ def ancestor_configs(self, address: Address) -> Iterable[PurePath]: yield from self.configs_by_dir.get(ancestor, []) -def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs: +def _group_by_semgrep_dir(semgrep: SemgrepSubsystem, all_paths: Paths) -> AllSemgrepConfigs: configs_by_dir = defaultdict(set) for path_ in all_paths.files: path = PurePath(path_) # A rule like foo/bar/.semgrep/baz.yaml should behave like it's in in foo/bar, not # foo/bar/.semgrep parent = path.parent - config_directory = parent.parent if parent.name == _RULES_DIR_NAME else parent + config_directory = parent.parent if parent.name == semgrep.rules_dir else parent configs_by_dir[config_directory].add(path) @@ -100,9 +89,17 @@ def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs: @rule -async def find_all_semgrep_configs() -> AllSemgrepConfigs: - all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS])) - return _group_by_semgrep_dir(all_paths) +async def find_all_semgrep_configs(semgrep: SemgrepSubsystem) -> AllSemgrepConfigs: + rules_files_globs = ( + f"{semgrep.rules_dir}/*.yml", + f"{semgrep.rules_dir}/*.yaml", + # TODO: these don't seem to be mentioned in semgrep docs; should they be removed? + ".semgrep.yml", + ".semgrep.yaml", + ) + + all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in rules_files_globs])) + return _group_by_semgrep_dir(semgrep, all_paths) @dataclass(frozen=True) @@ -122,8 +119,8 @@ async def infer_relevant_semgrep_configs( @rule -async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles: - snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"])) +async def all_semgrep_ignore_files(semgrep: SemgrepSubsystem) -> SemgrepIgnoreFiles: + snapshot = await Get(Snapshot, PathGlobs([f"**/{semgrep.ignore_file}"])) return SemgrepIgnoreFiles(snapshot) diff --git a/src/python/pants/backend/tools/semgrep/subsystem.py b/src/python/pants/backend/tools/semgrep/subsystem.py index af73b460304e..e779a8560309 100644 --- a/src/python/pants/backend/tools/semgrep/subsystem.py +++ b/src/python/pants/backend/tools/semgrep/subsystem.py @@ -12,7 +12,7 @@ from pants.engine.rules import Rule, collect_rules from pants.engine.target import Dependencies, FieldSet, SingleSourceField, Target from pants.engine.unions import UnionRule -from pants.option.option_types import ArgsListOption, BoolOption, SkipOption +from pants.option.option_types import ArgsListOption, BoolOption, SkipOption, StrOption from pants.util.strutil import softwrap @@ -51,6 +51,16 @@ class SemgrepSubsystem(PythonToolBase): register_lockfile = True default_lockfile_resource = ("pants.backend.tools.semgrep", "semgrep.lock") + rules_dir = StrOption( + default=".semgrep", + help="The directory with semgrep rules, which is searched recursively for YAML files.", + ) + + ignore_file = StrOption( + default=".semgrepignore", + help="The configuration file with semgrep excludes.", + ) + args = ArgsListOption( example="--verbose", default=["--quiet"],