Skip to content

Commit

Permalink
Allow customizing semgrep configurations; correct rule matching glob
Browse files Browse the repository at this point in the history
  • Loading branch information
purajit committed Jul 3, 2024
1 parent 4036cce commit a8b06ff
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 19 deletions.
33 changes: 15 additions & 18 deletions src/python/pants/backend/tools/semgrep/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,6 @@ def description(self) -> str:
return ", ".join(sorted(str(path) for path in self.config_files))


_IGNORE_FILE_NAME = ".semgrepignore"

_RULES_DIR_NAME = ".semgrep"
_RULES_FILES_GLOBS = (
".semgrep.yml",
".semgrep.yaml",
f"{_RULES_DIR_NAME}/*.yml",
f"{_RULES_DIR_NAME}/*.yaml",
)


@dataclass
class SemgrepIgnoreFiles:
snapshot: Snapshot
Expand All @@ -85,24 +74,32 @@ def ancestor_configs(self, address: Address) -> Iterable[PurePath]:
yield from self.configs_by_dir.get(ancestor, [])


def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs:
def _group_by_semgrep_dir(semgrep: SemgrepSubsystem, all_paths: Paths) -> AllSemgrepConfigs:
configs_by_dir = defaultdict(set)
for path_ in all_paths.files:
path = PurePath(path_)
# A rule like foo/bar/.semgrep/baz.yaml should behave like it's in in foo/bar, not
# foo/bar/.semgrep
parent = path.parent
config_directory = parent.parent if parent.name == _RULES_DIR_NAME else parent
config_directory = parent.parent if parent.name == semgrep.rules_dir else parent

configs_by_dir[config_directory].add(path)

return AllSemgrepConfigs(configs_by_dir)


@rule
async def find_all_semgrep_configs() -> AllSemgrepConfigs:
all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS]))
return _group_by_semgrep_dir(all_paths)
async def find_all_semgrep_configs(semgrep: SemgrepSubsystem) -> AllSemgrepConfigs:
rules_files_globs = (
f"{semgrep.rules_dir}/*.yml",
f"{semgrep.rules_dir}/*.yaml",
# TODO: these don't seem to be mentioned in semgrep docs; should they be removed?
".semgrep.yml",
".semgrep.yaml",
)

all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in rules_files_globs]))
return _group_by_semgrep_dir(semgrep, all_paths)


@dataclass(frozen=True)
Expand All @@ -122,8 +119,8 @@ async def infer_relevant_semgrep_configs(


@rule
async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles:
snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"]))
async def all_semgrep_ignore_files(semgrep: SemgrepSubsystem) -> SemgrepIgnoreFiles:
snapshot = await Get(Snapshot, PathGlobs([f"**/{semgrep.ignore_file}"]))
return SemgrepIgnoreFiles(snapshot)


Expand Down
12 changes: 11 additions & 1 deletion src/python/pants/backend/tools/semgrep/subsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pants.engine.rules import Rule, collect_rules
from pants.engine.target import Dependencies, FieldSet, SingleSourceField, Target
from pants.engine.unions import UnionRule
from pants.option.option_types import ArgsListOption, BoolOption, SkipOption
from pants.option.option_types import ArgsListOption, BoolOption, SkipOption, StrOption
from pants.util.strutil import softwrap


Expand Down Expand Up @@ -51,6 +51,16 @@ class SemgrepSubsystem(PythonToolBase):
register_lockfile = True
default_lockfile_resource = ("pants.backend.tools.semgrep", "semgrep.lock")

rules_dir = StrOption(
default=".semgrep",
help="The directory with semgrep rules, which is searched recursively for YAML files.",
)

ignore_file = StrOption(
default=".semgrepignore",
help="The configuration file with semgrep excludes.",
)

args = ArgsListOption(
example="--verbose",
default=["--quiet"],
Expand Down

0 comments on commit a8b06ff

Please sign in to comment.