diff --git a/src/codemodder/registry.py b/src/codemodder/registry.py index c9347f28..2f5fc669 100644 --- a/src/codemodder/registry.py +++ b/src/codemodder/registry.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from dataclasses import dataclass from importlib.metadata import entry_points from typing import TYPE_CHECKING, Optional @@ -64,36 +65,44 @@ def match_codemods( if codemod_exclude and not codemod_include: base_codemods = {} + patterns = [ + re.compile(exclude.replace("*", ".*")) + for exclude in codemod_exclude + if "*" in exclude + ] + names = set(name for name in codemod_exclude if "*" not in name) for codemod in self.codemods: - if (sast_only and codemod.origin != "pixee") or ( - not sast_only and codemod.origin == "pixee" + if ( + codemod.id in names + or (codemod.origin == "pixee" and codemod.name in names) + or any(pat.match(codemod.id) for pat in patterns) ): - base_codemods[codemod.id] = codemod - base_codemods[codemod.name] = codemod - - for name_or_id in codemod_exclude: - try: - codemod = base_codemods[name_or_id] - except KeyError: - logger.warning( - f"Requested codemod to exclude'{name_or_id}' does not exist." - ) continue - # remove both by name and id since we don't know which `name_or_id` represented - base_codemods.pop(codemod.name, None) - base_codemods.pop(codemod.id, None) + if bool(sast_only) != bool(codemod.origin == "pixee"): + base_codemods[codemod.id] = codemod + # Remove duplicates and preserve order - return list(dict.fromkeys(base_codemods.values())) + return list(base_codemods.values()) matched_codemods = [] for name in codemod_include: + if "*" in name: + pat = re.compile(name.replace("*", ".*")) + pattern_matches = [code for code in self.codemods if pat.match(code.id)] + matched_codemods.extend(pattern_matches) + if not pattern_matches: + logger.warning( + "Given codemod pattern '%s' does not match any codemods.", name + ) + continue + try: matched_codemods.append( self._codemods_by_name.get(name) or self._codemods_by_id[name] ) except KeyError: - logger.warning(f"Requested codemod to include'{name}' does not exist.") + logger.warning(f"Requested codemod to include '{name}' does not exist.") return matched_codemods def describe_codemods( diff --git a/tests/codemods/test_include_exclude.py b/tests/codemods/test_include_exclude.py index 2ab3ffc7..5f4699ae 100644 --- a/tests/codemods/test_include_exclude.py +++ b/tests/codemods/test_include_exclude.py @@ -89,3 +89,50 @@ def test_exclude_some_match(self): for c in self.registry.codemods if c.name not in "secure-random" and c.id in self.all_ids ] + + def test_include_with_pattern(self): + assert self.registry.match_codemods(["*django*"], None) == [ + c for c in self.registry.codemods if "django" in c.id + ] + + def test_include_with_pattern_and_another(self): + assert self.registry.match_codemods(["*django*", "use-defusedxml"], None) == [ + c for c in self.registry.codemods if "django" in c.id + ] + [self.codemod_map["use-defusedxml"]] + + def test_include_sast_with_prefix(self): + assert self.registry.match_codemods(["sonar*"], None, sast_only=False) == [ + c for c in self.registry.codemods if c.origin == "sonar" + ] + + def test_warn_pattern_no_match(self, caplog): + assert self.registry.match_codemods(["*doesntexist*"], None) == [] + assert ( + "Given codemod pattern '*doesntexist*' does not match any codemods" + in caplog.text + ) + + def test_exclude_with_pattern(self): + assert self.registry.match_codemods(None, ["*django*"], sast_only=False) == [ + c + for c in self.registry.codemods + if "django" not in c.id and c.id in self.all_ids + ] + + def test_exclude_with_pattern_and_another(self): + assert self.registry.match_codemods( + None, ["*django*", "use-defusedxml"], sast_only=False + ) == [ + c + for c in self.registry.codemods + if "django" not in c.id + and c.id in self.all_ids + and c.name != "use-defusedxml" + ] + + def test_exclude_pixee_with_prefix(self): + assert self.registry.match_codemods(None, ["pixee*"], sast_only=False) == [ + c + for c in self.registry.codemods + if not c.origin == "pixee" and c.id in self.all_ids + ] diff --git a/tests/test_codemodder.py b/tests/test_codemodder.py index 2cef0bbb..f87d7c20 100644 --- a/tests/test_codemodder.py +++ b/tests/test_codemodder.py @@ -210,7 +210,7 @@ def test_codemod_include_no_match( assert any(x[0] == ("scanned: %s files", 0) for x in info_logger.call_args_list) assert any( - f"Requested codemod to include'{bad_codemod}' does not exist." in x[0][0] + f"Requested codemod to include '{bad_codemod}' does not exist." in x[0][0] for x in warning_logger.call_args_list ) @@ -233,7 +233,7 @@ def test_codemod_include_some_match( write_report.assert_called_once() assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list) assert any( - f"Requested codemod to include'{bad_codemod}' does not exist." in x[0][0] + f"Requested codemod to include '{bad_codemod}' does not exist." in x[0][0] for x in warning_logger.call_args_list ) @@ -262,10 +262,6 @@ def test_codemod_exclude_some_match( assert f"pixee:python/{good_codemod}" not in codemods_that_ran assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list) - assert any( - f"Requested codemod to exclude'{bad_codemod}' does not exist." in x[0][0] - for x in warning_logger.call_args_list - ) @mock.patch("codemodder.registry.logger.warning") @mock.patch("codemodder.codemodder.logger.info") @@ -286,10 +282,6 @@ def test_codemod_exclude_no_match( run(args) write_report.assert_called_once() assert any("running codemod %s" in x[0][0] for x in info_logger.call_args_list) - assert any( - f"Requested codemod to exclude'{bad_codemod}' does not exist." in x[0][0] - for x in warning_logger.call_args_list - ) @mock.patch("codemodder.codemods.semgrep.semgrep_run") def test_exclude_all_registered_codemods(self, mock_semgrep_run, dir_structure):