From 090ab97057198de27c520e38d77c2b306b3afab3 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:39:40 +0300 Subject: [PATCH 1/5] feat: adds possibility to generate reports only from provided paths. --- Makefile | 2 +- src/codeplag/codeplagcli.py | 20 +++++ src/codeplag/handlers/report.py | 135 +++++++++++++++++++++++++++----- src/codeplag/utils.py | 6 +- 4 files changed, 140 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 7ed5432..1a38291 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -UTIL_VERSION := 0.5.10 +UTIL_VERSION := 0.5.11 UTIL_NAME := codeplag PWD := $(shell pwd) diff --git a/src/codeplag/codeplagcli.py b/src/codeplag/codeplagcli.py index ef7c8db..356e120 100644 --- a/src/codeplag/codeplagcli.py +++ b/src/codeplag/codeplagcli.py @@ -330,6 +330,26 @@ def __add_report_path(self: Self, subparsers: argparse._SubParsersAction) -> Non choices=REPORT_TYPE_CHOICE, default=DEFAULT_REPORT_TYPE, ) + report_create.add_argument( + "-frp", + "--first-root-path", + help=_( + "Path to first compared works. " + "Can be path to directory or URL to the project folder." + ), + type=str, + required=False, + ) + report_create.add_argument( + "-srp", + "--second-root-path", + help=_( + "Path to second compared works. " + "Can be path to directory or URL to the project folder." + ), + type=str, + required=False, + ) def __init__(self: Self) -> None: super(CodeplagCLI, self).__init__( diff --git a/src/codeplag/handlers/report.py b/src/codeplag/handlers/report.py index ac90639..020f7d7 100644 --- a/src/codeplag/handlers/report.py +++ b/src/codeplag/handlers/report.py @@ -32,18 +32,40 @@ ) -def html_report_create(report_path: Path, report_type: ReportType) -> Literal[0, 1]: +class Elements(TypedDict): + cnt_elements: int + same_parts: SameFuncs + max_funcs_same_percentages: dict[str, float] + + +SamePartsOfAll = dict[str, dict[str, Elements]] +CntHeadNodes = dict[str, int] +ResultingSamePercentages = dict[str, float] + + +def html_report_create( + report_path: Path, + report_type: ReportType, + first_root_path: Path | str | None = None, + second_root_path: Path | str | None = None, +) -> Literal[0, 1]: """Creates an HTML report based on the configuration settings. Args: ---- - report_path: The path where the report should be created. - report_type: Type of the created report file. + report_path (Path): The path where the report should be created. + report_type (ReportType): Type of the created report file. + first_root_path (Path | str | None): Path to first compared works. + second_root_path (Path | str | None): Path to second compared works. Returns: ------- Literal[0, 1]: 0 if the report was successfully created, 1 otherwise. + Raises: + ------- + ValueError: When provided invalid report type or only one path. + Example usage: >>> from pathlib import Path >>> html_report_create(Path('/path/to/report'), 'general') @@ -61,19 +83,75 @@ def html_report_create(report_path: Path, report_type: ReportType) -> Literal[0, if not (reports_path / CSV_REPORT_FILENAME).exists(): logger.error(f"There is nothing in '{reports_path}' to create a basic html report from.") return 1 - create_report_function = _create_report if report_type == "general" else _create_sources_report + if report_type == "general": + create_report_function = _create_general_report + elif report_type == "sources": + create_report_function = _create_sources_report + else: + raise ValueError("Invalid report type.") + all_paths_provided = all([first_root_path, second_root_path]) + if not all_paths_provided and any([first_root_path, second_root_path]): + raise ValueError("All paths must be provided.") + + df = read_df(reports_path / CSV_REPORT_FILENAME) + if all_paths_provided: + paths = tuple(sorted([str(first_root_path), str(second_root_path)])) + df = df[df["first_path"].str.startswith(paths[0])] # type: ignore + df = df[df["second_path"].str.startswith(paths[1])] # type: ignore + else: + paths = None environment = jinja2.Environment(extensions=["jinja2.ext.i18n"]) environment.install_gettext_translations(get_translations()) # type: ignore create_report_function( - reports_path / CSV_REPORT_FILENAME, + df, # type:ignore report_path, environment, settings_config["threshold"], settings_config["language"], + paths, # type: ignore ) return 0 +def calculate_general_total_similarity( + df: pd.DataFrame, unique_first_paths: NDArray, unique_second_paths: NDArray +) -> float: + total_similarity = 0.0 + if unique_first_paths.size == 0: + return total_similarity + for first_path in unique_first_paths: + max_similarity = 0.0 + for second_path in unique_second_paths: + sorted_paths = sorted([first_path, second_path]) + selected = df[ + (df["first_path"].str.startswith(sorted_paths[0])) # type: ignore + & (df["second_path"].str.startswith(sorted_paths[1])) # type: ignore + ] + if selected is None or selected.size == 0: + continue + module_similarity = float(selected.iloc[0]["weighted_average"]) + if module_similarity > max_similarity: + max_similarity = module_similarity + total_similarity += max_similarity + return total_similarity / unique_first_paths.size + + +def calculate_sources_total_similarity( + same_percentages: ResultingSamePercentages, + pattern: str, +) -> float: + item_cnt = 0 + total_similarity = 0.0 + for path, percentage in same_percentages.items(): + if not path.startswith(pattern): + continue + total_similarity += percentage + item_cnt += 1 + if item_cnt == 0: + return 0.0 + return total_similarity / item_cnt + + def _convert_similarity_matrix_to_percent_matrix(matrix: NDArray) -> NDArray: """Convert compliance matrix of size N x M x 2 to percent 2 dimensional matrix.""" percent_matrix = np.empty((matrix.shape[0], matrix.shape[1]), dtype=np.float64) @@ -169,17 +247,6 @@ def _get_parsed_line( yield line, cmp_res, same_parts_of_second, same_parts_of_first -class Elements(TypedDict): - cnt_elements: int - same_parts: SameFuncs - max_funcs_same_percentages: dict[str, float] - - -SamePartsOfAll = dict[str, dict[str, Elements]] -CntHeadNodes = dict[str, int] -ResultingSamePercentages = dict[str, float] - - def _get_resulting_same_percentages( same_parts_of_all: SamePartsOfAll, cnt_head_nodes: CntHeadNodes ) -> ResultingSamePercentages: @@ -238,37 +305,61 @@ def _search_sources( return {k: v for k, v in same_parts_of_all.items() if v}, cnt_head_nodes -def _create_report( - df_path: Path, +def _create_general_report( + df: pd.DataFrame, save_path: Path, environment: jinja2.Environment, threshold: int = DEFAULT_THRESHOLD, language: Language = DEFAULT_LANGUAGE, + paths: tuple[str, str] | None = None, ) -> None: + if paths is not None: + unique_first_paths = pd.unique(df["first_path"]) + unique_second_paths = pd.unique(df["second_path"]) + assert isinstance(unique_first_paths, np.ndarray) + assert isinstance(unique_second_paths, np.ndarray) + first_root_path_sim = calculate_general_total_similarity( + df, unique_first_paths, unique_second_paths + ) + second_root_path_sim = calculate_general_total_similarity( + df, unique_second_paths, unique_first_paths + ) + else: + first_root_path_sim = None + second_root_path_sim = None template = environment.from_string(GENERAL_TEMPLATE_PATH.read_text()) if save_path.is_dir(): save_path = save_path / DEFAULT_GENERAL_REPORT_NAME save_path.write_text( template.render( - data=_get_parsed_line(read_df(df_path)), + data=_get_parsed_line(df), list=list, len=len, round=round, threshold=threshold, language=language, + first_root_path_sim=first_root_path_sim, + second_root_path_sim=second_root_path_sim, ) ) def _create_sources_report( - df_path: Path, + df: pd.DataFrame, save_path: Path, environment: jinja2.Environment, threshold: int = DEFAULT_THRESHOLD, language: Language = DEFAULT_LANGUAGE, + paths: tuple[str, str] | None = None, ) -> None: - data, cnt_head_nodes = _search_sources(read_df(df_path), threshold) + data, cnt_head_nodes = _search_sources(df, threshold) same_percentages = _get_resulting_same_percentages(data, cnt_head_nodes) + if paths is not None: + first_root_path_sim = calculate_sources_total_similarity(same_percentages, paths[0]) + second_root_path_sim = calculate_sources_total_similarity(same_percentages, paths[1]) + else: + first_root_path_sim = None + second_root_path_sim = None template = environment.from_string(SOURCES_TEMPLATE_PATH.read_text()) if save_path.is_dir(): save_path = save_path / DEFAULT_SOURCES_REPORT_NAME @@ -283,5 +374,7 @@ def _create_sources_report( list=list, len=len, round=round, + first_root_path_sim=first_root_path_sim, + second_root_path_sim=second_root_path_sim, ) ) diff --git a/src/codeplag/utils.py b/src/codeplag/utils.py index 160be7f..064d929 100644 --- a/src/codeplag/utils.py +++ b/src/codeplag/utils.py @@ -29,6 +29,8 @@ def __init__(self: Self, parsed_args: dict[str, Any]) -> None: elif self.root == "report": self.path: Path = parsed_args.pop("path") self.report_type: ReportType = parsed_args.pop("type") + self.first_root_path = parsed_args.pop("first_root_path", None) + self.second_root_path = parsed_args.pop("second_root_path", None) else: self.github_files: list[str] = parsed_args.pop("github_files", []) self.github_project_folders: list[str] = parsed_args.pop("github_project_folders", []) @@ -62,7 +64,9 @@ def run(self: Self) -> Literal[0, 1]: settings_modify(self.parsed_args) settings_show() elif self.root == "report": - return html_report_create(self.path, self.report_type) + return html_report_create( + self.path, self.report_type, self.first_root_path, self.second_root_path + ) else: self.comparator.check( self.files, From a56bb7233c4aa086891850f16908c0ba9ede5946 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Thu, 2 Jan 2025 17:09:04 +0300 Subject: [PATCH 2/5] test: updates CLI unit tests. --- test/unit/codeplag/test_codeplagcli.py | 72 ++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/test/unit/codeplag/test_codeplagcli.py b/test/unit/codeplag/test_codeplagcli.py index 34a47e1..d76142e 100644 --- a/test/unit/codeplag/test_codeplagcli.py +++ b/test/unit/codeplag/test_codeplagcli.py @@ -59,8 +59,9 @@ def test_file_path_bad(path: str): @pytest.mark.parametrize( "args", [ - ["--extension", "py", "--directories", "src/", "src/"], + ["check", "--extension", "py", "--directories", "src/", "src/"], [ + "check", "--extension", "py", "--github-project-folders", @@ -68,16 +69,81 @@ def test_file_path_bad(path: str): "https://github.com/OSLL/code-plagiarism/tree/main/src", ], [ + "check", "--extension", "py", "--github-files", "https://github.com/OSLL/code-plagiarism/blob/main/setup.py", "https://github.com/OSLL/code-plagiarism/blob/main/setup.py", ], - ["--extension", "py", "--files", "setup.py", "setup.py"], + ["check", "--extension", "py", "--files", "setup.py", "setup.py"], + ["check", "--extension", "pypy"], + ], + ids=[ + "Twice repeated directory.", + "Twice repeated GitHub project folder.", + "Twice repeated GitHub file.", + "Twice repeated file.", + "Invalid extension.", ], ) -def test_get_parsed_args(args: list[str]): +def test_get_parsed_args_failed(args: list[str]): codeplagcli = CodeplagCLI() with pytest.raises(SystemExit): codeplagcli.parse_args(args=args) + + +@pytest.mark.parametrize( + "args,expected", + [ + (["check", "--extension", "cpp"], {"extension": "cpp", "root": "check"}), + ( + ["check", "--extension", "py", "--files", "setup.py"], + {"extension": "py", "root": "check", "files": [Path("setup.py").absolute()]}, + ), + ( + ["report", "create", "--path", "./", "--type", "general"], + { + "root": "report", + "report": "create", + "type": "general", + "path": Path("./"), + "first_root_path": None, + "second_root_path": None, + }, + ), + ( + [ + "report", + "create", + "--path", + "./", + "--type", + "general", + "--first-root-path", + "codeplag", + "--second-root-path", + "webparsers", + ], + { + "root": "report", + "report": "create", + "type": "general", + "path": Path("./"), + "first_root_path": "codeplag", + "second_root_path": "webparsers", + }, + ), + ], + ids=[ + "Only extension provided.", + "Extension and one file provided.", + "Create general report from all records.", + "Create general report from selected records.", + ], +) +def test_get_parsed_args(args: list[str], expected: argparse.Namespace): + codeplagcli = CodeplagCLI() + namespace = codeplagcli.parse_args(args=args) + for key, value in expected.items(): + assert getattr(namespace, key) == value From 27379a5aa799c66f520565b26d9b520d2d6ddd75 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Thu, 2 Jan 2025 20:34:41 +0300 Subject: [PATCH 3/5] test: adds unit tests for calculate total similarity. --- locales/codeplag.pot | 90 +++++++++++------- .../translations/en/LC_MESSAGES/codeplag.po | 94 ++++++++++++------- .../translations/ru/LC_MESSAGES/codeplag.po | 94 ++++++++++++------- src/codeplag/handlers/report.py | 6 +- src/templates/general.templ | 19 ++++ src/templates/sources.templ | 19 ++++ test/unit/codeplag/handlers/test_report.py | 65 +++++++++++++ 7 files changed, 280 insertions(+), 107 deletions(-) diff --git a/locales/codeplag.pot b/locales/codeplag.pot index f300220..a3eeaec 100644 --- a/locales/codeplag.pot +++ b/locales/codeplag.pot @@ -5,9 +5,8 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.10\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2025-01-01 18:21+0300\n" +"Project-Id-Version: codeplag 0.5.11\n" +"POT-Creation-Date: 2025-01-02 18:42+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: Artyom Semidolin\n" "Language-Team: LANGUAGE \n" @@ -173,33 +172,45 @@ msgstr "" msgid "Type of the created report file." msgstr "" -#: src/codeplag/codeplagcli.py:338 +#: src/codeplag/codeplagcli.py:336 +msgid "" +"Path to first compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" + +#: src/codeplag/codeplagcli.py:346 +msgid "" +"Path to second compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" + +#: src/codeplag/codeplagcli.py:358 msgid "" "Program help to find similar parts of source codes for the different " "languages." msgstr "" -#: src/codeplag/codeplagcli.py:346 +#: src/codeplag/codeplagcli.py:366 msgid "Print current version number and exit." msgstr "" -#: src/codeplag/codeplagcli.py:352 +#: src/codeplag/codeplagcli.py:372 msgid "Commands help." msgstr "" -#: src/codeplag/codeplagcli.py:367 +#: src/codeplag/codeplagcli.py:387 msgid "No command is provided; please choose one from the available (--help)." msgstr "" -#: src/codeplag/codeplagcli.py:378 +#: src/codeplag/codeplagcli.py:398 msgid "There is nothing to modify; please provide at least one argument." msgstr "" -#: src/codeplag/codeplagcli.py:382 +#: src/codeplag/codeplagcli.py:402 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "" -#: src/codeplag/codeplagcli.py:390 +#: src/codeplag/codeplagcli.py:410 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." @@ -214,75 +225,84 @@ msgid "Check results" msgstr "" #: src/templates/general.templ:44 src/templates/sources.templ:46 -msgid "General information" +msgid "Summary information" msgstr "" -#: src/templates/general.templ:46 +#: src/templates/general.templ:46 src/templates/general.templ:65 +#: src/templates/sources.templ:48 msgid "Path" msgstr "" -#: src/templates/general.templ:47 +#: src/templates/general.templ:47 src/templates/sources.templ:49 +msgid "Total similarity" +msgstr "" + +#: src/templates/general.templ:63 src/templates/sources.templ:65 +msgid "General information" +msgstr "" + +#: src/templates/general.templ:66 msgid "Last modification date" msgstr "" -#: src/templates/general.templ:48 +#: src/templates/general.templ:67 msgid "Check date" msgstr "" -#: src/templates/general.templ:61 +#: src/templates/general.templ:80 msgid "Metrics information" msgstr "" -#: src/templates/general.templ:63 +#: src/templates/general.templ:82 msgid "Metric" msgstr "" -#: src/templates/general.templ:64 +#: src/templates/general.templ:83 msgid "Value" msgstr "" -#: src/templates/general.templ:67 +#: src/templates/general.templ:86 msgid "Jaccard Coefficient" msgstr "" -#: src/templates/general.templ:71 +#: src/templates/general.templ:90 msgid "Similarity of operators" msgstr "" -#: src/templates/general.templ:75 +#: src/templates/general.templ:94 msgid "Similarity of keywords" msgstr "" -#: src/templates/general.templ:79 +#: src/templates/general.templ:98 msgid "Similarity of literals" msgstr "" -#: src/templates/general.templ:82 +#: src/templates/general.templ:101 msgid "Weighted average by fast metrics" msgstr "" -#: src/templates/general.templ:85 +#: src/templates/general.templ:104 msgid "Similarity of structures" msgstr "" -#: src/templates/general.templ:91 +#: src/templates/general.templ:110 msgid "Similar parts of the second program relative to the first" msgstr "" -#: src/templates/general.templ:93 src/templates/general.templ:125 +#: src/templates/general.templ:112 src/templates/general.templ:144 msgid "Part of the first program" msgstr "" -#: src/templates/general.templ:94 src/templates/general.templ:124 +#: src/templates/general.templ:113 src/templates/general.templ:143 msgid "Part of the second program" msgstr "" -#: src/templates/general.templ:95 src/templates/general.templ:126 -#: src/templates/sources.templ:69 +#: src/templates/general.templ:114 src/templates/general.templ:145 +#: src/templates/sources.templ:88 msgid "Similarity" msgstr "" -#: src/templates/general.templ:122 +#: src/templates/general.templ:141 msgid "Similar parts of the first program relative to the second" msgstr "" @@ -290,26 +310,26 @@ msgstr "" msgid "Verification results" msgstr "" -#: src/templates/sources.templ:48 +#: src/templates/sources.templ:67 msgid "The path to the file being checked" msgstr "" -#: src/templates/sources.templ:49 +#: src/templates/sources.templ:68 msgid "Total maximum compliance, %" msgstr "" -#: src/templates/sources.templ:64 +#: src/templates/sources.templ:83 msgid "Similar parts of the programs" msgstr "" -#: src/templates/sources.templ:66 +#: src/templates/sources.templ:85 msgid "The name of a similar file" msgstr "" -#: src/templates/sources.templ:67 +#: src/templates/sources.templ:86 msgid "Part of the file that is being checked" msgstr "" -#: src/templates/sources.templ:68 +#: src/templates/sources.templ:87 msgid "Part of a similar file" msgstr "" diff --git a/locales/translations/en/LC_MESSAGES/codeplag.po b/locales/translations/en/LC_MESSAGES/codeplag.po index 68f0e66..ce96c6b 100644 --- a/locales/translations/en/LC_MESSAGES/codeplag.po +++ b/locales/translations/en/LC_MESSAGES/codeplag.po @@ -4,8 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.10\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"Project-Id-Version: codeplag 0.5.11\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-16 19:15+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -191,7 +190,23 @@ msgstr "" msgid "Type of the created report file." msgstr "Type of the created report file." -#: src/codeplag/codeplagcli.py:338 +#: src/codeplag/codeplagcli.py:336 +msgid "" +"Path to first compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" +"Path to first compared works. Can be path to directory or URL to the " +"project folder." + +#: src/codeplag/codeplagcli.py:346 +msgid "" +"Path to second compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" +"Path to second compared works. Can be path to directory or URL to the " +"project folder." + +#: src/codeplag/codeplagcli.py:358 msgid "" "Program help to find similar parts of source codes for the different " "languages." @@ -199,27 +214,27 @@ msgstr "" "Program help to find similar parts of source codes for the different " "languages." -#: src/codeplag/codeplagcli.py:346 +#: src/codeplag/codeplagcli.py:366 msgid "Print current version number and exit." msgstr "Print current version number and exit." -#: src/codeplag/codeplagcli.py:352 +#: src/codeplag/codeplagcli.py:372 msgid "Commands help." msgstr "Commands help." -#: src/codeplag/codeplagcli.py:367 +#: src/codeplag/codeplagcli.py:387 msgid "No command is provided; please choose one from the available (--help)." msgstr "No command is provided; please choose one from the available (--help)." -#: src/codeplag/codeplagcli.py:378 +#: src/codeplag/codeplagcli.py:398 msgid "There is nothing to modify; please provide at least one argument." msgstr "There is nothing to modify; please provide at least one argument." -#: src/codeplag/codeplagcli.py:382 +#: src/codeplag/codeplagcli.py:402 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "The'repo-regexp' option requires the provided 'github-user' option." -#: src/codeplag/codeplagcli.py:390 +#: src/codeplag/codeplagcli.py:410 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." @@ -236,75 +251,84 @@ msgid "Check results" msgstr "Check results" #: src/templates/general.templ:44 src/templates/sources.templ:46 -msgid "General information" -msgstr "General information" +msgid "Summary information" +msgstr "Summary information" -#: src/templates/general.templ:46 +#: src/templates/general.templ:46 src/templates/general.templ:65 +#: src/templates/sources.templ:48 msgid "Path" msgstr "Path" -#: src/templates/general.templ:47 +#: src/templates/general.templ:47 src/templates/sources.templ:49 +msgid "Total similarity" +msgstr "Total similarity, %" + +#: src/templates/general.templ:63 src/templates/sources.templ:65 +msgid "General information" +msgstr "General information" + +#: src/templates/general.templ:66 msgid "Last modification date" msgstr "Last modification date" -#: src/templates/general.templ:48 +#: src/templates/general.templ:67 msgid "Check date" msgstr "Check date" -#: src/templates/general.templ:61 +#: src/templates/general.templ:80 msgid "Metrics information" msgstr "Metrics information" -#: src/templates/general.templ:63 +#: src/templates/general.templ:82 msgid "Metric" msgstr "Metric" -#: src/templates/general.templ:64 +#: src/templates/general.templ:83 msgid "Value" msgstr "Value" -#: src/templates/general.templ:67 +#: src/templates/general.templ:86 msgid "Jaccard Coefficient" msgstr "Jaccard Coefficient" -#: src/templates/general.templ:71 +#: src/templates/general.templ:90 msgid "Similarity of operators" msgstr "Similarity of operators" -#: src/templates/general.templ:75 +#: src/templates/general.templ:94 msgid "Similarity of keywords" msgstr "Similarity of keywords" -#: src/templates/general.templ:79 +#: src/templates/general.templ:98 msgid "Similarity of literals" msgstr "Similarity of literals" -#: src/templates/general.templ:82 +#: src/templates/general.templ:101 msgid "Weighted average by fast metrics" msgstr "Weighted average by fast metrics" -#: src/templates/general.templ:85 +#: src/templates/general.templ:104 msgid "Similarity of structures" msgstr "Similarity of structures" -#: src/templates/general.templ:91 +#: src/templates/general.templ:110 msgid "Similar parts of the second program relative to the first" msgstr "Similar parts of the second program relative to the first" -#: src/templates/general.templ:93 src/templates/general.templ:125 +#: src/templates/general.templ:112 src/templates/general.templ:144 msgid "Part of the first program" msgstr "Part of the first program" -#: src/templates/general.templ:94 src/templates/general.templ:124 +#: src/templates/general.templ:113 src/templates/general.templ:143 msgid "Part of the second program" msgstr "Part of the second program" -#: src/templates/general.templ:95 src/templates/general.templ:126 -#: src/templates/sources.templ:69 +#: src/templates/general.templ:114 src/templates/general.templ:145 +#: src/templates/sources.templ:88 msgid "Similarity" msgstr "Similarity" -#: src/templates/general.templ:122 +#: src/templates/general.templ:141 msgid "Similar parts of the first program relative to the second" msgstr "Similar parts of the first program relative to the second" @@ -312,26 +336,26 @@ msgstr "Similar parts of the first program relative to the second" msgid "Verification results" msgstr "Verification results" -#: src/templates/sources.templ:48 +#: src/templates/sources.templ:67 msgid "The path to the file being checked" msgstr "The path to the file being checked" -#: src/templates/sources.templ:49 +#: src/templates/sources.templ:68 msgid "Total maximum compliance, %" msgstr "Total maximum compliance, %" -#: src/templates/sources.templ:64 +#: src/templates/sources.templ:83 msgid "Similar parts of the programs" msgstr "Similar parts of the programs" -#: src/templates/sources.templ:66 +#: src/templates/sources.templ:85 msgid "The name of a similar file" msgstr "The name of a similar file" -#: src/templates/sources.templ:67 +#: src/templates/sources.templ:86 msgid "Part of the file that is being checked" msgstr "Part of the file that is being checked" -#: src/templates/sources.templ:68 +#: src/templates/sources.templ:87 msgid "Part of a similar file" msgstr "Part of a similar file" diff --git a/locales/translations/ru/LC_MESSAGES/codeplag.po b/locales/translations/ru/LC_MESSAGES/codeplag.po index 951278c..ec19e7f 100644 --- a/locales/translations/ru/LC_MESSAGES/codeplag.po +++ b/locales/translations/ru/LC_MESSAGES/codeplag.po @@ -4,8 +4,7 @@ # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.5.10\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"Project-Id-Version: codeplag 0.5.11\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2024-05-11 12:05+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -201,7 +200,23 @@ msgstr "" msgid "Type of the created report file." msgstr "Тип создаваемого файла отчёта." -#: src/codeplag/codeplagcli.py:338 +#: src/codeplag/codeplagcli.py:336 +msgid "" +"Path to first compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" +"Путь к первым сравниваемым файлам. Это может быть путь к директории или " +"URL к папке проекта." + +#: src/codeplag/codeplagcli.py:346 +msgid "" +"Path to second compared works. Can be path to directory or URL to the " +"project folder." +msgstr "" +"Путь ко вторым сравниваемым файлам. Это может быть путь к директории или " +"URL к папке проекта." + +#: src/codeplag/codeplagcli.py:358 msgid "" "Program help to find similar parts of source codes for the different " "languages." @@ -209,31 +224,31 @@ msgstr "" "Программа помогает находить схожие части исходных кодов для разных языков" " программирования." -#: src/codeplag/codeplagcli.py:346 +#: src/codeplag/codeplagcli.py:366 msgid "Print current version number and exit." msgstr "Выводит текущую версию программы." -#: src/codeplag/codeplagcli.py:352 +#: src/codeplag/codeplagcli.py:372 msgid "Commands help." msgstr "Справка по командам." -#: src/codeplag/codeplagcli.py:367 +#: src/codeplag/codeplagcli.py:387 msgid "No command is provided; please choose one from the available (--help)." msgstr "" "Ни одна из команд не выбрана, пожалуйста, выбери одну из доступных команд" " (--help)." -#: src/codeplag/codeplagcli.py:378 +#: src/codeplag/codeplagcli.py:398 msgid "There is nothing to modify; please provide at least one argument." msgstr "" "Нечего модифицировать, пожалуйста, выберите один из параметров для " "модификации." -#: src/codeplag/codeplagcli.py:382 +#: src/codeplag/codeplagcli.py:402 msgid "The'repo-regexp' option requires the provided 'github-user' option." msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'." -#: src/codeplag/codeplagcli.py:390 +#: src/codeplag/codeplagcli.py:410 msgid "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-project-folder' options." @@ -250,75 +265,84 @@ msgid "Check results" msgstr "Результаты проверки" #: src/templates/general.templ:44 src/templates/sources.templ:46 -msgid "General information" -msgstr "Общая информация" +msgid "Summary information" +msgstr "Сводка по работам" -#: src/templates/general.templ:46 +#: src/templates/general.templ:46 src/templates/general.templ:65 +#: src/templates/sources.templ:48 msgid "Path" msgstr "Путь" -#: src/templates/general.templ:47 +#: src/templates/general.templ:47 src/templates/sources.templ:49 +msgid "Total similarity" +msgstr "Общая схожесть, %" + +#: src/templates/general.templ:63 src/templates/sources.templ:65 +msgid "General information" +msgstr "Общая информация" + +#: src/templates/general.templ:66 msgid "Last modification date" msgstr "Дата последней модификации" -#: src/templates/general.templ:48 +#: src/templates/general.templ:67 msgid "Check date" msgstr "Дата проверки" -#: src/templates/general.templ:61 +#: src/templates/general.templ:80 msgid "Metrics information" msgstr "Информация по метрикам" -#: src/templates/general.templ:63 +#: src/templates/general.templ:82 msgid "Metric" msgstr "Метрика" -#: src/templates/general.templ:64 +#: src/templates/general.templ:83 msgid "Value" msgstr "Значение" -#: src/templates/general.templ:67 +#: src/templates/general.templ:86 msgid "Jaccard Coefficient" msgstr "Коэффициент Жаккара" -#: src/templates/general.templ:71 +#: src/templates/general.templ:90 msgid "Similarity of operators" msgstr "Схожесть операторов" -#: src/templates/general.templ:75 +#: src/templates/general.templ:94 msgid "Similarity of keywords" msgstr "Схожесть ключевых слов" -#: src/templates/general.templ:79 +#: src/templates/general.templ:98 msgid "Similarity of literals" msgstr "Схожеcть литералов" -#: src/templates/general.templ:82 +#: src/templates/general.templ:101 msgid "Weighted average by fast metrics" msgstr "Средневзвешенное по быстрым метрикам" -#: src/templates/general.templ:85 +#: src/templates/general.templ:104 msgid "Similarity of structures" msgstr "Схожесть структур" -#: src/templates/general.templ:91 +#: src/templates/general.templ:110 msgid "Similar parts of the second program relative to the first" msgstr "Схожие части второй программы относительно первой" -#: src/templates/general.templ:93 src/templates/general.templ:125 +#: src/templates/general.templ:112 src/templates/general.templ:144 msgid "Part of the first program" msgstr "Часть первой программ" -#: src/templates/general.templ:94 src/templates/general.templ:124 +#: src/templates/general.templ:113 src/templates/general.templ:143 msgid "Part of the second program" msgstr "Часть второй программы" -#: src/templates/general.templ:95 src/templates/general.templ:126 -#: src/templates/sources.templ:69 +#: src/templates/general.templ:114 src/templates/general.templ:145 +#: src/templates/sources.templ:88 msgid "Similarity" msgstr "Схожесть" -#: src/templates/general.templ:122 +#: src/templates/general.templ:141 msgid "Similar parts of the first program relative to the second" msgstr "Схожие части первой программы относительно второй" @@ -326,26 +350,26 @@ msgstr "Схожие части первой программы относите msgid "Verification results" msgstr "Результаты проверки" -#: src/templates/sources.templ:48 +#: src/templates/sources.templ:67 msgid "The path to the file being checked" msgstr "Путь до проверяемого файла" -#: src/templates/sources.templ:49 +#: src/templates/sources.templ:68 msgid "Total maximum compliance, %" msgstr "Суммарное максимальное соответствие, %" -#: src/templates/sources.templ:64 +#: src/templates/sources.templ:83 msgid "Similar parts of the programs" msgstr "Схожие части программ" -#: src/templates/sources.templ:66 +#: src/templates/sources.templ:85 msgid "The name of a similar file" msgstr "Имя схожего файла" -#: src/templates/sources.templ:67 +#: src/templates/sources.templ:86 msgid "Part of the file that is being checked" msgstr "Часть проверяемого файла" -#: src/templates/sources.templ:68 +#: src/templates/sources.templ:87 msgid "Part of a similar file" msgstr "Часть схожего файла" diff --git a/src/codeplag/handlers/report.py b/src/codeplag/handlers/report.py index 020f7d7..b5e4097 100644 --- a/src/codeplag/handlers/report.py +++ b/src/codeplag/handlers/report.py @@ -133,7 +133,7 @@ def calculate_general_total_similarity( if module_similarity > max_similarity: max_similarity = module_similarity total_similarity += max_similarity - return total_similarity / unique_first_paths.size + return round(total_similarity / unique_first_paths.size * 100, 2) def calculate_sources_total_similarity( @@ -149,7 +149,7 @@ def calculate_sources_total_similarity( item_cnt += 1 if item_cnt == 0: return 0.0 - return total_similarity / item_cnt + return round(total_similarity / item_cnt, 2) def _convert_similarity_matrix_to_percent_matrix(matrix: NDArray) -> NDArray: @@ -340,6 +340,7 @@ def _create_general_report( language=language, first_root_path_sim=first_root_path_sim, second_root_path_sim=second_root_path_sim, + paths=paths, ) ) @@ -376,5 +377,6 @@ def _create_sources_report( round=round, first_root_path_sim=first_root_path_sim, second_root_path_sim=second_root_path_sim, + paths=paths, ) ) diff --git a/src/templates/general.templ b/src/templates/general.templ index c4e1e57..42d5550 100644 --- a/src/templates/general.templ +++ b/src/templates/general.templ @@ -38,6 +38,25 @@

{{ _("Check results") }}

+ {% if first_root_path_sim is not none %} +
+ + + + + + + + + + + + + + +
{{ _("Summary information") }}
{{ _("Path") }}{{ _("Total similarity") }}
{{ paths[0] }}{{ first_root_path_sim }}
{{ paths[1] }}{{ second_root_path_sim }}
+
+ {% endif %} {% for line, cmp_res, same_parts_of_second, same_parts_of_first in data %}
diff --git a/src/templates/sources.templ b/src/templates/sources.templ index 8800099..077b872 100644 --- a/src/templates/sources.templ +++ b/src/templates/sources.templ @@ -40,6 +40,25 @@

{{ _("Verification results") }}

+ {% if first_root_path_sim is not none %} +
+
+ + + + + + + + + + + + + +
{{ _("Summary information") }}
{{ _("Path") }}{{ _("Total similarity") }}
{{ paths[0] }}{{ first_root_path_sim }}
{{ paths[1] }}{{ second_root_path_sim }}
+
+ {% endif %} {% for work_path, same_parts_of_other in data.items() %}
diff --git a/test/unit/codeplag/handlers/test_report.py b/test/unit/codeplag/handlers/test_report.py index 4fe68ec..b6466f2 100644 --- a/test/unit/codeplag/handlers/test_report.py +++ b/test/unit/codeplag/handlers/test_report.py @@ -1,6 +1,7 @@ from __future__ import annotations import numpy as np +import pandas as pd import pytest from codeplag.handlers.report import ( @@ -13,11 +14,75 @@ _get_resulting_same_percentages, _get_same_funcs, _replace_minimal_value, + calculate_general_total_similarity, + calculate_sources_total_similarity, ) from codeplag.reporters import serialize_compare_result from codeplag.types import ASTFeatures, CompareInfo, SameHead +@pytest.mark.parametrize( + "same_percentages,pattern,expected", + [ + ({}, "pattern", 0.0), + ( + { + "/usr/codeplag/marshal.py": 80.0, + "/usr/codeplag/featurebased.py": 81.25, + "/home/band/setup.py": 50.0, + }, + "/usr/codeplag", + 80.62, + ), + ], +) +def test_calculate_sources_total_similarity( + same_percentages: ResultingSamePercentages, pattern: str, expected: float +): + assert calculate_sources_total_similarity(same_percentages, pattern) == expected + + +@pytest.mark.parametrize( + "df,unique_first_paths,unique_second_paths,expected", + [ + (pd.DataFrame({}), np.array([]), np.array([]), 0.0), + ( + pd.DataFrame( + { + "first_path": ["a.py", "b.py", "a.py"], + "second_path": ["c.py", "d.py", "e.py"], + "weighted_average": [0.15, 0.1, 0.3], + } + ), + np.array(["a.py", "b.py"]), + np.array(["c.py", "d.py", "e.py"]), + 20.0, + ), + ( + pd.DataFrame( + { + "first_path": ["a.py", "b.py", "a.py"], + "second_path": ["c.py", "d.py", "e.py"], + "weighted_average": [0.15, 0.1, 0.3], + } + ), + np.array(["c.py", "d.py", "e.py"]), + np.array(["a.py", "b.py"]), + 18.33, + ), + ], +) +def test_calculate_general_total_similarity( + df: pd.DataFrame, + unique_first_paths: np.ndarray, + unique_second_paths: np.ndarray, + expected: float, +): + assert ( + calculate_general_total_similarity(df, unique_first_paths, unique_second_paths) == expected + ) + + @pytest.mark.parametrize( "same_parts,new_key,new_value,expected", [ From ba9d950641c8dc35b18a2319038eba95cc521532 Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:15:04 +0300 Subject: [PATCH 4/5] test: adds auto tests for creating reports with incorrect arguments. --- test/auto/functional/test_report.py | 18 ++++++++++++++++++ test/auto/utils.py | 21 ++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/test/auto/functional/test_report.py b/test/auto/functional/test_report.py index dedc6ed..d49136c 100644 --- a/test/auto/functional/test_report.py +++ b/test/auto/functional/test_report.py @@ -53,3 +53,21 @@ def test_content_different_between_calls(self: Self, report_type: ReportType) -> create_report(second_report_path, report_type).assert_success() assert first_report_path.read_text() != second_report_path.read_text() + + @pytest.mark.parametrize( + "report_type", + ["general", "sources"], + ) + def test_provided_only_first_path(self: Self, report_type: ReportType) -> None: + create_report( + REPORTS_FOLDER / "report.html", report_type, first_root_path="/usr/src" + ).assert_failed() + + @pytest.mark.parametrize( + "report_type", + ["general", "sources"], + ) + def test_provided_only_second_path(self: Self, report_type: ReportType) -> None: + create_report( + REPORTS_FOLDER / "report.html", report_type, second_root_path="/usr/src" + ).assert_failed() diff --git a/test/auto/utils.py b/test/auto/utils.py index 94730d9..2e130ff 100644 --- a/test/auto/utils.py +++ b/test/auto/utils.py @@ -45,8 +45,22 @@ def run_check(cmd: list[str], extension: str = "py") -> CmdResult: return run_util(["--extension", extension] + cmd, root="check") -def create_report(path: Path, report_type: ReportType) -> CmdResult: - return run_util(["create", "--path", str(path), "--type", report_type], root="report") +def create_opt(key: str, value: Any | None) -> list[str]: + return [f"--{key}", str(value)] if value is not None else [] + + +def create_report( + path: Path, + report_type: ReportType, + first_root_path: str | None = None, + second_root_path: str | None = None, +) -> CmdResult: + return run_util( + ["create", "--path", str(path), "--type", report_type] + + create_opt("first-root-path", first_root_path) + + create_opt("second-root-path", second_root_path), + root="report", + ) def modify_settings( @@ -62,9 +76,6 @@ def modify_settings( log_level: LogLevel | None = None, workers: int | None = None, ) -> CmdResult: - def create_opt(key: str, value: Any | None) -> list[str]: - return [f"--{key}", str(value)] if value is not None else [] - return run_util( ["modify"] + create_opt("reports", reports) From 53f6ce4d6d167047f32632528b5c7453317fdbde Mon Sep 17 00:00:00 2001 From: Artyom Semidolin <43622365+Artanias@users.noreply.github.com> Date: Thu, 2 Jan 2025 22:46:00 +0300 Subject: [PATCH 5/5] test: adds auto tests for creating reports with all arguments. --- test/auto/functional/test_report.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/auto/functional/test_report.py b/test/auto/functional/test_report.py index d49136c..2170fd5 100644 --- a/test/auto/functional/test_report.py +++ b/test/auto/functional/test_report.py @@ -11,7 +11,9 @@ @pytest.fixture(scope="function", autouse=True) def setup(create_reports_folder: None): - modify_settings(reports=REPORTS_FOLDER, reports_extension="csv").assert_success() + modify_settings( + reports=REPORTS_FOLDER, reports_extension="csv", short_output=1 + ).assert_success() run_check(["--directories", "test/unit/codeplag/cplag", "src/"]).assert_success() yield @@ -54,6 +56,23 @@ def test_content_different_between_calls(self: Self, report_type: ReportType) -> assert first_report_path.read_text() != second_report_path.read_text() + @pytest.mark.parametrize( + "report_type", + ["general", "sources"], + ) + def test_default_report_diff_with_provided_paths(self: Self, report_type: ReportType) -> None: + first_report_path = REPORTS_FOLDER / "report1.html" + second_report_path = REPORTS_FOLDER / "report2.html" + + create_report(first_report_path, report_type).assert_success() + create_report( + second_report_path, + report_type, + first_root_path="/usr/src/codeplag", + second_root_path="/usr/src/webparsers", + ).assert_success() + assert first_report_path.read_text() != second_report_path.read_text() + @pytest.mark.parametrize( "report_type", ["general", "sources"],