diff --git a/.gitignore b/.gitignore index 49aa60d..09c14a0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ __pycache__/ .idea/ +src/slate3k/ + # Distribution / packaging .Python build/ diff --git a/README.md b/README.md index 4526e7c..71a6bfe 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ $ pip install pytest $ pytest tests/ # Run package locally -$ python -m scripts.main.py [-s] [-o] [-h] input_directory +$ python -m scripts.main [-s] [-o] [-h] input_directory ``` **Recommandations** diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index dca98ed..c959c18 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ +-e git+https://github.com/Wazzabeee/slate3k#egg=slate3k beautifulsoup4==4.10.0 nltk==3.6.6 odfpy==1.4.1 pdfplumber==0.5.28 -slate3k==0.5.3 tabulate==0.8.9 diff --git a/scripts/html_utils.py b/scripts/html_utils.py index 058aa92..99593dd 100644 --- a/scripts/html_utils.py +++ b/scripts/html_utils.py @@ -74,11 +74,11 @@ def writing_results(dir_name: str) -> str: def get_color_from_similarity(similarity_score: float) -> str: """Return css style according to similarity score""" - if float(similarity_score) > 15: + if similarity_score > 15: return "#990033; font-weight: bold" - if float(similarity_score) > 10: + if similarity_score > 10: return "#ff6600" - if float(similarity_score) > 5: + if similarity_score > 5: return "#ffcc00" return "green" diff --git a/scripts/html_writing.py b/scripts/html_writing.py index 5d84f5a..46a9381 100644 --- a/scripts/html_writing.py +++ b/scripts/html_writing.py @@ -7,7 +7,7 @@ """ -from os import fsync, rename, path +from os import fsync, path from random import randint from shutil import copy from typing import Any, List @@ -43,7 +43,7 @@ def add_links_to_html_table(html_path: str) -> None: "a", href="file:///" + html_path.replace("_results", str(file_ind)), target="_blank", - style="color:" + get_color_from_similarity(td_tag.text), + style="color:" + get_color_from_similarity(float(td_tag.text)), ) td_tag.string.wrap(tmp) # We wrap the td string between the hyperlink @@ -114,15 +114,16 @@ def get_span_blocks(bs_obj: Bs, text1: list, text2: list, block_size: int) -> li def papers_comparison(save_dir: str, ind: int, text1: list, text2: list, filenames: tuple, block_size: int) -> None: """Write to HTML file texts that have been compared with highlighted similar blocks""" - - copy(path.join("templates", "template.html"), save_dir) # Copy comparison template to curr dir + template_path = path.join("templates", "template.html") comp_path = path.join(save_dir, str(ind) + ".html") - rename(path.join(save_dir, "template.html"), comp_path) + + # Copy the template to the save directory under a new name + copy(template_path, comp_path) with open(comp_path, encoding="utf-8") as html: soup = Bs(html, "html.parser") res = get_span_blocks(soup, text1, text2, block_size) - blocks = soup.findAll(attrs={"class": "block"}) + blocks = [soup.find(id="leftContent"), soup.find(id="rightContent")] # Append filename tags and span tags to html for i, filename in enumerate(filenames): @@ -132,6 +133,7 @@ def papers_comparison(save_dir: str, ind: int, text1: list, text2: list, filenam for tag in res[i]: blocks[i].append(tag) + # Write the modified content back to the file with open(comp_path, "wb") as f_output: f_output.write(soup.prettify("utf-8")) diff --git a/setup.cfg b/setup.cfg index c0bbbb1..e9b4ad4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,9 +4,9 @@ disable=C0103,C0114,R0913,R0914,C0200,C0301 [pep8] max-line-length=120 ignore=E121,E123,E126,E226,E24,E704,E203,W503 -exclude=venv,test_env,test_venv +exclude=venv,test_env,test_venv,slate3k [flake8] max-line-length=120 ignore=E121,E123,E126,E226,E24,E704,E203,W503 -exclude=venv,test_env,test_venv +exclude=venv,test_env,test_venv,slate3k diff --git a/templates/template.html b/templates/template.html index 7369dcc..7f44d9c 100644 --- a/templates/template.html +++ b/templates/template.html @@ -1,15 +1,54 @@ - - - - - - -
- -
- -
- -
- + + + + + + Plagiarism Detection Results + + + +
+
+
+
+