diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..7de6173 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,178 @@ +name: CI/CD Workflow +on: push +jobs: + ruff-format: + name: Check formatting + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + with: + args: format --check + + ruff-check: + name: Run linter + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: chartboost/ruff-action@v1 + + test-and-build: + name: Test & build Python package + runs-on: ubuntu-latest + needs: [ruff-check, ruff-format] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + # Установка Python c кэшированим зависимостей для pip + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install poetry + uses: abatilo/actions-poetry@v2 + + - name: Configure poetry + working-directory: ./pwdkek-python + run: | + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + + - uses: actions/cache@v3 + name: Enable dependencies cache + with: + path: ./pwdkek-python/.venv + key: venv-${{ hashFiles('pwdkek-python/poetry.lock') }} + + - name: Install dependencies + working-directory: ./pwdkek-python + run: poetry install + + - uses: actions/cache@v3 + name: Enable datasets cache + with: + path: ./datasets + key: datasets-${{ hashFiles('pwdkek-python/pwdkek_python/builtin_datasets.py') }} + + - name: Download datasets + run: | + cd pwdkek-python + export PYTHONPATH=$(pwd):$PYTHONPATH + python pwdkek_python/builtin_datasets.py + + - name: Run tests (small dataset) + run: | + cd pwdkek-python + export PYTHONPATH=$(pwd):$PYTHONPATH + python tests/test.py --dataset small + + - name: Run tests (big dataset) + run: | + cd pwdkek-python + export PYTHONPATH=$(pwd):$PYTHONPATH + python tests/test.py --dataset big + + - name: Build package + working-directory: ./pwdkek-python + run: poetry build + + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: ./pwdkek-python/dist/ + + publish-to-testpypi: + name: Publish to TestPyPI + needs: + - test-and-build + runs-on: ubuntu-latest + + environment: + name: testpypi + url: https://test.pypi.org/p/pwdkek-python + + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: ./pwdkek-python/dist/ + - name: Publish distribution 📦 to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + packages-dir: ./pwdkek-python/dist/ + + publish-to-pypi: + name: >- + Publish to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - test-and-build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/pwdkek-python + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: ./pwdkek-python/dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: ./pwdkek-python/dist/ + + github-release: + name: >- + Sign and upload package to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: ./pwdkek-python/dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v2.1.1 + with: + inputs: >- + ./pwdkek-python/dist/*.tar.gz + ./pwdkek-python/dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' ./pwdkek-python/dist/** + --repo '${{ github.repository }}' \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index b6808eb..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: CI/CD workflow -on: push -jobs: - test: - runs-on: ubuntu-latest - - steps: - # Извлечение репозитория - - name: Checkout - uses: actions/checkout@v4 - - # Установка Python c кэшированим зависимостей для pip - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run tests (small dataset) - run: | - python test.py --dataset_path datasets/rockyou-utf8-filtered-sorted.txt.gz - - - name: Run tests (big dataset) - run: | - python test.py --dataset_path datasets/crackstation-human-only-utf8-filtered-sorted.txt.gz \ No newline at end of file diff --git a/datasets/.gitkeep b/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/datasets/crackstation-human-only-utf8-filtered-sorted.txt.gz b/datasets/crackstation-human-only-utf8-filtered-sorted.txt.gz deleted file mode 100644 index ca86a53..0000000 --- a/datasets/crackstation-human-only-utf8-filtered-sorted.txt.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dca7474b4b369ff547a1f78a45724c268729a922ac32b2954fdc43f3f48e3566 -size 256404350 diff --git a/datasets/rockyou-utf8-filtered-sorted.txt.gz b/datasets/rockyou-utf8-filtered-sorted.txt.gz deleted file mode 100644 index e23b6f6..0000000 --- a/datasets/rockyou-utf8-filtered-sorted.txt.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7037738e9dfb308ceefc17ca960ca6fe1ecffd75a2d4db8b945b10f26c03432a -size 50027492 diff --git a/prepare_dataset.py b/prepare_dataset.py deleted file mode 100644 index 73c80ba..0000000 --- a/prepare_dataset.py +++ /dev/null @@ -1,25 +0,0 @@ -import gzip -import string -import sys - - -PASSWORD_ALLOWED_CHARS = str( - string.ascii_lowercase - + string.ascii_uppercase - + string.digits - + string.punctuation, -) - -# NOTE: convert input to utf-8 beforehand -with open(sys.argv[1], "r") as file: - passwords = [ - "".join([ch for ch in line if ch in PASSWORD_ALLOWED_CHARS]) - for line in file.readlines() - ] - -passwords = list(filter(lambda pwd: len(pwd) != 0, passwords)) -passwords.sort() - -with gzip.open(sys.argv[2], "wt") as file: - for password in passwords: - file.write(password + "\n") diff --git a/pwdkek-python/README.md b/pwdkek-python/README.md new file mode 100644 index 0000000..02069c0 --- /dev/null +++ b/pwdkek-python/README.md @@ -0,0 +1,3 @@ +## Pwd Kek + +Ckek how strong is your password \ No newline at end of file diff --git a/pwdkek-python/poetry.lock b/pwdkek-python/poetry.lock new file mode 100644 index 0000000..e19090a --- /dev/null +++ b/pwdkek-python/poetry.lock @@ -0,0 +1,101 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "packaging" +version = "24.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "8.2.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, + {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "ruff" +version = "0.5.0" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.5.0-py3-none-linux_armv6l.whl", hash = "sha256:ee770ea8ab38918f34e7560a597cc0a8c9a193aaa01bfbd879ef43cb06bd9c4c"}, + {file = "ruff-0.5.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:38f3b8327b3cb43474559d435f5fa65dacf723351c159ed0dc567f7ab735d1b6"}, + {file = "ruff-0.5.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7594f8df5404a5c5c8f64b8311169879f6cf42142da644c7e0ba3c3f14130370"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:adc7012d6ec85032bc4e9065110df205752d64010bed5f958d25dbee9ce35de3"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d505fb93b0fabef974b168d9b27c3960714d2ecda24b6ffa6a87ac432905ea38"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dc5cfd3558f14513ed0d5b70ce531e28ea81a8a3b1b07f0f48421a3d9e7d80a"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:db3ca35265de239a1176d56a464b51557fce41095c37d6c406e658cf80bbb362"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b1a321c4f68809fddd9b282fab6a8d8db796b270fff44722589a8b946925a2a8"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c4dfcd8d34b143916994b3876b63d53f56724c03f8c1a33a253b7b1e6bf2a7d"}, + {file = "ruff-0.5.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81e5facfc9f4a674c6a78c64d38becfbd5e4f739c31fcd9ce44c849f1fad9e4c"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e589e27971c2a3efff3fadafb16e5aef7ff93250f0134ec4b52052b673cf988d"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d2ffbc3715a52b037bcb0f6ff524a9367f642cdc5817944f6af5479bbb2eb50e"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cd096e23c6a4f9c819525a437fa0a99d1c67a1b6bb30948d46f33afbc53596cf"}, + {file = "ruff-0.5.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:46e193b36f2255729ad34a49c9a997d506e58f08555366b2108783b3064a0e1e"}, + {file = "ruff-0.5.0-py3-none-win32.whl", hash = "sha256:49141d267100f5ceff541b4e06552e98527870eafa1acc9dec9139c9ec5af64c"}, + {file = "ruff-0.5.0-py3-none-win_amd64.whl", hash = "sha256:e9118f60091047444c1b90952736ee7b1792910cab56e9b9a9ac20af94cd0440"}, + {file = "ruff-0.5.0-py3-none-win_arm64.whl", hash = "sha256:ed5c4df5c1fb4518abcb57725b576659542bdbe93366f4f329e8f398c4b71178"}, + {file = "ruff-0.5.0.tar.gz", hash = "sha256:eb641b5873492cf9bd45bc9c5ae5320648218e04386a5f0c264ad6ccce8226a1"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "d6ec288f0c21334d83a7b7c8e41d8733df8a788770312a0309791df10cd46504" diff --git a/pwdkek-python/pwdkek_python/__init__.py b/pwdkek-python/pwdkek_python/__init__.py new file mode 100644 index 0000000..a4d1eaf --- /dev/null +++ b/pwdkek-python/pwdkek_python/__init__.py @@ -0,0 +1,11 @@ +from pwdkek_python.complexity_estimator import ( + PasswordComplexityEstimator, + PasswordComplexityTiers, + PasswordComplexityEstimate, +) + +__all__ = [ + "PasswordComplexityEstimator", + "PasswordComplexityTiers", + "PasswordComplexityEstimate", +] diff --git a/pwdkek-python/pwdkek_python/__main__.py b/pwdkek-python/pwdkek_python/__main__.py new file mode 100644 index 0000000..1fa7452 --- /dev/null +++ b/pwdkek-python/pwdkek_python/__main__.py @@ -0,0 +1,60 @@ +import argparse +from datetime import timedelta + +from pwdkek_python.complexity_estimator import PasswordComplexityEstimator +from pwdkek_python.builtin_datasets import BuiltInDataset + + +def main(): + parser = argparse.ArgumentParser(description="Password Complexity Estimator") + parser.add_argument( + "--dataset", + type=str, + help="Built-in dataset name or path to the dataset file", + default="small", + ) + args = parser.parse_args() + + print("Loading...") + try: + if args.dataset in BuiltInDataset.names(): + dataset = BuiltInDataset[args.dataset.upper()] + else: + dataset = args.dataset + estimator = PasswordComplexityEstimator(dataset) + except ValueError as e: + print("Error:", e) + return + + try: + while True: + print() + try: + estimate = estimator.estimate(input("Enter a password: ")) + except ValueError as e: + print(e) + continue + + print("Password entropy:", estimate.entropy) + + print("Time to decode with 1Gh/s: ", end="") + if estimate.ttd == timedelta.max: + print("Uncountable number of years") + else: + ttd = estimate.ttd + years = ttd.days // 365 + days = ttd.days % 365 + hours, remainder = divmod(ttd.seconds, 3600) + minutes, seconds = divmod(remainder, 60) + print( + f"{years} years {days} days {hours} hours {minutes} minutes {seconds} seconds", + ) + + print("Tier:", estimate.tier.value) + + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + main() diff --git a/pwdkek-python/pwdkek_python/builtin_datasets.py b/pwdkek-python/pwdkek_python/builtin_datasets.py new file mode 100644 index 0000000..835e81d --- /dev/null +++ b/pwdkek-python/pwdkek_python/builtin_datasets.py @@ -0,0 +1,74 @@ +import gzip +from enum import Enum +from io import BytesIO +from tempfile import NamedTemporaryFile +from typing import NamedTuple +from pathlib import Path +from urllib.request import urlopen, Request + + +DATASET_ROOT = Path(__file__).parent.parent.parent / "datasets" + + +class BuiltinDatasetInfo(NamedTuple): + file_name: str + source_url: str + + @property + def path(self) -> Path: + file_path = DATASET_ROOT / self.file_name + if not file_path.exists(): + self.download() + return file_path + + @staticmethod + def _remove_non_utf8(file_path: str) -> None: + with open(file_path, "rb") as file: + data = file.read() + with open(file_path, "wb") as file: + file.write(data.decode("utf-8", errors="ignore").encode("utf-8")) + + def download(self) -> None: + from pwdkek_python.prepare_dataset import prepare_dataset + + print("Downloading", self.source_url) + file_data = urlopen( + Request(self.source_url, headers={"User-Agent": "curl/8.3.0"}) + ) + decompressed_file = NamedTemporaryFile(delete=False) + with decompressed_file as tmp_file: + with gzip.open(BytesIO(file_data.read()), "rb") as file: + tmp_file.write(file.read()) + + print("Fixing encoding...") + self._remove_non_utf8(decompressed_file.name) + + print("Preparing dataset...") + prepare_dataset(decompressed_file.name, DATASET_ROOT / self.file_name) + decompressed_file.close() + print("Done!") + + +class BuiltInDataset(Enum): + SMALL = BuiltinDatasetInfo( + "rockyou-utf8-filtered-sorted.txt.gz", + "https://raw.githubusercontent.com/zacheller/rockyou/master/rockyou.txt.tar.gz", + ) + BIG = BuiltinDatasetInfo( + "crackstation-human-only-utf8-filtered-sorted.txt.gz", + "http://download.g0tmi1k.com/wordlists/large/crackstation-human-only.txt.gz", + ) + + @classmethod + def names(cls): + return [ + name.lower() + for name, value in vars(cls).items() + if isinstance(value, BuiltInDataset) + ] + + +if __name__ == "__main__": + print("Available datasets:", BuiltInDataset.names()) + assert BuiltInDataset.SMALL.value.path.exists() + assert BuiltInDataset.BIG.value.path.exists() diff --git a/estimator.py b/pwdkek-python/pwdkek_python/complexity_estimator.py similarity index 66% rename from estimator.py rename to pwdkek-python/pwdkek_python/complexity_estimator.py index e1f8c3e..db8f94f 100644 --- a/estimator.py +++ b/pwdkek-python/pwdkek_python/complexity_estimator.py @@ -1,4 +1,3 @@ -import argparse import bisect from dataclasses import dataclass from datetime import timedelta @@ -6,6 +5,9 @@ import gzip from math import log2 import string +from pathlib import Path + +from pwdkek_python.builtin_datasets import BuiltInDataset PASSWORD_ALLOWED_CHARS = str( string.ascii_lowercase @@ -33,8 +35,11 @@ class PasswordComplexityEstimate: class PasswordComplexityEstimator: def __init__( self, - dataset_path: str, + dataset_path: str | Path | BuiltInDataset, ): + if isinstance(dataset_path, BuiltInDataset): + dataset_path = dataset_path.value.path + with gzip.open(dataset_path) as file: self._passwords = [line.decode() for line in file.readlines()] @@ -111,54 +116,3 @@ def estimate(self, password: str): ttd, tier, ) - - -def main(): - parser = argparse.ArgumentParser(description="Password Complexity Estimator") - parser.add_argument( - "--dataset_path", - type=str, - help="Path to the dataset file", - default="datasets/rockyou-utf8-filtered-sorted.txt.gz", - ) - args = parser.parse_args() - - print("Loading...") - try: - estimator = PasswordComplexityEstimator(args.dataset_path) - except ValueError as e: - print("Error:", e) - return - - try: - while True: - print() - try: - estimate = estimator.estimate(input("Enter a password: ")) - except ValueError as e: - print(e) - continue - - print("Password entropy:", estimate.entropy) - - print("Time to decode with 1Gh/s: ", end="") - if estimate.ttd == timedelta.max: - print("Uncountable number of years") - else: - ttd = estimate.ttd - years = ttd.days // 365 - days = ttd.days % 365 - hours, remainder = divmod(ttd.seconds, 3600) - minutes, seconds = divmod(remainder, 60) - print( - f"{years} years {days} days {hours} hours {minutes} minutes {seconds} seconds", - ) - - print("Tier:", estimate.tier.value) - - except KeyboardInterrupt: - pass - - -if __name__ == "__main__": - main() diff --git a/pwdkek-python/pwdkek_python/prepare_dataset.py b/pwdkek-python/pwdkek_python/prepare_dataset.py new file mode 100644 index 0000000..02bc304 --- /dev/null +++ b/pwdkek-python/pwdkek_python/prepare_dataset.py @@ -0,0 +1,25 @@ +import gzip +import sys +from pathlib import Path + +from pwdkek_python.complexity_estimator import PASSWORD_ALLOWED_CHARS + + +def prepare_dataset(input_path: str | Path, output_path: str | Path) -> None: + with open(input_path, "r") as file: + passwords = [ + "".join([ch for ch in line if ch in PASSWORD_ALLOWED_CHARS]) + for line in file.readlines() + ] + + passwords = list(filter(lambda pwd: len(pwd) != 0, passwords)) + passwords.sort() + + with gzip.open(output_path, "wt") as file: + for password in passwords: + file.write(password + "\n") + + +if __name__ == "__main__": + # NOTE: convert input to utf-8 beforehand + prepare_dataset(sys.argv[1], sys.argv[2]) diff --git a/pwdkek-python/pyproject.toml b/pwdkek-python/pyproject.toml new file mode 100644 index 0000000..b45f8f6 --- /dev/null +++ b/pwdkek-python/pyproject.toml @@ -0,0 +1,23 @@ +[tool.poetry] +name = "pwdkek-python" +version = "0.1.0" +description = "" +authors = [ + "Vitaly Mahonin", + "Evgenij Ivankin", + "Vladislav Kolpachev", + "Nikolay Kurichev" +] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" + + +[tool.poetry.group.dev.dependencies] +pytest = "^8.2.2" +ruff = "^0.5.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/pwdkek-python/tests/__init__.py b/pwdkek-python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test.py b/pwdkek-python/tests/test.py similarity index 57% rename from test.py rename to pwdkek-python/tests/test.py index e37ffed..355c68a 100644 --- a/test.py +++ b/pwdkek-python/tests/test.py @@ -1,43 +1,59 @@ from datetime import timedelta -from estimator import * + +from pwdkek_python.builtin_datasets import BuiltInDataset +from pwdkek_python.complexity_estimator import ( + PasswordComplexityTiers, + PasswordComplexityEstimator, +) import json import argparse + # Функция для загрузки паролей и их тиров из JSON файла def load_passwords(filename): - with open(filename, 'r', encoding='utf-8') as file: + with open(filename, "r", encoding="utf-8") as file: data = json.load(file) return data + # Функция для тестирования паролей def test_passwords(filename): - parser = argparse.ArgumentParser(description="Password Complexity Estimator Testing") + parser = argparse.ArgumentParser( + description="Password Complexity Estimator Testing" + ) parser.add_argument( - "--dataset_path", + "--dataset", type=str, - help="Path to the dataset file", - default="datasets/rockyou-utf8-filtered-sorted.txt.gz", + help="Built-in dataset name or path to the dataset file", + default="small", ) args = parser.parse_args() password_data = load_passwords(filename) - estimator = PasswordComplexityEstimator(args.dataset_path) + + if args.dataset in BuiltInDataset.names(): + dataset = BuiltInDataset[args.dataset.upper()] + else: + dataset = args.dataset + + estimator = PasswordComplexityEstimator(dataset) # Количество совпадений по тирам - correct_predictions = {tier_name.value : 0 for tier_name in PasswordComplexityTiers} - + correct_predictions = {tier_name.value: 0 for tier_name in PasswordComplexityTiers} + # cумма предсказаний для самых плохих паролей sum_pathetic_pred = 0 - tier_to_num = {tier_name.value : i for i, tier_name in enumerate(PasswordComplexityTiers)} + tier_to_num = { + tier_name.value: i for i, tier_name in enumerate(PasswordComplexityTiers) + } pathetic_tier = [tier_name.value for tier_name in PasswordComplexityTiers][0] - + # число правильно определeнных самых плохих паролей cnt_correct_pathetic_pred = 0 for expected_tier, passwords in password_data.items(): print(f"Testing {expected_tier} passwords:") for password in passwords: - try: estimate = estimator.estimate(password) except ValueError as e: @@ -49,7 +65,9 @@ def test_passwords(filename): if ttd == timedelta.max: ttd = "Uncountable number of years" - print(f"Password: {password}, Predicted Tier: {predicted_tier}, Entropy: {round(entropy, 3)}, TTD: {ttd}") + print( + f"Password: {password}, Predicted Tier: {predicted_tier}, Entropy: {round(entropy, 3)}, TTD: {ttd}" + ) if expected_tier == predicted_tier: correct_predictions[predicted_tier] += 1 @@ -57,16 +75,20 @@ def test_passwords(filename): sum_pathetic_pred += tier_to_num[predicted_tier] if expected_tier == predicted_tier: cnt_correct_pathetic_pred += 1 - - print('-' * 100) + + print("-" * 100) print("\nResults:") for tier_name, correct_count in correct_predictions.items(): if tier_name in password_data: - print(f"Correct predictions for {tier_name} tier: {correct_count / len(password_data[tier_name])}") - - print(f"\nAverage predictions for {pathetic_tier} passwords: {sum_pathetic_pred / len(password_data[pathetic_tier])}. must be {tier_to_num[pathetic_tier]}") + print( + f"Correct predictions for {tier_name} tier: {correct_count / len(password_data[tier_name])}" + ) + + print( + f"\nAverage predictions for {pathetic_tier} passwords: {sum_pathetic_pred / len(password_data[pathetic_tier])}. must be {tier_to_num[pathetic_tier]}" + ) print("Recall:", cnt_correct_pathetic_pred / len(password_data[pathetic_tier])) - + + if __name__ == "__main__": - test_passwords('test_passwords.json') - \ No newline at end of file + test_passwords("tests/test_files/test_passwords.json") diff --git a/test_passwords.json b/pwdkek-python/tests/test_files/test_passwords.json similarity index 100% rename from test_passwords.json rename to pwdkek-python/tests/test_files/test_passwords.json