diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55ac8aa..b12ba6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,5 +55,8 @@ jobs: - name: Run linting run: poetry run python devtools/lint.py - - name: Run tests + - name: Run unit tests + run: poetry run pytest + + - name: Run integration tests run: ./tests/run.sh diff --git a/README.md b/README.md index a9e1aa8..0b41bf5 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,11 @@ Repren is a simple but flexible command-line tool for rewriting file contents according to a set of regular expression patterns, and to rename or move files according to patterns. Essentially, it is a general-purpose, brute-force text file refactoring tool. + For example, repren could rename all occurrences of certain class and variable names in a set of Java source files, while simultaneously renaming the Java files according to the same pattern. + It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`: - It can also rename files, including moving files and creating directories. @@ -33,6 +35,7 @@ It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`: If file paths are provided, repren replaces those files in place, leaving a backup with extension ".orig". + If directory paths are provided, it applies replacements recursively to all files in the supplied paths that are not in the exclude pattern. If no arguments are supplied, it reads from stdin and writes to stdout. @@ -222,7 +225,6 @@ repren -p patfile --word-breaks --preserve-case --full mydir1 e.g. if the pattern file has foo_bar -> xxx_yyy, the replacements fooBar -> xxxYyy, FooBar -> XxxYyy, FOO_BAR -> XXX_YYY are also made. Assumes each pattern has one casing convention. - (Plain ASCII names only.) - The same logic applies to filenames, with patterns applied to the full file path with slashes replaced and then and parent directories created as needed, e.g. diff --git a/pyproject.toml b/pyproject.toml index 9fbc32b..fbc2daa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,8 +59,6 @@ format-jinja = """ [tool.poetry.scripts] repren = "repren:main" -lint = "devtools.lint:main" -test = "pytest:main" [tool.black] line-length = 100 diff --git a/repren/repren.py b/repren/repren.py index eb1499d..c520123 100755 --- a/repren/repren.py +++ b/repren/repren.py @@ -5,9 +5,11 @@ Repren is a simple but flexible command-line tool for rewriting file contents according to a set of regular expression patterns, and to rename or move files according to patterns. Essentially, it is a general-purpose, brute-force text file refactoring tool. + For example, repren could rename all occurrences of certain class and variable names in a set of Java source files, while simultaneously renaming the Java files according to the same pattern. + It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`: - It can also rename files, including moving files and creating directories. @@ -30,6 +32,7 @@ If file paths are provided, repren replaces those files in place, leaving a backup with extension ".orig". + If directory paths are provided, it applies replacements recursively to all files in the supplied paths that are not in the exclude pattern. If no arguments are supplied, it reads from stdin and writes to stdout. @@ -219,7 +222,6 @@ e.g. if the pattern file has foo_bar -> xxx_yyy, the replacements fooBar -> xxxYyy, FooBar -> XxxYyy, FOO_BAR -> XXX_YYY are also made. Assumes each pattern has one casing convention. - (Plain ASCII names only.) - The same logic applies to filenames, with patterns applied to the full file path with slashes replaced and then and parent directories created as needed, e.g. @@ -249,6 +251,7 @@ import argparse import bisect +from dataclasses import dataclass import importlib.metadata import os import re @@ -298,15 +301,15 @@ def safe_decode(b: bytes) -> str: return b.decode("utf-8", errors="backslashreplace") +@dataclass class _Tally: - def __init__(self) -> None: - self.files: int = 0 - self.chars: int = 0 - self.matches: int = 0 - self.valid_matches: int = 0 - self.files_changed: int = 0 - self.files_rewritten: int = 0 - self.renames: int = 0 + files: int = 0 + chars: int = 0 + matches: int = 0 + valid_matches: int = 0 + files_changed: int = 0 + files_rewritten: int = 0 + renames: int = 0 _tally: _Tally = _Tally() @@ -370,10 +373,10 @@ def _apply_replacements(input_bytes: bytes, matches: List[PatternPair]) -> bytes return b"".join(out) +@dataclass class _MatchCounts: - def __init__(self, found: int = 0, valid: int = 0) -> None: - self.found: int = found - self.valid: int = valid + found: int = 0 + valid: int = 0 def add(self, o: "_MatchCounts") -> None: self.found += o.found @@ -417,37 +420,57 @@ def multi_replace( def _split_name(name: str) -> Tuple[str, List[str]]: - """Split a camel-case or underscore-formatted name into words. Return separator and words.""" - if name.find("_") >= 0: + """ + Split a CamelCase or underscore-formatted name into words. + Return separator and list of words. + """ + if "_" in name: + # Underscore-separated name return "_", name.split("_") else: - temp = _camel_split_pat1.sub("\\1\t\\2", name) - temp = _camel_split_pat2.sub("\\1\t\\2", temp) - return "", temp.split("\t") + # CamelCase or mixed case name + words = [] + current_word = "" + i = 0 + while i < len(name): + char = name[i] + if i > 0 and char.isupper(): + if name[i - 1].islower() or (i + 1 < len(name) and name[i + 1].islower()): + # Start a new word + words.append(current_word) + current_word = char + else: + current_word += char + else: + current_word += char + i += 1 + if current_word: + words.append(current_word) + return "", words def _capitalize(word: str) -> str: - return word[0].upper() + word[1:].lower() + return word[0].upper() + word[1:].lower() if word else "" # Handle empty strings safely def to_lower_camel(name: str) -> str: - words = _split_name(name)[1] - return words[0].lower() + "".join([_capitalize(word) for word in words[1:]]) + separator, words = _split_name(name) + return words[0].lower() + "".join(_capitalize(word) for word in words[1:]) def to_upper_camel(name: str) -> str: - words = _split_name(name)[1] - return "".join([_capitalize(word) for word in words]) + separator, words = _split_name(name) + return "".join(_capitalize(word) for word in words) def to_lower_underscore(name: str) -> str: - words = _split_name(name)[1] - return "_".join([word.lower() for word in words]) + separator, words = _split_name(name) + return "_".join(word.lower() for word in words) def to_upper_underscore(name: str) -> str: - words = _split_name(name)[1] - return "_".join([word.upper() for word in words]) + separator, words = _split_name(name) + return "_".join(word.upper() for word in words) def _transform_expr(expr: str, transform: Callable[[str], str]) -> str: @@ -456,8 +479,10 @@ def _transform_expr(expr: str, transform: Callable[[str], str]) -> str: def all_case_variants(expr: str) -> List[str]: - """Return all casing variations of an expression. - Note: This operates on strings and is called before pattern compilation.""" + """ + Return all casing variations of an expression. + Note: This operates on strings and is called before pattern compilation. + """ return [ _transform_expr(expr, transform) for transform in [to_lower_camel, to_upper_camel, to_lower_underscore, to_upper_underscore] @@ -879,4 +904,3 @@ def format_flags(flags: int) -> str: # Log collisions # Separate patterns file for renames and replacements # Quiet and verbose modes (the latter logging each substitution) -# Support --preserve-case for Unicode (non-ASCII) characters (messy) diff --git a/tests/pytests.py b/tests/pytests.py new file mode 100644 index 0000000..54e71a2 --- /dev/null +++ b/tests/pytests.py @@ -0,0 +1,90 @@ +import pytest +from repren.repren import ( + to_lower_camel, + to_upper_camel, + to_lower_underscore, + to_upper_underscore, + _split_name, +) + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("ÜnicodeString", ("", ["Ünicode", "String"])), + ("sträßleTest", ("", ["sträßle", "Test"])), + ("ГДеловойКод", ("", ["Г", "Деловой", "Код"])), + ("ΚαλημέραWorld", ("", ["Καλημέρα", "World"])), + ("normalTest", ("", ["normal", "Test"])), + ("HTTPResponse", ("", ["HTTP", "Response"])), + ("ThisIsATest", ("", ["This", "Is", "A", "Test"])), + ("テストCase", ("", ["テスト", "Case"])), + ("测试案例", ("", ["测试案例"])), # Chinese characters + ], +) +def test_split_name(input_str, expected): + assert _split_name(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("ÜnicodeString", "ünicodeString"), + ("HTTPResponse", "httpResponse"), + ("ΚαλημέραWorld", "καλημέραWorld"), + ("sträßleTest", "sträßleTest"), + ("ThisIsATest", "thisIsATest"), + ("テストCase", "テストCase"), + ("测试案例", "测试案例"), + ], +) +def test_to_lower_camel(input_str, expected): + assert to_lower_camel(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("ünicode_string", "ÜnicodeString"), + ("sträßle_test", "SträßleTest"), + ("http_response", "HttpResponse"), + ("καλημέρα_world", "ΚαλημέραWorld"), + ("this_is_a_test", "ThisIsATest"), + ("テスト_case", "テストCase"), + ("测试_案例", "测试案例"), + ], +) +def test_to_upper_camel(input_str, expected): + assert to_upper_camel(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("ÜnicodeString", "ünicode_string"), + ("HTTPResponse", "http_response"), + ("ΚαλημέραWorld", "καλημέρα_world"), + ("sträßleTest", "sträßle_test"), + ("ThisIsATest", "this_is_a_test"), + ("テストCase", "テスト_case"), + ("测试案例", "测试案例"), + ], +) +def test_to_lower_underscore(input_str, expected): + assert to_lower_underscore(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("ünicode_string", "ÜNICODE_STRING"), + ("http_response", "HTTP_RESPONSE"), + ("καλημέρα_world", "ΚΑΛΗΜΈΡΑ_WORLD"), + ("sträßle_test", "STRÄSSLE_TEST"), + ("this_is_a_test", "THIS_IS_A_TEST"), + ("テスト_case", "テスト_CASE"), + ("测试_案例", "测试_案例"), + ], +) +def test_to_upper_underscore(input_str, expected): + assert to_upper_underscore(input_str) == expected diff --git a/workflows/ci.yml b/workflows/ci.yml deleted file mode 100644 index 07de19a..0000000 --- a/workflows/ci.yml +++ /dev/null @@ -1,56 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: CI - -on: - push: - branches: ["main"] - pull_request: - branches: ["main"] - -permissions: - contents: read - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - # Important for versioning plugins: - fetch-depth: 0 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: latest - - - name: Cache Poetry dependencies - uses: actions/cache@v4 - with: - path: | - ~/.cache/pypoetry - ~/.cache/pip - key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }} - restore-keys: | - ${{ runner.os }}-poetry- - - - name: Install Poetry plugins - run: | - poetry self add "poetry-dynamic-versioning[plugin]" - - - name: Install dependencies - run: poetry install - - - name: Run linting - run: poetry run lint - - - name: Run tests - run: poetry run test diff --git a/workflows/publish.yml b/workflows/publish.yml deleted file mode 100644 index 0aa11cc..0000000 --- a/workflows/publish.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Publish to PyPI - -on: - release: - types: [published] - workflow_dispatch: # Enable manual trigger. - -jobs: - build-and-publish: - runs-on: ubuntu-latest - permissions: - id-token: write # Mandatory for OIDC. - contents: read - steps: - - name: Check out the code - uses: actions/checkout@v4 - with: - # Important for versioning plugins: - fetch-depth: 0 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: latest - - - name: Install Poetry plugins - run: | - poetry self add "poetry-dynamic-versioning[plugin]" - - - name: Install dependencies - run: poetry install --no-root - - - name: Build package - run: poetry build - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1