Skip to content

Commit

Permalink
Better Unicode casing variations.
Browse files Browse the repository at this point in the history
Pytests. Other cleanups.
  • Loading branch information
jlevy committed Nov 28, 2024
1 parent 9d0ce06 commit 39dfe3c
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 131 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,8 @@ jobs:
- name: Run linting
run: poetry run python devtools/lint.py

- name: Run tests
- name: Run unit tests
run: poetry run pytest

- name: Run integration tests
run: ./tests/run.sh
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
Repren is a simple but flexible command-line tool for rewriting file contents according to a
set of regular expression patterns, and to rename or move files according to patterns.
Essentially, it is a general-purpose, brute-force text file refactoring tool.

For example, repren could rename all occurrences of certain class and variable names in a
set of Java source files, while simultaneously renaming the Java files according to the same
pattern.

It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`:

- It can also rename files, including moving files and creating directories.
Expand All @@ -33,6 +35,7 @@ It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`:

If file paths are provided, repren replaces those files in place, leaving a backup with
extension ".orig".

If directory paths are provided, it applies replacements recursively to all files in the
supplied paths that are not in the exclude pattern.
If no arguments are supplied, it reads from stdin and writes to stdout.
Expand Down Expand Up @@ -222,7 +225,6 @@ repren -p patfile --word-breaks --preserve-case --full mydir1
e.g. if the pattern file has foo_bar -> xxx_yyy, the replacements fooBar -> xxxYyy, FooBar
-> XxxYyy, FOO_BAR -> XXX_YYY are also made.
Assumes each pattern has one casing convention.
(Plain ASCII names only.)

- The same logic applies to filenames, with patterns applied to the full file path with
slashes replaced and then and parent directories created as needed, e.g.
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ format-jinja = """

[tool.poetry.scripts]
repren = "repren:main"
lint = "devtools.lint:main"
test = "pytest:main"

[tool.black]
line-length = 100
Expand Down
82 changes: 53 additions & 29 deletions repren/repren.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
Repren is a simple but flexible command-line tool for rewriting file contents according to a
set of regular expression patterns, and to rename or move files according to patterns.
Essentially, it is a general-purpose, brute-force text file refactoring tool.
For example, repren could rename all occurrences of certain class and variable names in a
set of Java source files, while simultaneously renaming the Java files according to the same
pattern.
It's more powerful than usual options like `perl -pie`, `rpl`, or `sed`:
- It can also rename files, including moving files and creating directories.
Expand All @@ -30,6 +32,7 @@
If file paths are provided, repren replaces those files in place, leaving a backup with
extension ".orig".
If directory paths are provided, it applies replacements recursively to all files in the
supplied paths that are not in the exclude pattern.
If no arguments are supplied, it reads from stdin and writes to stdout.
Expand Down Expand Up @@ -219,7 +222,6 @@
e.g. if the pattern file has foo_bar -> xxx_yyy, the replacements fooBar -> xxxYyy, FooBar
-> XxxYyy, FOO_BAR -> XXX_YYY are also made.
Assumes each pattern has one casing convention.
(Plain ASCII names only.)
- The same logic applies to filenames, with patterns applied to the full file path with
slashes replaced and then and parent directories created as needed, e.g.
Expand Down Expand Up @@ -249,6 +251,7 @@

import argparse
import bisect
from dataclasses import dataclass
import importlib.metadata
import os
import re
Expand Down Expand Up @@ -298,15 +301,15 @@ def safe_decode(b: bytes) -> str:
return b.decode("utf-8", errors="backslashreplace")


@dataclass
class _Tally:
def __init__(self) -> None:
self.files: int = 0
self.chars: int = 0
self.matches: int = 0
self.valid_matches: int = 0
self.files_changed: int = 0
self.files_rewritten: int = 0
self.renames: int = 0
files: int = 0
chars: int = 0
matches: int = 0
valid_matches: int = 0
files_changed: int = 0
files_rewritten: int = 0
renames: int = 0


_tally: _Tally = _Tally()
Expand Down Expand Up @@ -370,10 +373,10 @@ def _apply_replacements(input_bytes: bytes, matches: List[PatternPair]) -> bytes
return b"".join(out)


@dataclass
class _MatchCounts:
def __init__(self, found: int = 0, valid: int = 0) -> None:
self.found: int = found
self.valid: int = valid
found: int = 0
valid: int = 0

def add(self, o: "_MatchCounts") -> None:
self.found += o.found
Expand Down Expand Up @@ -417,37 +420,57 @@ def multi_replace(


def _split_name(name: str) -> Tuple[str, List[str]]:
"""Split a camel-case or underscore-formatted name into words. Return separator and words."""
if name.find("_") >= 0:
"""
Split a CamelCase or underscore-formatted name into words.
Return separator and list of words.
"""
if "_" in name:
# Underscore-separated name
return "_", name.split("_")
else:
temp = _camel_split_pat1.sub("\\1\t\\2", name)
temp = _camel_split_pat2.sub("\\1\t\\2", temp)
return "", temp.split("\t")
# CamelCase or mixed case name
words = []
current_word = ""
i = 0
while i < len(name):
char = name[i]
if i > 0 and char.isupper():
if name[i - 1].islower() or (i + 1 < len(name) and name[i + 1].islower()):
# Start a new word
words.append(current_word)
current_word = char
else:
current_word += char
else:
current_word += char
i += 1
if current_word:
words.append(current_word)
return "", words


def _capitalize(word: str) -> str:
return word[0].upper() + word[1:].lower()
return word[0].upper() + word[1:].lower() if word else "" # Handle empty strings safely


def to_lower_camel(name: str) -> str:
words = _split_name(name)[1]
return words[0].lower() + "".join([_capitalize(word) for word in words[1:]])
separator, words = _split_name(name)
return words[0].lower() + "".join(_capitalize(word) for word in words[1:])


def to_upper_camel(name: str) -> str:
words = _split_name(name)[1]
return "".join([_capitalize(word) for word in words])
separator, words = _split_name(name)
return "".join(_capitalize(word) for word in words)


def to_lower_underscore(name: str) -> str:
words = _split_name(name)[1]
return "_".join([word.lower() for word in words])
separator, words = _split_name(name)
return "_".join(word.lower() for word in words)


def to_upper_underscore(name: str) -> str:
words = _split_name(name)[1]
return "_".join([word.upper() for word in words])
separator, words = _split_name(name)
return "_".join(word.upper() for word in words)


def _transform_expr(expr: str, transform: Callable[[str], str]) -> str:
Expand All @@ -456,8 +479,10 @@ def _transform_expr(expr: str, transform: Callable[[str], str]) -> str:


def all_case_variants(expr: str) -> List[str]:
"""Return all casing variations of an expression.
Note: This operates on strings and is called before pattern compilation."""
"""
Return all casing variations of an expression.
Note: This operates on strings and is called before pattern compilation.
"""
return [
_transform_expr(expr, transform)
for transform in [to_lower_camel, to_upper_camel, to_lower_underscore, to_upper_underscore]
Expand Down Expand Up @@ -879,4 +904,3 @@ def format_flags(flags: int) -> str:
# Log collisions
# Separate patterns file for renames and replacements
# Quiet and verbose modes (the latter logging each substitution)
# Support --preserve-case for Unicode (non-ASCII) characters (messy)
90 changes: 90 additions & 0 deletions tests/pytests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pytest
from repren.repren import (
to_lower_camel,
to_upper_camel,
to_lower_underscore,
to_upper_underscore,
_split_name,
)


@pytest.mark.parametrize(
"input_str, expected",
[
("ÜnicodeString", ("", ["Ünicode", "String"])),
("sträßleTest", ("", ["sträßle", "Test"])),
("ГДеловойКод", ("", ["Г", "Деловой", "Код"])),
("ΚαλημέραWorld", ("", ["Καλημέρα", "World"])),
("normalTest", ("", ["normal", "Test"])),
("HTTPResponse", ("", ["HTTP", "Response"])),
("ThisIsATest", ("", ["This", "Is", "A", "Test"])),
("テストCase", ("", ["テスト", "Case"])),
("测试案例", ("", ["测试案例"])), # Chinese characters
],
)
def test_split_name(input_str, expected):
assert _split_name(input_str) == expected


@pytest.mark.parametrize(
"input_str, expected",
[
("ÜnicodeString", "ünicodeString"),
("HTTPResponse", "httpResponse"),
("ΚαλημέραWorld", "καλημέραWorld"),
("sträßleTest", "sträßleTest"),
("ThisIsATest", "thisIsATest"),
("テストCase", "テストCase"),
("测试案例", "测试案例"),
],
)
def test_to_lower_camel(input_str, expected):
assert to_lower_camel(input_str) == expected


@pytest.mark.parametrize(
"input_str, expected",
[
("ünicode_string", "ÜnicodeString"),
("sträßle_test", "SträßleTest"),
("http_response", "HttpResponse"),
("καλημέρα_world", "ΚαλημέραWorld"),
("this_is_a_test", "ThisIsATest"),
("テスト_case", "テストCase"),
("测试_案例", "测试案例"),
],
)
def test_to_upper_camel(input_str, expected):
assert to_upper_camel(input_str) == expected


@pytest.mark.parametrize(
"input_str, expected",
[
("ÜnicodeString", "ünicode_string"),
("HTTPResponse", "http_response"),
("ΚαλημέραWorld", "καλημέρα_world"),
("sträßleTest", "sträßle_test"),
("ThisIsATest", "this_is_a_test"),
("テストCase", "テスト_case"),
("测试案例", "测试案例"),
],
)
def test_to_lower_underscore(input_str, expected):
assert to_lower_underscore(input_str) == expected


@pytest.mark.parametrize(
"input_str, expected",
[
("ünicode_string", "ÜNICODE_STRING"),
("http_response", "HTTP_RESPONSE"),
("καλημέρα_world", "ΚΑΛΗΜΈΡΑ_WORLD"),
("sträßle_test", "STRÄSSLE_TEST"),
("this_is_a_test", "THIS_IS_A_TEST"),
("テスト_case", "テスト_CASE"),
("测试_案例", "测试_案例"),
],
)
def test_to_upper_underscore(input_str, expected):
assert to_upper_underscore(input_str) == expected
56 changes: 0 additions & 56 deletions workflows/ci.yml

This file was deleted.

Loading

0 comments on commit 39dfe3c

Please sign in to comment.