bump to v2.3.0

### v2.3.0 - Added classes "pregex.meta.essential {Text, NonWhitespace, Whitespace}". - Class "pregex.core.groups.Group" and method "pregex.core.pre.Pregex.group" now have an "is_case_insensitive" parameter, which can be used in order to apply the "case insenitive" modifier to the pattern that is wrapped within the group. - A "CannotBeNegatedException" is now thrown whenever one attempts to invert an instance of class "pregex.core.classes.Any". - Fixed bug where subtracting a character from a two-character character class range wouldn't successfully remove said character from the range. - Slightly updated documentation and README. - Modified some existing tests and added some more in order to achieve 100% coverage. - Updates on CI and PyPI package upload automation
manoss96 · Oct 8, 2022 · bfba67e · bfba67e
1 parent ef1b212
commit bfba67e
Show file tree

Hide file tree

Showing 16 changed files with 473 additions and 167 deletions.
diff --git a/.github/workflows/.pypi_upload.yml b/.github/workflows/.pypi_upload.yml
@@ -0,0 +1,32 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+     - '*'
+
+jobs:
+  publish:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Install dependencies
+        run: |
+          pip install build
+
+      - name: Build dist
+        run: |
+          python -m build --outdir dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,9 +1,15 @@
 name: Main CI
 
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - '**'
+  pull_request:
+    braches:
+      - '**'
 
 jobs:
-  build:
+  test-and-coverage:
 
     runs-on: ubuntu-latest
 
@@ -19,7 +25,7 @@ jobs:
         run: |
           echo "PYTHONPATH=${GITHUB_WORKSPACE}/src" >> $GITHUB_ENV
 
-      - name: Run Tests
+      - name: Run tests
         run: |
           python -m pip install coverage
           cd tests

diff --git a/README.md b/README.md
@@ -208,8 +208,8 @@ Having initialized ``wordle`` as a ``Pregex`` instance, we can simply simply do
 ``wordle.followed_by(some_pattern)`` so as to indicate that any potential match
 with ``wordle`` must be followed by ``some_pattern``. Recall that ``wordle`` merely
 represents the empty string, so we are not really matching anything at this point.
-Applying an assertion to the empty string pattern is just a neat little trick that
-one can use in order to validate something about their pattern before they even begin
+Applying an assertion to the empty string pattern is just a neat little trick one
+can use in order to validate something about their pattern before they even begin
 to build it.
 
 Now it's just a matter of figuring out what the value of ``some_pattern`` is.
@@ -246,16 +246,18 @@ represents the following RegEx pattern:
 
 After we have made sure that our pattern will reject any words that do not contain
 all the yellow letters, we can finally start building the part of the pattern that
-will handle the actual matching. This can easily be achived by performing five iterations,
-one for each letter of the word, where at each iteration ``i`` we construct a new character
-class, that is then appended to our pattern based on the following logic:
+will handle the actual matching. This can easily be achived by performing five
+iterations, one for each letter of the word, where at each iteration ``i`` we
+construct a new character class, which is then appended to our pattern based
+on the following logic:
 
 * If the letter that corresponds to the word's i-th position is known, then
   make it so that the pattern only matches that letter at that position.
 
 * If the letter that corresponds to the word's i-th position is not known,
   then make it so that the pattern matches any letter except for gray letters,
-  as well as any yellow letters that may have been ruled out for that position.
+  green letters, as well as any yellow letters that may have been ruled out for
+  that exact position.
 
 The following code snippet does just that:
 
@@ -266,7 +268,7 @@ for i in range(1, 6):
     if i in green:
         wordle += green[i]
     else:
-        invalid_chars_at_pos_i = list(gray)
+        invalid_chars_at_pos_i = gray + list(green.values())
         if i in yellow:
             invalid_chars_at_pos_i += yellow[i]
         wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
@@ -276,7 +278,7 @@ After executing the above code, ``wordle`` will contain the following
 RegEx pattern:
 
 ```
-(?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSTV-Z][ABE-KOPR-TV-Z][ABE-KOR-TV-Z]T[ABE-KOPR-TV-Z]
+(?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSV-Z][ABE-KOPRSV-Z][ABE-KORSV-Z]T[ABE-KOPRSV-Z]
 ```
 
 ### Matching from a dictionary
@@ -318,7 +320,7 @@ def wordle_solver(green: dict[int, str], yellow: dict[int, list[str]], gray: lis
         if i in green:
             wordle += green[i]
         else:
-            invalid_chars_at_pos_i = list(gray)
+            invalid_chars_at_pos_i = gray + list(green.values())
             if i in yellow:
                 invalid_chars_at_pos_i += yellow[i]
             wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -22,7 +22,7 @@
 author = 'Manos Stoumpos'
 
 # The full version, including alpha/beta/rc tags
-release = '2.2.1'
+release = '2.3.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/source/documentation/covering-the-basics.rst b/docs/source/documentation/covering-the-basics.rst
@@ -2,9 +2,9 @@
 Covering the Basics
 ###################
 
-In this section you will learn about the :class:`~pregex.core.pre.Pregex` class,
-and how instances of this class can be effectively combined together in order
-to construct complex RegEx patterns.
+In this section you will be learning about the :class:`~pregex.core.pre.Pregex`
+class, and how instances of this class can be effectively combined together in
+order to construct complex RegEx patterns.
 
 The Pregex class
 ============================================
@@ -36,10 +36,10 @@ all other classes inherit.
 
    # These are both Pregex instances.
    digit: Pregex = AnyDigit()
-   a_or_b: Pregex = Either('a', 'b')
+   either_a_or_b: Pregex = Either('a', 'b')
 
    # This is a Pregex instance as well!
-   digit_followed_by_either_a_or_b: Pregex = FollowedBy(digit, a_or_b)
+   digit_followed_by_either_a_or_b: Pregex = FollowedBy(digit, either_a_or_b)
 
 Being wrapped within instances of the same type allows for these Pregex
 patterns to be easily combined together into even more complex patterns.

diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
@@ -229,8 +229,8 @@ Having initialized ``wordle`` as a ``Pregex`` instance, we can simply simply do
 ``wordle.followed_by(some_pattern)`` so as to indicate that any potential match
 with ``wordle`` must be followed by ``some_pattern``. Recall that ``wordle`` merely
 represents the empty string, so we are not really matching anything at this point.
-Applying an assertion to the empty string pattern is just a neat little trick that
-one can use in order to validate something about their pattern before they even begin
+Applying an assertion to the empty string pattern is just a neat little trick one
+can use in order to validate something about their pattern before they even begin
 to build it.
 
 Now it's just a matter of figuring out what the value of ``some_pattern`` is.
@@ -268,16 +268,18 @@ Building valid character classes
 
 After we have made sure that our pattern will reject any words that do not contain
 all the yellow letters, we can finally start building the part of the pattern that
-will handle the actual matching. This can easily be achived by performing five iterations,
-one for each letter of the word, where at each iteration ``i`` we construct a new character
-class, that is then appended to our pattern based on the following logic:
+will handle the actual matching. This can easily be achived by performing five
+iterations, one for each letter of the word, where at each iteration ``i`` we
+construct a new character class, which is then appended to our pattern based
+on the following logic:
 
 * If the letter that corresponds to the word's i-th position is known, then
   make it so that the pattern only matches that letter at that position.
 
 * If the letter that corresponds to the word's i-th position is not known,
   then make it so that the pattern matches any letter except for gray letters,
-  as well as any yellow letters that may have been ruled out for that position.
+  green letters, as well as any yellow letters that may have been ruled out for
+  that exact position.
 
 The following code snippet does just that:
 
@@ -289,7 +291,7 @@ The following code snippet does just that:
       if i in green:
           wordle += green[i]
       else:
-          invalid_chars_at_pos_i = list(gray)
+          invalid_chars_at_pos_i = gray + list(green.values())
           if i in yellow:
               invalid_chars_at_pos_i += yellow[i]
           wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)
@@ -299,7 +301,7 @@ RegEx pattern:
 
 .. code-block::
 
-  (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSTV-Z][ABE-KOPR-TV-Z][ABE-KOR-TV-Z]T[ABE-KOPR-TV-Z]
+  (?=[A-Z]{,4}A)(?=[A-Z]{,4}R)(?=[A-Z]{,4}P)[BE-KOPSV-Z][ABE-KOPRSV-Z][ABE-KORSV-Z]T[ABE-KOPRSV-Z]
 
 Matching from a dictionary
 ---------------------------
@@ -343,7 +345,7 @@ constitute possible solutions to the problem.
           if i in green:
               wordle += green[i]
           else:
-              invalid_chars_at_pos_i = list(gray)
+              invalid_chars_at_pos_i = gray + list(green.values())
               if i in yellow:
                   invalid_chars_at_pos_i += yellow[i]
               wordle += AnyUppercaseLetter() - AnyFrom(*invalid_chars_at_pos_i)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pregex"
-version = "2.2.1"
+version = "2.3.0"
 authors = [
   {email = "[email protected]"},
   {name = "Manos Stoumpos"}

diff --git a/src/pregex/core/classes.py b/src/pregex/core/classes.py
@@ -282,7 +282,7 @@ def __chars_to_ranges(ranges: set[str], chars: set[str]) -> tuple[set[str], set[
         ranges: list[list[str]] = list(__class__.__split_range(rng) for rng in 
             __class__.__modify_classes(ranges, escape=False))
 
-        # 2. Check whether ranges can be constructed from classes.
+        # 2. Check whether ranges can be constructed from chars.
         i = 0
         while i < len(chars):
             for j in range(len(chars)):
@@ -307,60 +307,17 @@ def __chars_to_ranges(ranges: set[str], chars: set[str]) -> tuple[set[str], set[
         # Check whether these character-ranges can be incorporated into
         # any existing ranges. If two characters are next to each other
         # then keep them as characters.
-        if len(ranges) > 0:
-            i = 0
-            while i < len(chars):
-                j = 0
-                while j < len(ranges):
-                    c_i, r_j = chars[i], ranges[j]
-                    if len(c_i) == 1:
-                        if ord(c_i) == ord(r_j[0]) - 1:
-                            ranges[j][0] = c_i
-                            chars.pop(i)
-                            i = -1
-                            break
-                        elif ord(c_i) == ord(r_j[1]) + 1:
-                            ranges[j][1] = c_i
-                            chars.pop(i)
-                            i = -1
-                    else:
-                        if ord(c_i[1]) == ord(r_j[0]) - 1:
-                            ranges[j][0] = c_i[0]
-                            chars.pop(i)
-                            i = -1
-                            break
-                        elif ord(c_i[0]) == ord(r_j[1]) + 1:
-                            ranges[j][1] = c_i[1]
-                            chars.pop(i)
-                            i = -1
-                            break
-                        elif ord(c_i[1]) == ord(c_i[0]) + 1:
-                            chars.pop(i)
-                            chars.append(c_i[0])
-                            chars.append(c_i[1])
-                            i = -1
-                            break
-                        else:
-                            ranges.append([c_i[0], c_i[1]])
-                            chars.pop(i)
-                            i = -1
-                            break
-                    j += 1
-                i += 1
-            ranges_set = set(f"{rng[0]}-{rng[1]}" for rng in ranges)
-            chars_set = set(chars)
-        else:
-            ranges_set = set()
-            chars_set = set()
-            for c in chars:
-                if len(c) == 1:
-                    chars_set.add(c)
+        ranges_set = set(f"{rng[0]}-{rng[1]}" for rng in ranges)
+        chars_set = set()
+        for c in chars:
+            if len(c) == 1:
+                chars_set.add(c)
+            else:
+                if ord(c[1]) == ord(c[0]) + 1:
+                    chars_set.add(c[0])
+                    chars_set.add(c[1])
                 else:
-                    if ord(c[1]) == ord(c[0]) + 1:
-                        chars_set.add(c[0])
-                        chars_set.add(c[1])
-                    else:
-                        ranges_set.add(f"{c[0]}-{c[1]}")
+                    ranges_set.add(f"{c[0]}-{c[1]}")
 
         ranges = __class__.__modify_classes(ranges_set, escape=True)
         chars = __class__.__modify_classes(chars_set, escape=True)
@@ -624,7 +581,6 @@ def subtract_ranges(ranges1: set[str], ranges2: set[str]) -> tuple[set[str], set
                             break
                 i += 1
 
-
             ranges, chars = set(), set()
             for start, end in ranges1:
                 if start == end:
@@ -659,12 +615,12 @@ def subtract_ranges(ranges1: set[str], ranges2: set[str]) -> tuple[set[str], set
         # 2.b Subtract chars2 from chars1.
         chars1 = chars1.difference(chars2)
 
-        # 2.c. Subtract ranges2 from ranges1.
-        ranges1, reduced_chars = subtract_ranges(ranges1, ranges2)
+        # 2.c. Subtract any characters in chars2 from ranges1.
+        ranges1, reduced_chars = subtract_ranges(ranges1, set(f"{c}-{c}" for c in chars2))
         chars1 = chars1.union(reduced_chars)
 
-        # 2.d. Subtract any characters in chars2 from ranges1.
-        ranges1, reduced_chars = subtract_ranges(ranges1, set(f"{c}-{c}" for c in chars2))
+        # 2.d. Subtract ranges2 from ranges1.
+        ranges1, reduced_chars = subtract_ranges(ranges1, ranges2)
         chars1 = chars1.union(reduced_chars)
 
         # 3. Union ranges and chars together while escaping them.
@@ -693,9 +649,8 @@ def __extract_classes(pattern: str, unescape: bool = False) -> tuple[set[str], s
         def get_start_index(pattern: str):
             if pattern.startswith('[^'):
                 return 2
-            elif pattern.startswith('['):
+            else:
                 return 1
-            return 0
 
         # Remove brackets etc from string.
         start_index = get_start_index(pattern)
@@ -771,7 +726,7 @@ def __split_range(pattern: str) -> tuple[str, str]:
 
         if count == 1:
             return pattern.split("-")
-        elif count== 2:
+        elif count == 2:
             split_fun = pattern.split if pattern[-1] == "-" else pattern.rsplit
             return split_fun("-", 1)
         else:
@@ -789,6 +744,12 @@ def __init__(self) -> 'Any':
         '''
         super().__init__('.', is_negated=False)
 
+    def __invert__(self) -> None:
+        '''
+        Raises a "CannotBeNegatedException".
+        '''
+        raise _ex.CannotBeNegatedException()
+
 
 class AnyLetter(__Class):
     '''