Skip to content

Commit

Permalink
bump to v1.5.1
Browse files Browse the repository at this point in the history
- Updated docs and README.
- "pregex.quantifiers.{AtMost/AtLeastAtMost} can now receive zero value parameter.
- Fixed another bug in "Pregex.__infer_type".
- Added seperate test for testing various patterns for their inferred type.
- Modified existing tests and added some more.
  • Loading branch information
manoss96 committed Aug 14, 2022
1 parent 6538486 commit d311836
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 40 deletions.
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
myst-parser==0.18.0
pregex==1.5.0
pregex==1.5.1
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Manos Stoumpos'

# The full version, including alpha/beta/rc tags
release = '1.5.0'
release = '1.5.1'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/source/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ regex = pre.get_pattern()

This is the pattern that we just built. Yikes!
```
(?:https?:\/\/)?(?:(?:www\.)?([A-za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:\d{4})
(?:https?:\/\/)?(?:(?:www\.)?([A-Za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:\d{4})
```

Besides from having access to its underlying pattern, we can use a Pregex instance to find matches within a piece of text. Consider for example the following string:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pregex"
version = "1.5.0"
version = "1.5.1"
authors = [
{email = "[email protected]"},
{name = "Manos Stoumpos"}
Expand Down
35 changes: 22 additions & 13 deletions src/pregex/pre.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,16 +488,15 @@ def __escape(pattern: str) -> str:
pattern = pattern.replace("\\", "\\\\")
for c in {'^', '$', '(', ')', '[', ']', '{', '}', '?', '+', '*', '.', '|', '/'}:
pattern = pattern.replace(c, f"\\{c}")
return pattern
return pattern



def __infer_type(pattern: str) -> _Type:
'''
Examines the provided RegEx pattern and returns its type.
:param str pattern: The RegEx pattern that is to be examined.
'''

def remove_groups(pattern: str, repl: str = ""):
'''
Removes all groups from the provided pattern, and replaces them with 'repl'.
Expand Down Expand Up @@ -540,8 +539,6 @@ def __is_group(pattern: str) -> bool:

if pattern == "":
return _Type.Empty
elif __is_group(pattern):
return _Type.Group
elif _re.fullmatch(r"\\?.", pattern, flags=__class__.__flags) is not None:
if _re.fullmatch(r"\.|\\(?:w|d|s)", pattern,
flags=__class__.__flags | _re.IGNORECASE) is not None:
Expand All @@ -551,11 +548,17 @@ def __is_group(pattern: str) -> bool:
return _Type.Assertion
else:
return _Type.Token
elif _re.fullmatch(r"\[.+\]", pattern) is not None:

# Simplify classes by removing extra characters.
pattern = _re.sub(r"\[.+?(?<!\\)\]", "[a]", pattern)

if pattern == "[a]":
return _Type.Class
elif __is_group(pattern):
return _Type.Group

# Remove any groups.
temp = remove_groups(pattern)
# Replace every group with a simple character.
temp = remove_groups(pattern, repl="G")

if len(_re.split(pattern=r"(?<!\\)\|", string=temp)) > 1:
return _Type.Alternation
Expand Down Expand Up @@ -645,12 +648,16 @@ def _at_most(self, n: int, is_greedy: bool = True) -> str:
Applies quantifier "{,n}" on this instance's underlying pattern and \
returns the resulting pattern as a string.
:param int n: The maximum number of times that the provided pattern is to be matched.
:param int | None n: The maximum number of times that the provided pattern is to be matched.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to 'True'.
'''
if n == 1:
if n == None:
return self._indefinite(is_greedy)
elif n == 0:
return self._exactly(n)
elif n == 1:
return self._optional(is_greedy)
else:
return f"{self._quantify_conditional_group()}{{,{n}}}{'' if is_greedy else '?'}"
Expand All @@ -662,15 +669,17 @@ def _at_least_at_most(self, min: int, max: int, is_greedy: bool = True) -> str:
returns the resulting pattern as a string.
:param int min: The minimum number of times that the provided pattern is to be matched.
:param int max: The maximum number of times that the provided pattern is to be matched.
:param int | None max: The maximum number of times that the provided pattern is to be matched.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to 'True'.
'''
if min == max:
return self._exactly(min)
elif min == 0 and max == 1:
return self._optional(is_greedy)
elif min == 0:
return self._at_most(max, is_greedy)
elif max is None:
return self._at_least(min, is_greedy)
else:
return f"{self._quantify_conditional_group()}{{{min},{max}}}{'' if is_greedy else '?'}"

Expand Down
38 changes: 22 additions & 16 deletions src/pregex/quantifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,17 @@ class AtMost(__Quantifier):
:param Pregex | str pre: The pattern that is to be matched, provided either as a string \
or wrapped within a "Pregex" subtype instance.
:param int n: The maximum number of times that the provided pattern is to be matched.
:param int n | None: The maximum number of times that the provided pattern is to be matched.
:param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to 'True'.
:raises NonIntegerArgumentException: Parameter "n" is not an integer.
:raises NonPositiveArgumentException: Parameter "n" is less than one.
:raises NegativeArgumentException: Parameter "n" is less than zero.
:raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern.
:note: Providing the value zero as parameter "n" results in the "Empty" pattern.
:note: Setting "n" equal to "None" indicates that there is no upper limit to the number of \
times the pattern is to be repeated.
'''

def __init__(self, pre: _pre.Pregex or str, n: int, is_greedy: bool = True) -> _pre.Pregex:
Expand All @@ -197,21 +198,23 @@ def __init__(self, pre: _pre.Pregex or str, n: int, is_greedy: bool = True) -> _
:param Pregex | str pre: The pattern that is to be matched, provided either as a string \
or wrapped within a "Pregex" subtype instance.
:param int n: The maximum number of times that the provided pattern is to be matched.
:param int n | None: The maximum number of times that the provided pattern is to be matched.
:param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to 'True'.
:raises NonIntegerArgumentException: Parameter "n" is not an integer.
:raises NonPositiveArgumentException: Parameter "n" is less than one.
:raises NegativeArgumentException: Parameter "n" is less than zero.
:raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern.
:note: Providing the value zero as parameter "n" results in the "Empty" pattern.
:note: Setting "n" equal to "None" indicates that there is no upper limit to the number of \
times the pattern is to be repeated.
'''
if not isinstance(n, int) or isinstance(n, bool):
raise _ex.NonIntegerArgumentException(n)
if n < 1:
raise _ex.NonPositiveArgumentException("n", n)
if n is not None:
raise _ex.NonIntegerArgumentException(n)
elif n < 0:
raise _ex.NegativeArgumentException("n", n)
super().__init__(pre, is_greedy, lambda pre, is_greedy: pre._at_most(n, is_greedy))


Expand All @@ -222,7 +225,7 @@ class AtLeastAtMost(__Quantifier):
:param Pregex | str pre: The pattern that is to be matched, provided either as a string \
or wrapped within a "Pregex" subtype instance.
:param int min: The minimum number of times that the provided pattern is to be matched.
:param int max: The maximum number of times that the provided pattern is to be matched.
:param int | None max: The maximum number of times that the provided pattern is to be matched.
:param bool is_greedy: Indicates whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to 'True'.
Expand All @@ -233,7 +236,8 @@ class AtLeastAtMost(__Quantifier):
:raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern.
:note: Parameter "is_greedy" has no effect in the case that "min" equals "max".
:note: Providing the value zero as both parameter "min" and "max" results in the "Empty" pattern.
:note: Setting "max" equal to "None" indicates that there is no upper limit to the number of \
times the pattern is to be repeated.
'''

def __init__(self, pre: _pre.Pregex or str, min: int, max: int, is_greedy: bool = True) -> _pre.Pregex:
Expand All @@ -254,15 +258,17 @@ def __init__(self, pre: _pre.Pregex or str, min: int, max: int, is_greedy: bool
:raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern.
:note: Parameter "is_greedy" has no effect in the case that "min" equals "max".
:note: Providing the value zero as both parameter "min" and "max" results in the "Empty" pattern.
:note: Setting "max" equal to "None" indicates that there is no upper limit to the number of \
times the pattern is to be repeated.
'''
if not isinstance(min, int) or isinstance(min, bool):
raise _ex.NonIntegerArgumentException(min)
if not isinstance(max, int) or isinstance(max, bool):
raise _ex.NonIntegerArgumentException(max)
if min < 0:
elif min < 0:
raise _ex.NegativeArgumentException("min", min)
if max < 0:
elif not isinstance(max, int) or isinstance(max, bool):
if max is not None:
raise _ex.NonIntegerArgumentException(max)
elif max < 0:
raise _ex.NegativeArgumentException("max", max)
elif max < min:
raise _ex.MinGreaterThanMaxException(min, max)
Expand Down
25 changes: 25 additions & 0 deletions tests/test_pre.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,31 @@ def test_pregex_on_right_side_multiplication(self):
self.assertRaises(NonIntegerArgumentException, self.pre1.__rmul__, val)
self.assertRaises(CannotBeQuantifiedException, MatchAtStart("x").__rmul__, 2)

'''
Test Pregex's "__infer_type".
'''
def test_pregex_infer_type(self):
self.assertEqual(Pregex("abc|acd", escape=False)._get_type(), _Type.Alternation)
self.assertEqual(Pregex("(abc|acd)|(ab)?", escape=False)._get_type(), _Type.Alternation)
self.assertEqual(Pregex("(?<!a)b|c", escape=False)._get_type(), _Type.Alternation)
self.assertEqual(Pregex("(?<!a)b", escape=False)._get_type(), _Type.Assertion)
self.assertEqual(Pregex("(?<=[(\s])a", escape=False)._get_type(), _Type.Assertion)
self.assertEqual(Pregex("(?<!a)(?:b|c)", escape=False)._get_type(), _Type.Assertion)
self.assertEqual(Pregex("(?<![)])(?:b|c)", escape=False)._get_type(), _Type.Assertion)
self.assertEqual(Pregex("(?<!\))(?:b|c)", escape=False)._get_type(), _Type.Assertion)
self.assertEqual(Pregex("[(.z;!\]]", escape=False)._get_type(), _Type.Class)
self.assertEqual(Pregex("[\[a\]]", escape=False)._get_type(), _Type.Class)
self.assertEqual(Pregex("(abc|acd)", escape=False)._get_type(), _Type.Group)
self.assertEqual(Pregex("(a\\\\\))", escape=False)._get_type(), _Type.Group)
self.assertEqual(Pregex("(?abc)", escape=False)._get_type(), _Type.Group)
self.assertEqual(Pregex("\w\s", escape=False)._get_type(), _Type.Other)
self.assertEqual(Pregex("([A-Za-z_])[0-9]+([a-z]?)", escape=False)._get_type(), _Type.Other)
self.assertEqual(Pregex("(?abc)(abc)", escape=False)._get_type(), _Type.Other)
self.assertEqual(Pregex("(abc|acd)\|(ab)?", escape=False)._get_type(), _Type.Other)
self.assertEqual(Pregex("((abc|acd)|(ab))\\{1234,1245\\}", escape=False)._get_type(), _Type.Other)
self.assertEqual(Pregex("((abc|acd)|(ab))?", escape=False)._get_type(), _Type.Quantifier)
self.assertEqual(Pregex("((abc|acd)|(ab)){1234,1245}", escape=False)._get_type(), _Type.Quantifier)


class TestEmpty(unittest.TestCase):

Expand Down
89 changes: 82 additions & 7 deletions tests/test_quantifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,14 @@ def test_at_least_on_laziness(self):
val = 3
self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{{val},}}?")

def test_at_least_on_value_0(self):
val = 0
self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})*?")

def test_at_least_on_lazy_value_1(self):
val = 1
self.assertEqual(str(AtLeast(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})+?")

def test_at_least_on_type(self):
self.assertEqual(AtLeast("a", n=2)._get_type(), _Type.Quantifier)
self.assertEqual(AtLeast("abc", n=2)._get_type(), _Type.Quantifier)
Expand Down Expand Up @@ -230,16 +238,32 @@ def test_at_most_on_len_1_literal(self):

def test_at_most_on_len_n_literal(self):
for val in self.VALID_VALUES:
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N}){{,{val}}}")
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N}){{,{val}}}")

def test_at_most_on_value_0(self):
val = 0
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), "")

def test_at_most_on_value_1(self):
val = 1
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})?")

def test_at_most_on_value_None(self):
val = None
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val)), f"(?:{TEST_LITERAL_LEN_N})*")

def test_at_most_on_laziness(self):
val = 3
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{,{val}}}?")

def test_at_most_on_lazy_value_1(self):
val = 1
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})??")

def test_at_most_on_lazy_value_None(self):
val = None
self.assertEqual(str(AtMost(TEST_LITERAL_LEN_N, val, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})*?")

def test_at_most_on_type(self):
self.assertEqual(AtMost("a", n=2)._get_type(), _Type.Quantifier)
self.assertEqual(AtMost("abc", n=2)._get_type(), _Type.Quantifier)
Expand All @@ -250,8 +274,8 @@ def test_at_most_on_invalid_type_values(self):
self.assertRaises(NonIntegerArgumentException, AtMost, TEST_STR_LEN_1, val)

def test_at_most_on_invalid_values(self):
for val in [-10, -1, 0]:
self.assertRaises(NonPositiveArgumentException, AtMost, TEST_STR_LEN_1, val)
for val in [-10, -1]:
self.assertRaises(NegativeArgumentException, AtMost, TEST_STR_LEN_1, val)

class TestAtLeastAtMost(unittest.TestCase):

Expand All @@ -273,19 +297,70 @@ def test_at_least_at_most_on_len_n_literal(self):
for min, max in self.VALID_VALUES:
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min},{max}}}")

def test_at_least_at_most_on_min_equal_to_max(self):
min, max = 4, 4
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min}}}")

def test_at_least_at_most_on_min_equal_to_max_equal_to_zero(self):
min, max = 0, 0
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), "")

def test_at_least_at_most_on_min_equal_to_zero_max_equal_to_1(self):
min, max = 0, 1
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})?")

def test_at_least_at_most_on_min_equal_to_zero_max_greater_than_1(self):
min, max = 0, 2
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{,{max}}}")

def test_at_least_at_most_on_min_equal_to_zero_max_equal_to_None(self):
min, max = 0, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})*")

def test_at_least_at_most_on_min_equal_to_max_equal_to_one(self):
min, max = 1, 1
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), str(TEST_LITERAL_LEN_N))

def test_at_least_at_most_on_min_equal_to_one_max_equal_to_None(self):
min, max = 1, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N})+")

def test_at_least_at_most_on_min_equal_to_max(self):
min, max = 2, 2
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min}}}")

def test_at_least_at_most_on_min_equal_to_two_max_equal_to_None(self):
min, max = 2, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max)), f"(?:{TEST_LITERAL_LEN_N}){{{min},}}")

def test_at_least_at_most_on_laziness(self):
min, max = 3, 5
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
f"(?:{TEST_LITERAL_LEN_N}){{{min},{max}}}?")

def test_at_least_at_most_on_lazy_min_equal_to_zero_max_equal_to_1(self):
min, max = 0, 1
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N})??")

def test_at_least_at_most_on_lazy_min_equal_to_zero_max_greater_than_1(self):
min, max = 0, 2
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{,{max}}}?")

def test_at_least_at_most_on_lazy_min_equal_to_zero_max_equal_to_None(self):
min, max = 0, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
f"(?:{TEST_LITERAL_LEN_N})*?")

def test_at_least_at_most_on_lazy_min_equal_to_one_max_equal_to_None(self):
min, max = 1, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
f"(?:{TEST_LITERAL_LEN_N})+?")

def test_at_least_at_most_on_lazy_min_equal_to_two_max_equal_to_None(self):
min, max = 2, None
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)),
f"(?:{TEST_LITERAL_LEN_N}){{{min},}}?")

def test_at_least_at_most_on_lazy_min_equal_to_max(self):
min, max = 2, 2
self.assertEqual(str(AtLeastAtMost(TEST_LITERAL_LEN_N, min, max, is_greedy=False)), f"(?:{TEST_LITERAL_LEN_N}){{{min}}}")

def test_at_least_at_most_on_type(self):
self.assertEqual(AtLeastAtMost("a", min=1, max=2)._get_type(), _Type.Quantifier)
self.assertEqual(AtLeastAtMost("abc", min=1, max=2)._get_type(), _Type.Quantifier)
Expand Down

0 comments on commit d311836

Please sign in to comment.