From d31183604f0bc81ae1fc7bed220a7c9e184c8d83 Mon Sep 17 00:00:00 2001 From: manoss96 Date: Sun, 14 Aug 2022 15:40:24 +0300 Subject: [PATCH] bump to v1.5.1 - Updated docs and README. - "pregex.quantifiers.{AtMost/AtLeastAtMost} can now receive zero value parameter. - Fixed another bug in "Pregex.__infer_type". - Added seperate test for testing various patterns for their inferred type. - Modified existing tests and added some more. --- docs/requirements.txt | 2 +- docs/source/conf.py | 2 +- docs/source/readme.md | 2 +- pyproject.toml | 2 +- src/pregex/pre.py | 35 +++++++++------ src/pregex/quantifiers.py | 38 ++++++++++------- tests/test_pre.py | 25 +++++++++++ tests/test_quantifiers.py | 89 ++++++++++++++++++++++++++++++++++++--- 8 files changed, 155 insertions(+), 40 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 2c80ceb..ac15dde 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ myst-parser==0.18.0 -pregex==1.5.0 \ No newline at end of file +pregex==1.5.1 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 906152e..6aae3be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Manos Stoumpos' # The full version, including alpha/beta/rc tags -release = '1.5.0' +release = '1.5.1' # -- General configuration --------------------------------------------------- diff --git a/docs/source/readme.md b/docs/source/readme.md index d37f852..d038279 100644 --- a/docs/source/readme.md +++ b/docs/source/readme.md @@ -75,7 +75,7 @@ regex = pre.get_pattern() This is the pattern that we just built. Yikes! ``` -(?:https?:\/\/)?(?:(?:www\.)?([A-za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:\d{4}) +(?:https?:\/\/)?(?:(?:www\.)?([A-Za-z\d][A-Za-z\d\-.]{1,61}[A-Za-z\d])\.(?:com|org)|(?:\d{1,3}\.){3}\d{1,3}:\d{4}) ``` Besides from having access to its underlying pattern, we can use a Pregex instance to find matches within a piece of text. Consider for example the following string: diff --git a/pyproject.toml b/pyproject.toml index 19a403e..1b23f98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pregex" -version = "1.5.0" +version = "1.5.1" authors = [ {email = "manosstoumpos@gmail.com"}, {name = "Manos Stoumpos"} diff --git a/src/pregex/pre.py b/src/pregex/pre.py index 84d412b..286cce7 100644 --- a/src/pregex/pre.py +++ b/src/pregex/pre.py @@ -488,16 +488,15 @@ def __escape(pattern: str) -> str: pattern = pattern.replace("\\", "\\\\") for c in {'^', '$', '(', ')', '[', ']', '{', '}', '?', '+', '*', '.', '|', '/'}: pattern = pattern.replace(c, f"\\{c}") - return pattern + return pattern + - def __infer_type(pattern: str) -> _Type: ''' Examines the provided RegEx pattern and returns its type. :param str pattern: The RegEx pattern that is to be examined. ''' - def remove_groups(pattern: str, repl: str = ""): ''' Removes all groups from the provided pattern, and replaces them with 'repl'. @@ -540,8 +539,6 @@ def __is_group(pattern: str) -> bool: if pattern == "": return _Type.Empty - elif __is_group(pattern): - return _Type.Group elif _re.fullmatch(r"\\?.", pattern, flags=__class__.__flags) is not None: if _re.fullmatch(r"\.|\\(?:w|d|s)", pattern, flags=__class__.__flags | _re.IGNORECASE) is not None: @@ -551,11 +548,17 @@ def __is_group(pattern: str) -> bool: return _Type.Assertion else: return _Type.Token - elif _re.fullmatch(r"\[.+\]", pattern) is not None: + + # Simplify classes by removing extra characters. + pattern = _re.sub(r"\[.+?(? 1: return _Type.Alternation @@ -645,12 +648,16 @@ def _at_most(self, n: int, is_greedy: bool = True) -> str: Applies quantifier "{,n}" on this instance's underlying pattern and \ returns the resulting pattern as a string. - :param int n: The maximum number of times that the provided pattern is to be matched. + :param int | None n: The maximum number of times that the provided pattern is to be matched. :param bool is_greedy: Determines whether to declare this quantifier as greedy. \ When declared as such, the regex engine will try to match \ the expression as many times as possible. Defaults to 'True'. ''' - if n == 1: + if n == None: + return self._indefinite(is_greedy) + elif n == 0: + return self._exactly(n) + elif n == 1: return self._optional(is_greedy) else: return f"{self._quantify_conditional_group()}{{,{n}}}{'' if is_greedy else '?'}" @@ -662,15 +669,17 @@ def _at_least_at_most(self, min: int, max: int, is_greedy: bool = True) -> str: returns the resulting pattern as a string. :param int min: The minimum number of times that the provided pattern is to be matched. - :param int max: The maximum number of times that the provided pattern is to be matched. + :param int | None max: The maximum number of times that the provided pattern is to be matched. :param bool is_greedy: Determines whether to declare this quantifier as greedy. \ When declared as such, the regex engine will try to match \ the expression as many times as possible. Defaults to 'True'. ''' if min == max: return self._exactly(min) - elif min == 0 and max == 1: - return self._optional(is_greedy) + elif min == 0: + return self._at_most(max, is_greedy) + elif max is None: + return self._at_least(min, is_greedy) else: return f"{self._quantify_conditional_group()}{{{min},{max}}}{'' if is_greedy else '?'}" diff --git a/src/pregex/quantifiers.py b/src/pregex/quantifiers.py index 7e83fb3..43d2ef3 100644 --- a/src/pregex/quantifiers.py +++ b/src/pregex/quantifiers.py @@ -179,16 +179,17 @@ class AtMost(__Quantifier): :param Pregex | str pre: The pattern that is to be matched, provided either as a string \ or wrapped within a "Pregex" subtype instance. - :param int n: The maximum number of times that the provided pattern is to be matched. + :param int n | None: The maximum number of times that the provided pattern is to be matched. :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \ When declared as such, the regex engine will try to match \ the expression as many times as possible. Defaults to 'True'. :raises NonIntegerArgumentException: Parameter "n" is not an integer. - :raises NonPositiveArgumentException: Parameter "n" is less than one. + :raises NegativeArgumentException: Parameter "n" is less than zero. :raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern. - :note: Providing the value zero as parameter "n" results in the "Empty" pattern. + :note: Setting "n" equal to "None" indicates that there is no upper limit to the number of \ + times the pattern is to be repeated. ''' def __init__(self, pre: _pre.Pregex or str, n: int, is_greedy: bool = True) -> _pre.Pregex: @@ -197,21 +198,23 @@ def __init__(self, pre: _pre.Pregex or str, n: int, is_greedy: bool = True) -> _ :param Pregex | str pre: The pattern that is to be matched, provided either as a string \ or wrapped within a "Pregex" subtype instance. - :param int n: The maximum number of times that the provided pattern is to be matched. + :param int n | None: The maximum number of times that the provided pattern is to be matched. :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \ When declared as such, the regex engine will try to match \ the expression as many times as possible. Defaults to 'True'. :raises NonIntegerArgumentException: Parameter "n" is not an integer. - :raises NonPositiveArgumentException: Parameter "n" is less than one. + :raises NegativeArgumentException: Parameter "n" is less than zero. :raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern. - :note: Providing the value zero as parameter "n" results in the "Empty" pattern. + :note: Setting "n" equal to "None" indicates that there is no upper limit to the number of \ + times the pattern is to be repeated. ''' if not isinstance(n, int) or isinstance(n, bool): - raise _ex.NonIntegerArgumentException(n) - if n < 1: - raise _ex.NonPositiveArgumentException("n", n) + if n is not None: + raise _ex.NonIntegerArgumentException(n) + elif n < 0: + raise _ex.NegativeArgumentException("n", n) super().__init__(pre, is_greedy, lambda pre, is_greedy: pre._at_most(n, is_greedy)) @@ -222,7 +225,7 @@ class AtLeastAtMost(__Quantifier): :param Pregex | str pre: The pattern that is to be matched, provided either as a string \ or wrapped within a "Pregex" subtype instance. :param int min: The minimum number of times that the provided pattern is to be matched. - :param int max: The maximum number of times that the provided pattern is to be matched. + :param int | None max: The maximum number of times that the provided pattern is to be matched. :param bool is_greedy: Indicates whether to declare this quantifier as greedy. \ When declared as such, the regex engine will try to match \ the expression as many times as possible. Defaults to 'True'. @@ -233,7 +236,8 @@ class AtLeastAtMost(__Quantifier): :raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern. :note: Parameter "is_greedy" has no effect in the case that "min" equals "max". - :note: Providing the value zero as both parameter "min" and "max" results in the "Empty" pattern. + :note: Setting "max" equal to "None" indicates that there is no upper limit to the number of \ + times the pattern is to be repeated. ''' def __init__(self, pre: _pre.Pregex or str, min: int, max: int, is_greedy: bool = True) -> _pre.Pregex: @@ -254,15 +258,17 @@ def __init__(self, pre: _pre.Pregex or str, min: int, max: int, is_greedy: bool :raises CannotBeQuantifiedException: This class is applied to an instance that represents an "assertion" pattern. :note: Parameter "is_greedy" has no effect in the case that "min" equals "max". - :note: Providing the value zero as both parameter "min" and "max" results in the "Empty" pattern. + :note: Setting "max" equal to "None" indicates that there is no upper limit to the number of \ + times the pattern is to be repeated. ''' if not isinstance(min, int) or isinstance(min, bool): raise _ex.NonIntegerArgumentException(min) - if not isinstance(max, int) or isinstance(max, bool): - raise _ex.NonIntegerArgumentException(max) - if min < 0: + elif min < 0: raise _ex.NegativeArgumentException("min", min) - if max < 0: + elif not isinstance(max, int) or isinstance(max, bool): + if max is not None: + raise _ex.NonIntegerArgumentException(max) + elif max < 0: raise _ex.NegativeArgumentException("max", max) elif max < min: raise _ex.MinGreaterThanMaxException(min, max) diff --git a/tests/test_pre.py b/tests/test_pre.py index 737d889..d0beeae 100644 --- a/tests/test_pre.py +++ b/tests/test_pre.py @@ -216,6 +216,31 @@ def test_pregex_on_right_side_multiplication(self): self.assertRaises(NonIntegerArgumentException, self.pre1.__rmul__, val) self.assertRaises(CannotBeQuantifiedException, MatchAtStart("x").__rmul__, 2) + ''' + Test Pregex's "__infer_type". + ''' + def test_pregex_infer_type(self): + self.assertEqual(Pregex("abc|acd", escape=False)._get_type(), _Type.Alternation) + self.assertEqual(Pregex("(abc|acd)|(ab)?", escape=False)._get_type(), _Type.Alternation) + self.assertEqual(Pregex("(?