perf: Cache compiled regex

This replaces the regex module with a custom module regex_utils. It's supposed to cache every regex+flag combination. This will speed up the code as less calls to enum.__and__ are needed. Proposed by @oliverhaas based on a previous, similar patch of mine. Co-Authored-By: oliverhaas <[email protected]>
djlint · Nov 2, 2024 · 9fe82d3 · 9fe82d3
1 parent 02008ee
commit 9fe82d3
Show file tree

Hide file tree

Showing 13 changed files with 178 additions and 87 deletions.
diff --git a/djlint/formatter/attributes.py b/djlint/formatter/attributes.py
@@ -7,6 +7,7 @@
 
 import regex as re
 
+from .. import regex_utils
 from ..helpers import RE_FLAGS_IMX, RE_FLAGS_IX, child_of_ignored_block
 
 if TYPE_CHECKING:
@@ -38,7 +39,7 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:
 
         for line_number, line in enumerate(attributes.splitlines()):
             # when checking for template tag, use "match" to force start of line check.
-            if re.match(
+            if regex_utils.match(
                 config.template_unindent, line.strip(), flags=RE_FLAGS_IX
             ):
                 indent -= 1
@@ -48,7 +49,7 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:
                     + line.strip()
                 )
 
-            elif re.match(
+            elif regex_utils.match(
                 config.tag_unindent_line, line.strip(), flags=RE_FLAGS_IX
             ):
                 # if we are leaving an indented group, then remove the indent_adder
@@ -58,9 +59,9 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:
                     + line.strip()
                 )
 
-            elif re.search(
+            elif regex_utils.search(
                 config.template_indent, line.strip(), flags=RE_FLAGS_IX
-            ) and not re.search(
+            ) and not regex_utils.search(
                 config.template_unindent, line.strip(), flags=RE_FLAGS_IX
             ):
                 # for open tags, search, but then check that they are not closed.
@@ -97,7 +98,7 @@ def add_break(pattern: str, match: re.Match[str]) -> str:
 
     func = partial(add_break, "before")
 
-    attributes = re.sub(
+    attributes = regex_utils.sub(
         break_char
         + r".\K((?:{%|{{\#)[ ]*?(?:"
         + config.break_template_tags
@@ -109,7 +110,7 @@ def add_break(pattern: str, match: re.Match[str]) -> str:
 
     func = partial(add_break, "after")
     # break after
-    attributes = re.sub(
+    attributes = regex_utils.sub(
         r"((?:{%|{{\#)[ ]*?(?:"
         + config.break_template_tags
         + ")[^}]+?[%|}]})([^\n]+)$",
@@ -139,7 +140,7 @@ def format_attributes(config: Config, html: str, match: re.Match[str]) -> str:
     attributes = []
 
     # format attributes as groups
-    for attr_grp in re.finditer(
+    for attr_grp in regex_utils.finditer(
         config.attribute_pattern, match.group(3).strip(), flags=re.X
     ):
         attrib_name = attr_grp.group(1)

diff --git a/djlint/formatter/compress.py b/djlint/formatter/compress.py
@@ -7,12 +7,13 @@
 
 from typing import TYPE_CHECKING
 
-import regex as re
-
+from .. import regex_utils
 from ..const import HTML_TAG_NAMES, HTML_VOID_ELEMENTS
 from ..helpers import RE_FLAGS_IMX, child_of_unformatted_block
 
 if TYPE_CHECKING:
+    import regex as re
+
     from ..settings import Config
 
 
@@ -64,4 +65,6 @@ def _clean_tag(match: re.Match[str]) -> str:
 
         return f"{open_bracket}{tag}{attributes}{close_bracket}"
 
-    return re.sub(config.html_tag_regex, _clean_tag, html, flags=RE_FLAGS_IMX)
+    return regex_utils.sub(
+        config.html_tag_regex, _clean_tag, html, flags=RE_FLAGS_IMX
+    )
diff --git a/djlint/formatter/condense.py b/djlint/formatter/condense.py
@@ -11,6 +11,7 @@
 
 import regex as re
 
+from .. import regex_utils
 from ..helpers import (
     RE_FLAGS_IMS,
     RE_FLAGS_IMSX,
@@ -45,7 +46,7 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:
         if inside_protected_trans_block(config, html[: match.end()], match):
             return match.group().rstrip()
 
-        lines = sum(1 for _ in re.finditer(r"\n", match.group(2)))
+        lines = sum(1 for _ in regex_utils.finditer(r"\n", match.group(2)))
         blank_lines = "\n" * lines
         if lines > config.max_blank_lines:
             blank_lines = "\n" * max(config.max_blank_lines, 0)
@@ -62,7 +63,7 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:
 
     if not config.preserve_leading_space:
         # remove any leading/trailing space
-        html = re.sub(
+        html = regex_utils.sub(
             rf"^[ \t]*{line_contents}([{trailing_contents}]*)$",
             func,
             html,
@@ -72,13 +73,13 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:
     else:
         # only remove leading space in front of tags
         # <, {%
-        html = re.sub(
+        html = regex_utils.sub(
             rf"^[ \t]*((?:<|{{%).*?)([{trailing_contents}]*)$",
             func,
             html,
             flags=re.M,
         )
-        html = re.sub(
+        html = regex_utils.sub(
             rf"^{line_contents}([{trailing_contents}]*)$",
             func,
             html,
@@ -103,7 +104,7 @@ def add_blank_line_after(
     # should we add blank lines after load tags?
     if config.blank_line_after_tag:
         for tag in config.blank_line_after_tag.split(","):
-            html = re.sub(
+            html = regex_utils.sub(
                 rf"((?:{{%\s*?{tag.strip()}\b[^}}]+?%}}\n?)+)",
                 func,
                 html,
@@ -124,7 +125,7 @@ def add_blank_line_before(
     # should we add blank lines before load tags?
     if config.blank_line_before_tag:
         for tag in config.blank_line_before_tag.split(","):
-            html = re.sub(
+            html = regex_utils.sub(
                 rf"(?<!^\n)((?:{{%\s*?{tag.strip()}\b[^}}]+?%}}\n?)+)",
                 func,
                 html,
@@ -143,7 +144,7 @@ def yaml_add_blank_line_after(html: str, match: re.Match[str]) -> str:
 
     if not config.no_line_after_yaml:
         func = partial(yaml_add_blank_line_after, html)
-        html = re.sub(r"(^---.+?---)$", func, html, flags=RE_FLAGS_MS)
+        html = regex_utils.sub(r"(^---.+?---)$", func, html, flags=RE_FLAGS_MS)
 
     return html
 
@@ -181,7 +182,7 @@ def if_blank_line_after_match(config: Config, html: str) -> bool:
         """Check if there should be a blank line after."""
         if config.blank_line_after_tag:
             for tag in config.blank_line_after_tag.split(","):
-                if re.search(
+                if regex_utils.search(
                     rf"((?:{{%\s*?{tag.strip()}[^}}]+?%}}\n?)+)",
                     html,
                     flags=RE_FLAGS_IMS,
@@ -193,7 +194,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:
         """Check if there should be a blank line before."""
         if config.blank_line_before_tag:
             for tag in config.blank_line_before_tag.split(","):
-                if re.search(
+                if regex_utils.search(
                     rf"((?:{{%\s*?{tag.strip()}[^}}]+?%}}\n?)+)",
                     html,
                     flags=RE_FLAGS_IMS,
@@ -205,7 +206,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:
     func = partial(condense_line, config, html)
 
     # put short single line tags on one line
-    html = re.sub(
+    html = regex_utils.sub(
         rf"(<({config.optional_single_line_html_tags})\b(?:\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)\s*([^<\n]*?)\s*?(</(\2)>)",
         func,
         html,
@@ -214,7 +215,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:
 
     # put short template tags back on one line. must have leading space
     # jinja +%} and {%+ intentionally omitted.
-    return re.sub(
+    return regex_utils.sub(
         rf"((?:\s|^){{%-?[ ]*?({config.optional_single_line_template_tags})\b(?:(?!\n|%}}).)*?%}})\s*([^%\n]*?)\s*?({{%-?[ ]+?end(\2)[ ]*?%}})",
         func,
         html,

diff --git a/djlint/formatter/css.py b/djlint/formatter/css.py
@@ -7,12 +7,14 @@
 from typing import TYPE_CHECKING
 
 import cssbeautifier
-import regex as re
 from cssbeautifier.css.options import BeautifierOptions
 
+from .. import regex_utils
 from ..helpers import RE_FLAGS_IS, child_of_unformatted_block
 
 if TYPE_CHECKING:
+    import regex as re
+
     from ..settings import Config
 
 
@@ -63,7 +65,7 @@ def launch_formatter(
 
     func = partial(launch_formatter, config, html)
 
-    return re.sub(
+    return regex_utils.sub(
         r"([ ]*?)(<(?:style)\b(?:\"[^\"]*\"|'[^']*'|{[^}]*}|[^'\">{}])*>)(.*?)(?=</style>)",
         func,
         html,

diff --git a/djlint/formatter/expand.py b/djlint/formatter/expand.py
@@ -11,6 +11,7 @@
 
 import regex as re
 
+from .. import regex_utils
 from ..helpers import (
     RE_FLAGS_IMX,
     RE_FLAGS_IX,
@@ -58,15 +59,15 @@ def add_html_line(out_format: str, match: re.Match[str]) -> str:
     break_char = config.break_before
 
     # html tags - break before
-    html = re.sub(
+    html = regex_utils.sub(
         rf"{break_char}\K(</?(?:{html_tags})\b(\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)",
         add_left,
         html,
         flags=RE_FLAGS_IX,
     )
 
     # html tags - break after
-    html = re.sub(
+    html = regex_utils.sub(
         rf"(</?(?:{html_tags})\b(\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)(?!\s*?\n)(?=[^\n])",
         add_right,
         html,
@@ -81,7 +82,7 @@ def should_i_move_template_tag(
         if inside_ignored_block(config, html, match):
             return match.group(1)
 
-        if not re.search(
+        if not regex_utils.search(
             r"\<(?:"
             + str(config.indent_html_tags)
             # added > as not allowed inside a "" or '' to prevent invalid wild html matches
@@ -100,7 +101,7 @@ def should_i_move_template_tag(
 
     # template tags
     # break before
-    html = re.sub(
+    html = regex_utils.sub(
         break_char
         + r"\K((?:{%|{{\#)[ ]*?(?:"
         + config.break_template_tags
@@ -111,7 +112,7 @@ def should_i_move_template_tag(
     )
 
     # break after
-    return re.sub(
+    return regex_utils.sub(
         r"((?:{%|{{\#)[ ]*?(?:"
         + config.break_template_tags
         + ")[^}]+?[%}]})(?=[^\n])",