Skip to content

Commit

Permalink
perf: Cache compiled regex
Browse files Browse the repository at this point in the history
This replaces the regex module with a custom module regex_utils.
It's supposed to cache every regex+flag combination. This will speed up
the code as less calls to enum.__and__ are needed.

Proposed by @oliverhaas based on a previous, similar patch of mine.

Co-Authored-By: oliverhaas <[email protected]>
  • Loading branch information
JCWasmx86 and oliverhaas committed Nov 2, 2024
1 parent 02008ee commit 9fe82d3
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 87 deletions.
15 changes: 8 additions & 7 deletions djlint/formatter/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import regex as re

from .. import regex_utils
from ..helpers import RE_FLAGS_IMX, RE_FLAGS_IX, child_of_ignored_block

if TYPE_CHECKING:
Expand Down Expand Up @@ -38,7 +39,7 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:

for line_number, line in enumerate(attributes.splitlines()):
# when checking for template tag, use "match" to force start of line check.
if re.match(
if regex_utils.match(
config.template_unindent, line.strip(), flags=RE_FLAGS_IX
):
indent -= 1
Expand All @@ -48,7 +49,7 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:
+ line.strip()
)

elif re.match(
elif regex_utils.match(
config.tag_unindent_line, line.strip(), flags=RE_FLAGS_IX
):
# if we are leaving an indented group, then remove the indent_adder
Expand All @@ -58,9 +59,9 @@ def add_indentation(config: Config, attributes: str, spacing: int) -> str:
+ line.strip()
)

elif re.search(
elif regex_utils.search(
config.template_indent, line.strip(), flags=RE_FLAGS_IX
) and not re.search(
) and not regex_utils.search(
config.template_unindent, line.strip(), flags=RE_FLAGS_IX
):
# for open tags, search, but then check that they are not closed.
Expand Down Expand Up @@ -97,7 +98,7 @@ def add_break(pattern: str, match: re.Match[str]) -> str:

func = partial(add_break, "before")

attributes = re.sub(
attributes = regex_utils.sub(
break_char
+ r".\K((?:{%|{{\#)[ ]*?(?:"
+ config.break_template_tags
Expand All @@ -109,7 +110,7 @@ def add_break(pattern: str, match: re.Match[str]) -> str:

func = partial(add_break, "after")
# break after
attributes = re.sub(
attributes = regex_utils.sub(
r"((?:{%|{{\#)[ ]*?(?:"
+ config.break_template_tags
+ ")[^}]+?[%|}]})([^\n]+)$",
Expand Down Expand Up @@ -139,7 +140,7 @@ def format_attributes(config: Config, html: str, match: re.Match[str]) -> str:
attributes = []

# format attributes as groups
for attr_grp in re.finditer(
for attr_grp in regex_utils.finditer(
config.attribute_pattern, match.group(3).strip(), flags=re.X
):
attrib_name = attr_grp.group(1)
Expand Down
9 changes: 6 additions & 3 deletions djlint/formatter/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@

from typing import TYPE_CHECKING

import regex as re

from .. import regex_utils
from ..const import HTML_TAG_NAMES, HTML_VOID_ELEMENTS
from ..helpers import RE_FLAGS_IMX, child_of_unformatted_block

if TYPE_CHECKING:
import regex as re

from ..settings import Config


Expand Down Expand Up @@ -64,4 +65,6 @@ def _clean_tag(match: re.Match[str]) -> str:

return f"{open_bracket}{tag}{attributes}{close_bracket}"

return re.sub(config.html_tag_regex, _clean_tag, html, flags=RE_FLAGS_IMX)
return regex_utils.sub(
config.html_tag_regex, _clean_tag, html, flags=RE_FLAGS_IMX
)
23 changes: 12 additions & 11 deletions djlint/formatter/condense.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import regex as re

from .. import regex_utils
from ..helpers import (
RE_FLAGS_IMS,
RE_FLAGS_IMSX,
Expand Down Expand Up @@ -45,7 +46,7 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:
if inside_protected_trans_block(config, html[: match.end()], match):
return match.group().rstrip()

lines = sum(1 for _ in re.finditer(r"\n", match.group(2)))
lines = sum(1 for _ in regex_utils.finditer(r"\n", match.group(2)))
blank_lines = "\n" * lines
if lines > config.max_blank_lines:
blank_lines = "\n" * max(config.max_blank_lines, 0)
Expand All @@ -62,7 +63,7 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:

if not config.preserve_leading_space:
# remove any leading/trailing space
html = re.sub(
html = regex_utils.sub(
rf"^[ \t]*{line_contents}([{trailing_contents}]*)$",
func,
html,
Expand All @@ -72,13 +73,13 @@ def strip_space(config: Config, html: str, match: re.Match[str]) -> str:
else:
# only remove leading space in front of tags
# <, {%
html = re.sub(
html = regex_utils.sub(
rf"^[ \t]*((?:<|{{%).*?)([{trailing_contents}]*)$",
func,
html,
flags=re.M,
)
html = re.sub(
html = regex_utils.sub(
rf"^{line_contents}([{trailing_contents}]*)$",
func,
html,
Expand All @@ -103,7 +104,7 @@ def add_blank_line_after(
# should we add blank lines after load tags?
if config.blank_line_after_tag:
for tag in config.blank_line_after_tag.split(","):
html = re.sub(
html = regex_utils.sub(
rf"((?:{{%\s*?{tag.strip()}\b[^}}]+?%}}\n?)+)",
func,
html,
Expand All @@ -124,7 +125,7 @@ def add_blank_line_before(
# should we add blank lines before load tags?
if config.blank_line_before_tag:
for tag in config.blank_line_before_tag.split(","):
html = re.sub(
html = regex_utils.sub(
rf"(?<!^\n)((?:{{%\s*?{tag.strip()}\b[^}}]+?%}}\n?)+)",
func,
html,
Expand All @@ -143,7 +144,7 @@ def yaml_add_blank_line_after(html: str, match: re.Match[str]) -> str:

if not config.no_line_after_yaml:
func = partial(yaml_add_blank_line_after, html)
html = re.sub(r"(^---.+?---)$", func, html, flags=RE_FLAGS_MS)
html = regex_utils.sub(r"(^---.+?---)$", func, html, flags=RE_FLAGS_MS)

return html

Expand Down Expand Up @@ -181,7 +182,7 @@ def if_blank_line_after_match(config: Config, html: str) -> bool:
"""Check if there should be a blank line after."""
if config.blank_line_after_tag:
for tag in config.blank_line_after_tag.split(","):
if re.search(
if regex_utils.search(
rf"((?:{{%\s*?{tag.strip()}[^}}]+?%}}\n?)+)",
html,
flags=RE_FLAGS_IMS,
Expand All @@ -193,7 +194,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:
"""Check if there should be a blank line before."""
if config.blank_line_before_tag:
for tag in config.blank_line_before_tag.split(","):
if re.search(
if regex_utils.search(
rf"((?:{{%\s*?{tag.strip()}[^}}]+?%}}\n?)+)",
html,
flags=RE_FLAGS_IMS,
Expand All @@ -205,7 +206,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:
func = partial(condense_line, config, html)

# put short single line tags on one line
html = re.sub(
html = regex_utils.sub(
rf"(<({config.optional_single_line_html_tags})\b(?:\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)\s*([^<\n]*?)\s*?(</(\2)>)",
func,
html,
Expand All @@ -214,7 +215,7 @@ def if_blank_line_before_match(config: Config, html: str) -> bool:

# put short template tags back on one line. must have leading space
# jinja +%} and {%+ intentionally omitted.
return re.sub(
return regex_utils.sub(
rf"((?:\s|^){{%-?[ ]*?({config.optional_single_line_template_tags})\b(?:(?!\n|%}}).)*?%}})\s*([^%\n]*?)\s*?({{%-?[ ]+?end(\2)[ ]*?%}})",
func,
html,
Expand Down
6 changes: 4 additions & 2 deletions djlint/formatter/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from typing import TYPE_CHECKING

import cssbeautifier
import regex as re
from cssbeautifier.css.options import BeautifierOptions

from .. import regex_utils
from ..helpers import RE_FLAGS_IS, child_of_unformatted_block

if TYPE_CHECKING:
import regex as re

from ..settings import Config


Expand Down Expand Up @@ -63,7 +65,7 @@ def launch_formatter(

func = partial(launch_formatter, config, html)

return re.sub(
return regex_utils.sub(
r"([ ]*?)(<(?:style)\b(?:\"[^\"]*\"|'[^']*'|{[^}]*}|[^'\">{}])*>)(.*?)(?=</style>)",
func,
html,
Expand Down
11 changes: 6 additions & 5 deletions djlint/formatter/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import regex as re

from .. import regex_utils
from ..helpers import (
RE_FLAGS_IMX,
RE_FLAGS_IX,
Expand Down Expand Up @@ -58,15 +59,15 @@ def add_html_line(out_format: str, match: re.Match[str]) -> str:
break_char = config.break_before

# html tags - break before
html = re.sub(
html = regex_utils.sub(
rf"{break_char}\K(</?(?:{html_tags})\b(\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)",
add_left,
html,
flags=RE_FLAGS_IX,
)

# html tags - break after
html = re.sub(
html = regex_utils.sub(
rf"(</?(?:{html_tags})\b(\"[^\"]*\"|'[^']*'|{{[^}}]*}}|[^'\">{{}}])*>)(?!\s*?\n)(?=[^\n])",
add_right,
html,
Expand All @@ -81,7 +82,7 @@ def should_i_move_template_tag(
if inside_ignored_block(config, html, match):
return match.group(1)

if not re.search(
if not regex_utils.search(
r"\<(?:"
+ str(config.indent_html_tags)
# added > as not allowed inside a "" or '' to prevent invalid wild html matches
Expand All @@ -100,7 +101,7 @@ def should_i_move_template_tag(

# template tags
# break before
html = re.sub(
html = regex_utils.sub(
break_char
+ r"\K((?:{%|{{\#)[ ]*?(?:"
+ config.break_template_tags
Expand All @@ -111,7 +112,7 @@ def should_i_move_template_tag(
)

# break after
return re.sub(
return regex_utils.sub(
r"((?:{%|{{\#)[ ]*?(?:"
+ config.break_template_tags
+ ")[^}]+?[%}]})(?=[^\n])",
Expand Down
Loading

0 comments on commit 9fe82d3

Please sign in to comment.