Skip to content

Commit

Permalink
feat: Identify inefficient use of Python string replace in `map_ele…
Browse files Browse the repository at this point in the history
…ments` (#19668)
  • Loading branch information
alexander-beedie authored Nov 6, 2024
1 parent d34a3e1 commit 8f9ba23
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 1 deletion.
44 changes: 43 additions & 1 deletion py-polars/polars/_utils/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class OpNames:
"endswith": "str.ends_with",
"lower": "str.to_lowercase",
"lstrip": "str.strip_chars_start",
"replace": "str.replace",
"rstrip": "str.strip_chars_end",
"startswith": "str.starts_with",
"strip": "str.strip_chars",
Expand Down Expand Up @@ -983,7 +984,7 @@ def _rewrite_methods(
"""Replace python method calls with synthetic POLARS_EXPRESSION op."""
LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"}
if matching_instructions := (
# method call with one basic arg, eg: "s.endswith('!')"
# method call with one arg, eg: "s.endswith('!')"
self._matches(
idx,
opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL],
Expand Down Expand Up @@ -1016,6 +1017,47 @@ def _rewrite_methods(
px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
updated_instructions.append(px)

elif matching_instructions := (
# method call with three args, eg: "s.replace('!','?',count=2)"
self._matches(
idx,
opnames=[
LOAD_METHOD,
{"LOAD_CONST"},
{"LOAD_CONST"},
{"LOAD_CONST"},
OpNames.CALL,
],
argvals=[_PYTHON_METHODS_MAP],
)
or
# method call with two args, eg: "s.replace('!','?')"
self._matches(
idx,
opnames=[LOAD_METHOD, {"LOAD_CONST"}, {"LOAD_CONST"}, OpNames.CALL],
argvals=[_PYTHON_METHODS_MAP],
)
):
inst = matching_instructions[0]
expr = _PYTHON_METHODS_MAP[inst.argval]

param_values = [
i.argval
for i in matching_instructions[1 : len(matching_instructions) - 1]
]
if expr == "str.replace":
if len(param_values) == 3:
old, new, count = param_values
expr += f"({old!r},{new!r},n={count},literal=True)"
else:
old, new = param_values
expr = f"str.replace_all({old!r},{new!r},literal=True)"
else:
expr += f"({','.join(repr(v) for v in param_values)})"

px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
updated_instructions.append(px)

return len(matching_instructions)

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@
"""lambda x: x.lstrip().startswith(('!','#','?',"'"))""",
"""pl.col("b").str.strip_chars_start().str.contains(r"^(!|\\#|\\?|')")""",
),
(
"b",
"lambda x: x.replace(':','')",
"""pl.col("b").str.replace_all(':','',literal=True)""",
),
(
"b",
"lambda x: x.replace(':','',2)",
"""pl.col("b").str.replace(':','',n=2,literal=True)""",
),
# ---------------------------------------------
# json expr: load/extract
# ---------------------------------------------
Expand Down

0 comments on commit 8f9ba23

Please sign in to comment.