From 37d49aabcf653014105ac8de0a6caaf384cc7c12 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 7 Nov 2024 01:15:00 +0400 Subject: [PATCH] feat: Identify inefficient use of Python string `replace` in `map_elements` (#19668) --- py-polars/polars/_utils/udfs.py | 44 ++++++++++++++++++- .../map/test_inefficient_map_warning.py | 10 +++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/_utils/udfs.py b/py-polars/polars/_utils/udfs.py index 0ff968ed59ec..cfc9d06492b1 100644 --- a/py-polars/polars/_utils/udfs.py +++ b/py-polars/polars/_utils/udfs.py @@ -183,6 +183,7 @@ class OpNames: "endswith": "str.ends_with", "lower": "str.to_lowercase", "lstrip": "str.strip_chars_start", + "replace": "str.replace", "rstrip": "str.strip_chars_end", "startswith": "str.starts_with", "strip": "str.strip_chars", @@ -983,7 +984,7 @@ def _rewrite_methods( """Replace python method calls with synthetic POLARS_EXPRESSION op.""" LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"} if matching_instructions := ( - # method call with one basic arg, eg: "s.endswith('!')" + # method call with one arg, eg: "s.endswith('!')" self._matches( idx, opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL], @@ -1016,6 +1017,47 @@ def _rewrite_methods( px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr) updated_instructions.append(px) + elif matching_instructions := ( + # method call with three args, eg: "s.replace('!','?',count=2)" + self._matches( + idx, + opnames=[ + LOAD_METHOD, + {"LOAD_CONST"}, + {"LOAD_CONST"}, + {"LOAD_CONST"}, + OpNames.CALL, + ], + argvals=[_PYTHON_METHODS_MAP], + ) + or + # method call with two args, eg: "s.replace('!','?')" + self._matches( + idx, + opnames=[LOAD_METHOD, {"LOAD_CONST"}, {"LOAD_CONST"}, OpNames.CALL], + argvals=[_PYTHON_METHODS_MAP], + ) + ): + inst = matching_instructions[0] + expr = _PYTHON_METHODS_MAP[inst.argval] + + param_values = [ + i.argval + for i in matching_instructions[1 : len(matching_instructions) - 1] + ] + if expr == "str.replace": + if len(param_values) == 3: + old, new, count = param_values + expr += f"({old!r},{new!r},n={count},literal=True)" + else: + old, new = param_values + expr = f"str.replace_all({old!r},{new!r},literal=True)" + else: + expr += f"({','.join(repr(v) for v in param_values)})" + + px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr) + updated_instructions.append(px) + return len(matching_instructions) @staticmethod diff --git a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py index 74946f084d51..13eee2c731e3 100644 --- a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py +++ b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py @@ -176,6 +176,16 @@ """lambda x: x.lstrip().startswith(('!','#','?',"'"))""", """pl.col("b").str.strip_chars_start().str.contains(r"^(!|\\#|\\?|')")""", ), + ( + "b", + "lambda x: x.replace(':','')", + """pl.col("b").str.replace_all(':','',literal=True)""", + ), + ( + "b", + "lambda x: x.replace(':','',2)", + """pl.col("b").str.replace(':','',n=2,literal=True)""", + ), # --------------------------------------------- # json expr: load/extract # ---------------------------------------------