diff --git a/py-polars/docs/source/reference/expressions/functions.rst b/py-polars/docs/source/reference/expressions/functions.rst index 09e9afabce4f..f8ba9f8a3fb6 100644 --- a/py-polars/docs/source/reference/expressions/functions.rst +++ b/py-polars/docs/source/reference/expressions/functions.rst @@ -97,6 +97,7 @@ These functions are available from the polars module root and can be used as exp Expr.head Expr.implode Expr.map + Expr.map_elements Expr.max Expr.mean Expr.median diff --git a/py-polars/docs/source/reference/series/miscellaneous.rst b/py-polars/docs/source/reference/series/miscellaneous.rst index 6d5cfaf7ea5b..949cfc320a9c 100644 --- a/py-polars/docs/source/reference/series/miscellaneous.rst +++ b/py-polars/docs/source/reference/series/miscellaneous.rst @@ -7,6 +7,7 @@ Miscellaneous :toctree: api/ Series.apply + Series.map_elements Series.reinterpret Series.series_equal Series.set_sorted diff --git a/py-polars/polars/utils/udfs.py b/py-polars/polars/utils/udfs.py index 66233bee4f99..43115b8cafbd 100644 --- a/py-polars/polars/utils/udfs.py +++ b/py-polars/polars/utils/udfs.py @@ -41,7 +41,7 @@ class StackValue(NamedTuple): right_operand: str -ApplyTarget: TypeAlias = Literal["expr", "frame", "series"] +MapTarget: TypeAlias = Literal["expr", "frame", "series"] StackEntry: TypeAlias = Union[str, StackValue] _MIN_PY311 = sys.version_info >= (3, 11) @@ -204,9 +204,9 @@ class BytecodeParser: """Introspect UDF bytecode and determine if we can rewrite as native expression.""" _can_attempt_rewrite: dict[str, bool] - _apply_target_name: str | None = None + _map_target_name: str | None = None - def __init__(self, function: Callable[[Any], Any], apply_target: ApplyTarget): + def __init__(self, function: Callable[[Any], Any], map_target: MapTarget): try: original_instructions = get_instructions(function) except TypeError: @@ -216,7 +216,7 @@ def __init__(self, function: Callable[[Any], Any], apply_target: ApplyTarget): self._can_attempt_rewrite = {} self._function = function - self._apply_target = apply_target + self._map_target = map_target self._param_name = self._get_param_name(function) self._rewritten_instructions = RewrittenInstructions( instructions=original_instructions, @@ -284,34 +284,34 @@ def _inject_nesting( return sorted(expression_blocks.items()) def _get_target_name(self, col: str, expression: str) -> str: - """The name of the object against which the 'apply' is being invoked.""" - if self._apply_target_name is not None: - return self._apply_target_name + """The name of the object against which the 'map' is being invoked.""" + if self._map_target_name is not None: + return self._map_target_name else: col_expr = f'pl.col("{col}")' - if self._apply_target == "expr": + if self._map_target == "expr": return col_expr - elif self._apply_target == "series": + elif self._map_target == "series": # note: handle overlapping name from global variables; fallback # through "s", "srs", "series" and (finally) srs0 -> srsN... search_expr = expression.replace(col_expr, "") for name in ("s", "srs", "series"): if not re.search(rf"\b{name}\b", search_expr): - self._apply_target_name = name + self._map_target_name = name return name n = count() while True: name = f"srs{next(n)}" if not re.search(rf"\b{name}\b", search_expr): - self._apply_target_name = name + self._map_target_name = name return name - raise NotImplementedError(f"TODO: apply_target = {self._apply_target!r}") + raise NotImplementedError(f"TODO: map_target = {self._map_target!r}") @property - def apply_target(self) -> ApplyTarget: - """The apply target, eg: one of 'expr', 'frame', or 'series'.""" - return self._apply_target + def map_target(self) -> MapTarget: + """The map target, eg: one of 'expr', 'frame', or 'series'.""" + return self._map_target def can_attempt_rewrite(self) -> bool: """ @@ -322,15 +322,13 @@ def can_attempt_rewrite(self) -> bool: same output. (Hopefully nobody is writing lambdas like that anyway...) """ if ( - can_attempt_rewrite := self._can_attempt_rewrite.get( - self._apply_target, None - ) + can_attempt_rewrite := self._can_attempt_rewrite.get(self._map_target, None) ) is not None: return can_attempt_rewrite else: - self._can_attempt_rewrite[self._apply_target] = False + self._can_attempt_rewrite[self._map_target] = False if self._rewritten_instructions and self._param_name is not None: - self._can_attempt_rewrite[self._apply_target] = ( + self._can_attempt_rewrite[self._map_target] = ( # check minimum number of ops, ensuring all are parseable len(self._rewritten_instructions) >= 2 and all( @@ -346,7 +344,7 @@ def can_attempt_rewrite(self) -> bool: == 1 ) - return self._can_attempt_rewrite[self._apply_target] + return self._can_attempt_rewrite[self._map_target] def dis(self) -> None: """Print disassembled function bytecode.""" @@ -374,7 +372,7 @@ def rewritten_instructions(self) -> list[Instruction]: def to_expression(self, col: str) -> str | None: """Translate postfix bytecode instructions to polars expression/string.""" - self._apply_target_name = None + self._map_target_name = None if not self.can_attempt_rewrite() or self._param_name is None: return None @@ -397,7 +395,7 @@ def to_expression(self, col: str) -> str | None: offset: InstructionTranslator( instructions=ops, caller_variables=caller_variables, - apply_target=self._apply_target, + map_target=self._map_target, ).to_expression( col=col, param_name=self._param_name, @@ -409,14 +407,14 @@ def to_expression(self, col: str) -> str | None: ) polars_expr = " ".join(expr for _offset, expr in expression_strings) except NotImplementedError: - self._can_attempt_rewrite[self._apply_target] = False + self._can_attempt_rewrite[self._map_target] = False return None # note: if no 'pl.col' in the expression, it likely represents a compound # constant value (e.g. `lambda x: CONST + 123`), so we don't want to warn if "pl.col(" not in polars_expr: return None - elif self._apply_target == "series": + elif self._map_target == "series": return polars_expr.replace( f'pl.col("{col}")', self._get_target_name(col, polars_expr), @@ -452,7 +450,7 @@ def warn( if 'pl.col("")' in suggested_expression else "" ) - if self._apply_target == "expr": + if self._map_target == "expr": apitype = "expressions" clsname = "Expr" else: @@ -487,10 +485,10 @@ def __init__( self, instructions: list[Instruction], caller_variables: dict[str, Any], - apply_target: ApplyTarget, + map_target: MapTarget, ) -> None: self._caller_variables: dict[str, Any] = caller_variables - self._stack = self._to_intermediate_stack(instructions, apply_target) + self._stack = self._to_intermediate_stack(instructions, map_target) def to_expression(self, col: str, param_name: str, depth: int) -> str: """Convert intermediate stack to polars expression string.""" @@ -562,10 +560,10 @@ def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str return value def _to_intermediate_stack( - self, instructions: list[Instruction], apply_target: ApplyTarget + self, instructions: list[Instruction], map_target: MapTarget ) -> StackEntry: """Take postfix bytecode and convert to an intermediate natural-order stack.""" - if apply_target in ("expr", "series"): + if map_target in ("expr", "series"): stack: list[StackEntry] = [] for inst in instructions: stack.append( @@ -593,7 +591,7 @@ def _to_intermediate_stack( return stack[0] # TODO: dataframe.apply(...) - raise NotImplementedError(f"TODO: {apply_target!r} apply") + raise NotImplementedError(f"TODO: {map_target!r} apply") class RewrittenInstructions: @@ -673,8 +671,8 @@ def _rewrite(self, instructions: Iterator[Instruction]) -> list[Instruction]: while idx < len(self._instructions): inst, increment = self._instructions[idx], 1 if inst.opname not in OpNames.LOAD or not any( - (increment := apply_rewrite(idx, updated_instructions)) - for apply_rewrite in ( + (increment := map_rewrite(idx, updated_instructions)) + for map_rewrite in ( # add any other rewrite methods here self._rewrite_functions, self._rewrite_methods, @@ -830,7 +828,7 @@ def _is_raw_function(function: Callable[[Any], Any]) -> tuple[str, str]: def warn_on_inefficient_map( - function: Callable[[Any], Any], columns: list[str], map_target: ApplyTarget + function: Callable[[Any], Any], columns: list[str], map_target: MapTarget ) -> None: """ Generate ``PolarsInefficientMapWarning`` on poor usage of a ``map`` function.