diff --git a/polars/polars-lazy/polars-plan/src/dsl/arity.rs b/polars/polars-lazy/polars-plan/src/dsl/arity.rs index 7a871ee13d91..05ff22df52b0 100644 --- a/polars/polars-lazy/polars-plan/src/dsl/arity.rs +++ b/polars/polars-lazy/polars-plan/src/dsl/arity.rs @@ -1,78 +1,91 @@ use super::*; -/// Intermediate state of `when(..).then(..).otherwise(..)` expression. +/// Utility struct for the `when-then-otherwise` expression. +/// +/// Represents the state of the expression after [when] is called. +/// +/// In this state, `then` must be called to continue to finish the expression. #[derive(Clone)] pub struct When { - predicate: Expr, + condition: Expr, } -/// Intermediate state of `when(..).then(..).otherwise(..)` expression. +/// Utility struct for the `when-then-otherwise` expression. +/// +/// Represents the state of the expression after `when(...).then(...)` is called. #[derive(Clone)] pub struct Then { - predicate: Expr, - then: Expr, + condition: Expr, + statement: Expr, } -/// Intermediate state of a chained `when(..).then(..).otherwise(..)` expression. +/// Utility struct for the `when-then-otherwise` expression. +/// +/// Represents the state of the expression after an additional `when` is called. +/// +/// In this state, `then` must be called to continue to finish the expression. #[derive(Clone)] pub struct ChainedWhen { - predicates: Vec, - thens: Vec, + conditions: Vec, + statements: Vec, } -/// Intermediate state of a chained `when(..).then(..).otherwise(..)` expression. +/// Utility struct for the `when-then-otherwise` expression. +/// +/// Represents the state of the expression after an additional `then` is called. #[derive(Clone)] pub struct ChainedThen { - predicates: Vec, - thens: Vec, + conditions: Vec, + statements: Vec, } impl When { + /// Add a condition to the `when-then-otherwise` expression. pub fn then>(self, expr: E) -> Then { Then { - predicate: self.predicate, - then: expr.into(), + condition: self.condition, + statement: expr.into(), } } } impl Then { - pub fn when>(self, predicate: E) -> ChainedWhen { + /// Attach a statement to the corresponding condition. + pub fn when>(self, condition: E) -> ChainedWhen { ChainedWhen { - predicates: vec![self.predicate, predicate.into()], - thens: vec![self.then], + conditions: vec![self.condition, condition.into()], + statements: vec![self.statement], } } - pub fn otherwise>(self, expr: E) -> Expr { - Expr::Ternary { - predicate: Box::new(self.predicate), - truthy: Box::new(self.then), - falsy: Box::new(expr.into()), - } + /// Define a default for the `when-then-otherwise` expression. + pub fn otherwise>(self, statement: E) -> Expr { + ternary_expr(self.condition, self.statement, statement.into()) } } impl ChainedWhen { - pub fn then>(mut self, expr: E) -> ChainedThen { - self.thens.push(expr.into()); + pub fn then>(mut self, statement: E) -> ChainedThen { + self.statements.push(statement.into()); ChainedThen { - predicates: self.predicates, - thens: self.thens, + conditions: self.conditions, + statements: self.statements, } } } impl ChainedThen { - pub fn when>(mut self, predicate: E) -> ChainedWhen { - self.predicates.push(predicate.into()); + /// Add another condition to the `when-then-otherwise` expression. + pub fn when>(mut self, condition: E) -> ChainedWhen { + self.conditions.push(condition.into()); ChainedWhen { - predicates: self.predicates, - thens: self.thens, + conditions: self.conditions, + statements: self.statements, } } + /// Define a default for the `when-then-otherwise` expression. pub fn otherwise>(self, expr: E) -> Expr { // we iterate the preds/ exprs last in first out // and nest them. @@ -98,31 +111,29 @@ impl ChainedThen { // which will be used in the next layer `outer` // - let pred_iter = self.predicates.into_iter().rev(); - let mut then_iter = self.thens.into_iter().rev(); + let conditions_iter = self.conditions.into_iter().rev(); + let mut statements_iter = self.statements.into_iter().rev(); let mut otherwise = expr.into(); - for e in pred_iter { - otherwise = Expr::Ternary { - predicate: Box::new(e), - truthy: Box::new( - then_iter - .next() - .expect("expr expected, did you call when().then().otherwise?"), - ), - falsy: Box::new(otherwise), - } + for e in conditions_iter { + otherwise = ternary_expr( + e, + statements_iter + .next() + .expect("expr expected, did you call when().then().otherwise?"), + otherwise, + ); } otherwise } } -/// Start a `when(..).then(..).otherwise(..)` expression -pub fn when>(predicate: E) -> When { +/// Start a `when-then-otherwise` expression. +pub fn when>(condition: E) -> When { When { - predicate: predicate.into(), + condition: condition.into(), } } diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 0065b05e7a53..18714d8fbae5 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -8598,6 +8598,7 @@ def map_dict( Dictionary containing the before/after values to map. default Value to use when the remapping dict does not contain the lookup value. + Accepts expression input. Non-expression inputs are parsed as literals. Use ``pl.first()``, to keep the original value. return_dtype Set return dtype to override automatic return dtype determination. @@ -8923,6 +8924,9 @@ def inner_with_default(s: Series) -> Series: is_keys=False, ) + default_parsed = self._from_pyexpr( + parse_as_expression(default, str_as_lit=True) + ) return ( ( df.lazy() @@ -8942,7 +8946,7 @@ def inner_with_default(s: Series) -> Series: .select( F.when(F.col(is_remapped_column).is_not_null()) .then(F.col(remap_value_column)) - .otherwise(default) + .otherwise(default_parsed) .alias(column) ) ) diff --git a/py-polars/polars/expr/whenthen.py b/py-polars/polars/expr/whenthen.py index e9113ba78a7a..897b6987f905 100644 --- a/py-polars/polars/expr/whenthen.py +++ b/py-polars/polars/expr/whenthen.py @@ -1,11 +1,14 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any import polars.functions as F from polars.expr.expr import Expr from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr +from polars.utils.decorators import deprecated_alias +from polars.utils.various import find_stacklevel if TYPE_CHECKING: from polars.polars import PyExpr @@ -13,26 +16,43 @@ class When: - """Utility class. See the `when` function.""" + """ + Utility class for the `when-then-otherwise` expression. + + Represents the initial state of the expression after ``pl.when(...)`` is called. + + In this state, ``then`` must be called to continue to finish the expression. + + """ def __init__(self, when: Any): self._when = when - def then(self, expr: IntoExpr) -> Then: + @deprecated_alias(expr="statement") + def then(self, statement: IntoExpr) -> Then: """ - Values to return in case of the predicate being `True`. + Attach a statement to the corresponding condition. - See Also - -------- - pl.when : Documentation for `when, then, otherwise` + Parameters + ---------- + statement + The statement to apply if the corresponding condition is true. + Accepts expression input. Non-expression inputs are parsed as literals. """ - expr = parse_as_expression(expr, str_as_lit=True) - return Then(self._when.then(expr)) + if isinstance(statement, str): + _warn_for_deprecated_string_input_behavior(statement) + statement_pyexpr = parse_as_expression(statement, str_as_lit=True) + return Then(self._when.then(statement_pyexpr)) class Then(Expr): - """Utility class. See the `when` function.""" + """ + Utility class for the `when-then-otherwise` expression. + + Represents the state of the expression after ``pl.when(...).then(...)`` is called. + + """ def __init__(self, then: Any): self._then = then @@ -45,38 +65,77 @@ def _from_pyexpr(cls, pyexpr: PyExpr) -> Expr: # type: ignore[override] def _pyexpr(self) -> PyExpr: return self._then.otherwise(F.lit(None)._pyexpr) - def when(self, predicate: IntoExpr) -> ChainedWhen: - """Start another "when, then, otherwise" layer.""" - predicate = parse_as_expression(predicate) - return ChainedWhen(self._then.when(predicate)) + @deprecated_alias(predicate="condition") + def when(self, condition: IntoExpr) -> ChainedWhen: + """ + Add a condition to the `when-then-otherwise` expression. - def otherwise(self, expr: IntoExpr) -> Expr: + Parameters + ---------- + condition + The condition for applying the subsequent statement. + Accepts a boolean expression. String input is parsed as a column name. + + """ + condition_pyexpr = parse_as_expression(condition) + return ChainedWhen(self._then.when(condition_pyexpr)) + + @deprecated_alias(expr="statement") + def otherwise(self, statement: IntoExpr) -> Expr: """ - Values to return in case of the predicate being `False`. + Define a default for the `when-then-otherwise` expression. - See Also - -------- - pl.when : Documentation for `when, then, otherwise` + Parameters + ---------- + statement + The statement to apply if all conditions are false. + Accepts expression input. Non-expression inputs are parsed as literals. """ - expr = parse_as_expression(expr, str_as_lit=True) - return wrap_expr(self._then.otherwise(expr)) + if isinstance(statement, str): + _warn_for_deprecated_string_input_behavior(statement) + statement_pyexpr = parse_as_expression(statement, str_as_lit=True) + return wrap_expr(self._then.otherwise(statement_pyexpr)) class ChainedWhen(Expr): - """Utility class. See the `when` function.""" + """ + Utility class for the `when-then-otherwise` expression. + + Represents the state of the expression after an additional ``when`` is called. + + In this state, ``then`` must be called to continue to finish the expression. + + """ def __init__(self, chained_when: Any): self._chained_when = chained_when - def then(self, predicate: IntoExpr) -> ChainedThen: - """Start another "when, then, otherwise" layer.""" - predicate = parse_as_expression(predicate, str_as_lit=True) - return ChainedThen(self._chained_when.then(predicate)) + @deprecated_alias(expr="statement") + def then(self, statement: IntoExpr) -> ChainedThen: + """ + Attach a statement to the corresponding condition. + + Parameters + ---------- + statement + The statement to apply if the corresponding condition is true. + Accepts expression input. Non-expression inputs are parsed as literals. + + """ + if isinstance(statement, str): + _warn_for_deprecated_string_input_behavior(statement) + statement_pyexpr = parse_as_expression(statement, str_as_lit=True) + return ChainedThen(self._chained_when.then(statement_pyexpr)) class ChainedThen(Expr): - """Utility class. See the `when` function.""" + """ + Utility class for the `when-then-otherwise` expression. + + Represents the state of the expression after an additional ``then`` is called. + + """ def __init__(self, chained_then: Any): self._chained_then = chained_then @@ -89,19 +148,43 @@ def _from_pyexpr(cls, pyexpr: PyExpr) -> Expr: # type: ignore[override] def _pyexpr(self) -> PyExpr: return self._chained_then.otherwise(F.lit(None)._pyexpr) - def when(self, predicate: IntoExpr) -> ChainedWhen: - """Start another "when, then, otherwise" layer.""" - predicate = parse_as_expression(predicate) - return ChainedWhen(self._chained_then.when(predicate)) + @deprecated_alias(predicate="condition") + def when(self, condition: IntoExpr) -> ChainedWhen: + """ + Add another condition to the `when-then-otherwise` expression. + + Parameters + ---------- + condition + The condition for applying the subsequent statement. + Accepts a boolean expression. String input is parsed as a column name. + + """ + condition_pyexpr = parse_as_expression(condition) + return ChainedWhen(self._chained_then.when(condition_pyexpr)) - def otherwise(self, expr: IntoExpr) -> Expr: + @deprecated_alias(expr="statement") + def otherwise(self, statement: IntoExpr) -> Expr: """ - Values to return in case of the predicate being `False`. + Define a default for the `when-then-otherwise` expression. - See Also - -------- - pl.when : Documentation for `when, then, otherwise` + Parameters + ---------- + statement + The statement to apply if all conditions are false. + Accepts expression input. Non-expression inputs are parsed as literals. """ - expr = parse_as_expression(expr, str_as_lit=True) - return wrap_expr(self._chained_then.otherwise(expr)) + if isinstance(statement, str): + _warn_for_deprecated_string_input_behavior(statement) + statement_pyexpr = parse_as_expression(statement, str_as_lit=True) + return wrap_expr(self._chained_then.otherwise(statement_pyexpr)) + + +def _warn_for_deprecated_string_input_behavior(input: str) -> None: + warnings.warn( + "in a future version, string input will be parsed as a column name rather than a string literal." + f" To silence this warning, pass the input as an expression instead: `pl.lit({input!r})`", + DeprecationWarning, + stacklevel=find_stacklevel(), + ) diff --git a/py-polars/polars/functions/whenthen.py b/py-polars/polars/functions/whenthen.py index 077ce7f51f52..620ae6b5e526 100644 --- a/py-polars/polars/functions/whenthen.py +++ b/py-polars/polars/functions/whenthen.py @@ -5,6 +5,7 @@ import polars._reexport as pl from polars.utils._parse_expr_input import parse_as_expression +from polars.utils.decorators import deprecated_alias with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr @@ -13,9 +14,10 @@ from polars.type_aliases import IntoExpr -def when(expr: IntoExpr) -> pl.When: +@deprecated_alias(expr="condition") +def when(condition: IntoExpr) -> pl.When: """ - Start a "when, then, otherwise" expression. + Start a `when-then-otherwise` expression. Expression similar to an `if-else` statement in Python. Always initiated by a `pl.when().then()`. Optionally followed by chaining @@ -24,6 +26,12 @@ def when(expr: IntoExpr) -> pl.When: appended at the end. If not appended, and none of the conditions are `True`, `None` will be returned. + Parameters + ---------- + condition + The condition for applying the subsequent statement. + Accepts a boolean expression. String input is parsed as a column name. + Examples -------- Below we add a column with the value 1, where column "foo" > 2 and the value -1 @@ -89,7 +97,6 @@ def when(expr: IntoExpr) -> pl.When: │ 4 ┆ 0 ┆ 1 │ └─────┴─────┴──────┘ - """ - pyexpr = parse_as_expression(expr) - return pl.When(plr.when(pyexpr)) + condition_pyexpr = parse_as_expression(condition) + return pl.When(plr.when(condition_pyexpr)) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 44b0d34227cb..07f861f7b473 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4562,7 +4562,7 @@ def apply( Examples -------- >>> s = pl.Series("a", [1, 2, 3]) - >>> s.apply(lambda x: x + 10) + >>> s.apply(lambda x: x + 10) # doctest: +SKIP shape: (3,) Series: 'a' [i64] [ diff --git a/py-polars/src/functions/whenthen.rs b/py-polars/src/functions/whenthen.rs index 60a78863c217..f5d1d231dce1 100644 --- a/py-polars/src/functions/whenthen.rs +++ b/py-polars/src/functions/whenthen.rs @@ -4,9 +4,9 @@ use pyo3::prelude::*; use crate::PyExpr; #[pyfunction] -pub fn when(predicate: PyExpr) -> PyWhen { +pub fn when(condition: PyExpr) -> PyWhen { PyWhen { - inner: dsl::when(predicate.inner), + inner: dsl::when(condition.inner), } } @@ -36,44 +36,44 @@ pub struct PyChainedThen { #[pymethods] impl PyWhen { - fn then(&self, expr: PyExpr) -> PyThen { + fn then(&self, statement: PyExpr) -> PyThen { PyThen { - inner: self.inner.clone().then(expr.inner), + inner: self.inner.clone().then(statement.inner), } } } #[pymethods] impl PyThen { - fn when(&self, predicate: PyExpr) -> PyChainedWhen { + fn when(&self, condition: PyExpr) -> PyChainedWhen { PyChainedWhen { - inner: self.inner.clone().when(predicate.inner), + inner: self.inner.clone().when(condition.inner), } } - fn otherwise(&self, expr: PyExpr) -> PyExpr { - self.inner.clone().otherwise(expr.inner).into() + fn otherwise(&self, statement: PyExpr) -> PyExpr { + self.inner.clone().otherwise(statement.inner).into() } } #[pymethods] impl PyChainedWhen { - fn then(&self, expr: PyExpr) -> PyChainedThen { + fn then(&self, statement: PyExpr) -> PyChainedThen { PyChainedThen { - inner: self.inner.clone().then(expr.inner), + inner: self.inner.clone().then(statement.inner), } } } #[pymethods] impl PyChainedThen { - fn when(&self, predicate: PyExpr) -> PyChainedWhen { + fn when(&self, condition: PyExpr) -> PyChainedWhen { PyChainedWhen { - inner: self.inner.clone().when(predicate.inner), + inner: self.inner.clone().when(condition.inner), } } - fn otherwise(&self, expr: PyExpr) -> PyExpr { - self.inner.clone().otherwise(expr.inner).into() + fn otherwise(&self, statement: PyExpr) -> PyExpr { + self.inner.clone().otherwise(statement.inner).into() } } diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 24a892e3d325..9db461143d58 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -77,20 +77,6 @@ def test_cat_to_dummies() -> None: } -def test_comp_categorical_lit_dtype() -> None: - df = pl.DataFrame( - data={"column": ["a", "b", "e"], "values": [1, 5, 9]}, - schema=[("column", pl.Categorical), ("more", pl.Int32)], - ) - - assert df.with_columns( - pl.when(pl.col("column") == "e") - .then("d") - .otherwise(pl.col("column")) - .alias("column") - ).dtypes == [pl.Categorical, pl.Int32] - - def test_categorical_describe_3487() -> None: # test if we don't err df = pl.DataFrame({"cats": ["a", "b"]}) diff --git a/py-polars/tests/unit/functions/test_whenthen.py b/py-polars/tests/unit/functions/test_whenthen.py index 197be4ffb25c..b55192975f7e 100644 --- a/py-polars/tests/unit/functions/test_whenthen.py +++ b/py-polars/tests/unit/functions/test_whenthen.py @@ -3,7 +3,7 @@ import pytest import polars as pl -from polars.testing import assert_frame_equal +from polars.testing import assert_frame_equal, assert_series_equal def test_when_then() -> None: @@ -28,7 +28,12 @@ def test_when_then() -> None: def test_when_then_chained() -> None: df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}) - expr = pl.when(pl.col("a") < 3).then(pl.lit("x")).when(pl.col("a") > 4).then("z") + expr = ( + pl.when(pl.col("a") < 3) + .then(pl.lit("x")) + .when(pl.col("a") > 4) + .then(pl.lit("z")) + ) result = df.select( expr.otherwise(pl.lit("y")).alias("a"), @@ -48,19 +53,17 @@ def test_when_then_invalid_chains() -> None: with pytest.raises(AttributeError): pl.when("a").when("b") # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").otherwise("b") # type: ignore[attr-defined] - with pytest.raises(AttributeError): - pl.when("a").then("b").then("c") # type: ignore[attr-defined] + pl.when("a").otherwise(2) # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").then("b").otherwise("c").otherwise("d") # type: ignore[attr-defined] + pl.when("a").then(1).then(2) # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").then("b").otherwise("c").otherwise("d") # type: ignore[attr-defined] + pl.when("a").then(1).otherwise(2).otherwise(3) # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").then("b").when("c").when("d") # type: ignore[attr-defined] + pl.when("a").then(1).when("b").when("c") # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").then("b").when("c").otherwise("d") # type: ignore[attr-defined] + pl.when("a").then(1).when("b").otherwise("2") # type: ignore[attr-defined] with pytest.raises(AttributeError): - pl.when("a").then("b").when("c").then("d").when("e").when("f") # type: ignore[attr-defined] + pl.when("a").then(1).when("b").then(2).when("c").when("d") # type: ignore[attr-defined] def test_when_then_implicit_none() -> None: @@ -72,8 +75,8 @@ def test_when_then_implicit_none() -> None: ) result = df.select( - pl.when(pl.col("points") > 7).then("Foo"), - pl.when(pl.col("points") > 7).then("Foo").alias("bar"), + pl.when(pl.col("points") > 7).then(pl.lit("Foo")), + pl.when(pl.col("points") > 7).then(pl.lit("Foo")).alias("bar"), ) expected = pl.DataFrame( @@ -101,18 +104,20 @@ def test_nested_when_then_and_wildcard_expansion_6284() -> None: out0 = df.with_columns( pl.when(pl.any_horizontal(pl.all() == "a")) - .then("a") + .then(pl.lit("a")) .otherwise( - pl.when(pl.any_horizontal(pl.all() == "d")).then("d").otherwise(None) + pl.when(pl.any_horizontal(pl.all() == "d")) + .then(pl.lit("d")) + .otherwise(None) ) .alias("result") ) out1 = df.with_columns( pl.when(pl.any_horizontal(pl.all() == "a")) - .then("a") + .then(pl.lit("a")) .when(pl.any_horizontal(pl.all() == "d")) - .then("d") + .then(pl.lit("d")) .otherwise(None) .alias("result") ) @@ -221,3 +226,33 @@ def test_object_when_then_4702() -> None: "Type": [pl.Date, pl.UInt8], "New_Type": [pl.UInt16, pl.UInt8], } + + +def test_comp_categorical_lit_dtype() -> None: + df = pl.DataFrame( + data={"column": ["a", "b", "e"], "values": [1, 5, 9]}, + schema=[("column", pl.Categorical), ("more", pl.Int32)], + ) + + assert df.with_columns( + pl.when(pl.col("column") == "e") + .then(pl.lit("d")) + .otherwise(pl.col("column")) + .alias("column") + ).dtypes == [pl.Categorical, pl.Int32] + + +def test_when_then_deprecated_string_input() -> None: + df = pl.DataFrame( + { + "a": [True, False], + "b": [1, 2], + "c": [3, 4], + } + ) + + with pytest.deprecated_call(): + result = df.select(pl.when("a").then("b").otherwise("c").alias("when")) + + expected = pl.Series("when", ["b", "c"]) + assert_series_equal(result.to_series(), expected) diff --git a/py-polars/tests/unit/operations/test_sort.py b/py-polars/tests/unit/operations/test_sort.py index 35d77ce2e5cb..7333e453744e 100644 --- a/py-polars/tests/unit/operations/test_sort.py +++ b/py-polars/tests/unit/operations/test_sort.py @@ -513,7 +513,7 @@ def get_str_ints_df(n: int) -> pl.DataFrame: strs = pl.Series("strs", random.choices(string.ascii_lowercase, k=n)) strs = pl.select( pl.when(strs == "a") - .then("") + .then(pl.lit("")) .when(strs == "b") .then(None) .otherwise(strs) @@ -534,7 +534,7 @@ def test_sort_row_fmt() -> None: df_pd = df.to_pandas() for descending in [True, False]: - pl.testing.assert_frame_equal( + assert_frame_equal( df.sort(["strs", "vals"], nulls_last=True, descending=descending), pl.from_pandas( df_pd.sort_values(["strs", "vals"], ascending=not descending)