diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 988fd08de9e4..458c535ddc18 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -7645,116 +7645,69 @@ def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> Self: def clip( self, - lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn, - upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn, + lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, + upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, ) -> Self: """ - Clip (limit) the values in an array to a `min` and `max` boundary. - - Only works for physical numerical types. - - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. + Set values outside the given boundaries to the boundary value. Parameters ---------- lower_bound - Lower bound. + Lower bound. Accepts expression input. + Non-expression inputs are parsed as literals. upper_bound - Upper bound. + Upper bound. Accepts expression input. + Non-expression inputs are parsed as literals. - Examples + See Also -------- - >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]}) - >>> df.with_columns(pl.col("foo").clip(1, 10).alias("foo_clipped")) - shape: (4, 2) - ┌──────┬─────────────┐ - │ foo ┆ foo_clipped │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞══════╪═════════════╡ - │ -50 ┆ 1 │ - │ 5 ┆ 5 │ - │ null ┆ null │ - │ 50 ┆ 10 │ - └──────┴─────────────┘ - - """ - lower_bound = parse_as_expression(lower_bound, str_as_lit=True) - upper_bound = parse_as_expression(upper_bound, str_as_lit=True) - return self._from_pyexpr(self._pyexpr.clip(lower_bound, upper_bound)) + when - def clip_min( - self, lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn - ) -> Self: - """ - Clip (limit) the values in an array to a `min` boundary. - - Only works for physical numerical types. - - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. - - Parameters - ---------- - lower_bound - Lower bound. + Notes + ----- + This method only works for numeric and temporal columns. To clip other data + types, consider writing a `when-then-otherwise` expression. See :func:`when`. Examples -------- - >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]}) - >>> df.with_columns(pl.col("foo").clip_min(0).alias("foo_clipped")) - shape: (4, 2) - ┌──────┬─────────────┐ - │ foo ┆ foo_clipped │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞══════╪═════════════╡ - │ -50 ┆ 0 │ - │ 5 ┆ 5 │ - │ null ┆ null │ - │ 50 ┆ 50 │ - └──────┴─────────────┘ - - """ - lower_bound = parse_as_expression(lower_bound, str_as_lit=True) - return self._from_pyexpr(self._pyexpr.clip_min(lower_bound)) + Specifying both a lower and upper bound: - def clip_max( - self, upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn - ) -> Self: - """ - Clip (limit) the values in an array to a `max` boundary. - - Only works for physical numerical types. - - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. + >>> df = pl.DataFrame({"a": [-50, 5, 50, None]}) + >>> df.with_columns(clip=pl.col("a").clip(1, 10)) + shape: (4, 2) + ┌──────┬──────┐ + │ a ┆ clip │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞══════╪══════╡ + │ -50 ┆ 1 │ + │ 5 ┆ 5 │ + │ 50 ┆ 10 │ + │ null ┆ null │ + └──────┴──────┘ - Parameters - ---------- - upper_bound - Upper bound. + Specifying only a single bound: - Examples - -------- - >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]}) - >>> df.with_columns(pl.col("foo").clip_max(0).alias("foo_clipped")) + >>> df.with_columns(clip=pl.col("a").clip(upper_bound=10)) shape: (4, 2) - ┌──────┬─────────────┐ - │ foo ┆ foo_clipped │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞══════╪═════════════╡ - │ -50 ┆ -50 │ - │ 5 ┆ 0 │ - │ null ┆ null │ - │ 50 ┆ 0 │ - └──────┴─────────────┘ + ┌──────┬──────┐ + │ a ┆ clip │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞══════╪══════╡ + │ -50 ┆ -50 │ + │ 5 ┆ 5 │ + │ 50 ┆ 10 │ + │ null ┆ null │ + └──────┴──────┘ """ - upper_bound = parse_as_expression(upper_bound, str_as_lit=True) - return self._from_pyexpr(self._pyexpr.clip_max(upper_bound)) + if lower_bound is not None: + lower_bound = parse_as_expression(lower_bound, str_as_lit=True) + if upper_bound is not None: + upper_bound = parse_as_expression(upper_bound, str_as_lit=True) + return self._from_pyexpr(self._pyexpr.clip(lower_bound, upper_bound)) def lower_bound(self) -> Self: """ @@ -9560,6 +9513,42 @@ def is_last(self) -> Self: """ return self.is_last_distinct() + @deprecate_function("Use `clip` instead.", version="0.19.12") + def clip_min( + self, lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn + ) -> Self: + """ + Clip (limit) the values in an array to a `min` boundary. + + .. deprecated:: 0.19.12 + Use :func:`clip` instead. + + Parameters + ---------- + lower_bound + Lower bound. + + """ + return self.clip(lower_bound=lower_bound) + + @deprecate_function("Use `clip` instead.", version="0.19.12") + def clip_max( + self, upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn + ) -> Self: + """ + Clip (limit) the values in an array to a `max` boundary. + + .. deprecated:: 0.19.12 + Use :func:`clip` instead. + + Parameters + ---------- + upper_bound + Upper bound. + + """ + return self.clip(upper_bound=upper_bound) + def register_plugin( self, *, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 388dcc81da97..a2c54e861278 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -90,6 +90,7 @@ _time_to_pl_time, ) from polars.utils.deprecation import ( + deprecate_function, deprecate_nonkeyword_arguments, deprecate_renamed_function, deprecate_renamed_parameter, @@ -6166,72 +6167,58 @@ def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> float | None: def clip( self, - lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn, - upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn, + lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, + upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, ) -> Series: """ - Clip (limit) the values in an array to a `min` and `max` boundary. - - Only works for physical numerical types. - - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. + Set values outside the given boundaries to the boundary value. Parameters ---------- lower_bound - Minimum value. + Lower bound. Accepts expression input. + Non-expression inputs are parsed as literals. + If set to ``None`` (default), no lower bound is applied. upper_bound - Maximum value. + Upper bound. Accepts expression input. + Non-expression inputs are parsed as literals. + If set to ``None`` (default), no upper bound is applied. + + See Also + -------- + when + + Notes + ----- + This method only works for numeric and temporal columns. To clip other data + types, consider writing a `when-then-otherwise` expression. See :func:`when`. Examples -------- - >>> s = pl.Series("foo", [-50, 5, None, 50]) + Specifying both a lower and upper bound: + + >>> s = pl.Series([-50, 5, 50, None]) >>> s.clip(1, 10) shape: (4,) - Series: 'foo' [i64] + Series: '' [i64] [ - 1 - 5 - null - 10 + 1 + 5 + 10 + null ] - """ - - def clip_min( - self, lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn - ) -> Series: - """ - Clip (limit) the values in an array to a `min` boundary. - - Only works for physical numerical types. - - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. - - Parameters - ---------- - lower_bound - Lower bound. - - """ - - def clip_max( - self, upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn - ) -> Series: - """ - Clip (limit) the values in an array to a `max` boundary. - - Only works for physical numerical types. + Specifying only a single bound: - If you want to clip other dtypes, consider writing a "when, then, otherwise" - expression. See :func:`when` for more information. - - Parameters - ---------- - upper_bound - Upper bound. + >>> s.clip(upper_bound=10) + shape: (4,) + Series: '' [i64] + [ + -50 + 5 + 10 + null + ] """ @@ -6848,6 +6835,40 @@ def is_last(self) -> Series: """ + @deprecate_function("Use `clip` instead.", version="0.19.12") + def clip_min( + self, lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn + ) -> Series: + """ + Clip (limit) the values in an array to a `min` boundary. + + .. deprecated:: 0.19.12 + Use :func:`clip` instead. + + Parameters + ---------- + lower_bound + Lower bound. + + """ + + @deprecate_function("Use `clip` instead.", version="0.19.12") + def clip_max( + self, upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn + ) -> Series: + """ + Clip (limit) the values in an array to a `max` boundary. + + .. deprecated:: 0.19.12 + Use :func:`clip` instead. + + Parameters + ---------- + upper_bound + Upper bound. + + """ + # Keep the `list` and `str` properties below at the end of the definition of Series, # as to not confuse mypy with the type annotation `str` and `list` diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index e5af0136637e..de2ce8c1ffcc 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -462,16 +462,15 @@ impl PyExpr { self.inner.clone().ceil().into() } - fn clip(&self, min: Self, max: Self) -> Self { - self.inner.clone().clip(min.inner, max.inner).into() - } - - fn clip_min(&self, min: Self) -> Self { - self.inner.clone().clip_min(min.inner).into() - } - - fn clip_max(&self, max: Self) -> Self { - self.inner.clone().clip_max(max.inner).into() + fn clip(&self, min: Option, max: Option) -> Self { + let expr = self.inner.clone(); + let out = match (min, max) { + (Some(min), Some(max)) => expr.clip(min.inner, max.inner), + (Some(min), None) => expr.clip_min(min.inner), + (None, Some(max)) => expr.clip_max(max.inner), + (None, None) => expr, + }; + out.into() } fn abs(&self) -> Self { diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 705f6bce069b..ec16ae3edf0a 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -3328,98 +3328,6 @@ def test_deadlocks_3409() -> None: ) == {"col1": [0, 0, 0]} -def test_clip() -> None: - clip_exprs = [ - pl.col("a").clip(pl.col("min"), pl.col("max")).alias("clip"), - pl.col("a").clip_min(pl.col("min")).alias("clip_min"), - pl.col("a").clip_max(pl.col("max")).alias("clip_max"), - ] - - df = pl.DataFrame( - { - "a": [1, 2, 3, 4, 5], - "min": [0, -1, 4, None, 4], - "max": [2, 1, 8, 5, None], - } - ) - - assert df.select(clip_exprs).to_dict(False) == { - "clip": [1, 1, 4, None, None], - "clip_min": [1, 2, 4, None, 5], - "clip_max": [1, 1, 3, 4, None], - } - - df = pl.DataFrame( - { - "a": [1.0, 2.0, 3.0, 4.0, 5.0], - "min": [0, -1.0, 4.0, None, 4.0], - "max": [2.0, 1.0, 8.0, 5.0, None], - } - ) - - assert df.select(clip_exprs).to_dict(False) == { - "clip": [1.0, 1.0, 4.0, None, None], - "clip_min": [1.0, 2.0, 4.0, None, 5.0], - "clip_max": [1.0, 1.0, 3.0, 4.0, None], - } - - df = pl.DataFrame( - { - "a": [ - datetime(1995, 6, 5, 10, 30), - datetime(1995, 6, 5), - datetime(2023, 10, 20, 18, 30, 6), - None, - datetime(2023, 9, 24), - datetime(2000, 1, 10), - ], - "min": [ - datetime(1995, 6, 5, 10, 29), - datetime(1996, 6, 5), - datetime(2020, 9, 24), - datetime(2020, 1, 1), - None, - datetime(2000, 1, 1), - ], - "max": [ - datetime(1995, 7, 21, 10, 30), - datetime(2000, 1, 1), - datetime(2023, 9, 20, 18, 30, 6), - datetime(2000, 1, 1), - datetime(1993, 3, 13), - None, - ], - } - ) - - assert df.select(clip_exprs).to_dict(False) == { - "clip": [ - datetime(1995, 6, 5, 10, 30), - datetime(1996, 6, 5), - datetime(2023, 9, 20, 18, 30, 6), - None, - None, - None, - ], - "clip_min": [ - datetime(1995, 6, 5, 10, 30), - datetime(1996, 6, 5), - datetime(2023, 10, 20, 18, 30, 6), - None, - None, - datetime(2000, 1, 10), - ], - "clip_max": [ - datetime(1995, 6, 5, 10, 30), - datetime(1995, 6, 5), - datetime(2023, 9, 20, 18, 30, 6), - None, - datetime(1993, 3, 13), - None, - ], - } - - def test_cum_agg() -> None: df = pl.DataFrame({"a": [1, 2, 3, 2]}) assert_series_equal( diff --git a/py-polars/tests/unit/operations/test_clip.py b/py-polars/tests/unit/operations/test_clip.py new file mode 100644 index 000000000000..f178780b7a6a --- /dev/null +++ b/py-polars/tests/unit/operations/test_clip.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +from datetime import datetime + +import pytest + +import polars as pl +from polars.testing.asserts.series import assert_series_equal + + +def test_clip() -> None: + clip_exprs = [ + pl.col("a").clip(pl.col("min"), pl.col("max")).alias("clip"), + pl.col("a").clip(lower_bound=pl.col("min")).alias("clip_min"), + pl.col("a").clip(upper_bound=pl.col("max")).alias("clip_max"), + ] + + df = pl.DataFrame( + { + "a": [1, 2, 3, 4, 5], + "min": [0, -1, 4, None, 4], + "max": [2, 1, 8, 5, None], + } + ) + + assert df.select(clip_exprs).to_dict(False) == { + "clip": [1, 1, 4, None, None], + "clip_min": [1, 2, 4, None, 5], + "clip_max": [1, 1, 3, 4, None], + } + + df = pl.DataFrame( + { + "a": [1.0, 2.0, 3.0, 4.0, 5.0], + "min": [0, -1.0, 4.0, None, 4.0], + "max": [2.0, 1.0, 8.0, 5.0, None], + } + ) + + assert df.select(clip_exprs).to_dict(False) == { + "clip": [1.0, 1.0, 4.0, None, None], + "clip_min": [1.0, 2.0, 4.0, None, 5.0], + "clip_max": [1.0, 1.0, 3.0, 4.0, None], + } + + df = pl.DataFrame( + { + "a": [ + datetime(1995, 6, 5, 10, 30), + datetime(1995, 6, 5), + datetime(2023, 10, 20, 18, 30, 6), + None, + datetime(2023, 9, 24), + datetime(2000, 1, 10), + ], + "min": [ + datetime(1995, 6, 5, 10, 29), + datetime(1996, 6, 5), + datetime(2020, 9, 24), + datetime(2020, 1, 1), + None, + datetime(2000, 1, 1), + ], + "max": [ + datetime(1995, 7, 21, 10, 30), + datetime(2000, 1, 1), + datetime(2023, 9, 20, 18, 30, 6), + datetime(2000, 1, 1), + datetime(1993, 3, 13), + None, + ], + } + ) + + assert df.select(clip_exprs).to_dict(False) == { + "clip": [ + datetime(1995, 6, 5, 10, 30), + datetime(1996, 6, 5), + datetime(2023, 9, 20, 18, 30, 6), + None, + None, + None, + ], + "clip_min": [ + datetime(1995, 6, 5, 10, 30), + datetime(1996, 6, 5), + datetime(2023, 10, 20, 18, 30, 6), + None, + None, + datetime(2000, 1, 10), + ], + "clip_max": [ + datetime(1995, 6, 5, 10, 30), + datetime(1995, 6, 5), + datetime(2023, 9, 20, 18, 30, 6), + None, + datetime(1993, 3, 13), + None, + ], + } + + +def test_clip_min_max_deprecated() -> None: + s = pl.Series([-1, 0, 1]) + + with pytest.deprecated_call(): + result = s.clip_min(0) + expected = pl.Series([0, 0, 1]) + assert_series_equal(result, expected) + + with pytest.deprecated_call(): + result = s.clip_max(0) + expected = pl.Series([-1, 0, 0]) + assert_series_equal(result, expected)