diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock index 3ac0d1bafc1b..cdaf505ea0ae 100644 --- a/py-polars/Cargo.lock +++ b/py-polars/Cargo.lock @@ -3179,8 +3179,3 @@ dependencies = [ "libc", "pkg-config", ] - -[[patch.unused]] -name = "ahash" -version = "0.8.3" -source = "git+https://github.com/orlp/aHash?branch=fix-arm-intrinsics#80685f88d3c120ef39fb3fde1c7786b044af5e8b" diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index e5528722db9e..cfc254d5ad96 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7461,96 +7461,79 @@ def partition_by( return partitions @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift(self, n: int = 1) -> DataFrame: + def shift(self, n: int = 1, *, fill_value: IntoExpr | None = None) -> DataFrame: """ - Shift values by the given number of places. + Shift values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + fill_value + Fill the resulting null values with this value. Accepts expression input. + Non-expression inputs are parsed as literals. - See Also - -------- - shift_and_fill + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- + By default, values are shifted forward by one index. + >>> df = pl.DataFrame( ... { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], + ... "a": [1, 2, 3, 4], + ... "b": [5, 6, 7, 8], ... } ... ) - >>> df.shift(1) - shape: (3, 3) - ┌──────┬──────┬──────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞══════╪══════╪══════╡ - │ null ┆ null ┆ null │ - │ 1 ┆ 6 ┆ a │ - │ 2 ┆ 7 ┆ b │ - └──────┴──────┴──────┘ - >>> df.shift(-1) - shape: (3, 3) - ┌──────┬──────┬──────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞══════╪══════╪══════╡ - │ 2 ┆ 7 ┆ b │ - │ 3 ┆ 8 ┆ c │ - │ null ┆ null ┆ null │ - └──────┴──────┴──────┘ + >>> df.shift() + shape: (4, 2) + ┌──────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞══════╪══════╡ + │ null ┆ null │ + │ 1 ┆ 5 │ + │ 2 ┆ 6 │ + │ 3 ┆ 7 │ + └──────┴──────┘ - """ - return self.lazy().shift(n=n).collect(_eager=True) + Pass a negative value to shift in the opposite direction instead. - @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift_and_fill( - self, - fill_value: int | str | float, - *, - n: int = 1, - ) -> DataFrame: - """ - Shift values by the given number of places and fill the resulting null values. + >>> df.shift(-2) + shape: (4, 2) + ┌──────┬──────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞══════╪══════╡ + │ 3 ┆ 7 │ + │ 4 ┆ 8 │ + │ null ┆ null │ + │ null ┆ null │ + └──────┴──────┘ - Parameters - ---------- - fill_value - fill None values with this value. - n - Number of places to shift (may be negative). + Specify ``fill_value`` to fill the resulting null values. - Examples - -------- - >>> df = pl.DataFrame( - ... { - ... "foo": [1, 2, 3], - ... "bar": [6, 7, 8], - ... "ham": ["a", "b", "c"], - ... } - ... ) - >>> df.shift_and_fill(n=1, fill_value=0) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ foo ┆ bar ┆ ham │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ str │ - ╞═════╪═════╪═════╡ - │ 0 ┆ 0 ┆ 0 │ - │ 1 ┆ 6 ┆ a │ - │ 2 ┆ 7 ┆ b │ - └─────┴─────┴─────┘ + >>> df.shift(-2, fill_value=100) + shape: (4, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 3 ┆ 7 │ + │ 4 ┆ 8 │ + │ 100 ┆ 100 │ + │ 100 ┆ 100 │ + └─────┴─────┘ """ - return ( - self.lazy().shift_and_fill(fill_value=fill_value, n=n).collect(_eager=True) - ) + return self.lazy().shift(n, fill_value=fill_value).collect(_eager=True) def is_duplicated(self) -> Series: """ @@ -10162,6 +10145,30 @@ def apply( """ return self.map_rows(function, return_dtype, inference_size=inference_size) + @deprecate_function("Use `shift` instead.", version="0.19.12") + @deprecate_renamed_parameter("periods", "n", version="0.19.11") + def shift_and_fill( + self, + fill_value: int | str | float, + *, + n: int = 1, + ) -> DataFrame: + """ + Shift values by the given number of places and fill the resulting null values. + + .. deprecated:: 0.19.12 + Use :func:`shift` instead. + + Parameters + ---------- + fill_value + fill None values with this value. + n + Number of places to shift (may be negative). + + """ + return self.shift(n, fill_value=fill_value) + def _prepare_other_arg(other: Any, length: int | None = None) -> Series: # if not a series create singleton series such that it will broadcast diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 458c535ddc18..fa3c80aa6652 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -2400,70 +2400,75 @@ def get(self, index: int | Expr) -> Self: return self._from_pyexpr(self._pyexpr.get(index_lit)) @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift(self, n: int = 1) -> Self: + def shift(self, n: int = 1, *, fill_value: IntoExpr | None = None) -> Self: """ - Shift values by the given number of places. + Shift values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + fill_value + Fill the resulting null values with this value. + + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- - >>> df = pl.DataFrame({"foo": [1, 2, 3, 4]}) - >>> df.with_columns(foo_shifted=pl.col("foo").shift(1)) + By default, values are shifted forward by one index. + + >>> df = pl.DataFrame({"a": [1, 2, 3, 4]}) + >>> df.with_columns(shift=pl.col("a").shift()) shape: (4, 2) - ┌─────┬─────────────┐ - │ foo ┆ foo_shifted │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════════════╡ - │ 1 ┆ null │ - │ 2 ┆ 1 │ - │ 3 ┆ 2 │ - │ 4 ┆ 3 │ - └─────┴─────────────┘ + ┌─────┬───────┐ + │ a ┆ shift │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═══════╡ + │ 1 ┆ null │ + │ 2 ┆ 1 │ + │ 3 ┆ 2 │ + │ 4 ┆ 3 │ + └─────┴───────┘ - """ - return self._from_pyexpr(self._pyexpr.shift(n)) + Pass a negative value to shift in the opposite direction instead. - @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift_and_fill( - self, - fill_value: IntoExpr, - *, - n: int = 1, - ) -> Self: - """ - Shift values by the given number of places and fill the resulting null values. + >>> df.with_columns(shift=pl.col("a").shift(-2)) + shape: (4, 2) + ┌─────┬───────┐ + │ a ┆ shift │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═══════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + │ 3 ┆ null │ + │ 4 ┆ null │ + └─────┴───────┘ - Parameters - ---------- - fill_value - Fill None values with the result of this expression. - n - Number of places to shift (may be negative). + Specify ``fill_value`` to fill the resulting null values. - Examples - -------- - >>> df = pl.DataFrame({"foo": [1, 2, 3, 4]}) - >>> df.with_columns(foo_shifted=pl.col("foo").shift_and_fill("a", n=1)) + >>> df.with_columns(shift=pl.col("a").shift(-2, fill_value=100)) shape: (4, 2) - ┌─────┬─────────────┐ - │ foo ┆ foo_shifted │ - │ --- ┆ --- │ - │ i64 ┆ str │ - ╞═════╪═════════════╡ - │ 1 ┆ a │ - │ 2 ┆ 1 │ - │ 3 ┆ 2 │ - │ 4 ┆ 3 │ - └─────┴─────────────┘ + ┌─────┬───────┐ + │ a ┆ shift │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═══════╡ + │ 1 ┆ 3 │ + │ 2 ┆ 4 │ + │ 3 ┆ 100 │ + │ 4 ┆ 100 │ + └─────┴───────┘ """ - fill_value = parse_as_expression(fill_value, str_as_lit=True) - return self._from_pyexpr(self._pyexpr.shift_and_fill(n, fill_value)) + if fill_value is not None: + fill_value = parse_as_expression(fill_value, str_as_lit=True) + return self._from_pyexpr(self._pyexpr.shift(n, fill_value)) def fill_null( self, @@ -9549,6 +9554,30 @@ def clip_max( """ return self.clip(upper_bound=upper_bound) + @deprecate_function("Use `shift` instead.", version="0.19.12") + @deprecate_renamed_parameter("periods", "n", version="0.19.11") + def shift_and_fill( + self, + fill_value: IntoExpr, + *, + n: int = 1, + ) -> Self: + """ + Shift values by the given number of places and fill the resulting null values. + + .. deprecated:: 0.19.12 + Use :func:`shift` instead. + + Parameters + ---------- + fill_value + Fill None values with the result of this expression. + n + Number of places to shift (may be negative). + + """ + return self.shift(n, fill_value=fill_value) + def register_plugin( self, *, diff --git a/py-polars/polars/expr/list.py b/py-polars/polars/expr/list.py index 1aca2964aaf4..39cbfcdca3a3 100644 --- a/py-polars/polars/expr/list.py +++ b/py-polars/polars/expr/list.py @@ -718,23 +718,47 @@ def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Expr: @deprecate_renamed_parameter("periods", "n", version="0.19.11") def shift(self, n: int | IntoExprColumn = 1) -> Expr: """ - Shift values by the given number of places. + Shift list values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- - >>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]]) - >>> s.list.shift() - shape: (2,) - Series: 'a' [list[i64]] - [ - [null, 1, … 3] - [null, 10, 2] - ] + By default, list values are shifted forward by one index. + + >>> df = pl.DataFrame({"a": [[1, 2, 3], [4, 5]]}) + >>> df.with_columns(shift=pl.col("a").list.shift()) + shape: (2, 2) + ┌───────────┬──────────────┐ + │ a ┆ shift │ + │ --- ┆ --- │ + │ list[i64] ┆ list[i64] │ + ╞═══════════╪══════════════╡ + │ [1, 2, 3] ┆ [null, 1, 2] │ + │ [4, 5] ┆ [null, 4] │ + └───────────┴──────────────┘ + + Pass a negative value to shift in the opposite direction instead. + + >>> df.with_columns(shift=pl.col("a").list.shift(-2)) + shape: (2, 2) + ┌───────────┬─────────────────┐ + │ a ┆ shift │ + │ --- ┆ --- │ + │ list[i64] ┆ list[i64] │ + ╞═══════════╪═════════════════╡ + │ [1, 2, 3] ┆ [3, null, null] │ + │ [4, 5] ┆ [null, null] │ + └───────────┴─────────────────┘ """ n = parse_as_expression(n) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 90742011ce9c..8f7ba7f949b8 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4230,101 +4230,81 @@ def reverse(self) -> Self: return self._from_pyldf(self._ldf.reverse()) @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift(self, n: int = 1) -> Self: + def shift(self, n: int = 1, *, fill_value: IntoExpr | None = None) -> Self: """ - Shift values by the given number of places. + Shift values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + fill_value + Fill the resulting null values with this value. Accepts expression input. + Non-expression inputs are parsed as literals. + + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- + By default, values are shifted forward by one index. + >>> lf = pl.LazyFrame( ... { - ... "a": [1, 3, 5], - ... "b": [2, 4, 6], + ... "a": [1, 2, 3, 4], + ... "b": [5, 6, 7, 8], ... } ... ) - >>> lf.shift(1).collect() - shape: (3, 2) + >>> lf.shift().collect() + shape: (4, 2) ┌──────┬──────┐ │ a ┆ b │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞══════╪══════╡ │ null ┆ null │ - │ 1 ┆ 2 │ - │ 3 ┆ 4 │ + │ 1 ┆ 5 │ + │ 2 ┆ 6 │ + │ 3 ┆ 7 │ └──────┴──────┘ - >>> lf.shift(-1).collect() - shape: (3, 2) + + Pass a negative value to shift in the opposite direction instead. + + >>> lf.shift(-2).collect() + shape: (4, 2) ┌──────┬──────┐ │ a ┆ b │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞══════╪══════╡ - │ 3 ┆ 4 │ - │ 5 ┆ 6 │ + │ 3 ┆ 7 │ + │ 4 ┆ 8 │ + │ null ┆ null │ │ null ┆ null │ └──────┴──────┘ - """ - return self._from_pyldf(self._ldf.shift(n)) - - @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift_and_fill( - self, - fill_value: Expr | int | str | float, - *, - n: int = 1, - ) -> Self: - """ - Shift values by the given number of places and fill the resulting null values. + Specify ``fill_value`` to fill the resulting null values. - Parameters - ---------- - fill_value - fill None values with the result of this expression. - n - Number of places to shift (may be negative). - - Examples - -------- - >>> lf = pl.LazyFrame( - ... { - ... "a": [1, 3, 5], - ... "b": [2, 4, 6], - ... } - ... ) - >>> lf.shift_and_fill(fill_value=0, n=1).collect() - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 0 ┆ 0 │ - │ 1 ┆ 2 │ - │ 3 ┆ 4 │ - └─────┴─────┘ - >>> lf.shift_and_fill(fill_value=0, n=-1).collect() - shape: (3, 2) + >>> lf.shift(-2, fill_value=100).collect() + shape: (4, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═════╡ - │ 3 ┆ 4 │ - │ 5 ┆ 6 │ - │ 0 ┆ 0 │ + │ 3 ┆ 7 │ + │ 4 ┆ 8 │ + │ 100 ┆ 100 │ + │ 100 ┆ 100 │ └─────┴─────┘ """ - if not isinstance(fill_value, pl.Expr): - fill_value = F.lit(fill_value) - return self._from_pyldf(self._ldf.shift_and_fill(n, fill_value._pyexpr)) + if fill_value is not None: + fill_value = parse_as_expression(fill_value, str_as_lit=True) + return self._from_pyldf(self._ldf.shift(n, fill_value)) def slice(self, offset: int, length: int | None = None) -> Self: """ @@ -6186,3 +6166,27 @@ def map( validate_output_schema=validate_output_schema, streamable=streamable, ) + + @deprecate_function("Use `shift` instead.", version="0.19.12") + @deprecate_renamed_parameter("periods", "n", version="0.19.11") + def shift_and_fill( + self, + fill_value: Expr | int | str | float, + *, + n: int = 1, + ) -> Self: + """ + Shift values by the given number of places and fill the resulting null values. + + .. deprecated:: 0.19.12 + Use :func:`shift` instead. + + Parameters + ---------- + fill_value + fill None values with the result of this expression. + n + Number of places to shift (may be negative). + + """ + return self.shift(n, fill_value=fill_value) diff --git a/py-polars/polars/series/list.py b/py-polars/polars/series/list.py index a0546fa25e7d..bfe167681356 100644 --- a/py-polars/polars/series/list.py +++ b/py-polars/polars/series/list.py @@ -391,22 +391,40 @@ def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Series: @deprecate_renamed_parameter("periods", "n", version="0.19.11") def shift(self, n: int | IntoExprColumn = 1) -> Series: """ - Shift values by the given number of places. + Shift list values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- - >>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]]) + By default, list values are shifted forward by one index. + + >>> s = pl.Series([[1, 2, 3], [4, 5]]) >>> s.list.shift() shape: (2,) - Series: 'a' [list[i64]] + Series: '' [list[i64]] + [ + [null, 1, 2] + [null, 4] + ] + + Pass a negative value to shift in the opposite direction instead. + + >>> s.list.shift(-2) + shape: (2,) + Series: '' [list[i64]] [ - [null, 1, … 3] - [null, 10, 2] + [3, null, null] + [null, null] ] """ diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index a2c54e861278..32db495ab0d1 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -5084,53 +5084,62 @@ def map_elements( ) @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift(self, n: int = 1) -> Series: + def shift(self, n: int = 1, *, fill_value: IntoExpr | None = None) -> Series: """ - Shift values by the given number of places. + Shift values by the given number of indices. Parameters ---------- n - Number of places to shift (may be negative). + Number of indices to shift forward. If a negative value is passed, values + are shifted in the opposite direction instead. + fill_value + Fill the resulting null values with this value. Accepts expression input. + Non-expression inputs are parsed as literals. + + Notes + ----- + This method is similar to the ``LAG`` operation in SQL when the value for ``n`` + is positive. With a negative value for ``n``, it is similar to ``LEAD``. Examples -------- - >>> s = pl.Series("a", [1, 2, 3]) - >>> s.shift(1) - shape: (3,) - Series: 'a' [i64] + By default, values are shifted forward by one index. + + >>> s = pl.Series([1, 2, 3, 4]) + >>> s.shift() + shape: (4,) + Series: '' [i64] [ null 1 2 + 3 ] - >>> s.shift(-1) - shape: (3,) - Series: 'a' [i64] + + Pass a negative value to shift in the opposite direction instead. + + >>> s.shift(-2) + shape: (4,) + Series: '' [i64] [ - 2 3 + 4 + null null ] - """ - - @deprecate_renamed_parameter("periods", "n", version="0.19.11") - def shift_and_fill( - self, - fill_value: int | Expr, - *, - n: int = 1, - ) -> Series: - """ - Shift values by the given number of places and fill the resulting null values. + Specify ``fill_value`` to fill the resulting null values. - Parameters - ---------- - fill_value - Fill None values with the result of this expression. - n - Number of places to shift (may be negative). + >>> s.shift(-2, fill_value=100) + shape: (4,) + Series: '' [i64] + [ + 3 + 4 + 100 + 100 + ] """ @@ -6869,6 +6878,29 @@ def clip_max( """ + @deprecate_function("Use `shift` instead.", version="0.19.12") + @deprecate_renamed_parameter("periods", "n", version="0.19.11") + def shift_and_fill( + self, + fill_value: int | Expr, + *, + n: int = 1, + ) -> Series: + """ + Shift values by the given number of places and fill the resulting null values. + + .. deprecated:: 0.19.12 + Use :func:`shift` instead. + + Parameters + ---------- + fill_value + Fill None values with the result of this expression. + n + Number of places to shift (may be negative). + + """ + # Keep the `list` and `str` properties below at the end of the definition of Series, # as to not confuse mypy with the type annotation `str` and `list` diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index de2ce8c1ffcc..6fcc6af3b316 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -339,14 +339,13 @@ impl PyExpr { self.inner.clone().forward_fill(limit).into() } - fn shift(&self, n: i64) -> Self { - self.inner.clone().shift(n).into() - } - fn shift_and_fill(&self, n: i64, fill_value: Self) -> Self { - self.inner - .clone() - .shift_and_fill(n, fill_value.inner) - .into() + fn shift(&self, n: i64, fill_value: Option) -> Self { + let expr = self.inner.clone(); + let out = match fill_value { + Some(v) => expr.shift_and_fill(n, v.inner), + None => expr.shift(n), + }; + out.into() } fn fill_null(&self, expr: Self) -> Self { diff --git a/py-polars/src/lazyframe.rs b/py-polars/src/lazyframe.rs index 4d13f3d49ca7..dae8419ed466 100644 --- a/py-polars/src/lazyframe.rs +++ b/py-polars/src/lazyframe.rs @@ -834,14 +834,13 @@ impl PyLazyFrame { ldf.reverse().into() } - fn shift(&self, n: i64) -> Self { - let ldf = self.ldf.clone(); - ldf.shift(n).into() - } - - fn shift_and_fill(&self, n: i64, fill_value: PyExpr) -> Self { - let ldf = self.ldf.clone(); - ldf.shift_and_fill(n, fill_value.inner).into() + fn shift(&self, n: i64, fill_value: Option) -> Self { + let lf = self.ldf.clone(); + let out = match fill_value { + Some(v) => lf.shift_and_fill(n, v.inner), + None => lf.shift(n), + }; + out.into() } fn fill_nan(&self, fill_value: PyExpr) -> Self { diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index ec16ae3edf0a..853361504da7 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -2039,25 +2039,6 @@ def test_backward_fill() -> None: assert_series_equal(col_a_backward_fill, pl.Series("a", [1, 3, 3]).cast(pl.Float64)) -def test_shift_and_fill() -> None: - df = pl.DataFrame( - { - "foo": [1, 2, 3], - "bar": [6, 7, 8], - "ham": ["a", "b", "c"], - } - ) - result = df.shift_and_fill(fill_value=0, n=1) - expected = pl.DataFrame( - { - "foo": [0, 1, 2], - "bar": [0, 6, 7], - "ham": ["0", "a", "b"], - } - ) - assert_frame_equal(result, expected) - - def test_is_duplicated() -> None: df = pl.DataFrame({"foo": [1, 2, 2], "bar": [6, 7, 7]}) assert_series_equal(df.is_duplicated(), pl.Series("", [False, True, True])) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index cd906a46c6a5..4ab83bc6b160 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -141,16 +141,6 @@ def test_cast_null_to_categorical() -> None: ).dtypes == [pl.Categorical] -def test_shift_and_fill() -> None: - df = pl.DataFrame({"a": ["a", "b"]}).with_columns( - [pl.col("a").cast(pl.Categorical)] - ) - - s = df.with_columns(pl.col("a").shift_and_fill("c", n=1))["a"] - assert s.dtype == pl.Categorical - assert s.to_list() == ["c", "a"] - - @StringCache() def test_merge_lit_under_global_cache_4491() -> None: df = pl.DataFrame( diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 4b4a50e9b3cd..fa76cf7f1744 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -1421,7 +1421,7 @@ def test_supertype_timezones_4174() -> None: # test if this runs without error date_to_fill = df["dt_London"][0] - df.with_columns(df["dt_London"].shift_and_fill(date_to_fill, n=1)) + df.with_columns(df["dt_London"].shift(fill_value=date_to_fill)) @pytest.mark.skip(reason="from_dicts cannot yet infer timezones") @@ -1434,22 +1434,6 @@ def test_from_dict_tu_consistency() -> None: assert from_dict.dtypes == from_dicts.dtypes -def test_shift_and_fill_group_logicals() -> None: - df = pl.from_records( - [ - (date(2001, 1, 2), "A"), - (date(2001, 1, 3), "A"), - (date(2001, 1, 4), "A"), - (date(2001, 1, 3), "B"), - (date(2001, 1, 4), "B"), - ], - schema=["d", "s"], - ) - assert df.select( - pl.col("d").shift_and_fill(pl.col("d").max(), n=-1).over("s") - ).dtypes == [pl.Date] - - def test_date_arr_concat() -> None: expected = {"d": [[date(2000, 1, 1), date(2000, 1, 1)]]} diff --git a/py-polars/tests/unit/operations/map/test_map_batches.py b/py-polars/tests/unit/operations/map/test_map_batches.py index 4f824dfc9951..615948a4b6a5 100644 --- a/py-polars/tests/unit/operations/map/test_map_batches.py +++ b/py-polars/tests/unit/operations/map/test_map_batches.py @@ -22,7 +22,7 @@ def test_map_no_dtype_set_8531() -> None: df = pl.DataFrame({"a": [1]}) result = df.with_columns( - pl.col("a").map_batches(lambda x: x * 2).shift_and_fill(fill_value=0, n=0) + pl.col("a").map_batches(lambda x: x * 2).shift(n=0, fill_value=0) ) expected = pl.DataFrame({"a": [2]}) diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 1cfd275a0c6a..c7353a7414d4 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -384,13 +384,7 @@ def test_rolling_slice_pushdown() -> None: by="b", period="2i", ) - .agg( - [ - (pl.col("c") - pl.col("c").shift_and_fill(fill_value=0, n=1)) - .sum() - .alias("c") - ] - ) + .agg([(pl.col("c") - pl.col("c").shift(fill_value=0)).sum().alias("c")]) ) assert df.head(2).collect().to_dict(False) == { "b": ["a", "a"], diff --git a/py-polars/tests/unit/operations/test_group_by_dynamic.py b/py-polars/tests/unit/operations/test_group_by_dynamic.py index a1c2cc3b06f2..fa5dd77aac8b 100644 --- a/py-polars/tests/unit/operations/test_group_by_dynamic.py +++ b/py-polars/tests/unit/operations/test_group_by_dynamic.py @@ -319,11 +319,7 @@ def test_group_by_dynamic_slice_pushdown() -> None: df = ( df.sort("a") .group_by_dynamic("a", by="b", every="2i") - .agg( - (pl.col("c") - pl.col("c").shift_and_fill(fill_value=0, n=1)) - .sum() - .alias("c") - ) + .agg((pl.col("c") - pl.col("c").shift(fill_value=0)).sum().alias("c")) ) assert df.head(2).collect().to_dict(False) == { "b": ["a", "a"], diff --git a/py-polars/tests/unit/operations/test_shift.py b/py-polars/tests/unit/operations/test_shift.py new file mode 100644 index 000000000000..7ad8b2d9200c --- /dev/null +++ b/py-polars/tests/unit/operations/test_shift.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +from datetime import date + +import pytest + +import polars as pl +from polars.testing import assert_frame_equal, assert_series_equal + + +def test_shift() -> None: + a = pl.Series("a", [1, 2, 3]) + assert_series_equal(a.shift(1), pl.Series("a", [None, 1, 2])) + assert_series_equal(a.shift(-1), pl.Series("a", [2, 3, None])) + assert_series_equal(a.shift(-2), pl.Series("a", [3, None, None])) + assert_series_equal(a.shift(-1, fill_value=10), pl.Series("a", [2, 3, 10])) + + +def test_shift_frame(fruits_cars: pl.DataFrame) -> None: + df = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) + out = df.select(pl.col("a").shift(1)) + assert_series_equal(out["a"], pl.Series("a", [None, 1, 2, 3, 4])) + + res = fruits_cars.lazy().shift(2).collect() + + expected = pl.DataFrame( + { + "A": [None, None, 1, 2, 3], + "fruits": [None, None, "banana", "banana", "apple"], + "B": [None, None, 5, 4, 3], + "cars": [None, None, "beetle", "audi", "beetle"], + } + ) + assert_frame_equal(res, expected) + + # negative value + res = fruits_cars.lazy().shift(-2).collect() + for rows in [3, 4]: + for cols in range(4): + assert res[rows, cols] is None + + +def test_shift_and_fill() -> None: + ldf = pl.LazyFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) + + # use exprs + out = ldf.with_columns( + pl.col("a").shift(n=-2, fill_value=pl.col("b").mean()) + ).collect() + assert out["a"].null_count() == 0 + + # use df method + out = ldf.shift(n=2, fill_value=pl.col("b").std()).collect() + assert out["a"].null_count() == 0 + + +def test_shift_categorical() -> None: + df = pl.Series("a", ["a", "b"], dtype=pl.Categorical).to_frame() + + s = df.with_columns(pl.col("a").shift(fill_value="c"))["a"] + assert s.dtype == pl.Categorical + assert s.to_list() == ["c", "a"] + + +def test_shift_frame_with_fill() -> None: + df = pl.DataFrame( + { + "foo": [1, 2, 3], + "bar": [6, 7, 8], + "ham": ["a", "b", "c"], + } + ) + result = df.shift(fill_value=0) + expected = pl.DataFrame( + { + "foo": [0, 1, 2], + "bar": [0, 6, 7], + "ham": ["0", "a", "b"], + } + ) + assert_frame_equal(result, expected) + + +def test_shift_and_fill_group_logicals() -> None: + df = pl.DataFrame( + [ + (date(2001, 1, 2), "A"), + (date(2001, 1, 3), "A"), + (date(2001, 1, 4), "A"), + (date(2001, 1, 3), "B"), + (date(2001, 1, 4), "B"), + ], + schema=["d", "s"], + ) + result = df.select(pl.col("d").shift(fill_value=pl.col("d").max(), n=-1).over("s")) + + assert result.dtypes == [pl.Date] + + +def test_shift_and_fill_deprecated() -> None: + a = pl.Series("a", [1, 2, 3]) + + with pytest.deprecated_call(): + result = a.shift_and_fill(100, n=-1) + + expected = pl.Series("a", [2, 3, 100]) + assert_series_equal(result, expected) + + +def test_shift_and_fill_frame_deprecated() -> None: + lf = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + with pytest.deprecated_call(): + result = lf.shift_and_fill(100, n=1) + + expected = pl.LazyFrame({"a": [100, 1, 2], "b": [100, 4, 5]}) + assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/operations/test_window.py b/py-polars/tests/unit/operations/test_window.py index f06d87a3b528..d01b433731b7 100644 --- a/py-polars/tests/unit/operations/test_window.py +++ b/py-polars/tests/unit/operations/test_window.py @@ -239,16 +239,13 @@ def test_window_functions_list_types() -> None: "list_shifted" ].to_list() == [None, [1], [1], [2]] - # filling with None is allowed, but does not make any sense - # as it is the same as shift. - # that's why we don't add it to the allowed types. - assert ( - df.select(pl.col("col_list").shift_and_fill(None, n=1).alias("list_shifted")) - )["list_shifted"].to_list() == [None, [1], [1], [2]] + assert (df.select(pl.col("col_list").shift().alias("list_shifted")))[ + "list_shifted" + ].to_list() == [None, [1], [1], [2]] - assert ( - df.select(pl.col("col_list").shift_and_fill([], n=1).alias("list_shifted")) - )["list_shifted"].to_list() == [[], [1], [1], [2]] + assert (df.select(pl.col("col_list").shift(fill_value=[]).alias("list_shifted")))[ + "list_shifted" + ].to_list() == [[], [1], [1], [2]] def test_sorted_window_expression() -> None: diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 0eb1bca930a9..5efb1600cab4 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -1080,14 +1080,6 @@ def test_map_elements() -> None: a.map_elements(lambda x: x) -def test_shift() -> None: - a = pl.Series("a", [1, 2, 3]) - assert_series_equal(a.shift(1), pl.Series("a", [None, 1, 2])) - assert_series_equal(a.shift(-1), pl.Series("a", [2, 3, None])) - assert_series_equal(a.shift(-2), pl.Series("a", [3, None, None])) - assert_series_equal(a.shift_and_fill(10, n=-1), pl.Series("a", [2, 3, 10])) - - def test_object() -> None: vals = [[12], "foo", 9] a = pl.Series("a", vals) diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 39ed3a3faed9..49cd7fb33d4f 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -260,44 +260,6 @@ def test_group_by() -> None: assert_frame_equal(out.sort(by="groups"), expected) -def test_shift(fruits_cars: pl.DataFrame) -> None: - df = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) - out = df.select(pl.col("a").shift(1)) - assert_series_equal(out["a"], pl.Series("a", [None, 1, 2, 3, 4])) - - res = fruits_cars.lazy().shift(2).collect() - - expected = pl.DataFrame( - { - "A": [None, None, 1, 2, 3], - "fruits": [None, None, "banana", "banana", "apple"], - "B": [None, None, 5, 4, 3], - "cars": [None, None, "beetle", "audi", "beetle"], - } - ) - assert_frame_equal(res, expected) - - # negative value - res = fruits_cars.lazy().shift(-2).collect() - for rows in [3, 4]: - for cols in range(4): - assert res[rows, cols] is None - - -def test_shift_and_fill() -> None: - ldf = pl.LazyFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) - - # use exprs - out = ldf.with_columns( - pl.col("a").shift_and_fill(pl.col("b").mean(), n=-2) - ).collect() - assert out["a"].null_count() == 0 - - # use df method - out = ldf.shift_and_fill(pl.col("b").std(), n=2).collect() - assert out["a"].null_count() == 0 - - def test_arg_unique() -> None: ldf = pl.LazyFrame({"a": [4, 1, 4]}) col_a_unique = ldf.select(pl.col("a").arg_unique()).collect()["a"]