diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 6b8feb695f6bd..f0d41df28433b 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -6807,46 +6807,48 @@ def unstack( ... "x": list(ascii_uppercase[0:9]), ... "y": pl.int_range(0, 9, eager=True), ... } - ... ).with_columns(pl.int_ranges(pl.col("y"), pl.col("y") + 3)) + ... ).with_columns( + ... z=pl.int_ranges(pl.col("y"), pl.col("y") + 2, dtype=pl.UInt8), + ... ) >>> df shape: (9, 3) - ┌─────┬─────┬────────────┐ - │ x ┆ y ┆ int_range │ - │ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ list[i64] │ - ╞═════╪═════╪════════════╡ - │ A ┆ 0 ┆ [0, 1, 2] │ - │ B ┆ 1 ┆ [1, 2, 3] │ - │ C ┆ 2 ┆ [2, 3, 4] │ - │ D ┆ 3 ┆ [3, 4, 5] │ - │ E ┆ 4 ┆ [4, 5, 6] │ - │ F ┆ 5 ┆ [5, 6, 7] │ - │ G ┆ 6 ┆ [6, 7, 8] │ - │ H ┆ 7 ┆ [7, 8, 9] │ - │ I ┆ 8 ┆ [8, 9, 10] │ - └─────┴─────┴────────────┘ + ┌─────┬─────┬──────────┐ + │ x ┆ y ┆ z │ + │ --- ┆ --- ┆ --- │ + │ str ┆ i64 ┆ list[u8] │ + ╞═════╪═════╪══════════╡ + │ A ┆ 0 ┆ [0, 1] │ + │ B ┆ 1 ┆ [1, 2] │ + │ C ┆ 2 ┆ [2, 3] │ + │ D ┆ 3 ┆ [3, 4] │ + │ E ┆ 4 ┆ [4, 5] │ + │ F ┆ 5 ┆ [5, 6] │ + │ G ┆ 6 ┆ [6, 7] │ + │ H ┆ 7 ┆ [7, 8] │ + │ I ┆ 8 ┆ [8, 9] │ + └─────┴─────┴──────────┘ >>> df.unstack(step=3, how="vertical") shape: (3, 9) - ┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐ - │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2 │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64] ┆ list[i64] ┆ list[i64] │ - ╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡ - │ A ┆ D ┆ G ┆ 0 ┆ 3 ┆ 6 ┆ [0, 1, 2] ┆ [3, 4, 5] ┆ [6, 7, 8] │ - │ B ┆ E ┆ H ┆ 1 ┆ 4 ┆ 7 ┆ [1, 2, 3] ┆ [4, 5, 6] ┆ [7, 8, 9] │ - │ C ┆ F ┆ I ┆ 2 ┆ 5 ┆ 8 ┆ [2, 3, 4] ┆ [5, 6, 7] ┆ [8, 9, 10] │ - └─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘ + ┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬──────────┬──────────┐ + │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0 ┆ z_1 ┆ z_2 │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8] │ + ╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪══════════╪══════════╡ + │ A ┆ D ┆ G ┆ 0 ┆ 3 ┆ 6 ┆ [0, 1] ┆ [3, 4] ┆ [6, 7] │ + │ B ┆ E ┆ H ┆ 1 ┆ 4 ┆ 7 ┆ [1, 2] ┆ [4, 5] ┆ [7, 8] │ + │ C ┆ F ┆ I ┆ 2 ┆ 5 ┆ 8 ┆ [2, 3] ┆ [5, 6] ┆ [8, 9] │ + └─────┴─────┴─────┴─────┴─────┴─────┴──────────┴──────────┴──────────┘ >>> df.unstack(step=3, how="horizontal") shape: (3, 9) - ┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐ - │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2 │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64] ┆ list[i64] ┆ list[i64] │ - ╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡ - │ A ┆ B ┆ C ┆ 0 ┆ 1 ┆ 2 ┆ [0, 1, 2] ┆ [1, 2, 3] ┆ [2, 3, 4] │ - │ D ┆ E ┆ F ┆ 3 ┆ 4 ┆ 5 ┆ [3, 4, 5] ┆ [4, 5, 6] ┆ [5, 6, 7] │ - │ G ┆ H ┆ I ┆ 6 ┆ 7 ┆ 8 ┆ [6, 7, 8] ┆ [7, 8, 9] ┆ [8, 9, 10] │ - └─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘ + ┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬──────────┬──────────┐ + │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0 ┆ z_1 ┆ z_2 │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8] │ + ╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪══════════╪══════════╡ + │ A ┆ B ┆ C ┆ 0 ┆ 1 ┆ 2 ┆ [0, 1] ┆ [1, 2] ┆ [2, 3] │ + │ D ┆ E ┆ F ┆ 3 ┆ 4 ┆ 5 ┆ [3, 4] ┆ [4, 5] ┆ [5, 6] │ + │ G ┆ H ┆ I ┆ 6 ┆ 7 ┆ 8 ┆ [6, 7] ┆ [7, 8] ┆ [8, 9] │ + └─────┴─────┴─────┴─────┴─────┴─────┴──────────┴──────────┴──────────┘ >>> import polars.selectors as cs >>> df.unstack(step=4, columns=cs.numeric(), fill_values=0) shape: (4, 3) @@ -6912,7 +6914,7 @@ def unstack( @overload def partition_by( self, - by: str | Iterable[str], + by: str | SelectorType | Iterable[str] | Iterable[SelectorType], *more_by: str, maintain_order: bool = ..., include_key: bool = ..., @@ -6923,7 +6925,7 @@ def partition_by( @overload def partition_by( self, - by: str | Iterable[str], + by: str | SelectorType | Iterable[str] | Iterable[SelectorType], *more_by: str, maintain_order: bool = ..., include_key: bool = ..., @@ -6933,8 +6935,8 @@ def partition_by( def partition_by( self, - by: str | Iterable[str], - *more_by: str, + by: str | SelectorType | Iterable[str] | Iterable[SelectorType], + *more_by: str | SelectorType, maintain_order: bool = True, include_key: bool = True, as_dict: bool = False, @@ -6945,7 +6947,7 @@ def partition_by( Parameters ---------- by - Name of the column(s) to group by. + Column name(s) or selector(s) to group by. *more_by Additional names of columns to group by, specified as positional arguments. maintain_order @@ -7036,7 +7038,8 @@ def partition_by( Return the partitions as a dictionary by specifying ``as_dict=True``. - >>> df.partition_by("a", as_dict=True) # doctest: +IGNORE_RESULT + >>> import polars.selectors as cs + >>> df.partition_by(cs.string(), as_dict=True) # doctest: +IGNORE_RESULT {'a': shape: (2, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -7065,13 +7068,7 @@ def partition_by( └─────┴─────┴─────┘} """ - if isinstance(by, str): - by = [by] - elif not isinstance(by, list): - by = list(by) - if more_by: - by.extend(more_by) - + by = _expand_selectors(self, by, more_by) partitions = [ self._from_pydf(_df) for _df in self._df.partition_by(by, maintain_order, include_key) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index ca99fee6adb8e..b39720d46cbc3 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -2762,25 +2762,17 @@ def test_partition_by() -> None: {"foo": ["C"], "N": [2], "bar": ["l"]}, ] assert [ - a.to_dict(False) for a in df.partition_by(["foo", "bar"], maintain_order=True) + a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True) ] == expected assert [ - a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True) + a.to_dict(False) for a in df.partition_by(cs.string(), maintain_order=True) ] == expected expected = [ - { - "N": [1], - }, - { - "N": [2], - }, - { - "N": [2, 4], - }, - { - "N": [2], - }, + {"N": [1]}, + {"N": [2]}, + {"N": [2, 4]}, + {"N": [2]}, ] assert [ a.to_dict(False) @@ -2798,7 +2790,7 @@ def test_partition_by() -> None: ] df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]}) - assert df.partition_by(["a", "b"], as_dict=True)["one", 1].to_dict(False) == { + assert df.partition_by(cs.all(), as_dict=True)["one", 1].to_dict(False) == { "a": ["one"], "b": [1], }