add selector support to "partition_by"

pola-rs · Aug 4, 2023 · 1303247 · 1303247
1 parent 1831e74
commit 1303247
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 62 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -6807,46 +6807,48 @@ def unstack(
         ...         "x": list(ascii_uppercase[0:9]),
         ...         "y": pl.int_range(0, 9, eager=True),
         ...     }
-        ... ).with_columns(pl.int_ranges(pl.col("y"), pl.col("y") + 3))
+        ... ).with_columns(
+        ...     z=pl.int_ranges(pl.col("y"), pl.col("y") + 2, dtype=pl.UInt8),
+        ... )
         >>> df
         shape: (9, 3)
-        ┌─────┬─────┬────────────┐
-        │ x   ┆ y   ┆ int_range  │
-        │ --- ┆ --- ┆ ---        │
-        │ str ┆ i64 ┆ list[i64]  │
-        ╞═════╪═════╪════════════╡
-        │ A   ┆ 0   ┆ [0, 1, 2]  │
-        │ B   ┆ 1   ┆ [1, 2, 3]  │
-        │ C   ┆ 2   ┆ [2, 3, 4]  │
-        │ D   ┆ 3   ┆ [3, 4, 5]  │
-        │ E   ┆ 4   ┆ [4, 5, 6]  │
-        │ F   ┆ 5   ┆ [5, 6, 7]  │
-        │ G   ┆ 6   ┆ [6, 7, 8]  │
-        │ H   ┆ 7   ┆ [7, 8, 9]  │
-        │ I   ┆ 8   ┆ [8, 9, 10] │
-        └─────┴─────┴────────────┘
+        ┌─────┬─────┬──────────┐
+        │ x   ┆ y   ┆ z        │
+        │ --- ┆ --- ┆ ---      │
+        │ str ┆ i64 ┆ list[u8] │
+        ╞═════╪═════╪══════════╡
+        │ A   ┆ 0   ┆ [0, 1]   │
+        │ B   ┆ 1   ┆ [1, 2]   │
+        │ C   ┆ 2   ┆ [2, 3]   │
+        │ D   ┆ 3   ┆ [3, 4]   │
+        │ E   ┆ 4   ┆ [4, 5]   │
+        │ F   ┆ 5   ┆ [5, 6]   │
+        │ G   ┆ 6   ┆ [6, 7]   │
+        │ H   ┆ 7   ┆ [7, 8]   │
+        │ I   ┆ 8   ┆ [8, 9]   │
+        └─────┴─────┴──────────┘
         >>> df.unstack(step=3, how="vertical")
         shape: (3, 9)
-        ┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐
-        │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2 │
-        │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---         ┆ ---         ┆ ---         │
-        │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64]   ┆ list[i64]   ┆ list[i64]   │
-        ╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡
-        │ A   ┆ D   ┆ G   ┆ 0   ┆ 3   ┆ 6   ┆ [0, 1, 2]   ┆ [3, 4, 5]   ┆ [6, 7, 8]   │
-        │ B   ┆ E   ┆ H   ┆ 1   ┆ 4   ┆ 7   ┆ [1, 2, 3]   ┆ [4, 5, 6]   ┆ [7, 8, 9]   │
-        │ C   ┆ F   ┆ I   ┆ 2   ┆ 5   ┆ 8   ┆ [2, 3, 4]   ┆ [5, 6, 7]   ┆ [8, 9, 10]  │
-        └─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘
+        ┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬──────────┬──────────┐
+        │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0      ┆ z_1      ┆ z_2      │
+        │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---      ┆ ---      ┆ ---      │
+        │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8] │
+        ╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪══════════╪══════════╡
+        │ A   ┆ D   ┆ G   ┆ 0   ┆ 3   ┆ 6   ┆ [0, 1]   ┆ [3, 4]   ┆ [6, 7]   │
+        │ B   ┆ E   ┆ H   ┆ 1   ┆ 4   ┆ 7   ┆ [1, 2]   ┆ [4, 5]   ┆ [7, 8]   │
+        │ C   ┆ F   ┆ I   ┆ 2   ┆ 5   ┆ 8   ┆ [2, 3]   ┆ [5, 6]   ┆ [8, 9]   │
+        └─────┴─────┴─────┴─────┴─────┴─────┴──────────┴──────────┴──────────┘
         >>> df.unstack(step=3, how="horizontal")
         shape: (3, 9)
-        ┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐
-        │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2 │
-        │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---         ┆ ---         ┆ ---         │
-        │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64]   ┆ list[i64]   ┆ list[i64]   │
-        ╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡
-        │ A   ┆ B   ┆ C   ┆ 0   ┆ 1   ┆ 2   ┆ [0, 1, 2]   ┆ [1, 2, 3]   ┆ [2, 3, 4]   │
-        │ D   ┆ E   ┆ F   ┆ 3   ┆ 4   ┆ 5   ┆ [3, 4, 5]   ┆ [4, 5, 6]   ┆ [5, 6, 7]   │
-        │ G   ┆ H   ┆ I   ┆ 6   ┆ 7   ┆ 8   ┆ [6, 7, 8]   ┆ [7, 8, 9]   ┆ [8, 9, 10]  │
-        └─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘
+        ┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬──────────┬──────────┐
+        │ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0      ┆ z_1      ┆ z_2      │
+        │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---      ┆ ---      ┆ ---      │
+        │ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8] │
+        ╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪══════════╪══════════╡
+        │ A   ┆ B   ┆ C   ┆ 0   ┆ 1   ┆ 2   ┆ [0, 1]   ┆ [1, 2]   ┆ [2, 3]   │
+        │ D   ┆ E   ┆ F   ┆ 3   ┆ 4   ┆ 5   ┆ [3, 4]   ┆ [4, 5]   ┆ [5, 6]   │
+        │ G   ┆ H   ┆ I   ┆ 6   ┆ 7   ┆ 8   ┆ [6, 7]   ┆ [7, 8]   ┆ [8, 9]   │
+        └─────┴─────┴─────┴─────┴─────┴─────┴──────────┴──────────┴──────────┘
         >>> import polars.selectors as cs
         >>> df.unstack(step=4, columns=cs.numeric(), fill_values=0)
         shape: (4, 3)
@@ -6912,7 +6914,7 @@ def unstack(
     @overload
     def partition_by(
         self,
-        by: str | Iterable[str],
+        by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
         *more_by: str,
         maintain_order: bool = ...,
         include_key: bool = ...,
@@ -6923,7 +6925,7 @@ def partition_by(
     @overload
     def partition_by(
         self,
-        by: str | Iterable[str],
+        by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
         *more_by: str,
         maintain_order: bool = ...,
         include_key: bool = ...,
@@ -6933,8 +6935,8 @@ def partition_by(
 
     def partition_by(
         self,
-        by: str | Iterable[str],
-        *more_by: str,
+        by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
+        *more_by: str | SelectorType,
         maintain_order: bool = True,
         include_key: bool = True,
         as_dict: bool = False,
@@ -6945,7 +6947,7 @@ def partition_by(
         Parameters
         ----------
         by
-            Name of the column(s) to group by.
+            Column name(s) or selector(s) to group by.
         *more_by
             Additional names of columns to group by, specified as positional arguments.
         maintain_order
@@ -7036,7 +7038,8 @@ def partition_by(
 
         Return the partitions as a dictionary by specifying ``as_dict=True``.
 
-        >>> df.partition_by("a", as_dict=True)  # doctest: +IGNORE_RESULT
+        >>> import polars.selectors as cs
+        >>> df.partition_by(cs.string(), as_dict=True)  # doctest: +IGNORE_RESULT
         {'a': shape: (2, 3)
         ┌─────┬─────┬─────┐
         │ a   ┆ b   ┆ c   │
@@ -7065,13 +7068,7 @@ def partition_by(
         └─────┴─────┴─────┘}
 
         """
-        if isinstance(by, str):
-            by = [by]
-        elif not isinstance(by, list):
-            by = list(by)
-        if more_by:
-            by.extend(more_by)
-
+        by = _expand_selectors(self, by, more_by)
         partitions = [
             self._from_pydf(_df)
             for _df in self._df.partition_by(by, maintain_order, include_key)

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -2762,25 +2762,17 @@ def test_partition_by() -> None:
         {"foo": ["C"], "N": [2], "bar": ["l"]},
     ]
     assert [
-        a.to_dict(False) for a in df.partition_by(["foo", "bar"], maintain_order=True)
+        a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True)
     ] == expected
     assert [
-        a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True)
+        a.to_dict(False) for a in df.partition_by(cs.string(), maintain_order=True)
     ] == expected
 
     expected = [
-        {
-            "N": [1],
-        },
-        {
-            "N": [2],
-        },
-        {
-            "N": [2, 4],
-        },
-        {
-            "N": [2],
-        },
+        {"N": [1]},
+        {"N": [2]},
+        {"N": [2, 4]},
+        {"N": [2]},
     ]
     assert [
         a.to_dict(False)
@@ -2798,7 +2790,7 @@ def test_partition_by() -> None:
     ]
 
     df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]})
-    assert df.partition_by(["a", "b"], as_dict=True)["one", 1].to_dict(False) == {
+    assert df.partition_by(cs.all(), as_dict=True)["one", 1].to_dict(False) == {
         "a": ["one"],
         "b": [1],
     }