Skip to content

Commit

Permalink
add selector support to "partition_by"
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Aug 4, 2023
1 parent 1831e74 commit 1303247
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 62 deletions.
91 changes: 44 additions & 47 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6807,46 +6807,48 @@ def unstack(
... "x": list(ascii_uppercase[0:9]),
... "y": pl.int_range(0, 9, eager=True),
... }
... ).with_columns(pl.int_ranges(pl.col("y"), pl.col("y") + 3))
... ).with_columns(
... z=pl.int_ranges(pl.col("y"), pl.col("y") + 2, dtype=pl.UInt8),
... )
>>> df
shape: (9, 3)
┌─────┬─────┬────────────
│ x ┆ y ┆ int_range
│ --- ┆ --- ┆ ---
│ str ┆ i64 ┆ list[i64]
╞═════╪═════╪════════════
│ A ┆ 0 ┆ [0, 1, 2]
│ B ┆ 1 ┆ [1, 2, 3]
│ C ┆ 2 ┆ [2, 3, 4]
│ D ┆ 3 ┆ [3, 4, 5]
│ E ┆ 4 ┆ [4, 5, 6]
│ F ┆ 5 ┆ [5, 6, 7]
│ G ┆ 6 ┆ [6, 7, 8]
│ H ┆ 7 ┆ [7, 8, 9]
│ I ┆ 8 ┆ [8, 9, 10]
└─────┴─────┴────────────
┌─────┬─────┬──────────┐
│ x ┆ y ┆ z
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ list[u8]
╞═════╪═════╪══════════╡
│ A ┆ 0 ┆ [0, 1]
│ B ┆ 1 ┆ [1, 2]
│ C ┆ 2 ┆ [2, 3]
│ D ┆ 3 ┆ [3, 4]
│ E ┆ 4 ┆ [4, 5]
│ F ┆ 5 ┆ [5, 6]
│ G ┆ 6 ┆ [6, 7]
│ H ┆ 7 ┆ [7, 8]
│ I ┆ 8 ┆ [8, 9]
└─────┴─────┴──────────┘
>>> df.unstack(step=3, how="vertical")
shape: (3, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐
│ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---
│ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64] ┆ list[i64] ┆ list[i64]
╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡
│ A ┆ D ┆ G ┆ 0 ┆ 3 ┆ 6 ┆ [0, 1, 2] ┆ [3, 4, 5] ┆ [6, 7, 8] │
│ B ┆ E ┆ H ┆ 1 ┆ 4 ┆ 7 ┆ [1, 2, 3] ┆ [4, 5, 6] ┆ [7, 8, 9] │
│ C ┆ F ┆ I ┆ 2 ┆ 5 ┆ 8 ┆ [2, 3, 4] ┆ [5, 6, 7] ┆ [8, 9, 10]
└─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘
┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬────────────────────┐
│ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0 ┆ z_1 ┆ z_2
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8]
╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪════════════════════╡
│ A ┆ D ┆ G ┆ 0 ┆ 3 ┆ 6 ┆ [0, 1] ┆ [3, 4] ┆ [6, 7] │
│ B ┆ E ┆ H ┆ 1 ┆ 4 ┆ 7 ┆ [1, 2] ┆ [4, 5] ┆ [7, 8] │
│ C ┆ F ┆ I ┆ 2 ┆ 5 ┆ 8 ┆ [2, 3] ┆ [5, 6] ┆ [8, 9]
└─────┴─────┴─────┴─────┴─────┴─────┴──────────┴────────────────────┘
>>> df.unstack(step=3, how="horizontal")
shape: (3, 9)
┌─────┬─────┬─────┬─────┬─────┬─────┬─────────────┬─────────────┬─────────────┐
│ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ int_range_0 ┆ int_range_1 ┆ int_range_2
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---
│ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[i64] ┆ list[i64] ┆ list[i64]
╞═════╪═════╪═════╪═════╪═════╪═════╪═════════════╪═════════════╪═════════════╡
│ A ┆ B ┆ C ┆ 0 ┆ 1 ┆ 2 ┆ [0, 1, 2] ┆ [1, 2, 3] ┆ [2, 3, 4] │
│ D ┆ E ┆ F ┆ 3 ┆ 4 ┆ 5 ┆ [3, 4, 5] ┆ [4, 5, 6] ┆ [5, 6, 7] │
│ G ┆ H ┆ I ┆ 6 ┆ 7 ┆ 8 ┆ [6, 7, 8] ┆ [7, 8, 9] ┆ [8, 9, 10]
└─────┴─────┴─────┴─────┴─────┴─────┴─────────────┴─────────────┴─────────────┘
┌─────┬─────┬─────┬─────┬─────┬─────┬──────────┬────────────────────┐
│ x_0 ┆ x_1 ┆ x_2 ┆ y_0 ┆ y_1 ┆ y_2 ┆ z_0 ┆ z_1 ┆ z_2
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ list[u8] ┆ list[u8] ┆ list[u8]
╞═════╪═════╪═════╪═════╪═════╪═════╪══════════╪════════════════════╡
│ A ┆ B ┆ C ┆ 0 ┆ 1 ┆ 2 ┆ [0, 1] ┆ [1, 2] ┆ [2, 3] │
│ D ┆ E ┆ F ┆ 3 ┆ 4 ┆ 5 ┆ [3, 4] ┆ [4, 5] ┆ [5, 6] │
│ G ┆ H ┆ I ┆ 6 ┆ 7 ┆ 8 ┆ [6, 7] ┆ [7, 8] ┆ [8, 9]
└─────┴─────┴─────┴─────┴─────┴─────┴──────────┴────────────────────┘
>>> import polars.selectors as cs
>>> df.unstack(step=4, columns=cs.numeric(), fill_values=0)
shape: (4, 3)
Expand Down Expand Up @@ -6912,7 +6914,7 @@ def unstack(
@overload
def partition_by(
self,
by: str | Iterable[str],
by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
*more_by: str,
maintain_order: bool = ...,
include_key: bool = ...,
Expand All @@ -6923,7 +6925,7 @@ def partition_by(
@overload
def partition_by(
self,
by: str | Iterable[str],
by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
*more_by: str,
maintain_order: bool = ...,
include_key: bool = ...,
Expand All @@ -6933,8 +6935,8 @@ def partition_by(

def partition_by(
self,
by: str | Iterable[str],
*more_by: str,
by: str | SelectorType | Iterable[str] | Iterable[SelectorType],
*more_by: str | SelectorType,
maintain_order: bool = True,
include_key: bool = True,
as_dict: bool = False,
Expand All @@ -6945,7 +6947,7 @@ def partition_by(
Parameters
----------
by
Name of the column(s) to group by.
Column name(s) or selector(s) to group by.
*more_by
Additional names of columns to group by, specified as positional arguments.
maintain_order
Expand Down Expand Up @@ -7036,7 +7038,8 @@ def partition_by(
Return the partitions as a dictionary by specifying ``as_dict=True``.
>>> df.partition_by("a", as_dict=True) # doctest: +IGNORE_RESULT
>>> import polars.selectors as cs
>>> df.partition_by(cs.string(), as_dict=True) # doctest: +IGNORE_RESULT
{'a': shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
Expand Down Expand Up @@ -7065,13 +7068,7 @@ def partition_by(
└─────┴─────┴─────┘}
"""
if isinstance(by, str):
by = [by]
elif not isinstance(by, list):
by = list(by)
if more_by:
by.extend(more_by)

by = _expand_selectors(self, by, more_by)
partitions = [
self._from_pydf(_df)
for _df in self._df.partition_by(by, maintain_order, include_key)
Expand Down
22 changes: 7 additions & 15 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2762,25 +2762,17 @@ def test_partition_by() -> None:
{"foo": ["C"], "N": [2], "bar": ["l"]},
]
assert [
a.to_dict(False) for a in df.partition_by(["foo", "bar"], maintain_order=True)
a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True)
] == expected
assert [
a.to_dict(False) for a in df.partition_by("foo", "bar", maintain_order=True)
a.to_dict(False) for a in df.partition_by(cs.string(), maintain_order=True)
] == expected

expected = [
{
"N": [1],
},
{
"N": [2],
},
{
"N": [2, 4],
},
{
"N": [2],
},
{"N": [1]},
{"N": [2]},
{"N": [2, 4]},
{"N": [2]},
]
assert [
a.to_dict(False)
Expand All @@ -2798,7 +2790,7 @@ def test_partition_by() -> None:
]

df = pl.DataFrame({"a": ["one", "two", "one", "two"], "b": [1, 2, 3, 4]})
assert df.partition_by(["a", "b"], as_dict=True)["one", 1].to_dict(False) == {
assert df.partition_by(cs.all(), as_dict=True)["one", 1].to_dict(False) == {
"a": ["one"],
"b": [1],
}
Expand Down

0 comments on commit 1303247

Please sign in to comment.