Skip to content

Commit

Permalink
add selector support to "unnest" frame method
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Aug 3, 2023
1 parent 086c7dd commit a7864ae
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 16 deletions.
16 changes: 8 additions & 8 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6455,10 +6455,10 @@ def explode(
self,
columns: (
str
| Sequence[str]
| Expr
| Sequence[Expr]
| SelectorType
| Sequence[str]
| Sequence[Expr]
| Sequence[SelectorType]
),
*more_columns: str | Expr | SelectorType,
Expand Down Expand Up @@ -9009,7 +9009,11 @@ def to_struct(self, name: str) -> Series:
"""
return wrap_s(self._df.to_struct(name))

def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
def unnest(
self,
columns: str | SelectorType | Sequence[str] | Sequence[SelectorType],
*more_columns: str | SelectorType,
) -> Self:
"""
Decompose struct columns into separate columns for each of their fields.
Expand Down Expand Up @@ -9057,11 +9061,7 @@ def unnest(self, columns: str | Sequence[str], *more_columns: str) -> Self:
└────────┴─────┴─────┴──────┴───────────┴───────┘
"""
if isinstance(columns, str):
columns = [columns]
if more_columns:
columns = list(columns)
columns.extend(more_columns)
columns = expand_selectors(self, columns, *more_columns)
return self._from_pydf(self._df.unnest(columns))

def corr(self, **kwargs: Any) -> DataFrame:
Expand Down
24 changes: 23 additions & 1 deletion py-polars/polars/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,29 @@
def expand_selectors(
frame: DataFrame | LazyFrame, items: Any, *more_items: Any
) -> list[Any]:
"""Expand any selectors in the given input."""
"""
Expand any selectors to column names in the given input.
Non-selector values are left as-is.
Examples
--------
>>> from polars.selectors import expand_selectors
>>> import polars.selectors as cs
>>> df = pl.DataFrame(
... {
... "colw": ["a", "b"],
... "colx": ["x", "y"],
... "coly": [123, 456],
... "colz": [2.0, 5.5],
... }
... )
>>> expand_selectors(df, ["colx", cs.numeric()])
['colx', 'coly', 'colz']
>>> expand_selectors(df, cs.string(), cs.float())
['colw', 'colx', 'colz']
"""
expanded: list[Any] = []
for item in (
*(
Expand Down
15 changes: 8 additions & 7 deletions py-polars/tests/unit/datatypes/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pyarrow as pa

import polars as pl
import polars.selectors as cs
from polars.testing import assert_frame_equal


Expand Down Expand Up @@ -95,17 +96,16 @@ def test_struct_hashes() -> None:


def test_struct_unnesting() -> None:
df = pl.DataFrame({"a": [1, 2]})
out = df.select(
df_base = pl.DataFrame({"a": [1, 2]})
df = df_base.select(
[
pl.all().alias("a_original"),
pl.col("a")
.apply(lambda x: {"a": x, "b": x * 2, "c": x % 2 == 0})
.struct.rename_fields(["a", "a_squared", "mod2eq0"])
.alias("foo"),
]
).unnest("foo")

)
expected = pl.DataFrame(
{
"a_original": [1, 2],
Expand All @@ -114,11 +114,12 @@ def test_struct_unnesting() -> None:
"mod2eq0": [False, True],
}
)

assert_frame_equal(out, expected)
for cols in ("foo", cs.ends_with("oo")):
out = df.unnest(cols)
assert_frame_equal(out, expected)

out = (
df.lazy()
df_base.lazy()
.select(
[
pl.all().alias("a_original"),
Expand Down

0 comments on commit a7864ae

Please sign in to comment.