Skip to content

Commit

Permalink
docs: update documentation for group_by (narwhals-dev#332)
Browse files Browse the repository at this point in the history
  • Loading branch information
marenwestermann authored Jun 23, 2024
1 parent 0d08b5b commit 1767254
Showing 1 changed file with 93 additions and 53 deletions.
146 changes: 93 additions & 53 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,31 +1212,32 @@ def group_by(self, *keys: str | Iterable[str]) -> GroupBy:
GroupBy: Object which can be used to perform aggregations.
Examples:
Group by one column and call `agg` to compute the grouped sum of another
column.
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pl = pl.DataFrame(
... {
... "a": ["a", "b", "a", "b", "c"],
... "b": [1, 2, 1, 3, 3],
... "c": [5, 4, 3, 2, 1],
... }
... )
>>> df = nw.DataFrame(df_pl)
>>> df
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> dframe = df.group_by("a").agg(nw.col("b").sum()).sort("a")
>>> dframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(dframe)
>>> df = {
... "a": ["a", "b", "a", "b", "c"],
... "b": [1, 2, 1, 3, 3],
... "c": [5, 4, 3, 2, 1],
... }
>>> df_pd = pd.DataFrame(df)
>>> df_pl = pl.DataFrame(df)
Let's define a dataframe-agnostic function in which we group by one column
and call `agg` to compute the grouped sum of another column.
>>> @nw.narwhalify
... def func(df):
... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
0 a 2
1 b 5
2 c 3
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
Expand All @@ -1250,23 +1251,26 @@ def group_by(self, *keys: str | Iterable[str]) -> GroupBy:
Group by multiple columns by passing a list of column names.
>>> dframe = df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
>>> dframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(dframe) # doctest: +SKIP
>>> @nw.narwhalify
... def func(df):
... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
>>> func(df_pd)
a b c
0 a 1 5
1 b 2 4
2 b 3 2
3 c 3 1
>>> func(df_pl)
shape: (4, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a ┆ 1 ┆ 5 │
│ b ┆ 2 ┆ 4 │
│ b ┆ 3 ┆ 2 │
│ c ┆ 3 ┆ 1 │
│ a ┆ 1 ┆ 5 │
└─────┴─────┴─────┘
"""
from narwhals.group_by import GroupBy
Expand Down Expand Up @@ -2599,23 +2603,44 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy:
Group by one column and call `agg` to compute the grouped sum of
another column.
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> lf_pl = pl.LazyFrame(
... {
... "a": ["a", "b", "a", "b", "c"],
... "b": [1, 2, 1, 3, 3],
... "c": [5, 4, 3, 2, 1],
... }
... )
>>> lf = nw.LazyFrame(lf_pl)
>>> lframe = lf.group_by("a").agg(nw.col("b").sum()).collect().sort("a")
>>> lframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(lframe)
>>> df = {
... "a": ["a", "b", "a", "b", "c"],
... "b": [1, 2, 1, 3, 3],
... "c": [5, 4, 3, 2, 1],
... }
>>> df_pd = pd.DataFrame(df)
>>> df_pl = pl.DataFrame(df)
>>> lf_pl = pl.LazyFrame(df)
Let's define a dataframe-agnostic function in which we group by one column
and call `agg` to compute the grouped sum of another column.
>>> @nw.narwhalify
... def func(df):
... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
0 a 2
1 b 5
2 c 3
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ a ┆ 2 │
│ b ┆ 5 │
│ c ┆ 3 │
└─────┴─────┘
>>> func(lf_pl).collect()
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
Expand All @@ -2629,13 +2654,28 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy:
Group by multiple columns by passing a list of column names.
>>> lframe = lf.group_by(["a", "b"]).agg(nw.max("c")).collect().sort(["a", "b"])
>>> lframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(lframe)
>>> @nw.narwhalify
... def func(df):
... return df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"])
>>> func(df_pd)
a b c
0 a 1 5
1 b 2 4
2 b 3 2
3 c 3 1
>>> func(df_pl)
shape: (4, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ a ┆ 1 ┆ 5 │
│ b ┆ 2 ┆ 4 │
│ b ┆ 3 ┆ 2 │
│ c ┆ 3 ┆ 1 │
└─────┴─────┴─────┘
>>> func(lf_pl).collect()
shape: (4, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
Expand Down

0 comments on commit 1767254

Please sign in to comment.