Skip to content

Commit

Permalink
enh: .head() and .tail() (#297)
Browse files Browse the repository at this point in the history
* feat: head and tail

* Update narwhals/dataframe.py

---------

Co-authored-by: Marco Edward Gorelli <[email protected]>
  • Loading branch information
FBruzzesi and MarcoGorelli authored Jun 13, 2024
1 parent 831fb9e commit d77f0b9
Show file tree
Hide file tree
Showing 12 changed files with 371 additions and 33 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- select
- shape
- sort
- tail
- to_dict
- to_numpy
- to_pandas
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- drop_nulls
- fill_null
- filter
- head
- is_between
- is_duplicated
- is_first_distinct
Expand All @@ -32,6 +33,7 @@
- sort
- std
- sum
- tail
- unique
show_source: false
show_bases: false
1 change: 1 addition & 0 deletions docs/api-reference/lazyframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- schema
- select
- sort
- tail
- unique
- with_columns
- with_row_index
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- dtype
- fill_null
- filter
- head
- is_between
- is_duplicated
- is_empty
Expand All @@ -37,6 +38,7 @@
- sort
- std
- sum
- tail
- to_frame
- to_numpy
- to_pandas
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ def join(
def head(self, n: int) -> Self:
return self._from_dataframe(self._dataframe.head(n))

def tail(self, n: int) -> Self:
return self._from_dataframe(self._dataframe.tail(n))

def unique(self, subset: str | list[str]) -> Self:
subset = flatten(subset)
return self._from_dataframe(self._dataframe.drop_duplicates(subset=subset))
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,12 @@ def quantile(
self, "quantile", quantile, interpolation, returns_scalar=True
)

def head(self, n: int) -> Self:
return reuse_series_implementation(self, "head", n)

def tail(self, n: int) -> Self:
return reuse_series_implementation(self, "tail", n)

@property
def str(self) -> PandasExprStringNamespace:
return PandasExprStringNamespace(self)
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,12 @@ def zip_with(self: Self, mask: Any, other: Any) -> PandasSeries:
res = ser.where(mask._series, other._series)
return self._from_series(res)

def head(self: Self, n: int) -> Self:
return self._from_series(self._series.head(n))

def tail(self: Self, n: int) -> Self:
return self._from_series(self._series.tail(n))

@property
def str(self) -> PandasSeriesStringNamespace:
return PandasSeriesStringNamespace(self)
Expand Down
175 changes: 142 additions & 33 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ def rename(self, mapping: dict[str, str]) -> Self:
def head(self, n: int) -> Self:
return self._from_dataframe(self._dataframe.head(n))

def tail(self, n: int) -> Self:
return self._from_dataframe(self._dataframe.tail(n))

def drop(self, *columns: str | Iterable[str]) -> Self:
return self._from_dataframe(self._dataframe.drop(*columns))

Expand Down Expand Up @@ -834,7 +837,7 @@ def rename(self, mapping: dict[str, str]) -> Self:
"""
return super().rename(mapping)

def head(self, n: int) -> Self:
def head(self, n: int = 5) -> Self:
"""
Get the first `n` rows.
Expand All @@ -854,12 +857,11 @@ def head(self, n: int) -> Self:
>>> df_pd = pd.DataFrame(df)
>>> df_pl = pl.DataFrame(df)
We define a library agnostic function:
Let's define a dataframe-agnostic function that gets the first 3 rows.
>>> def func(df_any):
... df = nw.from_native(df_any)
... df = df.head(3)
... return nw.to_native(df)
>>> @nw.narwhalify
... def func(df):
... return df.head(3)
We can then pass either pandas or Polars to `func`:
Expand All @@ -880,8 +882,56 @@ def head(self, n: int) -> Self:
│ 3 ┆ 8 ┆ c │
└─────┴─────┴─────┘
"""

return super().head(n)

def tail(self, n: int = 5) -> Self:
"""
Get the last `n` rows.
Arguments:
n: Number of rows to return. If a negative value is passed, return all rows
except the first `abs(n)`.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df = {
... "foo": [1, 2, 3, 4, 5],
... "bar": [6, 7, 8, 9, 10],
... "ham": ["a", "b", "c", "d", "e"],
... }
>>> df_pd = pd.DataFrame(df)
>>> df_pl = pl.DataFrame(df)
Let's define a dataframe-agnostic function that gets the last 3 rows.
>>> @nw.narwhalify
... def func(df):
... return df.tail(3)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
foo bar ham
2 3 8 c
3 4 9 d
4 5 10 e
>>> func(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 3 ┆ 8 ┆ c │
│ 4 ┆ 9 ┆ d │
│ 5 ┆ 10 ┆ e │
└─────┴─────┴─────┘
"""
return super().tail(n)

def drop(self, *columns: str | Iterable[str]) -> Self:
"""
Remove columns from the dataframe.
Expand Down Expand Up @@ -1999,31 +2049,40 @@ def rename(self, mapping: dict[str, str]) -> Self:
"""
return super().rename(mapping)

def head(self, n: int) -> Self:
def head(self, n: int = 5) -> Self:
r"""
Get the first `n` rows.
Arguments:
n: Number of rows to return.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> lf_pl = pl.LazyFrame(
... {
... "a": [1, 2, 3, 4, 5, 6],
... "b": [7, 8, 9, 10, 11, 12],
... }
... )
>>> lf = nw.LazyFrame(lf_pl)
>>> lframe = lf.head(5).collect()
>>> lframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(lframe)
shape: (5, 2)
>>> import pandas as pd
>>> import polars as pl
>>> data = {
... "a": [1, 2, 3, 4, 5, 6],
... "b": [7, 8, 9, 10, 11, 12],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> lf_pl = pl.LazyFrame(data)
Let's define a dataframe-agnostic function that gets the first 3 rows.
>>> @nw.narwhalify
... def func(df):
... return df.head(3)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
0 1 7
1 2 8
2 3 9
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
Expand All @@ -2032,28 +2091,78 @@ def head(self, n: int) -> Self:
│ 1 ┆ 7 │
│ 2 ┆ 8 │
│ 3 ┆ 9 │
│ 4 ┆ 10 │
│ 5 ┆ 11 │
└─────┴─────┘
>>> lframe = lf.head(2).collect()
>>> lframe
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
| Use `narwhals.to_native` to see native output |
└───────────────────────────────────────────────┘
>>> nw.to_native(lframe)
shape: (2, 2)
>>> func(lf_pl).collect()
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 7 │
│ 2 ┆ 8 │
│ 3 ┆ 9 │
└─────┴─────┘
"""
return super().head(n)

def tail(self, n: int = 5) -> Self:
r"""
Get the last `n` rows.
Arguments:
n: Number of rows to return.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {
... "a": [1, 2, 3, 4, 5, 6],
... "b": [7, 8, 9, 10, 11, 12],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> lf_pl = pl.LazyFrame(data)
Let's define a dataframe-agnostic function that gets the last 3 rows.
>>> @nw.narwhalify
... def func(df):
... return df.tail(3)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
3 4 10
4 5 11
5 6 12
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 4 ┆ 10 │
│ 5 ┆ 11 │
│ 6 ┆ 12 │
└─────┴─────┘
>>> func(lf_pl).collect()
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 4 ┆ 10 │
│ 5 ┆ 11 │
│ 6 ┆ 12 │
└─────┴─────┘
"""
return super().tail(n)

def drop(self, *columns: str | Iterable[str]) -> Self:
r"""
Remove columns from the LazyFrame.
Expand Down
Loading

0 comments on commit d77f0b9

Please sign in to comment.