diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index c0084c9e9..41ccf0d29 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -23,6 +23,7 @@ - select - shape - sort + - tail - to_dict - to_numpy - to_pandas diff --git a/docs/api-reference/expressions.md b/docs/api-reference/expressions.md index bf2aee981..0118a05c8 100644 --- a/docs/api-reference/expressions.md +++ b/docs/api-reference/expressions.md @@ -13,6 +13,7 @@ - drop_nulls - fill_null - filter + - head - is_between - is_duplicated - is_first_distinct @@ -32,6 +33,7 @@ - sort - std - sum + - tail - unique show_source: false show_bases: false diff --git a/docs/api-reference/lazyframe.md b/docs/api-reference/lazyframe.md index 341da4268..96e1a3ffe 100644 --- a/docs/api-reference/lazyframe.md +++ b/docs/api-reference/lazyframe.md @@ -18,6 +18,7 @@ - schema - select - sort + - tail - unique - with_columns - with_row_index diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index 2a92e870e..1832b7131 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -14,6 +14,7 @@ - dtype - fill_null - filter + - head - is_between - is_duplicated - is_empty @@ -37,6 +38,7 @@ - sort - std - sum + - tail - to_frame - to_numpy - to_pandas diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index c7580e035..90816faba 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -244,6 +244,9 @@ def join( def head(self, n: int) -> Self: return self._from_dataframe(self._dataframe.head(n)) + def tail(self, n: int) -> Self: + return self._from_dataframe(self._dataframe.tail(n)) + def unique(self, subset: str | list[str]) -> Self: subset = flatten(subset) return self._from_dataframe(self._dataframe.drop_duplicates(subset=subset)) diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 0be8b6907..a7e167f08 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -288,6 +288,12 @@ def quantile( self, "quantile", quantile, interpolation, returns_scalar=True ) + def head(self, n: int) -> Self: + return reuse_series_implementation(self, "head", n) + + def tail(self, n: int) -> Self: + return reuse_series_implementation(self, "tail", n) + @property def str(self) -> PandasExprStringNamespace: return PandasExprStringNamespace(self) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 3cd5de3eb..e1e6cd309 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -494,6 +494,12 @@ def zip_with(self: Self, mask: Any, other: Any) -> PandasSeries: res = ser.where(mask._series, other._series) return self._from_series(res) + def head(self: Self, n: int) -> Self: + return self._from_series(self._series.head(n)) + + def tail(self: Self, n: int) -> Self: + return self._from_series(self._series.tail(n)) + @property def str(self) -> PandasSeriesStringNamespace: return PandasSeriesStringNamespace(self) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index 59293c911..6b5c26f7f 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -142,6 +142,9 @@ def rename(self, mapping: dict[str, str]) -> Self: def head(self, n: int) -> Self: return self._from_dataframe(self._dataframe.head(n)) + def tail(self, n: int) -> Self: + return self._from_dataframe(self._dataframe.tail(n)) + def drop(self, *columns: str | Iterable[str]) -> Self: return self._from_dataframe(self._dataframe.drop(*columns)) @@ -834,7 +837,7 @@ def rename(self, mapping: dict[str, str]) -> Self: """ return super().rename(mapping) - def head(self, n: int) -> Self: + def head(self, n: int = 5) -> Self: """ Get the first `n` rows. @@ -854,12 +857,11 @@ def head(self, n: int) -> Self: >>> df_pd = pd.DataFrame(df) >>> df_pl = pl.DataFrame(df) - We define a library agnostic function: + Let's define a dataframe-agnostic function that gets the first 3 rows. - >>> def func(df_any): - ... df = nw.from_native(df_any) - ... df = df.head(3) - ... return nw.to_native(df) + >>> @nw.narwhalify + ... def func(df): + ... return df.head(3) We can then pass either pandas or Polars to `func`: @@ -880,8 +882,56 @@ def head(self, n: int) -> Self: │ 3 ┆ 8 ┆ c │ └─────┴─────┴─────┘ """ + return super().head(n) + def tail(self, n: int = 5) -> Self: + """ + Get the last `n` rows. + + Arguments: + n: Number of rows to return. If a negative value is passed, return all rows + except the first `abs(n)`. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df = { + ... "foo": [1, 2, 3, 4, 5], + ... "bar": [6, 7, 8, 9, 10], + ... "ham": ["a", "b", "c", "d", "e"], + ... } + >>> df_pd = pd.DataFrame(df) + >>> df_pl = pl.DataFrame(df) + + Let's define a dataframe-agnostic function that gets the last 3 rows. + + >>> @nw.narwhalify + ... def func(df): + ... return df.tail(3) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) + foo bar ham + 2 3 8 c + 3 4 9 d + 4 5 10 e + >>> func(df_pl) + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ str │ + ╞═════╪═════╪═════╡ + │ 3 ┆ 8 ┆ c │ + │ 4 ┆ 9 ┆ d │ + │ 5 ┆ 10 ┆ e │ + └─────┴─────┴─────┘ + """ + return super().tail(n) + def drop(self, *columns: str | Iterable[str]) -> Self: """ Remove columns from the dataframe. @@ -1999,7 +2049,7 @@ def rename(self, mapping: dict[str, str]) -> Self: """ return super().rename(mapping) - def head(self, n: int) -> Self: + def head(self, n: int = 5) -> Self: r""" Get the first `n` rows. @@ -2007,23 +2057,32 @@ def head(self, n: int) -> Self: n: Number of rows to return. Examples: - >>> import polars as pl >>> import narwhals as nw - >>> lf_pl = pl.LazyFrame( - ... { - ... "a": [1, 2, 3, 4, 5, 6], - ... "b": [7, 8, 9, 10, 11, 12], - ... } - ... ) - >>> lf = nw.LazyFrame(lf_pl) - >>> lframe = lf.head(5).collect() - >>> lframe - ┌───────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native` to see native output | - └───────────────────────────────────────────────┘ - >>> nw.to_native(lframe) - shape: (5, 2) + >>> import pandas as pd + >>> import polars as pl + >>> data = { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [7, 8, 9, 10, 11, 12], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> lf_pl = pl.LazyFrame(data) + + Let's define a dataframe-agnostic function that gets the first 3 rows. + + >>> @nw.narwhalify + ... def func(df): + ... return df.head(3) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) + a b + 0 1 7 + 1 2 8 + 2 3 9 + >>> func(df_pl) + shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ @@ -2032,17 +2091,9 @@ def head(self, n: int) -> Self: │ 1 ┆ 7 │ │ 2 ┆ 8 │ │ 3 ┆ 9 │ - │ 4 ┆ 10 │ - │ 5 ┆ 11 │ └─────┴─────┘ - >>> lframe = lf.head(2).collect() - >>> lframe - ┌───────────────────────────────────────────────┐ - | Narwhals DataFrame | - | Use `narwhals.to_native` to see native output | - └───────────────────────────────────────────────┘ - >>> nw.to_native(lframe) - shape: (2, 2) + >>> func(lf_pl).collect() + shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ @@ -2050,10 +2101,68 @@ def head(self, n: int) -> Self: ╞═════╪═════╡ │ 1 ┆ 7 │ │ 2 ┆ 8 │ + │ 3 ┆ 9 │ └─────┴─────┘ """ return super().head(n) + def tail(self, n: int = 5) -> Self: + r""" + Get the last `n` rows. + + Arguments: + n: Number of rows to return. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [7, 8, 9, 10, 11, 12], + ... } + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + >>> lf_pl = pl.LazyFrame(data) + + Let's define a dataframe-agnostic function that gets the last 3 rows. + + >>> @nw.narwhalify + ... def func(df): + ... return df.tail(3) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) + a b + 3 4 10 + 4 5 11 + 5 6 12 + >>> func(df_pl) + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 4 ┆ 10 │ + │ 5 ┆ 11 │ + │ 6 ┆ 12 │ + └─────┴─────┘ + >>> func(lf_pl).collect() + shape: (3, 2) + ┌─────┬─────┐ + │ a ┆ b │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 4 ┆ 10 │ + │ 5 ┆ 11 │ + │ 6 ┆ 12 │ + └─────┴─────┘ + """ + return super().tail(n) + def drop(self, *columns: str | Iterable[str]) -> Self: r""" Remove columns from the LazyFrame. diff --git a/narwhals/expression.py b/narwhals/expression.py index 0b8f7247e..3ee39781b 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -1431,6 +1431,94 @@ def quantile( lambda plx: self._call(plx).quantile(quantile, interpolation) ) + def head(self, n: int = 10) -> Expr: + r""" + Get the first `n` rows. + + Arguments + n : int + Number of rows to return. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = {"a": list(range(10))} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function that returns the first 3 rows: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").head(3)) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + a + 0 0 + 1 1 + 2 2 + >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 0 │ + │ 1 │ + │ 2 │ + └─────┘ + """ + + return self.__class__(lambda plx: self._call(plx).head(n)) + + def tail(self, n: int = 10) -> Expr: + r""" + Get the last `n` rows. + + Arguments + n : int + Number of rows to return. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = {"a": list(range(10))} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + Let's define a dataframe-agnostic function that returns the last 3 rows: + + >>> @nw.narwhalify + ... def func(df): + ... return df.select(nw.col("a").tail(3)) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) # doctest: +NORMALIZE_WHITESPACE + a + 7 7 + 8 8 + 9 9 + >>> func(df_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3, 1) + ┌─────┐ + │ a │ + │ --- │ + │ i64 │ + ╞═════╡ + │ 7 │ + │ 8 │ + │ 9 │ + └─────┘ + """ + + return self.__class__(lambda plx: self._call(plx).tail(n)) + @property def str(self) -> ExprStringNamespace: return ExprStringNamespace(self) diff --git a/narwhals/series.py b/narwhals/series.py index d97cc44b4..814775652 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -1624,6 +1624,89 @@ def item(self: Self, index: int | None = None) -> Any: """ return self._series.item(index=index) + def head(self: Self, n: int = 10) -> Self: + r""" + Get the first `n` rows. + + Arguments + n : int + Number of rows to return. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = list(range(10)) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + + Let's define a dataframe-agnostic function that returns the first 3 rows: + + >>> @nw.narwhalify(allow_series=True) + ... def func(s): + ... return s.head(3) + + We can then pass either pandas or Polars to `func`: + + >>> func(s_pd) # doctest: +NORMALIZE_WHITESPACE + 0 0 + 1 1 + 2 2 + dtype: int64 + + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 0 + 1 + 2 + ] + """ + + return self._from_series(self._series.head(n)) + + def tail(self: Self, n: int = 10) -> Self: + r""" + Get the last `n` rows. + + Arguments + n : int + Number of rows to return. + + Examples: + >>> import narwhals as nw + >>> import pandas as pd + >>> import polars as pl + >>> data = list(range(10)) + >>> s_pd = pd.Series(data) + >>> s_pl = pl.Series(data) + + Let's define a dataframe-agnostic function that returns the last 3 rows: + + >>> @nw.narwhalify(allow_series=True) + ... def func(s): + ... return s.tail(3) + + We can then pass either pandas or Polars to `func`: + + >>> func(s_pd) # doctest: +NORMALIZE_WHITESPACE + 7 7 + 8 8 + 9 9 + dtype: int64 + >>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE + shape: (3,) + Series: '' [i64] + [ + 7 + 8 + 9 + ] + """ + + return self._from_series(self._series.tail(n)) + @property def str(self) -> SeriesStringNamespace: return SeriesStringNamespace(self) diff --git a/tests/test_common.py b/tests/test_common.py index 2d7a15adf..7740a2d5d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -495,6 +495,25 @@ def test_head(df_raw: Any) -> None: result = nw.to_native(df.collect().head(2)) expected = {"a": [1, 3], "b": [4, 4], "z": [7.0, 8.0]} compare_dicts(result, expected) + result = nw.to_native(df.collect().select(nw.col("a").head(2))) + expected = {"a": [1, 3]} + compare_dicts(result, expected) + + +@pytest.mark.parametrize( + "df_raw", [df_pandas, df_lazy, df_pandas_nullable, df_pandas_pyarrow] +) +def test_tail(df_raw: Any) -> None: + df = nw.LazyFrame(df_raw) + result = nw.to_native(df.tail(2)) + expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9]} + compare_dicts(result, expected) + result = nw.to_native(df.collect().tail(2)) + expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9]} + compare_dicts(result, expected) + result = nw.to_native(df.collect().select(nw.col("a").tail(2))) + expected = {"a": [3, 2]} + compare_dicts(result, expected) @pytest.mark.parametrize( diff --git a/tests/test_series.py b/tests/test_series.py index 77aee9115..b5036551c 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -523,3 +523,21 @@ def test_item(df_raw: Any, index: int, expected: int) -> None: match=re.escape("can only call '.item()' if the Series is of length 1,"), ): s.item(None) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("n", [1, 2, 3, 10]) +def test_head(df_raw: Any, n: int) -> None: + s_raw = df_raw["z"] + s = nw.from_native(s_raw, allow_series=True) + + assert s.head(n) == nw.Series(s_raw.head(n)) + + +@pytest.mark.parametrize("df_raw", [df_pandas, df_polars]) +@pytest.mark.parametrize("n", [1, 2, 3, 10]) +def test_tail(df_raw: Any, n: int) -> None: + s_raw = df_raw["z"] + s = nw.from_native(s_raw, allow_series=True) + + assert s.tail(n) == nw.Series(s_raw.tail(n))