diff --git a/docs/api-reference/expressions_str.md b/docs/api-reference/expressions_str.md index 8067dd975..5d2f3deb2 100644 --- a/docs/api-reference/expressions_str.md +++ b/docs/api-reference/expressions_str.md @@ -5,6 +5,7 @@ options: members: - ends_with + - head - to_datetime show_source: false show_bases: false diff --git a/docs/api-reference/series_str.md b/docs/api-reference/series_str.md index cb725023e..683075e61 100644 --- a/docs/api-reference/series_str.md +++ b/docs/api-reference/series_str.md @@ -5,5 +5,6 @@ options: members: - ends_with + - head show_source: false show_bases: false diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 8ff9df8d2..85a0220fe 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -272,6 +272,16 @@ def ends_with(self, suffix: str) -> PandasExpr: implementation=self._expr._implementation, ) + def head(self, n: int = 5) -> PandasExpr: + return PandasExpr( + lambda df: [series.str.head(n) for series in self._expr._call(df)], + depth=self._expr._depth + 1, + function_name=f"{self._expr._function_name}->str.head", + root_names=self._expr._root_names, + output_names=self._expr._output_names, + implementation=self._expr._implementation, + ) + def to_datetime(self, format: str | None = None) -> PandasExpr: # noqa: A002 # TODO make a register_expression_call for namespaces return PandasExpr( diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index e5c42ed1d..26fb050c2 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -414,12 +414,15 @@ def __init__(self, series: PandasSeries) -> None: self._series = series def ends_with(self, suffix: str) -> PandasSeries: - # TODO make a register_expression_call for namespaces - return self._series._from_series( self._series._series.str.endswith(suffix), ) + def head(self, n: int = 5) -> PandasSeries: + return self._series._from_series( + self._series._series.str[:n], + ) + class PandasSeriesDateTimeNamespace: def __init__(self, series: PandasSeries) -> None: diff --git a/narwhals/expression.py b/narwhals/expression.py index 2a7f52bdc..320beac23 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -685,6 +685,57 @@ def ends_with(self, suffix: str) -> Expr: lambda plx: self._expr._call(plx).str.ends_with(suffix) ) + def head(self, n: int = 5) -> Expr: + """ + Take the first n elements of each string. + + Arguments: + n: Number of elements to take. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> data = {'lyrics': ['Atatata', 'taata', 'taatatata', 'zukkyun']} + >>> df_pd = pd.DataFrame(data) + >>> df_pl = pl.DataFrame(data) + + We define a data-frame agnostic function: + + >>> def func(df_any): + ... df = nw.from_native(df_any) + ... df = df.with_columns(lyrics_head = nw.col('lyrics').str.head()) + ... return nw.to_native(df) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd) + lyrics lyrics_head + 0 Atatata Atata + 1 taata taata + 2 taatatata taata + 3 zukkyun zukky + >>> func(df_pl) + shape: (4, 2) + ┌───────────┬─────────────┐ + │ lyrics ┆ lyrics_head │ + │ --- ┆ --- │ + │ str ┆ str │ + ╞═══════════╪═════════════╡ + │ Atatata ┆ Atata │ + │ taata ┆ taata │ + │ taatatata ┆ taata │ + │ zukkyun ┆ zukky │ + └───────────┴─────────────┘ + """ + + def func(plx: Any) -> Any: + if plx is get_polars(): + return self._expr._call(plx).str.slice(0, n) + return self._expr._call(plx).str.head(n) + + return self._expr.__class__(func) + def to_datetime(self, format: str) -> Expr: # noqa: A002 """ Convert to Datetime dtype. diff --git a/narwhals/series.py b/narwhals/series.py index 03da0b8ff..dad1d94d4 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -205,6 +205,50 @@ def __init__(self, series: Series) -> None: def ends_with(self, suffix: str) -> Series: return self._series.__class__(self._series._series.str.ends_with(suffix)) + def head(self, n: int = 5) -> Series: + """ + Take the first n elements of each string. + + Arguments: + n: Number of elements to take. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> lyrics = ['Atatata', 'taata', 'taatatata', 'zukkyun'] + >>> s_pd = pd.Series(lyrics) + >>> s_pl = pl.Series(lyrics) + + We define a data-frame agnostic function: + + >>> def func(s_any): + ... s = nw.from_native(s_any, series_only=True) + ... s = s.str.head() + ... return nw.to_native(s) + + We can then pass either pandas or Polars to `func`: + + >>> func(s_pd) + 0 Atata + 1 taata + 2 taata + 3 zukky + dtype: object + >>> func(s_pl) # doctest: +SKIP + shape: (2,) + Series: '' [str] + [ + "Atata" + "taata" + "taata" + "zukky" + ] + """ + if self._series._is_polars: + return self._series.__class__(self._series._series.str.slice(0, n)) + return self._series.__class__(self._series._series.str.head(n)) + class SeriesDateTimeNamespace: def __init__(self, series: Series) -> None: diff --git a/tests/expr/str/head_test.py b/tests/expr/str/head_test.py new file mode 100644 index 000000000..2e420673e --- /dev/null +++ b/tests/expr/str/head_test.py @@ -0,0 +1,24 @@ +from typing import Any + +import pandas as pd +import polars as pl +import pytest + +import narwhals as nw +from tests.utils import compare_dicts + +data = { + "a": ["foo", "bars"], +} + + +@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame]) +def test_str_head(constructor: Any) -> None: + df = nw.from_native(constructor(data), eager_only=True) + result = df.select(nw.col("a").str.head(3)) + expected = { + "a": ["foo", "bar"], + } + compare_dicts(result, expected) + result = df.select(df["a"].str.head(3)) + compare_dicts(result, expected)