Skip to content

Commit

Permalink
Merge pull request #110 from narwhals-dev/str-head
Browse files Browse the repository at this point in the history
add str.head
  • Loading branch information
MarcoGorelli authored May 8, 2024
2 parents 33f7253 + 268fe11 commit 469a077
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expressions_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
options:
members:
- ends_with
- head
- to_datetime
show_source: false
show_bases: false
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
options:
members:
- ends_with
- head
show_source: false
show_bases: false
10 changes: 10 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,16 @@ def ends_with(self, suffix: str) -> PandasExpr:
implementation=self._expr._implementation,
)

def head(self, n: int = 5) -> PandasExpr:
return PandasExpr(
lambda df: [series.str.head(n) for series in self._expr._call(df)],
depth=self._expr._depth + 1,
function_name=f"{self._expr._function_name}->str.head",
root_names=self._expr._root_names,
output_names=self._expr._output_names,
implementation=self._expr._implementation,
)

def to_datetime(self, format: str | None = None) -> PandasExpr: # noqa: A002
# TODO make a register_expression_call for namespaces
return PandasExpr(
Expand Down
7 changes: 5 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,12 +414,15 @@ def __init__(self, series: PandasSeries) -> None:
self._series = series

def ends_with(self, suffix: str) -> PandasSeries:
# TODO make a register_expression_call for namespaces

return self._series._from_series(
self._series._series.str.endswith(suffix),
)

def head(self, n: int = 5) -> PandasSeries:
return self._series._from_series(
self._series._series.str[:n],
)


class PandasSeriesDateTimeNamespace:
def __init__(self, series: PandasSeries) -> None:
Expand Down
51 changes: 51 additions & 0 deletions narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,57 @@ def ends_with(self, suffix: str) -> Expr:
lambda plx: self._expr._call(plx).str.ends_with(suffix)
)

def head(self, n: int = 5) -> Expr:
"""
Take the first n elements of each string.
Arguments:
n: Number of elements to take.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {'lyrics': ['Atatata', 'taata', 'taatatata', 'zukkyun']}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a data-frame agnostic function:
>>> def func(df_any):
... df = nw.from_native(df_any)
... df = df.with_columns(lyrics_head = nw.col('lyrics').str.head())
... return nw.to_native(df)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
lyrics lyrics_head
0 Atatata Atata
1 taata taata
2 taatatata taata
3 zukkyun zukky
>>> func(df_pl)
shape: (4, 2)
┌───────────┬─────────────┐
│ lyrics ┆ lyrics_head │
│ --- ┆ --- │
│ str ┆ str │
╞═══════════╪═════════════╡
│ Atatata ┆ Atata │
│ taata ┆ taata │
│ taatatata ┆ taata │
│ zukkyun ┆ zukky │
└───────────┴─────────────┘
"""

def func(plx: Any) -> Any:
if plx is get_polars():
return self._expr._call(plx).str.slice(0, n)
return self._expr._call(plx).str.head(n)

return self._expr.__class__(func)

def to_datetime(self, format: str) -> Expr: # noqa: A002
"""
Convert to Datetime dtype.
Expand Down
44 changes: 44 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,50 @@ def __init__(self, series: Series) -> None:
def ends_with(self, suffix: str) -> Series:
return self._series.__class__(self._series._series.str.ends_with(suffix))

def head(self, n: int = 5) -> Series:
"""
Take the first n elements of each string.
Arguments:
n: Number of elements to take.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> lyrics = ['Atatata', 'taata', 'taatatata', 'zukkyun']
>>> s_pd = pd.Series(lyrics)
>>> s_pl = pl.Series(lyrics)
We define a data-frame agnostic function:
>>> def func(s_any):
... s = nw.from_native(s_any, series_only=True)
... s = s.str.head()
... return nw.to_native(s)
We can then pass either pandas or Polars to `func`:
>>> func(s_pd)
0 Atata
1 taata
2 taata
3 zukky
dtype: object
>>> func(s_pl) # doctest: +SKIP
shape: (2,)
Series: '' [str]
[
"Atata"
"taata"
"taata"
"zukky"
]
"""
if self._series._is_polars:
return self._series.__class__(self._series._series.str.slice(0, n))
return self._series.__class__(self._series._series.str.head(n))


class SeriesDateTimeNamespace:
def __init__(self, series: Series) -> None:
Expand Down
24 changes: 24 additions & 0 deletions tests/expr/str/head_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Any

import pandas as pd
import polars as pl
import pytest

import narwhals as nw
from tests.utils import compare_dicts

data = {
"a": ["foo", "bars"],
}


@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame])
def test_str_head(constructor: Any) -> None:
df = nw.from_native(constructor(data), eager_only=True)
result = df.select(nw.col("a").str.head(3))
expected = {
"a": ["foo", "bar"],
}
compare_dicts(result, expected)
result = df.select(df["a"].str.head(3))
compare_dicts(result, expected)

0 comments on commit 469a077

Please sign in to comment.