Skip to content

Commit

Permalink
Merge pull request #213 from DeaMariaLeon/zip
Browse files Browse the repository at this point in the history
Adding Series.zip_with
  • Loading branch information
MarcoGorelli authored May 26, 2024
2 parents b66c9d0 + 18b90ca commit 071210d
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@
- to_pandas
- unique
- value_counts
- zip_with
show_source: false
show_bases: false
5 changes: 5 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ def value_counts(self: Self, *, sort: bool = False, parallel: bool = False) -> A
implementation=self._implementation,
)

def zip_with(self: Self, mask: Any, other: Any) -> PandasSeries:
ser = self._series
res = ser.where(mask._series, other._series)
return self._from_series(res)

@property
def str(self) -> PandasSeriesStringNamespace:
return PandasSeriesStringNamespace(self)
Expand Down
49 changes: 49 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,55 @@ def value_counts(

return DataFrame(self._series.value_counts(sort=sort, parallel=parallel))

def zip_with(self, mask: Any, other: Any) -> Self:
"""
Take values from self or other based on the given mask. Where mask evaluates true, take values from self. Where mask evaluates false, take values from other.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> s1_pl = pl.Series([1, 2, 3, 4, 5])
>>> s2_pl = pl.Series([5, 4, 3, 2, 1])
>>> mask_pl = pl.Series([True, False, True, False, True])
>>> s1_pd = pd.Series([1, 2, 3, 4, 5])
>>> s2_pd = pd.Series([5, 4, 3, 2, 1])
>>> mask_pd = pd.Series([True, False, True, False, True])
Let's define a dataframe-agnostic function:
>>> def func(s1_any, mask_any, s2_any):
... s1 = nw.from_native(s1_any, allow_series=True)
... mask = nw.from_native(mask_any, series_only=True)
... s2 = nw.from_native(s2_any, series_only=True)
... s = s1.zip_with(mask, s2)
... return nw.to_native(s)
We can then pass either pandas or Polars to `func`:
>>> func(s1_pl, mask_pl, s2_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (5,)
Series: '' [i64]
[
1
4
3
2
5
]
>>> func(s1_pd, mask_pd, s2_pd)
0 1
1 4
2 3
3 2
4 5
dtype: int64
"""

return self._from_series(
self._series.zip_with(self._extract_native(mask), self._extract_native(other))
)

@property
def str(self) -> SeriesStringNamespace:
return SeriesStringNamespace(self)
Expand Down
16 changes: 16 additions & 0 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,19 @@ def test_is_sorted_invalid(df_raw: Any) -> None:

with pytest.raises(TypeError):
series.is_sorted(descending="invalid_type") # type: ignore[arg-type]


@pytest.mark.parametrize(
("df_raw", "mask", "expected"),
[
(df_pandas, pd.Series([True, False, True]), pd.Series([1, 4, 2])),
(df_polars, pl.Series([True, False, True]), pl.Series([1, 4, 2])),
],
)
def test_zip_with(df_raw: Any, mask: Any, expected: Any) -> None:
series1 = nw.Series(df_raw["a"])
series2 = nw.Series(df_raw["b"])
mask = nw.Series(mask)
result = series1.zip_with(mask, series2)
expected = nw.Series(expected)
assert result == expected
1 change: 1 addition & 0 deletions utils/check_api_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
"is_empty",
"is_sorted",
"value_counts",
"zip_with",
}
)
):
Expand Down

0 comments on commit 071210d

Please sign in to comment.