Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Series|Expr.is_finite method #1341

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- clip
- is_between
- is_duplicated
- is_finite
- is_first_distinct
- is_in
- is_last_distinct
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- is_between
- is_duplicated
- is_empty
- is_finite
- is_first_distinct
- is_in
- is_last_distinct
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
dtypes=self._dtypes,
)

def is_finite(self: Self) -> Self:
return reuse_series_implementation(self, "is_finite")

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,11 @@ def mode(self: Self) -> ArrowSeries:
plx.col(col_token) == plx.col(col_token).max()
)[self.name]

def is_finite(self: Self) -> Self:
import pyarrow.compute as pc # ignore-banned-import

return self._from_native_series(pc.is_finite(self._native_series))

def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,15 @@ def func(_input: Any, dtype: DType | type[DType]) -> Any:
returns_scalar=False,
)

def is_finite(self: Self) -> Self:
import dask.array as da # ignore-banned-import

return self._from_call(
lambda _input: da.isfinite(_input),
"is_finite",
returns_scalar=False,
)


class DaskExprStringNamespace:
def __init__(self, expr: DaskExpr) -> None:
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
dtypes=self._dtypes,
)

def is_finite(self: Self) -> Self:
return reuse_series_implementation(self, "is_finite")

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
7 changes: 7 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,13 @@ def mode(self: Self) -> Self:
def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

def is_finite(self: Self) -> Self:
import numpy as np # ignore-banned-import

return self._from_native_series(
np.isfinite(self._native_series) & ~self._native_series.isna()
)
Comment on lines +768 to +770
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a opinionated choice that na is not finite

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

πŸ€” no sure, wouldn't we want to preserve null values?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Behavior is different for different pandas backend dtype. Let me come back with an example

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm actually, for classical pandas types, we wouldn't have the option of returning a nullable boolean (if we want to preserve the dtype backend)

πŸ€” gonna think about this a little longer

Copy link
Member Author

@FBruzzesi FBruzzesi Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These would be the output:

data = [float("nan"), float("inf"), 2.0, None]

s = pd.Series(data)
np.isfinite(s)

0    False
1    False
2     True
3    False
dtype: bool
np.isfinite(s.convert_dtypes(dtype_backend="numpy_nullable"))

0     <NA>
1    False
2     True
3     <NA>
dtype: boolean
np.isfinite(s.convert_dtypes(dtype_backend="pyarrow"))

0    False
1    False
2     True
3    False
dtype: bool

While for polars:

pl.Series(data).is_finite()

shape: (4,)
Series: '' [bool]
[
	false
	false
	true
	null
]


@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
58 changes: 56 additions & 2 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1433,8 +1433,8 @@ def is_null(self) -> Self:
Returns a boolean Series indicating which values are null.

Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
pandas, Polars and PyArrow handle null values differently. Polars and PyArrow
distinguish between NaN and Null, whereas pandas doesn't.

Examples:
>>> import pandas as pd
Expand Down Expand Up @@ -2599,6 +2599,60 @@ def mode(self: Self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).mode())

def is_finite(self: Self) -> Self:
"""
Returns boolean values indicating which original values are finite.

Warning:
Different backend handle null values differently. `is_finite` will return
False for NaN and Null's in the pandas and Dask backend, while for Polars and
PyArrow null values are kept as such.

Returns:
Expression of `Boolean` data type.

Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").is_finite())

We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:

>>> func(pd.DataFrame(data))
a
0 False
1 False
2 True
3 False
>>> func(pl.DataFrame(data))
shape: (4, 1)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”
β”‚ a β”‚
β”‚ --- β”‚
β”‚ bool β”‚
β•žβ•β•β•β•β•β•β•β•‘
β”‚ false β”‚
β”‚ false β”‚
β”‚ true β”‚
β”‚ null β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”˜

>>> func(pa.table(data))
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]
"""
return self.__class__(lambda plx: self._call(plx).is_finite())

@property
def str(self: Self) -> ExprStringNamespace[Self]:
return ExprStringNamespace(self)
Expand Down
57 changes: 57 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2689,6 +2689,63 @@ def mode(self: Self) -> Self:
"""
return self._from_compliant_series(self._compliant_series.mode())

def is_finite(self: Self) -> Self:
"""
Returns a boolean Series indicating which values are finite.

Warning:
Different backend handle null values differently. `is_finite` will return
False for NaN and Null's in the pandas backend, while for Polars and
PyArrow null values are kept as such.

Returns:
Expression of `Boolean` data type.

Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = [float("nan"), float("inf"), 2.0, None]

We define a library agnostic function:

>>> @nw.narwhalify
... def func(s):
... return s.is_finite()

We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:

>>> func(pd.Series(data))
0 False
1 False
2 True
3 False
dtype: bool

>>> func(pl.Series(data)) # doctest: +NORMALIZE_WHITESPACE
shape: (4,)
Series: '' [bool]
[
false
false
true
null
]

>>> func(pa.chunked_array([data])) # doctest: +ELLIPSIS
<pyarrow.lib.ChunkedArray object at ...>
[
[
false,
false,
true,
null
]
]
"""
return self._from_compliant_series(self._compliant_series.is_finite())

def __iter__(self: Self) -> Iterator[Any]:
yield from self._compliant_series.__iter__()

Expand Down
44 changes: 44 additions & 0 deletions tests/expr_and_series/is_finite_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

import pytest

import narwhals.stable.v1 as nw
from tests.utils import PANDAS_VERSION
from tests.utils import Constructor
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {"a": [float("nan"), float("inf"), 2.0, None]}


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
def test_is_finite_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None:
if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor):
request.applymarker(pytest.mark.xfail)

if "polars" in str(constructor) or "pyarrow_table" in str(constructor):
expected = {"a": [False, False, True, None]}
else:
expected = {"a": [False, False, True, False]}

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").is_finite())
assert_equal_data(result, expected)


@pytest.mark.filterwarnings("ignore:invalid value encountered in cast")
def test_is_finite_series(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)

if "polars" in str(constructor_eager) or "pyarrow_table" in str(constructor_eager):
expected = {"a": [False, False, True, None]}
else:
expected = {"a": [False, False, True, False]}

df = nw.from_native(constructor_eager(data), eager_only=True)
result = {"a": df["a"].is_finite()}

assert_equal_data(result, expected)
Loading