Skip to content

Commit

Permalink
Merge pull request #112 from narwhals-dev/fill-null
Browse files Browse the repository at this point in the history
fill_null
  • Loading branch information
MarcoGorelli authored May 8, 2024
2 parents bc83ded + d11e156 commit 6661aa1
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- cast
- diff
- drop_nulls
- fill_null
- filter
- is_between
- is_in
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- diff
- drop_nulls
- dtype
- fill_null
- filter
- is_between
- is_in
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ def is_between(
def is_null(self) -> Self:
return register_expression_call(self, "is_null")

def fill_null(self, value: Any) -> Self:
return register_expression_call(self, "fill_null", value)

def is_in(self, other: Any) -> Self:
return register_expression_call(self, "is_in", other)

Expand Down
4 changes: 4 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ def is_null(self) -> PandasSeries:
ser = self._series
return self._from_series(ser.isna())

def fill_null(self, value: Any) -> PandasSeries:
ser = self._series
return self._from_series(ser.fillna(value))

def drop_nulls(self) -> PandasSeries:
ser = self._series
return self._from_series(ser.dropna())
Expand Down
62 changes: 62 additions & 0 deletions narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,10 @@ def is_null(self) -> Expr:
"""
Returns a boolean Series indicating which values are null.
Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
Expand Down Expand Up @@ -657,6 +661,64 @@ def is_null(self) -> Expr:
"""
return self.__class__(lambda plx: self._call(plx).is_null())

def fill_null(self, value: Any) -> Expr:
"""
Fill null values with given value.
Notes:
pandas and Polars handle null values differently. Polars distinguishes
between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame(
... {
... 'a': [2, 4, None, 3, 5],
... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0]
... }
... )
>>> df_pl = pl.DataFrame(
... {
... 'a': [2, 4, None, 3, 5],
... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0]
... }
... )
Let's define a dataframe-agnostic function:
>>> def func(df_any):
... df = nw.from_native(df_any)
... df = df.with_columns(nw.col('a', 'b').fill_null(0))
... return nw.to_native(df)
We can then pass either pandas or Polars to `func`:
>>> func(df_pd)
a b
0 2.0 2.0
1 4.0 4.0
2 0.0 0.0
3 3.0 3.0
4 5.0 5.0
>>> func(df_pl) # nan != null for polars
shape: (5, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 2.0 │
│ 4 ┆ 4.0 │
│ 0 ┆ NaN │
│ 3 ┆ 3.0 │
│ 5 ┆ 5.0 │
└─────┴─────┘
"""
return self.__class__(lambda plx: self._call(plx).fill_null(value))

# --- partial reduction ---
def drop_nulls(self) -> Expr:
return self.__class__(lambda plx: self._call(plx).drop_nulls())
Expand Down
3 changes: 3 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ def sort(self, *, descending: bool = False) -> Self:
def is_null(self) -> Self:
return self._from_series(self._series.is_null())

def fill_null(self, value: Any) -> Self:
return self._from_series(self._series.fill_null(value))

def is_between(
self, lower_bound: Any, upper_bound: Any, closed: str = "both"
) -> Self:
Expand Down
32 changes: 32 additions & 0 deletions tests/expr/fill_null_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import Any

import pandas as pd
import polars as pl
import pytest

import narwhals as nw
from tests.utils import compare_dicts

data = {
"a": [0.0, None, 2, 3, 4],
"b": [1.0, None, None, 5, 3],
"c": [5.0, None, 3, 2, 1],
}


@pytest.mark.parametrize("constructor", [pd.DataFrame, pl.DataFrame])
def test_over_single(constructor: Any) -> None:
df = nw.from_native(constructor(data), eager_only=True)
result = df.with_columns(nw.all().fill_null(99))
expected = {
"a": [0.0, 99, 2, 3, 4],
"b": [1.0, 99, 99, 5, 3],
"c": [5.0, 99, 3, 2, 1],
}
compare_dicts(result, expected)
result = df.with_columns(
a=df["a"].fill_null(99),
b=df["b"].fill_null(99),
c=df["c"].fill_null(99),
)
compare_dicts(result, expected)

0 comments on commit 6661aa1

Please sign in to comment.