Skip to content

Commit

Permalink
Merge branch 'main' into enh/slotted-classes
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Jul 13, 2024
2 parents 67ceab2 + 9663d6f commit 64b21be
Show file tree
Hide file tree
Showing 13 changed files with 158 additions and 57 deletions.
36 changes: 36 additions & 0 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Literal
from typing import Sequence
from typing import overload

Expand Down Expand Up @@ -185,6 +186,41 @@ def with_columns(
df = self._native_dataframe.__class__.from_arrays(to_concat, names=output_names)
return self._from_native_dataframe(df)

def join(
self,
other: Self,
*,
how: Literal["inner"] = "inner",
left_on: str | list[str] | None,
right_on: str | list[str] | None,
) -> Self:
if isinstance(left_on, str):
left_on = [left_on]
if isinstance(right_on, str):
right_on = [right_on]

if how == "cross": # type: ignore[comparison-overlap]
raise NotImplementedError

if how == "anti": # type: ignore[comparison-overlap]
raise NotImplementedError

if how == "semi": # type: ignore[comparison-overlap]
raise NotImplementedError

if how == "left": # type: ignore[comparison-overlap]
raise NotImplementedError

return self._from_native_dataframe(
self._native_dataframe.join(
other._native_dataframe,
keys=left_on,
right_keys=right_on,
join_type=how,
right_suffix="_right",
),
)

def drop(self, *columns: str | Iterable[str]) -> Self:
return self._from_native_dataframe(
self._native_dataframe.drop(list(flatten(columns)))
Expand Down
12 changes: 12 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,27 @@ def __or__(self, other: ArrowExpr | bool | Any) -> Self:
def __add__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__add__", other)

def __radd__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__radd__", other)

def __sub__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__sub__", other)

def __rsub__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__rsub__", other)

def __mul__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__mul__", other)

def __rmul__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__rmul__", other)

def __pow__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__pow__", other)

def __rpow__(self, other: ArrowExpr | Any) -> Self:
return reuse_series_implementation(self, "__rpow__", other)

def filter(self, *predicates: Any) -> Self:
from narwhals._arrow.namespace import ArrowNamespace

Expand Down
10 changes: 8 additions & 2 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,15 @@ def _create_series_from_scalar(self, value: Any, series: ArrowSeries) -> ArrowSe
backend_version=self._backend_version,
)

def _create_native_series(self, value: Any) -> Any: # pragma: no cover (todo!)
def _create_compliant_series(self, value: Any) -> ArrowSeries:
from narwhals._arrow.series import ArrowSeries

pa = get_pyarrow()
return pa.chunked_array([value])
return ArrowSeries(
native_series=pa.chunked_array([value]),
name="",
backend_version=self._backend_version,
)

# --- not in spec ---
def __init__(self, *, backend_version: tuple[int, ...]) -> None:
Expand Down
15 changes: 15 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,37 @@ def __add__(self, other: Any) -> Self:
other = validate_column_comparand(other)
return self._from_native_series(pc.add(self._native_series, other))

def __radd__(self, other: Any) -> Self:
return self + other # type: ignore[no-any-return]

def __sub__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
other = validate_column_comparand(other)
return self._from_native_series(pc.subtract(self._native_series, other))

def __rsub__(self, other: Any) -> Self:
return (self - other) * (-1) # type: ignore[no-any-return]

def __mul__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
other = validate_column_comparand(other)
return self._from_native_series(pc.multiply(self._native_series, other))

def __rmul__(self, other: Any) -> Self:
return self * other # type: ignore[no-any-return]

def __pow__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
ser = self._native_series
other = validate_column_comparand(other)
return self._from_native_series(pc.power(ser, other))

def __rpow__(self, other: Any) -> Self:
pc = get_pyarrow_compute()
ser = self._native_series
other = validate_column_comparand(other)
return self._from_native_series(pc.power(other, ser))

def filter(self, other: Any) -> Self:
other = validate_column_comparand(other)
return self._from_native_series(self._native_series.filter(other))
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_expression_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def parse_into_expr(
if isinstance(into_expr, str):
return namespace.col(into_expr)
if (np := get_numpy()) is not None and isinstance(into_expr, np.ndarray):
series = namespace._create_native_series(into_expr)
return namespace._create_expr_from_series(series)
series = namespace._create_compliant_series(into_expr)
return namespace._create_expr_from_series(series) # type: ignore[arg-type]
msg = f"Expected IntoExpr, got {type(into_expr)}" # pragma: no cover
raise AssertionError(msg)

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def _create_expr_from_series(self, series: PandasLikeSeries) -> PandasLikeExpr:
backend_version=self._backend_version,
)

def _create_native_series(self, value: Any) -> Any:
def _create_compliant_series(self, value: Any) -> PandasLikeSeries:
return create_native_series(
value,
implementation=self._implementation,
Expand Down
5 changes: 5 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,11 @@ def __truediv__(self, other: object) -> Self:
self._compliant_series.__truediv__(self._extract_native(other))
)

def __rtruediv__(self, other: object) -> Self:
return self._from_compliant_series(
self._compliant_series.__rtruediv__(self._extract_native(other))
)

def __floordiv__(self, other: object) -> Self:
return self._from_compliant_series(
self._compliant_series.__floordiv__(self._extract_native(other))
Expand Down
34 changes: 34 additions & 0 deletions tests/expr/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,37 @@ def test_arithmetic(
df = nw.from_native(constructor_with_pyarrow(data))
result = df.select(getattr(nw.col("a"), attr)(rhs))
compare_dicts(result, {"a": expected})


@pytest.mark.parametrize(
("attr", "rhs", "expected"),
[
("__radd__", 1, [2, 3, 4]),
("__rsub__", 1, [0, -1, -2]),
("__rmul__", 2, [2, 4, 6]),
("__rtruediv__", 2.0, [2, 1, 2 / 3]),
("__rfloordiv__", 2, [2, 1, 0]),
("__rmod__", 2, [0, 0, 2]),
("__rpow__", 2, [2, 4, 8]),
],
)
def test_right_arithmetic(
attr: str, rhs: Any, expected: list[Any], constructor_with_pyarrow: Any, request: Any
) -> None:
if "pandas_pyarrow" in str(constructor_with_pyarrow) and attr in {"__rmod__"}:
request.applymarker(pytest.mark.xfail)

# pyarrow case
if "table" in str(constructor_with_pyarrow) and attr in {
"__rtruediv__",
"__rfloordiv__",
"__rmod__",
}:
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3]}
df = nw.from_native(constructor_with_pyarrow(data))
result = df.select(a=getattr(nw.col("a"), attr)(rhs))
compare_dicts(result, {"a": expected})
result = df.select(a=getattr(df["a"], attr)(rhs))
compare_dicts(result, {"a": expected})
52 changes: 34 additions & 18 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,33 @@
from tests.utils import compare_dicts


def test_inner_join(constructor_with_lazy: Any) -> None:
def test_inner_join_two_keys(constructor_with_pyarrow: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data)).lazy()
df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
df_right = df
result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
result_native = nw.to_native(result)
expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
compare_dicts(result_native, expected)
compare_dicts(result, expected)


result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner") # type: ignore[assignment]
result_native = nw.to_native(result)
def test_inner_join_single_key(constructor_with_pyarrow: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
df_right = df
result = df.join(df_right, left_on="a", right_on="a", how="inner")
expected = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"b_right": [4, 4, 6],
"z": [7.0, 8, 9],
"z_right": [7.0, 8, 9],
}
compare_dicts(result_native, expected)
compare_dicts(result, expected)


def test_cross_join(constructor_with_lazy: Any) -> None:
def test_cross_join(constructor: Any) -> None:
data = {"a": [1, 3, 2]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))
result = df.join(df, how="cross") # type: ignore[arg-type]

expected = {"a": [1, 1, 1, 3, 3, 3, 2, 2, 2], "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2]}
Expand Down Expand Up @@ -63,13 +66,13 @@ def test_cross_join_non_pandas() -> None:
],
)
def test_anti_join(
constructor_with_lazy: Any,
constructor: Any,
join_key: list[str],
filter_expr: nw.Expr,
expected: dict[str, list[Any]],
) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type]
compare_dicts(result, expected)
Expand Down Expand Up @@ -97,9 +100,9 @@ def test_semi_join(


@pytest.mark.parametrize("how", ["right", "full"])
def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
def test_join_not_implemented(constructor: Any, how: str) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))

with pytest.raises(
NotImplementedError,
Expand All @@ -112,12 +115,14 @@ def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:

@pytest.mark.filterwarnings("ignore:the default coalesce behavior")
def test_left_join(constructor: Any) -> None:
data_left = {"a": [1, 2, 3], "b": [4, 5, 6]}
data_right = {"a": [1, 2, 3], "c": [4, 5, 6]}
data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6]}
data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7]}
df_left = nw.from_native(constructor(data_left), eager_only=True)
df_right = nw.from_native(constructor(data_right), eager_only=True)
result = df_left.join(df_right, left_on="b", right_on="c", how="left")
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, 3]}
result = df_left.join(df_right, left_on="b", right_on="c", how="left").select(
nw.all().fill_null(float("nan"))
)
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, float("nan")]}
compare_dicts(result, expected)


Expand All @@ -139,11 +144,22 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
df_left = nw.from_native(constructor(data_left), eager_only=True)
df_right = nw.from_native(constructor(data_right), eager_only=True)
result = df_left.join(df_right, left_on="b", right_on="c", how="left")
expected = {
expected: dict[str, list[Any]] = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1, 2, 3],
"d_right": [1, 4, 2],
}
compare_dicts(result, expected)
result = df_left.join(df_right, left_on="a", right_on="d", how="left").select(
nw.all().fill_null(float("nan"))
)
expected = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1.0, 3.0, float("nan")],
"c": [4.0, 6.0, float("nan")],
}
compare_dicts(result, expected)
11 changes: 8 additions & 3 deletions tests/frame/with_columns_sequence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
}


def test_with_columns(constructor: Any) -> None:
result = nw.from_native(constructor(data)).with_columns(d=np.array([4, 5]))
expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "d": [4, 5]}
def test_with_columns(constructor_with_pyarrow: Any) -> None:
result = (
nw.from_native(constructor_with_pyarrow(data))
.with_columns(d=np.array([4, 5]))
.with_columns(e=nw.col("d") + 1)
.select("d", "e")
)
expected = {"d": [4, 5], "e": [5, 6]}
compare_dicts(result, expected)
27 changes: 0 additions & 27 deletions tests/series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,3 @@ def test_arithmetic(
s = nw.from_native(constructor_series_with_pyarrow(data), series_only=True)
result = getattr(s, attr)(rhs)
assert result.to_numpy().tolist() == expected


@pytest.mark.parametrize("data", [[1, 2, 3]])
@pytest.mark.parametrize(
("attr", "rhs", "expected"),
[
("__radd__", 1, [2, 3, 4]),
("__rsub__", 1, [0, -1, -2]),
("__rmul__", 2, [2, 4, 6]),
("__rfloordiv__", 2, [2, 1, 0]),
("__rmod__", 2, [0, 0, 2]),
("__rpow__", 2, [2, 4, 8]),
],
)
def test_rarithmetic(
data: list[int | float],
attr: str,
rhs: Any,
expected: list[Any],
constructor_series: Any,
request: Any,
) -> None:
if "pyarrow" in str(constructor_series) and attr == "__rmod__":
request.applymarker(pytest.mark.xfail)
s = nw.from_native(constructor_series(data), series_only=True)
result = getattr(s, attr)(rhs)
assert result.to_numpy().tolist() == expected
6 changes: 3 additions & 3 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
import math
import sys
import warnings
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterator
from typing import Sequence

if TYPE_CHECKING:
import pandas as pd
import pandas as pd


def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
Expand All @@ -35,6 +33,8 @@ def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
elif isinstance(lhs, float) and math.isnan(lhs):
assert math.isnan(rhs), (lhs, rhs) # pragma: no cover
elif pd.isna(lhs):
assert pd.isna(rhs), (lhs, rhs)
else:
assert lhs == rhs, (lhs, rhs)

Expand Down
1 change: 0 additions & 1 deletion utils/check_backend_completeness.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"DataFrame.is_unique",
"DataFrame.item",
"DataFrame.iter_rows",
"DataFrame.join",
"DataFrame.pipe",
"DataFrame.rename",
"DataFrame.unique",
Expand Down

0 comments on commit 64b21be

Please sign in to comment.