Merge branch 'main' into enh/slotted-classes

narwhals-dev · Jul 13, 2024 · 64b21be · 64b21be
2 parents 67ceab2 + 9663d6f
commit 64b21be
Show file tree

Hide file tree

Showing 13 changed files with 158 additions and 57 deletions.
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterable
+from typing import Literal
 from typing import Sequence
 from typing import overload
 
@@ -185,6 +186,41 @@ def with_columns(
         df = self._native_dataframe.__class__.from_arrays(to_concat, names=output_names)
         return self._from_native_dataframe(df)
 
+    def join(
+        self,
+        other: Self,
+        *,
+        how: Literal["inner"] = "inner",
+        left_on: str | list[str] | None,
+        right_on: str | list[str] | None,
+    ) -> Self:
+        if isinstance(left_on, str):
+            left_on = [left_on]
+        if isinstance(right_on, str):
+            right_on = [right_on]
+
+        if how == "cross":  # type: ignore[comparison-overlap]
+            raise NotImplementedError
+
+        if how == "anti":  # type: ignore[comparison-overlap]
+            raise NotImplementedError
+
+        if how == "semi":  # type: ignore[comparison-overlap]
+            raise NotImplementedError
+
+        if how == "left":  # type: ignore[comparison-overlap]
+            raise NotImplementedError
+
+        return self._from_native_dataframe(
+            self._native_dataframe.join(
+                other._native_dataframe,
+                keys=left_on,
+                right_keys=right_on,
+                join_type=how,
+                right_suffix="_right",
+            ),
+        )
+
     def drop(self, *columns: str | Iterable[str]) -> Self:
         return self._from_native_dataframe(
             self._native_dataframe.drop(list(flatten(columns)))

diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
@@ -113,15 +113,27 @@ def __or__(self, other: ArrowExpr | bool | Any) -> Self:
     def __add__(self, other: ArrowExpr | Any) -> Self:
         return reuse_series_implementation(self, "__add__", other)
 
+    def __radd__(self, other: ArrowExpr | Any) -> Self:
+        return reuse_series_implementation(self, "__radd__", other)
+
     def __sub__(self, other: ArrowExpr | Any) -> Self:
         return reuse_series_implementation(self, "__sub__", other)
 
+    def __rsub__(self, other: ArrowExpr | Any) -> Self:
+        return reuse_series_implementation(self, "__rsub__", other)
+
     def __mul__(self, other: ArrowExpr | Any) -> Self:
         return reuse_series_implementation(self, "__mul__", other)
 
+    def __rmul__(self, other: ArrowExpr | Any) -> Self:
+        return reuse_series_implementation(self, "__rmul__", other)
+
     def __pow__(self, other: ArrowExpr | Any) -> Self:
         return reuse_series_implementation(self, "__pow__", other)
 
+    def __rpow__(self, other: ArrowExpr | Any) -> Self:
+        return reuse_series_implementation(self, "__rpow__", other)
+
     def filter(self, *predicates: Any) -> Self:
         from narwhals._arrow.namespace import ArrowNamespace
 

diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py
@@ -85,9 +85,15 @@ def _create_series_from_scalar(self, value: Any, series: ArrowSeries) -> ArrowSe
             backend_version=self._backend_version,
         )
 
-    def _create_native_series(self, value: Any) -> Any:  # pragma: no cover (todo!)
+    def _create_compliant_series(self, value: Any) -> ArrowSeries:
+        from narwhals._arrow.series import ArrowSeries
+
         pa = get_pyarrow()
-        return pa.chunked_array([value])
+        return ArrowSeries(
+            native_series=pa.chunked_array([value]),
+            name="",
+            backend_version=self._backend_version,
+        )
 
     # --- not in spec ---
     def __init__(self, *, backend_version: tuple[int, ...]) -> None:

diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
@@ -110,22 +110,37 @@ def __add__(self, other: Any) -> Self:
         other = validate_column_comparand(other)
         return self._from_native_series(pc.add(self._native_series, other))
 
+    def __radd__(self, other: Any) -> Self:
+        return self + other  # type: ignore[no-any-return]
+
     def __sub__(self, other: Any) -> Self:
         pc = get_pyarrow_compute()
         other = validate_column_comparand(other)
         return self._from_native_series(pc.subtract(self._native_series, other))
 
+    def __rsub__(self, other: Any) -> Self:
+        return (self - other) * (-1)  # type: ignore[no-any-return]
+
     def __mul__(self, other: Any) -> Self:
         pc = get_pyarrow_compute()
         other = validate_column_comparand(other)
         return self._from_native_series(pc.multiply(self._native_series, other))
 
+    def __rmul__(self, other: Any) -> Self:
+        return self * other  # type: ignore[no-any-return]
+
     def __pow__(self, other: Any) -> Self:
         pc = get_pyarrow_compute()
         ser = self._native_series
         other = validate_column_comparand(other)
         return self._from_native_series(pc.power(ser, other))
 
+    def __rpow__(self, other: Any) -> Self:
+        pc = get_pyarrow_compute()
+        ser = self._native_series
+        other = validate_column_comparand(other)
+        return self._from_native_series(pc.power(other, ser))
+
     def filter(self, other: Any) -> Self:
         other = validate_column_comparand(other)
         return self._from_native_series(self._native_series.filter(other))

diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py
@@ -150,8 +150,8 @@ def parse_into_expr(
     if isinstance(into_expr, str):
         return namespace.col(into_expr)
     if (np := get_numpy()) is not None and isinstance(into_expr, np.ndarray):
-        series = namespace._create_native_series(into_expr)
-        return namespace._create_expr_from_series(series)
+        series = namespace._create_compliant_series(into_expr)
+        return namespace._create_expr_from_series(series)  # type: ignore[arg-type]
     msg = f"Expected IntoExpr, got {type(into_expr)}"  # pragma: no cover
     raise AssertionError(msg)
 

diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
@@ -99,7 +99,7 @@ def _create_expr_from_series(self, series: PandasLikeSeries) -> PandasLikeExpr:
             backend_version=self._backend_version,
         )
 
-    def _create_native_series(self, value: Any) -> Any:
+    def _create_compliant_series(self, value: Any) -> PandasLikeSeries:
         return create_native_series(
             value,
             implementation=self._implementation,

diff --git a/narwhals/series.py b/narwhals/series.py
@@ -1261,6 +1261,11 @@ def __truediv__(self, other: object) -> Self:
             self._compliant_series.__truediv__(self._extract_native(other))
         )
 
+    def __rtruediv__(self, other: object) -> Self:
+        return self._from_compliant_series(
+            self._compliant_series.__rtruediv__(self._extract_native(other))
+        )
+
     def __floordiv__(self, other: object) -> Self:
         return self._from_compliant_series(
             self._compliant_series.__floordiv__(self._extract_native(other))

diff --git a/tests/expr/arithmetic_test.py b/tests/expr/arithmetic_test.py
@@ -38,3 +38,37 @@ def test_arithmetic(
     df = nw.from_native(constructor_with_pyarrow(data))
     result = df.select(getattr(nw.col("a"), attr)(rhs))
     compare_dicts(result, {"a": expected})
+
+
+@pytest.mark.parametrize(
+    ("attr", "rhs", "expected"),
+    [
+        ("__radd__", 1, [2, 3, 4]),
+        ("__rsub__", 1, [0, -1, -2]),
+        ("__rmul__", 2, [2, 4, 6]),
+        ("__rtruediv__", 2.0, [2, 1, 2 / 3]),
+        ("__rfloordiv__", 2, [2, 1, 0]),
+        ("__rmod__", 2, [0, 0, 2]),
+        ("__rpow__", 2, [2, 4, 8]),
+    ],
+)
+def test_right_arithmetic(
+    attr: str, rhs: Any, expected: list[Any], constructor_with_pyarrow: Any, request: Any
+) -> None:
+    if "pandas_pyarrow" in str(constructor_with_pyarrow) and attr in {"__rmod__"}:
+        request.applymarker(pytest.mark.xfail)
+
+    # pyarrow case
+    if "table" in str(constructor_with_pyarrow) and attr in {
+        "__rtruediv__",
+        "__rfloordiv__",
+        "__rmod__",
+    }:
+        request.applymarker(pytest.mark.xfail)
+
+    data = {"a": [1, 2, 3]}
+    df = nw.from_native(constructor_with_pyarrow(data))
+    result = df.select(a=getattr(nw.col("a"), attr)(rhs))
+    compare_dicts(result, {"a": expected})
+    result = df.select(a=getattr(df["a"], attr)(rhs))
+    compare_dicts(result, {"a": expected})
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
@@ -11,30 +11,33 @@
 from tests.utils import compare_dicts
 
 
-def test_inner_join(constructor_with_lazy: Any) -> None:
+def test_inner_join_two_keys(constructor_with_pyarrow: Any) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data)).lazy()
+    df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
     df_right = df
     result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
-    result_native = nw.to_native(result)
     expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
-    compare_dicts(result_native, expected)
+    compare_dicts(result, expected)
+
 
-    result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner")  # type: ignore[assignment]
-    result_native = nw.to_native(result)
+def test_inner_join_single_key(constructor_with_pyarrow: Any) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor_with_pyarrow(data), eager_only=True)
+    df_right = df
+    result = df.join(df_right, left_on="a", right_on="a", how="inner")
     expected = {
         "a": [1, 3, 2],
         "b": [4, 4, 6],
         "b_right": [4, 4, 6],
         "z": [7.0, 8, 9],
         "z_right": [7.0, 8, 9],
     }
-    compare_dicts(result_native, expected)
+    compare_dicts(result, expected)
 
 
-def test_cross_join(constructor_with_lazy: Any) -> None:
+def test_cross_join(constructor: Any) -> None:
     data = {"a": [1, 3, 2]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
     result = df.join(df, how="cross")  # type: ignore[arg-type]
 
     expected = {"a": [1, 1, 1, 3, 3, 3, 2, 2, 2], "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2]}
@@ -63,13 +66,13 @@ def test_cross_join_non_pandas() -> None:
     ],
 )
 def test_anti_join(
-    constructor_with_lazy: Any,
+    constructor: Any,
     join_key: list[str],
     filter_expr: nw.Expr,
     expected: dict[str, list[Any]],
 ) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)  # type: ignore[arg-type]
     compare_dicts(result, expected)
@@ -97,9 +100,9 @@ def test_semi_join(
 
 
 @pytest.mark.parametrize("how", ["right", "full"])
-def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
+def test_join_not_implemented(constructor: Any, how: str) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
 
     with pytest.raises(
         NotImplementedError,
@@ -112,12 +115,14 @@ def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
 
 @pytest.mark.filterwarnings("ignore:the default coalesce behavior")
 def test_left_join(constructor: Any) -> None:
-    data_left = {"a": [1, 2, 3], "b": [4, 5, 6]}
-    data_right = {"a": [1, 2, 3], "c": [4, 5, 6]}
+    data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6]}
+    data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7]}
     df_left = nw.from_native(constructor(data_left), eager_only=True)
     df_right = nw.from_native(constructor(data_right), eager_only=True)
-    result = df_left.join(df_right, left_on="b", right_on="c", how="left")
-    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, 3]}
+    result = df_left.join(df_right, left_on="b", right_on="c", how="left").select(
+        nw.all().fill_null(float("nan"))
+    )
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, float("nan")]}
     compare_dicts(result, expected)
 
 
@@ -139,11 +144,22 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
     df_left = nw.from_native(constructor(data_left), eager_only=True)
     df_right = nw.from_native(constructor(data_right), eager_only=True)
     result = df_left.join(df_right, left_on="b", right_on="c", how="left")
-    expected = {
+    expected: dict[str, list[Any]] = {
         "a": [1, 2, 3],
         "b": [4, 5, 6],
         "d": [1, 4, 2],
         "a_right": [1, 2, 3],
         "d_right": [1, 4, 2],
     }
     compare_dicts(result, expected)
+    result = df_left.join(df_right, left_on="a", right_on="d", how="left").select(
+        nw.all().fill_null(float("nan"))
+    )
+    expected = {
+        "a": [1, 2, 3],
+        "b": [4, 5, 6],
+        "d": [1, 4, 2],
+        "a_right": [1.0, 3.0, float("nan")],
+        "c": [4.0, 6.0, float("nan")],
+    }
+    compare_dicts(result, expected)
diff --git a/tests/frame/with_columns_sequence_test.py b/tests/frame/with_columns_sequence_test.py
@@ -11,7 +11,12 @@
 }
 
 
-def test_with_columns(constructor: Any) -> None:
-    result = nw.from_native(constructor(data)).with_columns(d=np.array([4, 5]))
-    expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "d": [4, 5]}
+def test_with_columns(constructor_with_pyarrow: Any) -> None:
+    result = (
+        nw.from_native(constructor_with_pyarrow(data))
+        .with_columns(d=np.array([4, 5]))
+        .with_columns(e=nw.col("d") + 1)
+        .select("d", "e")
+    )
+    expected = {"d": [4, 5], "e": [5, 6]}
     compare_dicts(result, expected)
diff --git a/tests/series/arithmetic_test.py b/tests/series/arithmetic_test.py
@@ -43,30 +43,3 @@ def test_arithmetic(
     s = nw.from_native(constructor_series_with_pyarrow(data), series_only=True)
     result = getattr(s, attr)(rhs)
     assert result.to_numpy().tolist() == expected
-
-
-@pytest.mark.parametrize("data", [[1, 2, 3]])
-@pytest.mark.parametrize(
-    ("attr", "rhs", "expected"),
-    [
-        ("__radd__", 1, [2, 3, 4]),
-        ("__rsub__", 1, [0, -1, -2]),
-        ("__rmul__", 2, [2, 4, 6]),
-        ("__rfloordiv__", 2, [2, 1, 0]),
-        ("__rmod__", 2, [0, 0, 2]),
-        ("__rpow__", 2, [2, 4, 8]),
-    ],
-)
-def test_rarithmetic(
-    data: list[int | float],
-    attr: str,
-    rhs: Any,
-    expected: list[Any],
-    constructor_series: Any,
-    request: Any,
-) -> None:
-    if "pyarrow" in str(constructor_series) and attr == "__rmod__":
-        request.applymarker(pytest.mark.xfail)
-    s = nw.from_native(constructor_series(data), series_only=True)
-    result = getattr(s, attr)(rhs)
-    assert result.to_numpy().tolist() == expected
diff --git a/tests/utils.py b/tests/utils.py
@@ -3,13 +3,11 @@
 import math
 import sys
 import warnings
-from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterator
 from typing import Sequence
 
-if TYPE_CHECKING:
-    import pandas as pd
+import pandas as pd
 
 
 def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
@@ -35,6 +33,8 @@ def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
                 assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
             elif isinstance(lhs, float) and math.isnan(lhs):
                 assert math.isnan(rhs), (lhs, rhs)  # pragma: no cover
+            elif pd.isna(lhs):
+                assert pd.isna(rhs), (lhs, rhs)
             else:
                 assert lhs == rhs, (lhs, rhs)
 

diff --git a/utils/check_backend_completeness.py b/utils/check_backend_completeness.py
@@ -18,7 +18,6 @@
     "DataFrame.is_unique",
     "DataFrame.item",
     "DataFrame.iter_rows",
-    "DataFrame.join",
     "DataFrame.pipe",
     "DataFrame.rename",
     "DataFrame.unique",