chore: increase local coverage for join (#506)

narwhals-dev · Jul 13, 2024 · 1f3350f · 1f3350f
1 parent 057c00c
commit 1f3350f
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 19 deletions.
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
@@ -11,16 +11,20 @@
 from tests.utils import compare_dicts
 
 
-def test_inner_join(constructor_with_lazy: Any) -> None:
+def test_inner_join_two_keys(constructor: Any) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data)).lazy()
+    df = nw.from_native(constructor(data), eager_only=True)
     df_right = df
     result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
-    result_native = nw.to_native(result)
     expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
-    compare_dicts(result_native, expected)
+    compare_dicts(result, expected)
 
-    result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner")  # type: ignore[assignment]
+
+def test_inner_join_single_key(constructor: Any) -> None:
+    data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
+    df = nw.from_native(constructor(data), eager_only=True)
+    df_right = df
+    result = df.join(df_right, left_on="a", right_on="a", how="inner")
     result_native = nw.to_native(result)
     expected = {
         "a": [1, 3, 2],
@@ -32,9 +36,9 @@ def test_inner_join(constructor_with_lazy: Any) -> None:
     compare_dicts(result_native, expected)
 
 
-def test_cross_join(constructor_with_lazy: Any) -> None:
+def test_cross_join(constructor: Any) -> None:
     data = {"a": [1, 3, 2]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
     result = df.join(df, how="cross")  # type: ignore[arg-type]
 
     expected = {"a": [1, 1, 1, 3, 3, 3, 2, 2, 2], "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2]}
@@ -63,13 +67,13 @@ def test_cross_join_non_pandas() -> None:
     ],
 )
 def test_anti_join(
-    constructor_with_lazy: Any,
+    constructor: Any,
     join_key: list[str],
     filter_expr: nw.Expr,
     expected: dict[str, list[Any]],
 ) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)  # type: ignore[arg-type]
     compare_dicts(result, expected)
@@ -97,9 +101,9 @@ def test_semi_join(
 
 
 @pytest.mark.parametrize("how", ["right", "full"])
-def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
+def test_join_not_implemented(constructor: Any, how: str) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
-    df = nw.from_native(constructor_with_lazy(data))
+    df = nw.from_native(constructor(data))
 
     with pytest.raises(
         NotImplementedError,
@@ -112,12 +116,14 @@ def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
 
 @pytest.mark.filterwarnings("ignore:the default coalesce behavior")
 def test_left_join(constructor: Any) -> None:
-    data_left = {"a": [1, 2, 3], "b": [4, 5, 6]}
-    data_right = {"a": [1, 2, 3], "c": [4, 5, 6]}
+    data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6]}
+    data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7]}
     df_left = nw.from_native(constructor(data_left), eager_only=True)
     df_right = nw.from_native(constructor(data_right), eager_only=True)
-    result = df_left.join(df_right, left_on="b", right_on="c", how="left")
-    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, 3]}
+    result = df_left.join(df_right, left_on="b", right_on="c", how="left").select(
+        nw.all().fill_null(float("nan"))
+    )
+    expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, float("nan")]}
     compare_dicts(result, expected)
 
 
@@ -139,11 +145,22 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
     df_left = nw.from_native(constructor(data_left), eager_only=True)
     df_right = nw.from_native(constructor(data_right), eager_only=True)
     result = df_left.join(df_right, left_on="b", right_on="c", how="left")
-    expected = {
+    expected: dict[str, list[Any]] = {
         "a": [1, 2, 3],
         "b": [4, 5, 6],
         "d": [1, 4, 2],
         "a_right": [1, 2, 3],
         "d_right": [1, 4, 2],
     }
     compare_dicts(result, expected)
+    result = df_left.join(df_right, left_on="a", right_on="d", how="left").select(
+        nw.all().fill_null(float("nan"))
+    )
+    expected = {
+        "a": [1, 2, 3],
+        "b": [4, 5, 6],
+        "d": [1, 4, 2],
+        "a_right": [1.0, 3.0, float("nan")],
+        "c": [4.0, 6.0, float("nan")],
+    }
+    compare_dicts(result, expected)
diff --git a/tests/utils.py b/tests/utils.py
@@ -3,13 +3,11 @@
 import math
 import sys
 import warnings
-from typing import TYPE_CHECKING
 from typing import Any
 from typing import Iterator
 from typing import Sequence
 
-if TYPE_CHECKING:
-    import pandas as pd
+import pandas as pd
 
 
 def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
@@ -35,6 +33,8 @@ def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
                 assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
             elif isinstance(lhs, float) and math.isnan(lhs):
                 assert math.isnan(rhs), (lhs, rhs)  # pragma: no cover
+            elif pd.isna(lhs):
+                assert pd.isna(rhs), (lhs, rhs)
             else:
                 assert lhs == rhs, (lhs, rhs)