Skip to content

Commit

Permalink
chore: increase local coverage for join (#506)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Jul 13, 2024
1 parent 057c00c commit 1f3350f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
49 changes: 33 additions & 16 deletions tests/frame/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,20 @@
from tests.utils import compare_dicts


def test_inner_join(constructor_with_lazy: Any) -> None:
def test_inner_join_two_keys(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data)).lazy()
df = nw.from_native(constructor(data), eager_only=True)
df_right = df
result = df.join(df_right, left_on=["a", "b"], right_on=["a", "b"], how="inner")
result_native = nw.to_native(result)
expected = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9], "z_right": [7.0, 8, 9]}
compare_dicts(result_native, expected)
compare_dicts(result, expected)

result = df.collect().join(df_right.collect(), left_on="a", right_on="a", how="inner") # type: ignore[assignment]

def test_inner_join_single_key(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data), eager_only=True)
df_right = df
result = df.join(df_right, left_on="a", right_on="a", how="inner")
result_native = nw.to_native(result)
expected = {
"a": [1, 3, 2],
Expand All @@ -32,9 +36,9 @@ def test_inner_join(constructor_with_lazy: Any) -> None:
compare_dicts(result_native, expected)


def test_cross_join(constructor_with_lazy: Any) -> None:
def test_cross_join(constructor: Any) -> None:
data = {"a": [1, 3, 2]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))
result = df.join(df, how="cross") # type: ignore[arg-type]

expected = {"a": [1, 1, 1, 3, 3, 3, 2, 2, 2], "a_right": [1, 3, 2, 1, 3, 2, 1, 3, 2]}
Expand Down Expand Up @@ -63,13 +67,13 @@ def test_cross_join_non_pandas() -> None:
],
)
def test_anti_join(
constructor_with_lazy: Any,
constructor: Any,
join_key: list[str],
filter_expr: nw.Expr,
expected: dict[str, list[Any]],
) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))
other = df.filter(filter_expr)
result = df.join(other, how="anti", left_on=join_key, right_on=join_key) # type: ignore[arg-type]
compare_dicts(result, expected)
Expand Down Expand Up @@ -97,9 +101,9 @@ def test_semi_join(


@pytest.mark.parametrize("how", ["right", "full"])
def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:
def test_join_not_implemented(constructor: Any, how: str) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor_with_lazy(data))
df = nw.from_native(constructor(data))

with pytest.raises(
NotImplementedError,
Expand All @@ -112,12 +116,14 @@ def test_join_not_implemented(constructor_with_lazy: Any, how: str) -> None:

@pytest.mark.filterwarnings("ignore:the default coalesce behavior")
def test_left_join(constructor: Any) -> None:
data_left = {"a": [1, 2, 3], "b": [4, 5, 6]}
data_right = {"a": [1, 2, 3], "c": [4, 5, 6]}
data_left = {"a": [1.0, 2, 3], "b": [4.0, 5, 6]}
data_right = {"a": [1.0, 2, 3], "c": [4.0, 5, 7]}
df_left = nw.from_native(constructor(data_left), eager_only=True)
df_right = nw.from_native(constructor(data_right), eager_only=True)
result = df_left.join(df_right, left_on="b", right_on="c", how="left")
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, 3]}
result = df_left.join(df_right, left_on="b", right_on="c", how="left").select(
nw.all().fill_null(float("nan"))
)
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "a_right": [1, 2, float("nan")]}
compare_dicts(result, expected)


Expand All @@ -139,11 +145,22 @@ def test_left_join_overlapping_column(constructor: Any) -> None:
df_left = nw.from_native(constructor(data_left), eager_only=True)
df_right = nw.from_native(constructor(data_right), eager_only=True)
result = df_left.join(df_right, left_on="b", right_on="c", how="left")
expected = {
expected: dict[str, list[Any]] = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1, 2, 3],
"d_right": [1, 4, 2],
}
compare_dicts(result, expected)
result = df_left.join(df_right, left_on="a", right_on="d", how="left").select(
nw.all().fill_null(float("nan"))
)
expected = {
"a": [1, 2, 3],
"b": [4, 5, 6],
"d": [1, 4, 2],
"a_right": [1.0, 3.0, float("nan")],
"c": [4.0, 6.0, float("nan")],
}
compare_dicts(result, expected)
6 changes: 3 additions & 3 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
import math
import sys
import warnings
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterator
from typing import Sequence

if TYPE_CHECKING:
import pandas as pd
import pandas as pd


def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
Expand All @@ -35,6 +33,8 @@ def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
elif isinstance(lhs, float) and math.isnan(lhs):
assert math.isnan(rhs), (lhs, rhs) # pragma: no cover
elif pd.isna(lhs):
assert pd.isna(rhs), (lhs, rhs)
else:
assert lhs == rhs, (lhs, rhs)

Expand Down

0 comments on commit 1f3350f

Please sign in to comment.