Skip to content

Commit

Permalink
fix(rust, python): block predicate pushdown is_in and null producing … (
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jul 31, 2023
1 parent 600c325 commit 47b91ab
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ fn should_block_join_specific(ae: &AExpr, how: &JoinType) -> bool {
| FunctionExpr::FillNull { .. },
..
} => join_produces_null(how),
#[cfg(feature = "is_in")]
Function {
function: FunctionExpr::Boolean(BooleanFunction::IsIn),
..
} => join_produces_null(how),
// joins can produce duplicates
#[cfg(feature = "is_unique")]
Function {
Expand Down
15 changes: 15 additions & 0 deletions py-polars/tests/unit/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,18 @@ def test_predicate_pushdown_join_fill_null_10058() -> None:
.collect()
.to_dict(False)["id"]
) == [0, 2]


def test_is_in_join_blocked() -> None:
df1 = pl.DataFrame(
{"Groups": ["A", "B", "C", "D", "E", "F"], "values0": [1, 2, 3, 4, 5, 6]}
).lazy()

df2 = pl.DataFrame(
{"values22": [1, 2, None, 4, 5, 6], "values20": [1, 2, 3, 4, 5, 6]}
).lazy()

df_all = df2.join(df1, left_on="values20", right_on="values0", how="left")
assert df_all.filter(~pl.col("Groups").is_in(["A", "B", "F"])).collect().to_dict(
False
) == {"values22": [None, 4, 5], "values20": [3, 4, 5], "Groups": ["C", "D", "E"]}

0 comments on commit 47b91ab

Please sign in to comment.