From e181a3ec19194bfa912613cb57fb9d1ee798f52d Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 26 Jul 2023 07:15:26 +0200 Subject: [PATCH] fix(rust, python): fix Boolean::isin(null values) (#10074) --- polars/polars-core/src/chunked_array/ops/aggregate/mod.rs | 1 - polars/polars-core/src/chunked_array/ops/is_in.rs | 8 +++++++- py-polars/tests/unit/operations/test_is_in.py | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/polars/polars-core/src/chunked_array/ops/aggregate/mod.rs b/polars/polars-core/src/chunked_array/ops/aggregate/mod.rs index 12584c3baa5f..3db7a22fee53 100644 --- a/polars/polars-core/src/chunked_array/ops/aggregate/mod.rs +++ b/polars/polars-core/src/chunked_array/ops/aggregate/mod.rs @@ -223,7 +223,6 @@ where /// Booleans are casted to 1 or 0. impl BooleanChunked { - /// Returns `None` if the array is empty or only contains null values. pub fn sum(&self) -> Option { Some(if self.is_empty() { 0 diff --git a/polars/polars-core/src/chunked_array/ops/is_in.rs b/polars/polars-core/src/chunked_array/ops/is_in.rs index 7c408f977ebf..d6979b3476db 100644 --- a/polars/polars-core/src/chunked_array/ops/is_in.rs +++ b/polars/polars-core/src/chunked_array/ops/is_in.rs @@ -269,7 +269,13 @@ impl IsIn for BooleanChunked { DataType::Boolean => { let other = other.bool().unwrap(); let has_true = other.any(); - let has_false = !other.all(); + let nc = other.null_count(); + + let has_false = if nc == 0 { + !other.all() + } else { + !(other.sum().unwrap() as usize + nc) == other.len() + }; Ok(self.apply(|v| if v { has_true } else { has_false })) } _ => polars_bail!(opq = is_in, self.dtype(), other.dtype()), diff --git a/py-polars/tests/unit/operations/test_is_in.py b/py-polars/tests/unit/operations/test_is_in.py index 8d283e4d8a6f..81a01b4245c4 100644 --- a/py-polars/tests/unit/operations/test_is_in.py +++ b/py-polars/tests/unit/operations/test_is_in.py @@ -28,10 +28,10 @@ def test_struct_logical_is_in() -> None: def test_is_in_bool() -> None: - bool_value_to_filter_on = {True, None} + vals = [True, None] df = pl.DataFrame({"A": [True, False, None]}) - assert df.filter(pl.col("A").is_in(bool_value_to_filter_on)).to_dict(False) == { - "A": [True, False] + assert df.select(pl.col("A").is_in(vals)).to_dict(False) == { + "A": [True, False, None] }