From c241a6d9cba02949a698bcf3280394020ab4b5cd Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Wed, 20 Sep 2023 14:40:59 +0800 Subject: [PATCH] fix: fix nullable filter mask in group_by --- .../src/physical_plan/expressions/filter.rs | 2 +- py-polars/tests/unit/operations/test_filter.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/crates/polars-lazy/src/physical_plan/expressions/filter.rs b/crates/polars-lazy/src/physical_plan/expressions/filter.rs index 9af814d405bb..6095354960ef 100644 --- a/crates/polars-lazy/src/physical_plan/expressions/filter.rs +++ b/crates/polars-lazy/src/physical_plan/expressions/filter.rs @@ -73,7 +73,7 @@ impl PhysicalExpr for FilterExpr { let predicate = predicate_s.bool()?; // All values true - don't do anything. - if predicate.all() { + if let Some(true) = predicate.all_kleene() { return Ok(ac_s); } // All values false - create empty groups. diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py index a1e044630001..4de22bdede9b 100644 --- a/py-polars/tests/unit/operations/test_filter.py +++ b/py-polars/tests/unit/operations/test_filter.py @@ -32,6 +32,22 @@ def test_melt_values_predicate_pushdown() -> None: ).to_dict(False) == {"id": [1], "variable": ["asset_key_1"], "value": ["123"]} +def test_group_by_filter_all_true() -> None: + df = pl.DataFrame( + { + "name": ["a", "a", "b", "b"], + "type": [None, 1, 1, None], + "order": [1, 2, 3, 4], + } + ) + out = ( + df.group_by("name") + .agg([pl.col("order").filter(pl.col("type") == 1).n_unique().alias("n_unique")]) + .select("n_unique") + ) + assert out.to_dict(False) == {"n_unique": [1, 1]} + + def test_filter_is_in_4572() -> None: df = pl.DataFrame({"id": [1, 2, 1, 2], "k": ["a"] * 2 + ["b"] * 2}) expected = (