From 6da020fbd0181282f45d9bc314a8ee51505fda8d Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Fri, 4 Aug 2023 08:54:43 +0200 Subject: [PATCH] fix(python): don't panic on cse if function hasn't implemented __eq__ (#10286) --- crates/polars-plan/src/dsl/python_udf.rs | 3 ++- .../tests/unit/io/test_pyarrow_dataset.py | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/crates/polars-plan/src/dsl/python_udf.rs b/crates/polars-plan/src/dsl/python_udf.rs index 46bf0d97795b..a234563093fd 100644 --- a/crates/polars-plan/src/dsl/python_udf.rs +++ b/crates/polars-plan/src/dsl/python_udf.rs @@ -44,7 +44,8 @@ impl PartialEq for PythonFunction { eq.call1(py, (other.0.clone(),)) .unwrap() .extract::(py) - .unwrap() + // equality can be not implemented, so default to false + .unwrap_or(false) }) } } diff --git a/py-polars/tests/unit/io/test_pyarrow_dataset.py b/py-polars/tests/unit/io/test_pyarrow_dataset.py index 0e5c2d67eb47..2331799248ee 100644 --- a/py-polars/tests/unit/io/test_pyarrow_dataset.py +++ b/py-polars/tests/unit/io/test_pyarrow_dataset.py @@ -137,3 +137,23 @@ def test_dataset_foo(df: pl.DataFrame, tmp_path: Path) -> None: .select(["bools", "floats", "date"]) .collect(), ) + + +def test_pyarrow_dataset_comm_subplan_elim(tmp_path: Path) -> None: + df0 = pl.DataFrame({"a": [1, 2, 3]}) + + df1 = pl.DataFrame({"a": [1, 2]}) + + file_path_0 = tmp_path / "0.parquet" + file_path_1 = tmp_path / "1.parquet" + + df0.write_parquet(file_path_0) + df1.write_parquet(file_path_1) + + ds0 = ds.dataset(file_path_0, format="parquet") + ds1 = ds.dataset(file_path_1, format="parquet") + + lf0 = pl.scan_pyarrow_dataset(ds0) + lf1 = pl.scan_pyarrow_dataset(ds1) + + assert lf0.join(lf1, on="a", how="inner").collect().to_dict(False) == {"a": [1, 2]}