From 08154e545a3c22092131fac8af605468881d8788 Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Mon, 21 Aug 2023 18:01:02 +0800 Subject: [PATCH] fix(python): fix apply for empty series in threading mode (#10651) --- py-polars/polars/expr/expr.py | 13 ++++++----- py-polars/tests/unit/operations/test_apply.py | 22 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 68c013feaca8..258cbc802bf3 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -3845,8 +3845,16 @@ def wrap_f(x: Series) -> Series: # pragma: no cover elif strategy == "threading": def wrap_threading(x: Series) -> Series: + def get_lazy_promise(df: DataFrame) -> LazyFrame: + return df.lazy().select( + F.col("x").map(wrap_f, agg_list=True, return_dtype=return_dtype) + ) + df = x.to_frame("x") + if x.len() == 0: + return get_lazy_promise(df).collect().to_series() + n_threads = threadpool_size() chunk_size = x.len() // n_threads remainder = x.len() % n_threads @@ -3858,11 +3866,6 @@ def wrap_threading(x: Series) -> Series: for i in range(n_threads) ] - def get_lazy_promise(df: DataFrame) -> LazyFrame: - return df.lazy().select( - F.col("x").map(wrap_f, agg_list=True, return_dtype=return_dtype) - ) - # create partitions with LazyFrames # these are promises on a computation partitions = [] diff --git a/py-polars/tests/unit/operations/test_apply.py b/py-polars/tests/unit/operations/test_apply.py index d7948408cd2f..af6cb5946633 100644 --- a/py-polars/tests/unit/operations/test_apply.py +++ b/py-polars/tests/unit/operations/test_apply.py @@ -391,3 +391,25 @@ def test_apply_dict_order_10128() -> None: def test_apply_10237() -> None: df = pl.DataFrame({"a": [1, 2, 3]}) assert df.select(pl.all().apply(lambda x: x > 50))["a"].to_list() == [False] * 3 + + +def test_apply_on_empty_col_10639() -> None: + df = pl.DataFrame({"A": [], "B": []}) + res = df.groupby("B").agg( + pl.col("A") + .apply(lambda x: x, return_dtype=pl.Int32, strategy="threading") + .alias("Foo") + ) + assert res.to_dict(False) == { + "B": [], + "Foo": [], + } + res = df.groupby("B").agg( + pl.col("A") + .apply(lambda x: x, return_dtype=pl.Int32, strategy="thread_local") + .alias("Foo") + ) + assert res.to_dict(False) == { + "B": [], + "Foo": [], + }