From 3f7c1904b3b6a243684f899d9bf322fd5f2b09f2 Mon Sep 17 00:00:00 2001 From: Julian Date: Sat, 28 Oct 2023 12:30:38 +0200 Subject: [PATCH] fix(python,rust): str.concat on empty list (#12066) --- .../src/chunked_array/strings/concat.rs | 6 ++++- .../unit/namespaces/string/test_string.py | 22 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/crates/polars-ops/src/chunked_array/strings/concat.rs b/crates/polars-ops/src/chunked_array/strings/concat.rs index 781e1da2175c..16269f06d5c1 100644 --- a/crates/polars-ops/src/chunked_array/strings/concat.rs +++ b/crates/polars-ops/src/chunked_array/strings/concat.rs @@ -4,7 +4,11 @@ use polars_core::prelude::*; // Vertically concatenate all strings in a Utf8Chunked. pub fn str_concat(ca: &Utf8Chunked, delimiter: &str) -> Utf8Chunked { - if ca.len() <= 1 { + if ca.is_empty() { + return Utf8Chunked::new(ca.name(), &[""]); + } + + if ca.len() == 1 { return ca.clone(); } diff --git a/py-polars/tests/unit/namespaces/string/test_string.py b/py-polars/tests/unit/namespaces/string/test_string.py index fb2039ab33c3..db2526857470 100644 --- a/py-polars/tests/unit/namespaces/string/test_string.py +++ b/py-polars/tests/unit/namespaces/string/test_string.py @@ -28,6 +28,28 @@ def test_str_concat2() -> None: assert cast(str, df.item()) == "1-null-2" +def test_str_concat_empty_list() -> None: + s = pl.Series([], dtype=pl.Utf8) + result = s.str.concat() + expected = pl.Series([""]) + assert_series_equal(result, expected) + + +def test_str_concat_empty_list2() -> None: + s = pl.Series([], dtype=pl.Utf8) + df = pl.DataFrame({"foo": s}) + result = df.select(pl.col("foo").str.concat()).item() + expected = "" + assert result == expected + + +def test_str_concat_empty_list_agg_context() -> None: + df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.Utf8}) + result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat())["v"].item() + expected = "" + assert result == expected + + def test_str_concat_datetime() -> None: df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) df = df.select(pl.col("d").str.concat("|"))