From eea22949739ecc9b6288895fa4a25ac4ca1b52c5 Mon Sep 17 00:00:00 2001 From: cmdlineluser <99486669+cmdlineluser@users.noreply.github.com> Date: Sun, 30 Jul 2023 13:37:31 +0100 Subject: [PATCH] fix(rust, python): prevent re-ordering of dict keys inside `.apply` --- py-polars/src/apply/mod.rs | 10 ++++++++-- py-polars/tests/unit/operations/test_apply.py | 5 +++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/py-polars/src/apply/mod.rs b/py-polars/src/apply/mod.rs index f9d0a10b2d26..4d5fdc3d04ee 100644 --- a/py-polars/src/apply/mod.rs +++ b/py-polars/src/apply/mod.rs @@ -54,6 +54,10 @@ fn iterator_to_struct<'a>( // ] let mut struct_fields: BTreeMap<&str, Vec> = BTreeMap::new(); + // as a BTreeMap sorts its keys, we also need to track the original + // order of the field names + let mut field_names_ordered = Vec::with_capacity(flds.len()); + // use the first value and the known null count to initialize the buffers // if we find a new key later on, we make a new entry in the BTree for (value, fld) in vals.into_iter().zip(flds) { @@ -62,6 +66,7 @@ fn iterator_to_struct<'a>( buf.push(AnyValue::Null); } buf.push(value); + field_names_ordered.push(fld.name() as &str); struct_fields.insert(fld.name(), buf); } @@ -86,6 +91,7 @@ fn iterator_to_struct<'a>( for (key, val) in dict.iter() { let key = key.str().unwrap().to_str().unwrap(); let buf = struct_fields.entry(key).or_insert_with(|| { + field_names_ordered.push(key); let mut buf = Vec::with_capacity(capacity); for _ in 0..(init_null_count + current_len) { buf.push(AnyValue::Null); @@ -110,9 +116,9 @@ fn iterator_to_struct<'a>( } let fields = POOL.install(|| { - struct_fields + field_names_ordered .par_iter() - .map(|(name, avs)| Series::new(name, avs)) + .map(|name| Series::new(name, struct_fields.get(name).unwrap())) .collect::>() }); diff --git a/py-polars/tests/unit/operations/test_apply.py b/py-polars/tests/unit/operations/test_apply.py index 840a2754830e..b8bfe480b738 100644 --- a/py-polars/tests/unit/operations/test_apply.py +++ b/py-polars/tests/unit/operations/test_apply.py @@ -381,3 +381,8 @@ def test_apply_shifted_chunks() -> None: "column_0": ["test", "test123", "tests"], "column_1": [None, "test", "test123"], } + + +def test_apply_dict_order_10128() -> None: + df = pl.select(pl.lit("").apply(lambda x: {"c": 1, "b": 2, "a": 3})) + assert df.to_dict(False) == {"literal": [{"c": 1, "b": 2, "a": 3}]}