From 7617fd27f1370b0427349118c219b6e2b04c3993 Mon Sep 17 00:00:00 2001 From: alexander-beedie Date: Mon, 25 Sep 2023 21:50:40 +0400 Subject: [PATCH] fix(python): DataFrame init from `collections.namedtuple` values --- py-polars/polars/utils/_construction.py | 7 ++++--- py-polars/tests/unit/test_constructors.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/utils/_construction.py b/py-polars/polars/utils/_construction.py index 542da78b69a0..17bb0863968a 100644 --- a/py-polars/polars/utils/_construction.py +++ b/py-polars/polars/utils/_construction.py @@ -388,7 +388,7 @@ def sequence_to_pyseries( if ( dataclasses.is_dataclass(value) or is_pydantic_model(value) - or is_namedtuple(value.__class__, annotated=True) + or is_namedtuple(value.__class__) ): return pl.DataFrame(values).to_struct(name)._s elif isinstance(value, range): @@ -1080,12 +1080,13 @@ def _sequence_of_tuple_to_pydf( if is_namedtuple(first_element.__class__): if schema is None: schema = first_element._fields # type: ignore[attr-defined] - if len(first_element.__annotations__) == len(schema): + annotations = getattr(first_element, "__annotations__", None) + if annotations and len(annotations) == len(schema): schema = [ (name, py_type_to_dtype(tp, raise_unmatched=False)) for name, tp in first_element.__annotations__.items() ] - elif orient is None: + if orient is None: orient = "row" # ...then defer to generic sequence processing diff --git a/py-polars/tests/unit/test_constructors.py b/py-polars/tests/unit/test_constructors.py index 512b2eb830e0..b66afe94b52f 100644 --- a/py-polars/tests/unit/test_constructors.py +++ b/py-polars/tests/unit/test_constructors.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +from collections import namedtuple from datetime import date, datetime, timedelta, timezone from decimal import Decimal from random import shuffle @@ -462,6 +463,24 @@ class ABC: assert dataclasses.asdict(abc) == df.rows(named=True)[0] +def test_collections_namedtuple() -> None: + TestData = namedtuple("TestData", ["id", "info"]) + nt_data = [TestData(1, "a"), TestData(2, "b"), TestData(3, "c")] + + df1 = pl.DataFrame(nt_data) + assert df1.to_dict(False) == {"id": [1, 2, 3], "info": ["a", "b", "c"]} + + df2 = pl.DataFrame({"data": nt_data, "misc": ["x", "y", "z"]}) + assert df2.to_dict(False) == { + "data": [ + {"id": 1, "info": "a"}, + {"id": 2, "info": "b"}, + {"id": 3, "info": "c"}, + ], + "misc": ["x", "y", "z"], + } + + def test_init_ndarray(monkeypatch: Any) -> None: # Empty array df = pl.DataFrame(np.array([]))