From 477ed2391649cd63398e0e4e008fdfa38713cc7b Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 21 Jul 2024 01:04:28 -0400 Subject: [PATCH] add test cases for empty arrays/streams --- py-polars/src/series/import.rs | 8 +++++- .../unit/constructors/test_constructors.py | 27 ++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/py-polars/src/series/import.rs b/py-polars/src/series/import.rs index 2b668d919fb7..9fe905bf83c4 100644 --- a/py-polars/src/series/import.rs +++ b/py-polars/src/series/import.rs @@ -101,7 +101,13 @@ pub(crate) fn import_stream_pycapsule(capsule: &Bound) -> PyResult None: # Array via C data interface pyarrow_array = pyarrow_table["bools"].chunk(0) round_trip_series = pl.Series(PyCapsuleArrayHolder(pyarrow_array)) - df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False) + assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False) + + # empty Array via C data interface + empty_pyarrow_array = pa.array([], type=pyarrow_array.type) + round_trip_series = pl.Series(PyCapsuleArrayHolder(empty_pyarrow_array)) + assert df["bools"].dtype == round_trip_series.dtype # RecordBatch via C array interface pyarrow_record_batch = pyarrow_table.to_batches()[0] @@ -1678,15 +1683,29 @@ def test_pycapsule_interface(df: pl.DataFrame) -> None: # ChunkedArray via C stream interface pyarrow_chunked_array = pyarrow_table["bools"] round_trip_series = pl.Series(PyCapsuleStreamHolder(pyarrow_chunked_array)) - df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False) + assert df["bools"].equals(round_trip_series, check_dtypes=True, check_names=False) + + # empty ChunkedArray via C stream interface + empty_chunked_array = pa.chunked_array([], type=pyarrow_chunked_array.type) + round_trip_series = pl.Series(PyCapsuleStreamHolder(empty_chunked_array)) + assert df["bools"].dtype == round_trip_series.dtype # Table via C stream interface round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(pyarrow_table)) assert df.equals(round_trip_df) # empty Table via C stream interface - # empty_df = df[:0].to_arrow() - # round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(empty_df)) + empty_df = df[:0].to_arrow() + round_trip_df = pl.DataFrame(PyCapsuleStreamHolder(empty_df)) + orig_schema = df.schema + round_trip_schema = round_trip_df.schema + + # The "enum" schema is not preserved because categories are lost via C data + # interface + orig_schema.pop("enum") + round_trip_schema.pop("enum") + + assert orig_schema == round_trip_schema # RecordBatchReader via C stream interface pyarrow_reader = pa.RecordBatchReader.from_batches(