diff --git a/py-polars/tests/unit/io/test_delta.py b/py-polars/tests/unit/io/test_delta.py index ea84f94bb08c0..c6de3ab379672 100644 --- a/py-polars/tests/unit/io/test_delta.py +++ b/py-polars/tests/unit/io/test_delta.py @@ -1,5 +1,6 @@ from __future__ import annotations +from datetime import datetime from pathlib import Path import pyarrow.fs @@ -182,165 +183,166 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None: @pytest.mark.write_disk() -def test_write_delta_w_compatible_schema(tmp_path: Path) -> None: - import datetime - - from deltalake import DeltaTable - - from polars import Datetime, Field, List, Struct, UInt32, Utf8 - - df = pl.DataFrame( - [ - pl.Series("string", ["test"], dtype=pl.Utf8), - pl.Series("uint", [1], dtype=pl.UInt64), - pl.Series("int", [1], dtype=pl.Int64), - pl.Series( - "uint_list", - [[[[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]]]], - dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))), - ), - pl.Series( - "date_ns", - [datetime.datetime(2010, 1, 1, 0, 0)], - dtype=Datetime(time_unit="ns"), - ), - pl.Series( - "date_us", - [datetime.datetime(2010, 1, 1, 0, 0)], - dtype=Datetime(time_unit="us"), +@pytest.mark.parametrize( + "series", + [ + pl.Series("string", ["test"], dtype=pl.Utf8), + pl.Series("uint", [1], dtype=pl.UInt64), + pl.Series("int", [1], dtype=pl.Int64), + pl.Series( + "uint_list", + [[[[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]]]], + dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))), + ), + pl.Series( + "date_ns", + [datetime(2010, 1, 1, 0, 0)], + dtype=pl.Datetime(time_unit="ns"), + ), + pl.Series( + "date_us", + [datetime(2010, 1, 1, 0, 0)], + dtype=pl.Datetime(time_unit="us"), + ), + pl.Series( + "list_date", + [ + [ + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), + ] + ], + dtype=pl.List( + pl.Datetime( + time_unit="ns", + ) ), - pl.Series( - "list_date", + ), + pl.Series( + "list_date_us", + [ [ - [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), - ] - ], - dtype=pl.List( - Datetime( - time_unit="ns", - ) - ), + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), + ] + ], + dtype=pl.List( + pl.Datetime( + time_unit="ms", + ) ), - pl.Series( - "list_date_us", + ), + pl.Series( + "nested_list_date", + [ [ [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), ] - ], - dtype=pl.List( - Datetime( - time_unit="ms", + ] + ], + dtype=pl.List( + pl.List( + pl.Datetime( + time_unit="ns", ) - ), + ) ), - pl.Series( - "nested_list_date", - [ - [ + ), + pl.Series( + "struct_with_list", + [ + { + "date_range": [ + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), + ], + "date_us": [ + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), + ], + "date_range_nested": [ [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), ] - ] - ], - dtype=pl.List( - List( - Datetime( - time_unit="ns", - ) - ) - ), + ], + "string": "test", + "int": 1, + } + ], + dtype=pl.Struct( + [ + pl.Field( + "date_range", + pl.List(pl.Datetime(time_unit="ms", time_zone=None)), + ), + pl.Field( + "date_us", pl.List(pl.Datetime(time_unit="ms", time_zone=None)) + ), + pl.Field( + "date_range_nested", + pl.List(pl.List(pl.Datetime(time_unit="ms", time_zone=None))), + ), + pl.Field("string", pl.Utf8), + pl.Field("int", pl.UInt32), + ] ), - pl.Series( - "struct_with_list", + ), + pl.Series( + "list_with_struct_with_list", + [ [ { "date_range": [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), ], - "date_us": [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), + "date_ns": [ + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), ], "date_range_nested": [ [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), + datetime(2010, 1, 1, 0, 0), + datetime(2010, 1, 2, 0, 0), ] ], "string": "test", "int": 1, } - ], - dtype=pl.Struct( + ] + ], + dtype=pl.List( + pl.Struct( [ - Field( - "date_range", List(Datetime(time_unit="ms", time_zone=None)) + pl.Field( + "date_range", + pl.List(pl.Datetime(time_unit="ns", time_zone=None)), ), - Field( - "date_us", List(Datetime(time_unit="ms", time_zone=None)) + pl.Field( + "date_ns", + pl.List(pl.Datetime(time_unit="ns", time_zone=None)), ), - Field( + pl.Field( "date_range_nested", - List(List(Datetime(time_unit="ms", time_zone=None))), + pl.List( + pl.List(pl.Datetime(time_unit="ns", time_zone=None)) + ), ), - Field("string", Utf8), - Field("int", UInt32), + pl.Field("string", pl.Utf8), + pl.Field("int", pl.UInt32), ] - ), + ) ), - pl.Series( - "list_with_struct_with_list", - [ - [ - { - "date_range": [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), - ], - "date_ns": [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), - ], - "date_range_nested": [ - [ - datetime.datetime(2010, 1, 1, 0, 0), - datetime.datetime(2010, 1, 2, 0, 0), - ] - ], - "string": "test", - "int": 1, - } - ] - ], - dtype=pl.List( - Struct( - [ - Field( - "date_range", - List(Datetime(time_unit="ns", time_zone=None)), - ), - Field( - "date_ns", - List(Datetime(time_unit="ns", time_zone=None)), - ), - Field( - "date_range_nested", - List(List(Datetime(time_unit="ns", time_zone=None))), - ), - Field("string", Utf8), - Field("int", UInt32), - ] - ) - ), - ), - ] - ) + ), + ], +) +def test_write_delta_w_compatible_schema(series: pl.Series, tmp_path: Path) -> None: + from deltalake import DeltaTable + + df = pl.DataFrame(series) # Create table df.write_delta(tmp_path, mode="append")