Skip to content

Commit

Permalink
Added parametrized tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Aug 5, 2023
1 parent d190b14 commit 43e33f3
Showing 1 changed file with 131 additions and 129 deletions.
260 changes: 131 additions & 129 deletions py-polars/tests/unit/io/test_delta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from datetime import datetime
from pathlib import Path

import pyarrow.fs
Expand Down Expand Up @@ -182,165 +183,166 @@ def test_write_delta(df: pl.DataFrame, tmp_path: Path) -> None:


@pytest.mark.write_disk()
def test_write_delta_w_compatible_schema(tmp_path: Path) -> None:
import datetime

from deltalake import DeltaTable

from polars import Datetime, Field, List, Struct, UInt32, Utf8

df = pl.DataFrame(
[
pl.Series("string", ["test"], dtype=pl.Utf8),
pl.Series("uint", [1], dtype=pl.UInt64),
pl.Series("int", [1], dtype=pl.Int64),
pl.Series(
"uint_list",
[[[[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]]]],
dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))),
),
pl.Series(
"date_ns",
[datetime.datetime(2010, 1, 1, 0, 0)],
dtype=Datetime(time_unit="ns"),
),
pl.Series(
"date_us",
[datetime.datetime(2010, 1, 1, 0, 0)],
dtype=Datetime(time_unit="us"),
@pytest.mark.parametrize(
"series",
[
pl.Series("string", ["test"], dtype=pl.Utf8),
pl.Series("uint", [1], dtype=pl.UInt64),
pl.Series("int", [1], dtype=pl.Int64),
pl.Series(
"uint_list",
[[[[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]]]],
dtype=pl.List(pl.List(pl.List(pl.List(pl.UInt16)))),
),
pl.Series(
"date_ns",
[datetime(2010, 1, 1, 0, 0)],
dtype=pl.Datetime(time_unit="ns"),
),
pl.Series(
"date_us",
[datetime(2010, 1, 1, 0, 0)],
dtype=pl.Datetime(time_unit="us"),
),
pl.Series(
"list_date",
[
[
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
]
],
dtype=pl.List(
pl.Datetime(
time_unit="ns",
)
),
pl.Series(
"list_date",
),
pl.Series(
"list_date_us",
[
[
[
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
]
],
dtype=pl.List(
Datetime(
time_unit="ns",
)
),
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
]
],
dtype=pl.List(
pl.Datetime(
time_unit="ms",
)
),
pl.Series(
"list_date_us",
),
pl.Series(
"nested_list_date",
[
[
[
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
]
],
dtype=pl.List(
Datetime(
time_unit="ms",
]
],
dtype=pl.List(
pl.List(
pl.Datetime(
time_unit="ns",
)
),
)
),
pl.Series(
"nested_list_date",
[
[
),
pl.Series(
"struct_with_list",
[
{
"date_range": [
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
],
"date_us": [
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
],
"date_range_nested": [
[
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
]
]
],
dtype=pl.List(
List(
Datetime(
time_unit="ns",
)
)
),
],
"string": "test",
"int": 1,
}
],
dtype=pl.Struct(
[
pl.Field(
"date_range",
pl.List(pl.Datetime(time_unit="ms", time_zone=None)),
),
pl.Field(
"date_us", pl.List(pl.Datetime(time_unit="ms", time_zone=None))
),
pl.Field(
"date_range_nested",
pl.List(pl.List(pl.Datetime(time_unit="ms", time_zone=None))),
),
pl.Field("string", pl.Utf8),
pl.Field("int", pl.UInt32),
]
),
pl.Series(
"struct_with_list",
),
pl.Series(
"list_with_struct_with_list",
[
[
{
"date_range": [
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
],
"date_us": [
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
"date_ns": [
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
],
"date_range_nested": [
[
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
datetime(2010, 1, 1, 0, 0),
datetime(2010, 1, 2, 0, 0),
]
],
"string": "test",
"int": 1,
}
],
dtype=pl.Struct(
]
],
dtype=pl.List(
pl.Struct(
[
Field(
"date_range", List(Datetime(time_unit="ms", time_zone=None))
pl.Field(
"date_range",
pl.List(pl.Datetime(time_unit="ns", time_zone=None)),
),
Field(
"date_us", List(Datetime(time_unit="ms", time_zone=None))
pl.Field(
"date_ns",
pl.List(pl.Datetime(time_unit="ns", time_zone=None)),
),
Field(
pl.Field(
"date_range_nested",
List(List(Datetime(time_unit="ms", time_zone=None))),
pl.List(
pl.List(pl.Datetime(time_unit="ns", time_zone=None))
),
),
Field("string", Utf8),
Field("int", UInt32),
pl.Field("string", pl.Utf8),
pl.Field("int", pl.UInt32),
]
),
)
),
pl.Series(
"list_with_struct_with_list",
[
[
{
"date_range": [
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
],
"date_ns": [
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
],
"date_range_nested": [
[
datetime.datetime(2010, 1, 1, 0, 0),
datetime.datetime(2010, 1, 2, 0, 0),
]
],
"string": "test",
"int": 1,
}
]
],
dtype=pl.List(
Struct(
[
Field(
"date_range",
List(Datetime(time_unit="ns", time_zone=None)),
),
Field(
"date_ns",
List(Datetime(time_unit="ns", time_zone=None)),
),
Field(
"date_range_nested",
List(List(Datetime(time_unit="ns", time_zone=None))),
),
Field("string", Utf8),
Field("int", UInt32),
]
)
),
),
]
)
),
],
)
def test_write_delta_w_compatible_schema(series: pl.Series, tmp_path: Path) -> None:
from deltalake import DeltaTable

df = pl.DataFrame(series)

# Create table
df.write_delta(tmp_path, mode="append")
Expand Down

0 comments on commit 43e33f3

Please sign in to comment.