From 403f181dc072275f787e125024459ee1927bdc58 Mon Sep 17 00:00:00 2001 From: Aidos Kanapyanov Date: Mon, 29 Jul 2024 14:26:49 +0500 Subject: [PATCH] feat: add year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, ordinal_day for dask --- narwhals/_dask/expr.py | 69 ++++++++++++++++++++++++ tests/dask_test.py | 116 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index c981538f8..1c00d2866 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -199,6 +199,10 @@ def sum(self) -> Self: def str(self: Self) -> DaskExprStringNamespace: return DaskExprStringNamespace(self) + @property + def dt(self: Self) -> DaskExprDateTimeNamespace: + return DaskExprDateTimeNamespace(self) + class DaskExprStringNamespace: def __init__(self, expr: DaskExpr) -> None: @@ -249,3 +253,68 @@ def to_lowercase(self) -> DaskExpr: lambda _input: _input.str.lower(), "to_lowercase", ) + + +class DaskExprDateTimeNamespace: + def __init__(self, expr: DaskExpr) -> None: + self._expr = expr + + def year(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.year, + "year", + ) + + def month(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.month, + "month", + ) + + def day(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.day, + "day", + ) + + def hour(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.hour, + "hour", + ) + + def minute(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.minute, + "minute", + ) + + def second(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.second, + "second", + ) + + def millisecond(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.microsecond // 1000, + "millisecond", + ) + + def microsecond(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.microsecond, + "microsecond", + ) + + def nanosecond(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.microsecond * 1000, + "nanosecond", + ) + + def ordinal_day(self) -> DaskExpr: + return self._expr._from_call( + lambda _input: _input.dt.dayofyear, + "ordinal_day", + ) diff --git a/tests/dask_test.py b/tests/dask_test.py index 83be3e9c2..77657f028 100644 --- a/tests/dask_test.py +++ b/tests/dask_test.py @@ -260,3 +260,119 @@ def test_str_to_lowercase( result_frame = df.with_columns(nw.col("a").str.to_lowercase()) compare_dicts(result_frame, expected) + + +def test_dt_year() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1), datetime(2021, 1, 1)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(year=nw.col("a").dt.year()) + expected = {"a": data["a"], "year": [2020, 2021]} + compare_dicts(result, expected) + + +def test_dt_month() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1), datetime(2021, 1, 1)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(month=nw.col("a").dt.month()) + expected = {"a": data["a"], "month": [1, 1]} + compare_dicts(result, expected) + + +def test_dt_day() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1), datetime(2021, 1, 1)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(day=nw.col("a").dt.day()) + expected = {"a": data["a"], "day": [1, 1]} + compare_dicts(result, expected) + + +def test_dt_hour() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1, 1), datetime(2021, 1, 1, 2)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(hour=nw.col("a").dt.hour()) + expected = {"a": data["a"], "hour": [1, 2]} + compare_dicts(result, expected) + + +def test_dt_minute() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1, 1, 1), datetime(2021, 1, 1, 2, 2)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(minute=nw.col("a").dt.minute()) + expected = {"a": data["a"], "minute": [1, 2]} + compare_dicts(result, expected) + + +def test_dt_second() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 1, 1, 1, 1), datetime(2021, 1, 1, 2, 2, 2)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(second=nw.col("a").dt.second()) + expected = {"a": data["a"], "second": [1, 2]} + compare_dicts(result, expected) + + +def test_dt_millisecond() -> None: + import dask.dataframe as dd + + data = { + "a": [datetime(2020, 1, 1, 1, 1, 1, 1000), datetime(2021, 1, 1, 2, 2, 2, 2000)] + } + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(millisecond=nw.col("a").dt.millisecond()) + expected = {"a": data["a"], "millisecond": [1, 2]} + compare_dicts(result, expected) + + +def test_dt_microsecond() -> None: + import dask.dataframe as dd + + data = { + "a": [datetime(2020, 1, 1, 1, 1, 1, 1000), datetime(2021, 1, 1, 2, 2, 2, 2000)] + } + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(microsecond=nw.col("a").dt.microsecond()) + expected = {"a": data["a"], "microsecond": [1000, 2000]} + compare_dicts(result, expected) + + +def test_dt_nanosecond() -> None: + import dask.dataframe as dd + + data = { + "a": [datetime(2020, 1, 1, 1, 1, 1, 1000), datetime(2021, 1, 1, 2, 2, 2, 2000)] + } + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(nanosecond=nw.col("a").dt.nanosecond()) + expected = {"a": data["a"], "nanosecond": [1000000, 2000000]} + compare_dicts(result, expected) + + +def test_dt_ordinal_day() -> None: + import dask.dataframe as dd + + data = {"a": [datetime(2020, 1, 7), datetime(2021, 2, 1)]} + dfdd = dd.from_pandas(pd.DataFrame(data)) + df = nw.from_native(dfdd) + result = df.with_columns(ordinal_day=nw.col("a").dt.ordinal_day()) + expected = {"a": data["a"], "ordinal_day": [7, 32]} + compare_dicts(result, expected)