Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add dt.to_string to DaskExpr #796

Merged
merged 12 commits into from
Aug 19, 2024
12 changes: 11 additions & 1 deletion narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,9 @@ def fill_null(self, value: Any) -> DaskExpr:
)

def clip(
self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
self: Self,
lower_bound: Any | None = None,
upper_bound: Any | None = None,
) -> Self:
return self._from_call(
lambda _input, _lower, _upper: _input.clip(lower=_lower, upper=_upper),
Expand Down Expand Up @@ -798,6 +800,14 @@ def ordinal_day(self) -> DaskExpr:
returns_scalar=False,
)

def to_string(self, format: str) -> DaskExpr: # noqa: A002
return self._expr._from_call(
lambda _input, _format: _input.dt.strftime(_format),
"strftime",
format.replace("%.f", ".%f"),
returns_scalar=False,
)

def total_minutes(self) -> DaskExpr:
return self._expr._from_call(
lambda _input: _input.dt.total_seconds() // 60,
Expand Down
138 changes: 100 additions & 38 deletions tests/expr_and_series/dt/to_string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts
from tests.utils import is_windows

data = {
Expand All @@ -17,31 +18,71 @@


@pytest.mark.parametrize(
"fmt", ["%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%G-W%V-%u", "%G-W%V"]
"fmt",
[
"%Y-%m-%d",
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S",
"%G-W%V-%u",
"%G-W%V",
],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string(constructor_eager: Any, fmt: str) -> None:
def test_dt_to_string_series(constructor_eager: Any, fmt: str) -> None:
input_frame = nw.from_native(constructor_eager(data), eager_only=True)
input_series = input_frame["a"]

expected_col = [datetime.strftime(d, fmt) for d in data["a"]]

result = input_series.dt.to_string(fmt).to_list()
result = {"a": input_series.dt.to_string(fmt)}

if any(
x in str(constructor_eager) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
# PyArrow differs from other libraries, in that %S also shows
# the fraction of a second.
result = [x[: x.find(".")] if "." in x else x for x in result]
assert result == expected_col
result = input_frame.select(nw.col("a").dt.to_string(fmt))["a"].to_list()
if any(
x in str(constructor_eager) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]
):
result = {"a": input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "")}

compare_dicts(result, {"a": expected_col})


@pytest.mark.parametrize(
"fmt",
[
"%Y-%m-%d",
"%Y-%m-%d %H:%M:%S",
"%Y/%m/%d %H:%M:%S",
"%G-W%V-%u",
"%G-W%V",
],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_expr(constructor: Any, fmt: str) -> None:
input_frame = nw.from_native(constructor(data))

expected_col = [datetime.strftime(d, fmt) for d in data["a"]]

result = input_frame.select(nw.col("a").dt.to_string(fmt).alias("b"))
if any(x in str(constructor) for x in ["pandas_pyarrow", "pyarrow_table", "modin"]):
# PyArrow differs from other libraries, in that %S also shows
# the fraction of a second.
result = [x[: x.find(".")] if "." in x else x for x in result]
assert result == expected_col
result = input_frame.select(
nw.col("a").dt.to_string(fmt).str.replace(r"\.\d+$", "").alias("b")
)
compare_dicts(result, {"b": expected_col})


def _clean_string(result: str) -> str:
# rstrip '0' to remove trailing zeros, as different libraries handle this differently
# if there's then a trailing `.`, remove that too.
if "." in result:
result = result.rstrip("0").rstrip(".")
return result


def _clean_string_expr(e: Any) -> Any:
# Same as `_clean_string` but for Expr
return e.str.replace_all(r"0+$", "").str.replace_all(r"\.$", "")


@pytest.mark.parametrize(
Expand All @@ -50,20 +91,16 @@ def test_dt_to_string(constructor_eager: Any, fmt: str) -> None:
(datetime(2020, 1, 9), "2020-01-09T00:00:00.000000"),
(datetime(2020, 1, 9, 12, 34, 56), "2020-01-09T12:34:56.000000"),
(datetime(2020, 1, 9, 12, 34, 56, 123), "2020-01-09T12:34:56.000123"),
(datetime(2020, 1, 9, 12, 34, 56, 123456), "2020-01-09T12:34:56.123456"),
(
datetime(2020, 1, 9, 12, 34, 56, 123456),
"2020-01-09T12:34:56.123456",
),
],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_iso_local_datetime(
def test_dt_to_string_iso_local_datetime_series(
constructor_eager: Any, data: datetime, expected: str
) -> None:
def _clean_string(result: str) -> str:
# rstrip '0' to remove trailing zeros, as different libraries handle this differently
# if there's then a trailing `.`, remove that too.
if "." in result:
result = result.rstrip("0").rstrip(".")
return result

df = constructor_eager({"a": [data]})
result = (
nw.from_native(df, eager_only=True)["a"]
Expand All @@ -72,34 +109,48 @@ def _clean_string(result: str) -> str:
)
assert _clean_string(result) == _clean_string(expected)

result = (
nw.from_native(df, eager_only=True)
.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f"))["a"]
.to_list()[0]
)
assert _clean_string(result) == _clean_string(expected)

result = (
nw.from_native(df, eager_only=True)["a"]
.dt.to_string("%Y-%m-%dT%H:%M:%S%.f")
.to_list()[0]
)
assert _clean_string(result) == _clean_string(expected)

result = (
nw.from_native(df, eager_only=True)
.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f"))["a"]
.to_list()[0]

@pytest.mark.parametrize(
("data", "expected"),
[
(datetime(2020, 1, 9, 12, 34, 56), "2020-01-09T12:34:56.000000"),
(datetime(2020, 1, 9, 12, 34, 56, 123), "2020-01-09T12:34:56.000123"),
(
datetime(2020, 1, 9, 12, 34, 56, 123456),
"2020-01-09T12:34:56.123456",
),
],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_iso_local_datetime_expr(
constructor: Any, data: datetime, expected: str
) -> None:
df = constructor({"a": [data]})

result = nw.from_native(df).with_columns(
_clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f")).alias("b")
)
assert _clean_string(result) == _clean_string(expected)
compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})

result = nw.from_native(df).with_columns(
_clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f")).alias("b")
)
compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})


@pytest.mark.parametrize(
("data", "expected"),
[(datetime(2020, 1, 9), "2020-01-09")],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_iso_local_date(
def test_dt_to_string_iso_local_date_series(
constructor_eager: Any, data: datetime, expected: str
) -> None:
df = constructor_eager({"a": [data]})
Expand All @@ -108,9 +159,20 @@ def test_dt_to_string_iso_local_date(
)
assert result == expected

result = (
nw.from_native(df, eager_only=True)
.select(b=nw.col("a").dt.to_string("%Y-%m-%d"))["b"]
.to_list()[0]

@pytest.mark.parametrize(
("data", "expected"),
[(datetime(2020, 1, 9), "2020-01-09")],
)
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_dt_to_string_iso_local_date_expr(
request: Any, constructor: Any, data: datetime, expected: str
) -> None:
if "modin" in str(constructor):
request.applymarker(pytest.mark.xfail)

df = constructor({"a": [data]})
result = nw.from_native(df).with_columns(
nw.col("a").dt.to_string("%Y-%m-%d").alias("b")
)
assert result == expected
compare_dicts(result, {"a": [data], "b": [expected]})
Loading