Skip to content

Commit

Permalink
fix(rust, python): only preserve sortedness flag in replace_time_zone…
Browse files Browse the repository at this point in the history
… when safe
  • Loading branch information
MarcoGorelli committed Aug 26, 2023
1 parent b1d64b1 commit 9442354
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
12 changes: 10 additions & 2 deletions crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ pub fn replace_time_zone(
time_zone: Option<&str>,
ambiguous: &Utf8Chunked,
) -> PolarsResult<DatetimeChunked> {
let from_tz = parse_time_zone(datetime.time_zone().as_deref().unwrap_or("UTC"))?;
let from_time_zone = datetime.time_zone().as_deref().unwrap_or("UTC");
let from_tz = parse_time_zone(from_time_zone)?;
let to_tz = parse_time_zone(time_zone.unwrap_or("UTC"))?;
let timestamp_to_datetime: fn(i64) -> NaiveDateTime = match datetime.time_unit() {
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
Expand Down Expand Up @@ -49,6 +50,13 @@ pub fn replace_time_zone(
},
};
let mut out = out?.into_datetime(datetime.time_unit(), time_zone.map(|x| x.to_string()));
out.set_sorted_flag(datetime.is_sorted_flag());
if from_time_zone == "UTC" && ambiguous.len() == 1 && ambiguous.get(0).unwrap() == "raise" {
// In general, the sortedness flag can't be preserved.
// To be safe, we only do so in the simplest case when we know for sure that there is no "daylight savings weirdness" going on, i.e.:
// - `from_tz` is guaranteed to not observe daylight savings time;
// - user is just passing 'raise' to 'ambiguous'.
// Both conditions above need to be satisfied.
out.set_sorted_flag(datetime.is_sorted_flag());
}
Ok(out)
}
52 changes: 52 additions & 0 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2140,6 +2140,58 @@ def test_replace_time_zone_ambiguous_raises() -> None:
ts.dt.replace_time_zone("Europe/Brussels")


@pytest.mark.parametrize(
("from_tz", "expected_sortedness", "ambiguous"),
[
("Europe/London", False, "earliest"),
("Europe/London", False, "raise"),
("UTC", False, "earliest"),
("UTC", True, "raise"),
(None, False, "earliest"),
(None, True, "raise"),
],
)
def test_replace_time_zone_sortedness_series(
from_tz: str | None, expected_sortedness: bool, ambiguous: Ambiguous
) -> None:
ser = (
pl.Series("ts", [1603584000000000, 1603587600000000])
.cast(pl.Datetime("us", from_tz))
.sort()
)
assert ser.flags["SORTED_ASC"]
result = ser.dt.replace_time_zone("UTC", ambiguous=ambiguous)
assert result.flags["SORTED_ASC"] == expected_sortedness


@pytest.mark.parametrize(
("from_tz", "ambiguous"),
[
("Europe/London", "earliest"),
("Europe/London", "raise"),
("UTC", "earliest"),
("UTC", "raise"),
(None, "earliest"),
(None, "raise"),
],
)
def test_replace_time_zone_sortedness_expressions(
from_tz: str | None, ambiguous: str
) -> None:
df = (
pl.Series("ts", [1603584000000000, 1603587600000000])
.cast(pl.Datetime("us", from_tz))
.sort()
.to_frame()
)
df = df.with_columns(ambiguous=pl.Series([ambiguous] * 2))
assert df["ts"].flags["SORTED_ASC"]
result = df.select(
pl.col("ts").dt.replace_time_zone("UTC", ambiguous=pl.col("ambiguous"))
)
assert not result["ts"].flags["SORTED_ASC"]


def test_use_earliest_deprecation() -> None:
# strptime
with pytest.warns(
Expand Down

0 comments on commit 9442354

Please sign in to comment.