Skip to content

Commit

Permalink
feat: deprecate _saturating in duration string language, make it th…
Browse files Browse the repository at this point in the history
…e default (#12301)
  • Loading branch information
MarcoGorelli authored Nov 8, 2023
1 parent b392e83 commit 1d65aac
Show file tree
Hide file tree
Showing 15 changed files with 99 additions and 180 deletions.
8 changes: 0 additions & 8 deletions crates/polars-time/src/upsample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ pub trait PolarsUpsample {
/// Or combine them:
/// "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
///
/// Suffix with `"_saturating"` to saturate dates with days too
/// large for their month to the last day of the month (e.g.
/// 2022-02-29 to 2022-02-28).
///
/// By "calendar day", we mean the corresponding time on the next
/// day (which may not be 24 hours, depending on daylight savings).
/// Similarly for "calendar week", "calendar month", "calendar quarter",
Expand Down Expand Up @@ -79,10 +75,6 @@ pub trait PolarsUpsample {
/// Or combine them:
/// "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
///
/// Suffix with `"_saturating"` to saturate dates with days too
/// large for their month to the last day of the month (e.g.
/// 2022-02-29 to 2022-02-28).
///
/// By "calendar day", we mean the corresponding time on the next
/// day (which may not be 24 hours, depending on daylight savings).
/// Similarly for "calendar week", "calendar month", "calendar quarter",
Expand Down
58 changes: 12 additions & 46 deletions crates/polars-time/src/windows/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ pub struct Duration {
pub(crate) negative: bool,
// indicates if an integer string was passed. e.g. "2i"
pub parsed_int: bool,
// indicates if an offset to a non-existent date (e.g. 2022-02-29)
// should saturate (to 2022-02-28) as opposed to erroring
pub(crate) saturating: bool,
}

impl PartialOrd<Self> for Duration {
Expand All @@ -67,7 +64,6 @@ impl Duration {
nsecs: fixed_slots.abs(),
negative: fixed_slots < 0,
parsed_int: true,
saturating: false,
}
}

Expand Down Expand Up @@ -98,10 +94,6 @@ impl Duration {
/// * `y`: calendar year
/// * `i`: index value (only for {Int32, Int64} dtypes)
///
/// Suffix with `"_saturating"` to indicate that dates too large for
/// their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
/// instead of erroring.
///
/// By "calendar day", we mean the corresponding time on the next
/// day (which may not be 24 hours, depending on daylight savings).
/// Similarly for "calendar week", "calendar month", "calendar quarter",
Expand All @@ -123,13 +115,7 @@ impl Duration {
let mut days = 0;
let mut months = 0;
let negative = duration.starts_with('-');
let (saturating, mut iter) = match duration.ends_with("_saturating") {
true => (
true,
duration[..duration.len() - "_saturating".len()].char_indices(),
),
false => (false, duration.char_indices()),
};
let mut iter = duration.char_indices();
let mut start = 0;

// skip the '-' char
Expand Down Expand Up @@ -198,7 +184,6 @@ impl Duration {
months: months.abs(),
negative,
parsed_int,
saturating,
}
}

Expand Down Expand Up @@ -265,7 +250,6 @@ impl Duration {
nsecs,
negative,
parsed_int: false,
saturating: false,
}
}

Expand All @@ -279,7 +263,6 @@ impl Duration {
nsecs: 0,
negative,
parsed_int: false,
saturating: false,
}
}

Expand All @@ -293,7 +276,6 @@ impl Duration {
nsecs: 0,
negative,
parsed_int: false,
saturating: false,
}
}

Expand All @@ -307,7 +289,6 @@ impl Duration {
nsecs: 0,
negative,
parsed_int: false,
saturating: false,
}
}

Expand Down Expand Up @@ -379,12 +360,7 @@ impl Duration {
}

#[doc(hidden)]
fn add_month(
ts: NaiveDateTime,
n_months: i64,
negative: bool,
saturating: bool,
) -> PolarsResult<NaiveDateTime> {
fn add_month(ts: NaiveDateTime, n_months: i64, negative: bool) -> NaiveDateTime {
let mut months = n_months;
if negative {
months = -months;
Expand All @@ -409,16 +385,14 @@ impl Duration {
month += 12;
}

if saturating {
// Normalize the day if we are past the end of the month.
let mut last_day_of_month = last_day_of_month(month);
if month == (chrono::Month::February.number_from_month() as i32) && is_leap_year(year) {
last_day_of_month += 1;
}
// Normalize the day if we are past the end of the month.
let mut last_day_of_month = last_day_of_month(month);
if month == (chrono::Month::February.number_from_month() as i32) && is_leap_year(year) {
last_day_of_month += 1;
}

if day > last_day_of_month {
day = last_day_of_month
}
if day > last_day_of_month {
day = last_day_of_month
}

// Retrieve the original time and construct a data
Expand All @@ -427,16 +401,8 @@ impl Duration {
let minute = ts.minute();
let sec = ts.second();
let nsec = ts.nanosecond();
new_datetime(year, month as u32, day, hour, minute, sec, nsec).ok_or(
polars_err!(
ComputeError: format!(
"cannot advance '{}' by {} month(s). \
If you were trying to get the last day of each month, you may want to try `.dt.month_end` \
or append \"_saturating\" to your duration string.",
ts,
if negative {-n_months} else {n_months}
)
),
new_datetime(year, month as u32, day, hour, minute, sec, nsec).expect(
"Expected valid datetime, please open an issue at https://github.com/pola-rs/polars/issues"
)
}

Expand Down Expand Up @@ -714,7 +680,7 @@ impl Duration {
Some(tz) => unlocalize_datetime(timestamp_to_datetime(t), tz),
_ => timestamp_to_datetime(t),
};
let dt = Self::add_month(ts, d.months, d.negative, d.saturating)?;
let dt = Self::add_month(ts, d.months, d.negative);
new_t = match tz {
#[cfg(feature = "timezones")]
Some(tz) => datetime_to_timestamp(try_localize_datetime(dt, tz, Ambiguous::Raise)?),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-time/src/windows/group_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ pub(crate) fn group_by_values_iter_lookbehind(
// We have `period == -offset`, so `t + offset + period` is equal to `t`,
// and `upper` is trivially equal to `t` itself. Using the trivial calculation,
// instead of `upper = lower + period`, avoids issues around
// `t - 1mo_saturating + 1mo_saturating` not round-tripping.
// `t - 1mo + 1mo` not round-tripping.
let upper = t;
let b = Bounds::new(lower, upper);
let slice = &time[..start_offset];
Expand Down
24 changes: 9 additions & 15 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
deprecate_nonkeyword_arguments,
deprecate_renamed_function,
deprecate_renamed_parameter,
deprecate_saturating,
)
from polars.utils.various import (
_prepare_row_count_args,
Expand Down Expand Up @@ -5198,10 +5199,6 @@ def rolling(
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
Expand Down Expand Up @@ -5287,6 +5284,8 @@ def rolling(
└─────────────────────┴───────┴───────┴───────┘
"""
period = deprecate_saturating(period)
offset = deprecate_saturating(offset)
return RollingGroupBy(
self,
index_column=index_column,
Expand Down Expand Up @@ -5440,10 +5439,6 @@ def group_by_dynamic(
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
Expand Down Expand Up @@ -5617,6 +5612,9 @@ def group_by_dynamic(
└─────────────────┴─────────────────┴─────┴─────────────────┘
""" # noqa: W505
every = deprecate_saturating(every)
period = deprecate_saturating(period)
offset = deprecate_saturating(offset)
return DynamicGroupBy(
self,
index_column=index_column,
Expand Down Expand Up @@ -5664,9 +5662,6 @@ def upsample(
- "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
Expand Down Expand Up @@ -5728,6 +5723,8 @@ def upsample(
└─────────────────────┴────────┴────────┘
"""
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if by is None:
by = []
if isinstance(by, str):
Expand Down Expand Up @@ -5823,10 +5820,6 @@ def join_asof(
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date
(e.g. 2022-02-29 -> 2022-02-28) instead of erroring.
By "calendar day", we mean the corresponding time on the next day
(which may not be 24 hours, due to daylight savings). Similarly for
"calendar week", "calendar month", "calendar quarter", and
Expand Down Expand Up @@ -5878,6 +5871,7 @@ def join_asof(
└─────────────────────┴────────────┴──────┘
"""
tolerance = deprecate_saturating(tolerance)
if not isinstance(other, DataFrame):
raise TypeError(
f"expected `other` join table to be a DataFrame, got {type(other).__name__!r}"
Expand Down
16 changes: 6 additions & 10 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import (
deprecate_renamed_function,
deprecate_saturating,
issue_deprecation_warning,
rename_use_earliest_to_ambiguous,
)
Expand Down Expand Up @@ -94,9 +95,6 @@ def truncate(
- 3d12h4m25s # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
Expand Down Expand Up @@ -173,6 +171,8 @@ def truncate(
└─────────────────────┴─────────────────────┘
"""
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if not isinstance(every, pl.Expr):
every = _timedelta_to_pl_duration(every)

Expand Down Expand Up @@ -252,9 +252,6 @@ def round(
eg: 3d12h4m25s # 3 days, 12 hours, 4 minutes, and 25 seconds
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date (e.g. 2022-02-29 -> 2022-02-28)
instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
Expand Down Expand Up @@ -315,6 +312,8 @@ def round(
└─────────────────────┴─────────────────────┘
"""
every = deprecate_saturating(every)
offset = deprecate_saturating(offset)
if offset is None:
offset = "0ns"

Expand Down Expand Up @@ -1749,10 +1748,6 @@ def offset_by(self, by: str | Expr) -> Expr:
- 1y (1 calendar year)
- 1i (1 index count)
Suffix with `"_saturating"` to indicate that dates too large for
their month should saturate at the largest date
(e.g. 2022-02-29 -> 2022-02-28) instead of erroring.
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
Expand Down Expand Up @@ -1810,6 +1805,7 @@ def offset_by(self, by: str | Expr) -> Expr:
│ 2005-01-01 00:00:00 ┆ 1y ┆ 2006-01-01 00:00:00 │
└─────────────────────┴────────┴─────────────────────┘
"""
by = deprecate_saturating(by)
by = parse_as_expression(by, str_as_lit=True)
return wrap_expr(self._pyexpr.dt_offset_by(by))

Expand Down
Loading

0 comments on commit 1d65aac

Please sign in to comment.