From a6d62939ea5a8f9eed156e0420103c7bc0a96dae Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Tue, 2 Jan 2024 09:26:27 +0100 Subject: [PATCH] feat: support min_periods for temporal rolling aggregations (#13342) --- .../chunkedarray/rolling_window/dispatch.rs | 2 + .../rolling_kernels/no_nulls.rs | 39 +++++++++++--- py-polars/polars/expr/expr.py | 50 ++++++++++++++---- py-polars/polars/series/series.py | 50 ++++++++++++++---- .../unit/operations/rolling/test_rolling.py | 52 +++++++++++++++++++ 5 files changed, 166 insertions(+), 27 deletions(-) diff --git a/crates/polars-time/src/chunkedarray/rolling_window/dispatch.rs b/crates/polars-time/src/chunkedarray/rolling_window/dispatch.rs index 8f1874bae302..787274b9c02a 100644 --- a/crates/polars-time/src/chunkedarray/rolling_window/dispatch.rs +++ b/crates/polars-time/src/chunkedarray/rolling_window/dispatch.rs @@ -30,6 +30,7 @@ fn rolling_agg( Duration, &[i64], ClosedWindow, + usize, TimeUnit, Option<&TimeZone>, DynArgs, @@ -87,6 +88,7 @@ where duration, by, closed_window, + options.min_periods, tu, options.tz, options.fn_params, diff --git a/crates/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs b/crates/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs index 67ba1bb6213e..f77ef5f284cb 100644 --- a/crates/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs +++ b/crates/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs @@ -8,6 +8,7 @@ use super::*; pub(crate) fn rolling_apply_agg_window<'a, Agg, T, O>( values: &'a [T], offsets: O, + min_periods: usize, params: DynArgs, ) -> PolarsResult where @@ -32,7 +33,10 @@ where result.map(|(start, len)| { let end = start + len; - if start == end { + // On the Python side, if `min_periods` wasn't specified, it is set to + // `1`. In that case, this condition is the same as checking + // `if start == end`. + if len < (min_periods as IdxSize) { None } else { // safety: @@ -52,6 +56,7 @@ pub(crate) fn rolling_min( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, _params: DynArgs, @@ -64,7 +69,7 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, None) + rolling_apply_agg_window::, _, _>(values, offset_iter, min_periods, None) } #[allow(clippy::too_many_arguments)] @@ -73,6 +78,7 @@ pub(crate) fn rolling_max( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, _params: DynArgs, @@ -85,7 +91,7 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, None) + rolling_apply_agg_window::, _, _>(values, offset_iter, min_periods, None) } #[allow(clippy::too_many_arguments)] @@ -94,6 +100,7 @@ pub(crate) fn rolling_sum( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, _params: DynArgs, @@ -106,7 +113,7 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, None) + rolling_apply_agg_window::, _, _>(values, offset_iter, min_periods, None) } #[allow(clippy::too_many_arguments)] @@ -115,6 +122,7 @@ pub(crate) fn rolling_mean( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, _params: DynArgs, @@ -127,7 +135,12 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, None) + rolling_apply_agg_window::, _, _>( + values, + offset_iter, + min_periods, + None, + ) } #[allow(clippy::too_many_arguments)] @@ -136,6 +149,7 @@ pub(crate) fn rolling_var( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, params: DynArgs, @@ -148,7 +162,12 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, params) + rolling_apply_agg_window::, _, _>( + values, + offset_iter, + min_periods, + params, + ) } #[allow(clippy::too_many_arguments)] @@ -157,6 +176,7 @@ pub(crate) fn rolling_quantile( period: Duration, time: &[i64], closed_window: ClosedWindow, + min_periods: usize, tu: TimeUnit, tz: Option<&TimeZone>, params: DynArgs, @@ -169,5 +189,10 @@ where Some(tz) => group_by_values_iter(period, time, closed_window, tu, tz.parse::().ok()), _ => group_by_values_iter(period, time, closed_window, tu, None), }?; - rolling_apply_agg_window::, _, _>(values, offset_iter, params) + rolling_apply_agg_window::, _, _>( + values, + offset_iter, + min_periods, + params, + ) } diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 6cea47279ad9..422d5fe183ac 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -5701,7 +5701,10 @@ def rolling_min( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -5911,7 +5914,10 @@ def rolling_max( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -6144,7 +6150,10 @@ def rolling_mean( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -6387,7 +6396,10 @@ def rolling_sum( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -6617,7 +6629,10 @@ def rolling_std( relative contribution of each value in a window to the output. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -6860,7 +6875,10 @@ def rolling_var( relative contribution of each value in a window to the output. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -7102,7 +7120,10 @@ def rolling_median( relative contribution of each value in a window to the output. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -7267,7 +7288,10 @@ def rolling_quantile( relative contribution of each value in a window to the output. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window by @@ -7469,7 +7493,10 @@ def rolling_map( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window. @@ -9431,7 +9458,10 @@ def rolling_apply( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 4ebe36a07ee0..ef3095fe51d8 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -5379,7 +5379,10 @@ def rolling_min( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -5435,7 +5438,10 @@ def rolling_max( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -5491,7 +5497,10 @@ def rolling_mean( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -5547,7 +5556,10 @@ def rolling_sum( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -5604,7 +5616,10 @@ def rolling_std( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window ddof @@ -5664,7 +5679,10 @@ def rolling_var( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window ddof @@ -5724,7 +5742,10 @@ def rolling_map( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window. @@ -5769,7 +5790,10 @@ def rolling_median( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -5834,7 +5858,10 @@ def rolling_quantile( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window @@ -7042,7 +7069,10 @@ def rolling_apply( elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to window size. + a result. If None, it will be set equal to: + + - the window size, if `window_size` is a fixed integer + - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index af6ef9425613..4c8179bfd766 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -845,3 +845,55 @@ def test_rolling_median_2() -> None: assert df.select( pl.col("x").rolling_median(window_size=100).sum() ).item() == pytest.approx(26.60506093611384) + + +@pytest.mark.parametrize( + ("dates", "closed", "expected"), + [ + ( + [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], + "right", + [None, 3, 5], + ), + ( + [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], + "left", + [None, None, 3], + ), + ( + [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], + "both", + [None, 3, 6], + ), + ( + [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], + "none", + [None, None, None], + ), + ( + [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 4)], + "right", + [None, 3, None], + ), + ( + [date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 4)], + "right", + [None, None, 5], + ), + ( + [date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 5)], + "right", + [None, None, None], + ), + ], +) +def test_rolling_min_periods( + dates: list[date], closed: ClosedInterval, expected: list[int] +) -> None: + df = pl.DataFrame({"date": dates, "value": [1, 2, 3]}).sort("date") + result = df.select( + pl.col("value").rolling_sum( + window_size="2d", by="date", min_periods=2, closed=closed + ) + )["value"] + assert_series_equal(result, pl.Series("value", expected, pl.Int64))