Skip to content

Commit

Permalink
feat: warn if by column is not sorted in rolling aggregations (as o…
Browse files Browse the repository at this point in the history
…pposed to raising), add warn_if_unsorted argument (#12398)
  • Loading branch information
MarcoGorelli authored Nov 20, 2023
1 parent be6b565 commit b860c7f
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 21 deletions.
14 changes: 13 additions & 1 deletion crates/polars-plan/src/dsl/function_expr/rolling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,19 @@ fn convert<'a>(
),
dt => polars_bail!(opq = expr_name, got = dt, expected = "date/datetime"),
};
ensure_sorted_arg(&by, expr_name)?;
if by.is_sorted_flag() != IsSorted::Ascending && options.warn_if_unsorted {
polars_warn!(format!(
"Series is not known to be sorted by `by` column in {} operation.\n\
\n\
To silence this warning, you may want to try:\n\
- sorting your data by your `by` column beforehand;\n\
- setting `.set_sorted()` if you already know your data is sorted;\n\
- passing `warn_if_unsorted=False` if this warning is a false-positive\n \
(this is known to happen when combining rolling aggregations with `over`);\n\n\
before passing calling the rolling aggregation function.\n",
expr_name
));
}
let by = by.datetime().unwrap();
let by_values = by.cont_slice().map_err(|_| {
polars_err!(
Expand Down
2 changes: 0 additions & 2 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#![allow(ambiguous_glob_reexports)]
//! Domain specific language for the Lazy API.
#[cfg(feature = "rolling_window")]
use polars_core::utils::ensure_sorted_arg;
#[cfg(feature = "dtype-categorical")]
pub mod cat;
#[cfg(feature = "dtype-categorical")]
Expand Down
3 changes: 3 additions & 0 deletions crates/polars-time/src/chunkedarray/rolling_window/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ pub struct RollingOptions {
/// Optional parameters for the rolling function
#[cfg_attr(feature = "serde", serde(skip))]
pub fn_params: DynArgs,
/// Warn if data is not known to be sorted by `by` column (if passed)
pub warn_if_unsorted: bool,
}

impl Default for RollingOptions {
Expand All @@ -43,6 +45,7 @@ impl Default for RollingOptions {
by: None,
closed_window: None,
fn_params: None,
warn_if_unsorted: true,
}
}
}
Expand Down
59 changes: 53 additions & 6 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5558,6 +5558,7 @@ def rolling_min(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Apply a rolling min (moving min) over the values in this array.
Expand Down Expand Up @@ -5625,6 +5626,9 @@ def rolling_min(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -5751,7 +5755,7 @@ def rolling_min(
)
return self._from_pyexpr(
self._pyexpr.rolling_min(
window_size, weights, min_periods, center, by, closed
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
)
)

Expand All @@ -5765,6 +5769,7 @@ def rolling_max(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Apply a rolling max (moving max) over the values in this array.
Expand Down Expand Up @@ -5828,6 +5833,9 @@ def rolling_max(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -5981,7 +5989,7 @@ def rolling_max(
)
return self._from_pyexpr(
self._pyexpr.rolling_max(
window_size, weights, min_periods, center, by, closed
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
)
)

Expand All @@ -5995,6 +6003,7 @@ def rolling_mean(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Apply a rolling mean (moving mean) over the values in this array.
Expand Down Expand Up @@ -6062,6 +6071,9 @@ def rolling_mean(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -6215,7 +6227,13 @@ def rolling_mean(
)
return self._from_pyexpr(
self._pyexpr.rolling_mean(
window_size, weights, min_periods, center, by, closed
window_size,
weights,
min_periods,
center,
by,
closed,
warn_if_unsorted,
)
)

Expand All @@ -6229,6 +6247,7 @@ def rolling_sum(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Apply a rolling sum (moving sum) over the values in this array.
Expand Down Expand Up @@ -6292,6 +6311,9 @@ def rolling_sum(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -6445,7 +6467,7 @@ def rolling_sum(
)
return self._from_pyexpr(
self._pyexpr.rolling_sum(
window_size, weights, min_periods, center, by, closed
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
)
)

Expand All @@ -6460,6 +6482,7 @@ def rolling_std(
by: str | None = None,
closed: ClosedInterval = "left",
ddof: int = 1,
warn_if_unsorted: bool = True,
) -> Self:
"""
Compute a rolling standard deviation.
Expand Down Expand Up @@ -6525,6 +6548,9 @@ def rolling_std(
applicable if `by` has been set.
ddof
"Delta Degrees of Freedom": The divisor for a length N window is N - ddof
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -6678,7 +6704,14 @@ def rolling_std(
)
return self._from_pyexpr(
self._pyexpr.rolling_std(
window_size, weights, min_periods, center, by, closed, ddof
window_size,
weights,
min_periods,
center,
by,
closed,
ddof,
warn_if_unsorted,
)
)

Expand All @@ -6693,6 +6726,7 @@ def rolling_var(
by: str | None = None,
closed: ClosedInterval = "left",
ddof: int = 1,
warn_if_unsorted: bool = True,
) -> Self:
"""
Compute a rolling variance.
Expand Down Expand Up @@ -6758,6 +6792,9 @@ def rolling_var(
applicable if `by` has been set.
ddof
"Delta Degrees of Freedom": The divisor for a length N window is N - ddof
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -6918,6 +6955,7 @@ def rolling_var(
by,
closed,
ddof,
warn_if_unsorted,
)
)

Expand All @@ -6931,6 +6969,7 @@ def rolling_median(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Compute a rolling median.
Expand Down Expand Up @@ -6994,6 +7033,9 @@ def rolling_median(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -7073,7 +7115,7 @@ def rolling_median(
)
return self._from_pyexpr(
self._pyexpr.rolling_median(
window_size, weights, min_periods, center, by, closed
window_size, weights, min_periods, center, by, closed, warn_if_unsorted
)
)

Expand All @@ -7089,6 +7131,7 @@ def rolling_quantile(
center: bool = False,
by: str | None = None,
closed: ClosedInterval = "left",
warn_if_unsorted: bool = True,
) -> Self:
"""
Compute a rolling quantile.
Expand Down Expand Up @@ -7156,6 +7199,9 @@ def rolling_quantile(
closed : {'left', 'right', 'both', 'none'}
Define which sides of the temporal interval are closed (inclusive); only
applicable if `by` has been set.
warn_if_unsorted
Warn if data is not known to be sorted by `by` column (if passed).
Experimental.
Warnings
--------
Expand Down Expand Up @@ -7271,6 +7317,7 @@ def rolling_quantile(
center,
by,
closed,
warn_if_unsorted,
)
)

Expand Down
Loading

0 comments on commit b860c7f

Please sign in to comment.