From 584d68020eeb8946d2f25188b72802be2836c15b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 15 Jul 2024 10:14:26 +0100 Subject: [PATCH 1/6] feat: implement round and truncate for Duration --- .../src/dsl/function_expr/datetime.rs | 14 +- crates/polars-time/src/round.rs | 83 +++++++-- crates/polars-time/src/truncate.rs | 77 ++++++-- py-polars/polars/expr/datetime.py | 21 ++- py-polars/polars/series/datetime.py | 27 ++- .../namespaces/temporal/test_datetime.py | 173 ++++++++++++++++-- 6 files changed, 325 insertions(+), 70 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index 1a6251a43f82..ea1c6f17c225 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -397,7 +397,12 @@ pub(super) fn truncate(s: &[Series]) -> PolarsResult { _ => time_series.datetime()?.truncate(None, every)?.into_series(), }, DataType::Date => time_series.date()?.truncate(None, every)?.into_series(), - dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), + DataType::Duration(_) => time_series.duration()?.truncate(None, every)?.into_series(), + dt => polars_bail!( + opq = truncate, + got = dt, + expected = "date/datetime/duration" + ), }; out.set_sorted_flag(time_series.is_sorted_flag()); Ok(out) @@ -498,7 +503,12 @@ pub(super) fn round(s: &[Series]) -> PolarsResult { .unwrap() .round(every, None)? .into_series(), - dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), + DataType::Duration(_) => time_series + .duration() + .unwrap() + .round(every, None)? + .into_series(), + dt => polars_bail!(opq = round, got = dt, expected = "date/datetime/duration"), }) } diff --git a/crates/polars-time/src/round.rs b/crates/polars-time/src/round.rs index 4bb6f2a3386f..ab0878c3d40d 100644 --- a/crates/polars-time/src/round.rs +++ b/crates/polars-time/src/round.rs @@ -21,9 +21,7 @@ impl PolarsRound for DatetimeChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - if every_parsed.negative { - polars_bail!(ComputeError: "cannot round a Datetime to a negative duration") - } + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Datetime to a non-positive duration"); if (time_zone.is_none() || time_zone.as_deref() == Some("UTC")) && (every_parsed.months() == 0 && every_parsed.weeks() == 0) { @@ -76,14 +74,11 @@ impl PolarsRound for DatetimeChunked { opt_every, ) { (Some(timestamp), Some(every)) => { - let every = + let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); - if every.negative { - polars_bail!(ComputeError: "cannot round a Datetime to a negative duration") - } - - let w = Window::new(every, every, offset); + let w = Window::new(every_parsed, every_parsed, offset); func(&w, timestamp, tz).map(Some) }, _ => Ok(None), @@ -98,11 +93,9 @@ impl PolarsRound for DateChunked { let out = match every.len() { 1 => { if let Some(every) = every.get(0) { - let every = Duration::parse(every); - if every.negative { - polars_bail!(ComputeError: "cannot round a Date to a negative duration") - } - let w = Window::new(every, every, offset); + let every_parsed = Duration::parse(every); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); + let w = Window::new(every_parsed, every_parsed, offset); self.try_apply_nonnull_values_generic(|t| { Ok( (w.round_ms(MILLISECONDS_IN_DAY * t as i64, None)? @@ -118,14 +111,11 @@ impl PolarsRound for DateChunked { let mut duration_cache = FastFixedCache::new((every.len() as f64).sqrt() as usize); match (opt_t, opt_every) { (Some(t), Some(every)) => { - let every = *duration_cache + let every_parsed = *duration_cache .get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); - if every.negative { - polars_bail!(ComputeError: "cannot round a Date to a negative duration") - } - - let w = Window::new(every, every, offset); + let w = Window::new(every_parsed, every_parsed, offset); Ok(Some( (w.round_ms(MILLISECONDS_IN_DAY * t as i64, None)? / MILLISECONDS_IN_DAY) as i32, @@ -138,3 +128,56 @@ impl PolarsRound for DateChunked { Ok(out?.into_date()) } } + +#[cfg(feature = "dtype-duration")] +impl PolarsRound for DurationChunked { + fn round(&self, every: &StringChunked, _tz: Option<&Tz>) -> PolarsResult { + if every.len() == 1 { + if let Some(every) = every.get(0) { + let every_parsed = Duration::parse(every); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Duration to a non-positive duration"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + let every = match self.time_unit() { + TimeUnit::Milliseconds => every_parsed.duration_ms(), + TimeUnit::Microseconds => every_parsed.duration_us(), + TimeUnit::Nanoseconds => every_parsed.duration_ns(), + }; + return Ok(self + .apply_values(|t| { + // Round half-way values away from zero + let half_away = t.signum() * every / 2; + t + half_away - (t + half_away) % every + }) + .into_duration(self.time_unit())); + } else { + return Ok(Int64Chunked::full_null(self.name(), self.len()) + .into_duration(self.time_unit())); + } + } + + // A sqrt(n) cache is not too small, not too large. + let mut duration_cache = FastFixedCache::new((every.len() as f64).sqrt() as usize); + + let out = broadcast_try_binary_elementwise(self, every, |opt_timestamp, opt_every| match ( + opt_timestamp, + opt_every, + ) { + (Some(t), Some(every)) => { + let every_parsed = + *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative, InvalidOperation: "cannot round a Duration to a negative duration"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + let every = match self.time_unit() { + TimeUnit::Milliseconds => every_parsed.duration_ms(), + TimeUnit::Microseconds => every_parsed.duration_us(), + TimeUnit::Nanoseconds => every_parsed.duration_ns(), + }; + // Round half-way values away from zero + let half_away = t.signum() * every / 2; + Ok(Some(t + half_away - (t + half_away) % every)) + }, + _ => Ok(None), + }); + Ok(out?.into_duration(self.time_unit())) + } +} diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs index 991ce50b547a..a4cf11ab47e1 100644 --- a/crates/polars-time/src/truncate.rs +++ b/crates/polars-time/src/truncate.rs @@ -21,9 +21,7 @@ impl PolarsTruncate for DatetimeChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - if every_parsed.negative { - polars_bail!(ComputeError: "cannot truncate a Datetime to a negative duration") - } + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive duration"); if (time_zone.is_none() || time_zone.as_deref() == Some("UTC")) && (every_parsed.months() == 0 && every_parsed.weeks() == 0) { @@ -75,14 +73,11 @@ impl PolarsTruncate for DatetimeChunked { opt_every, ) { (Some(timestamp), Some(every)) => { - let every = + let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive duration"); - if every.negative { - polars_bail!(ComputeError: "cannot truncate a Datetime to a negative duration") - } - - let w = Window::new(every, every, offset); + let w = Window::new(every_parsed, every_parsed, offset); func(&w, timestamp, tz).map(Some) }, _ => Ok(None), @@ -97,11 +92,9 @@ impl PolarsTruncate for DateChunked { let out = match every.len() { 1 => { if let Some(every) = every.get(0) { - let every = Duration::parse(every); - if every.negative { - polars_bail!(ComputeError: "cannot truncate a Date to a negative duration") - } - let w = Window::new(every, every, offset); + let every_parsed = Duration::parse(every); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive duration"); + let w = Window::new(every_parsed, every_parsed, offset); self.try_apply_nonnull_values_generic(|t| { Ok((w.truncate_ms(MILLISECONDS_IN_DAY * t as i64, None)? / MILLISECONDS_IN_DAY) as i32) @@ -115,14 +108,11 @@ impl PolarsTruncate for DateChunked { let mut duration_cache = FastFixedCache::new((every.len() as f64).sqrt() as usize); match (opt_t, opt_every) { (Some(t), Some(every)) => { - let every = *duration_cache + let every_parsed = *duration_cache .get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive duration"); - if every.negative { - polars_bail!(ComputeError: "cannot truncate a Date to a negative duration") - } - - let w = Window::new(every, every, offset); + let w = Window::new(every_parsed, every_parsed, offset); Ok(Some( (w.truncate_ms(MILLISECONDS_IN_DAY * t as i64, None)? / MILLISECONDS_IN_DAY) as i32, @@ -135,3 +125,50 @@ impl PolarsTruncate for DateChunked { Ok(out?.into_date()) } } + +#[cfg(feature = "dtype-duration")] +impl PolarsTruncate for DurationChunked { + fn truncate(&self, _tz: Option<&Tz>, every: &StringChunked) -> PolarsResult { + if every.len() == 1 { + if let Some(every) = every.get(0) { + let every_parsed = Duration::parse(every); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive duration"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + let every = match self.time_unit() { + TimeUnit::Milliseconds => every_parsed.duration_ms(), + TimeUnit::Microseconds => every_parsed.duration_us(), + TimeUnit::Nanoseconds => every_parsed.duration_ns(), + }; + return Ok(self + .apply_values(|t| t - t % every) + .into_duration(self.time_unit())); + } else { + return Ok(Int64Chunked::full_null(self.name(), self.len()) + .into_duration(self.time_unit())); + } + } + + // A sqrt(n) cache is not too small, not too large. + let mut duration_cache = FastFixedCache::new((every.len() as f64).sqrt() as usize); + + let out = broadcast_try_binary_elementwise(self, every, |opt_timestamp, opt_every| match ( + opt_timestamp, + opt_every, + ) { + (Some(t), Some(every)) => { + let every_parsed = + *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive duration"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + let every = match self.time_unit() { + TimeUnit::Milliseconds => every_parsed.duration_ms(), + TimeUnit::Microseconds => every_parsed.duration_us(), + TimeUnit::Nanoseconds => every_parsed.duration_ns(), + }; + Ok(Some(t - t % every)) + }, + _ => Ok(None), + }); + Ok(out?.into_duration(self.time_unit())) + } +} diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index cdf6ccb6516f..528b61984b92 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -152,10 +152,11 @@ def add_business_days( def truncate(self, every: str | dt.timedelta | Expr) -> Expr: """ - Divide the date/datetime range into buckets. + Divide the dates, datetimes, or durations into buckets. - Each date/datetime is mapped to the start of its bucket using the corresponding - local datetime. Note that weekly buckets start on Monday. + For dates or datetimes, each date/datetime is mapped to the start of its bucket + using the corresponding local datetime. + Note that weekly buckets start on Monday. Ambiguous results are localised using the DST offset of the original timestamp - for example, truncating `'2022-11-06 01:30:00 CST'` by `'1h'` results in `'2022-11-06 01:00:00 CST'`, whereas truncating `'2022-11-06 01:30:00 CDT'` by @@ -192,6 +193,10 @@ def truncate(self, every: str | dt.timedelta | Expr) -> Expr: not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". + Durations may not be truncated to a period length `every` containing calendar + days, weeks, months, quarters, or years, as these are not constant time + intervals. + Returns ------- Expr @@ -278,15 +283,15 @@ def truncate(self, every: str | dt.timedelta | Expr) -> Expr: @unstable() def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr: """ - Divide the date/datetime range into buckets. + Divide the dates, datetimes, or durations into buckets. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - Each date/datetime in the first half of the interval + Each date/datetime/duration in the first half of the interval is mapped to the start of its bucket. - Each date/datetime in the second half of the interval + Each date/datetime/duration in the second half of the interval is mapped to the end of its bucket. Ambiguous results are localised using the DST offset of the original timestamp - for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in @@ -326,6 +331,10 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr: not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". + Durations may not be rounded to a period length `every` containing calendar + days, weeks, months, quarters, or years, as these are not constant time + intervals. + Examples -------- >>> from datetime import timedelta, datetime diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 8c8bfb32bad8..de97e54389bd 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -1644,10 +1644,11 @@ def offset_by(self, by: str | Expr) -> Series: def truncate(self, every: str | dt.timedelta | IntoExprColumn) -> Series: """ - Divide the date/ datetime range into buckets. + Divide the dates, datetimes, or durations into buckets. - Each date/datetime is mapped to the start of its bucket using the corresponding - local datetime. Note that weekly buckets start on Monday. + For dates or datetimes, each date/datetime is mapped to the start of its bucket + using the corresponding local datetime. + Note that weekly buckets start on Monday. Ambiguous results are localised using the DST offset of the original timestamp - for example, truncating `'2022-11-06 01:30:00 CST'` by `'1h'` results in `'2022-11-06 01:00:00 CST'`, whereas truncating `'2022-11-06 01:30:00 CDT'` by @@ -1683,6 +1684,10 @@ def truncate(self, every: str | dt.timedelta | IntoExprColumn) -> Series: not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". + Durations may not be truncated to a period length `every` containing calendar + days, weeks, months, quarters, or years, as these are not constant time + intervals. + Returns ------- Series @@ -1758,17 +1763,17 @@ def truncate(self, every: str | dt.timedelta | IntoExprColumn) -> Series: @unstable() def round(self, every: str | dt.timedelta | IntoExprColumn) -> Series: """ - Divide the date/ datetime range into buckets. + Divide the dates, datetimes, or durations into buckets. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - Each date/datetime in the first half of the interval is mapped to the start of - its bucket. - Each date/datetime in the second half of the interval is mapped to the end of - its bucket. - Ambiguous results are localized using the DST offset of the original timestamp - + Each date/datetime/duration in the first half of the interval + is mapped to the start of its bucket. + Each date/datetime/duration in the second half of the interval + is mapped to the end of its bucket. + Ambiguous results are localised using the DST offset of the original timestamp - for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in `'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by `'1h'` results in `'2022-11-06 01:00:00 CDT'`. @@ -1808,6 +1813,10 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Series: not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". + Durations may not be rounded to a period length `every` containing calendar + days, weeks, months, quarters, or years, as these are not constant time + intervals. + Examples -------- >>> from datetime import timedelta, datetime diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index c9a43984cd45..0cfdf97ada63 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -486,35 +486,116 @@ def test_truncate( assert out.dt[-1] == stop +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_truncate_duration(time_unit: TimeUnit) -> None: + durations = pl.Series( + [ + timedelta(seconds=21), + timedelta(seconds=35), + timedelta(seconds=59), + None, + timedelta(seconds=-35), + ] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [ + timedelta(seconds=20), + timedelta(seconds=30), + timedelta(seconds=50), + None, + timedelta(seconds=-30), + ] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.truncate("10s"), expected) + + +def test_truncate_duration_zero() -> None: + """Truncating to the nearest zero should raise a descriptive error.""" + durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) + + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Duration to a non-positive duration", + ): + durations.dt.truncate("0s") + + +def test_truncate_expressions() -> None: + df = pl.DataFrame( + { + "duration": [ + timedelta(seconds=20), + timedelta(seconds=21), + timedelta(seconds=22), + ], + "every": ["3s", "4s", "5s"], + } + ) + result = df.select(pl.col("duration").dt.truncate(pl.col("every")))["duration"] + expected = pl.Series( + "duration", + [timedelta(seconds=18), timedelta(seconds=20), timedelta(seconds=20)], + ) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize("every_unit", ["mo", "q", "y"]) +def test_truncated_duration_non_constant(every_unit: str) -> None: + # Duration series can't be truncated to non-constant durations + df = pl.DataFrame( + { + "durations": [timedelta(seconds=1), timedelta(seconds=2)], + "every": ["1" + every_unit, "1" + every_unit], + } + ) + + with pytest.raises(InvalidOperationError): + df["durations"].dt.truncate("1" + every_unit) + + with pytest.raises(InvalidOperationError): + df.select(pl.col("durations").dt.truncate(pl.col("every"))) + + def test_truncate_negative() -> None: """Test that truncating to a negative duration gives a helpful error message.""" df = pl.DataFrame( { "date": [date(1895, 5, 7), date(1955, 11, 5)], "datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)], - "duration": ["-1m", "1m"], + "duration": [timedelta(minutes=1), timedelta(minutes=-1)], + "every": ["-1m", "1m"], } ) - with pytest.raises( - ComputeError, match="cannot truncate a Date to a negative duration" + InvalidOperationError, match="cannot truncate a Date to a non-positive duration" ): df.select(pl.col("date").dt.truncate("-1m")) - with pytest.raises( - ComputeError, match="cannot truncate a Datetime to a negative duration" + InvalidOperationError, + match="cannot truncate a Datetime to a non-positive duration", ): df.select(pl.col("datetime").dt.truncate("-1m")) - with pytest.raises( - ComputeError, match="cannot truncate a Date to a negative duration" + InvalidOperationError, + match="cannot truncate a Duration to a non-positive duration", + ): + df.select(pl.col("duration").dt.truncate("-1m")) + with pytest.raises( + InvalidOperationError, match="cannot truncate a Date to a non-positive duration" ): - df.select(pl.col("date").dt.truncate(pl.col("duration"))) - + df.select(pl.col("date").dt.truncate(pl.col("every"))) + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Datetime to a non-positive duration", + ): + df.select(pl.col("datetime").dt.truncate(pl.col("every"))) with pytest.raises( - ComputeError, match="cannot truncate a Datetime to a negative duration" + InvalidOperationError, + match="cannot truncate a Duration to a non-positive duration", ): - df.select(pl.col("datetime").dt.truncate(pl.col("duration"))) + df.select(pl.col("duration").dt.truncate(pl.col("every"))) @pytest.mark.parametrize( @@ -551,6 +632,65 @@ def test_round( assert out.dt[-1] == stop +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_round_duration(time_unit: TimeUnit) -> None: + durations = pl.Series( + [ + timedelta(seconds=21), + timedelta(seconds=35), + timedelta(seconds=59), + None, + timedelta(seconds=-35), + ] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [ + timedelta(seconds=20), + timedelta(seconds=40), + timedelta(seconds=60), + None, + timedelta(seconds=-40), + ] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.round("10s"), expected) + + +def test_round_duration_zero() -> None: + """Rounding to the nearest zero should raise a descriptive error.""" + durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) + + with pytest.raises( + InvalidOperationError, + match="cannot round a Duration to a non-positive duration", + ): + durations.dt.round("0s") + + +@pytest.mark.parametrize("every", ["mo", "q", "y"]) +def test_round_duration_non_constant(every: str) -> None: + # Duration series can't be rounded to non-constant durations + durations = pl.Series([timedelta(seconds=21)]) + + with pytest.raises(InvalidOperationError): + durations.dt.round("1" + every) + + +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_round_duration_half(time_unit: TimeUnit) -> None: + # Values at halfway points should round away from zero + durations = pl.Series( + [timedelta(minutes=-30), timedelta(minutes=30), timedelta(minutes=90)] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [timedelta(hours=-1), timedelta(hours=1), timedelta(hours=2)] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.round("1h"), expected) + + def test_round_expr() -> None: df = pl.DataFrame( { @@ -612,15 +752,22 @@ def test_round_expr() -> None: def test_round_negative() -> None: """Test that rounding to a negative duration gives a helpful error message.""" with pytest.raises( - ComputeError, match="cannot round a Date to a negative duration" + InvalidOperationError, match="cannot round a Date to a non-positive duration" ): pl.Series([date(1895, 5, 7)]).dt.round("-1m") with pytest.raises( - ComputeError, match="cannot round a Datetime to a negative duration" + InvalidOperationError, + match="cannot round a Datetime to a non-positive duration", ): pl.Series([datetime(1895, 5, 7)]).dt.round("-1m") + with pytest.raises( + InvalidOperationError, + match="cannot round a Duration to a non-positive duration", + ): + pl.Series([timedelta(days=1)]).dt.round("-1m") + @pytest.mark.parametrize( ("time_unit", "date_in_that_unit"), From da2199ad6ed4a5f3f3022c22372fa66a53da8708 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Tue, 16 Apr 2024 19:02:17 -0700 Subject: [PATCH 2/6] noop From fde75e9673366bbd6b7fb13ed25006425c7bccee Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:56:59 +0100 Subject: [PATCH 3/6] consistency --- crates/polars-time/src/round.rs | 10 ++++----- crates/polars-time/src/truncate.rs | 12 +++++----- .../namespaces/temporal/test_datetime.py | 22 +++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/crates/polars-time/src/round.rs b/crates/polars-time/src/round.rs index ab0878c3d40d..f8b6d2a6201c 100644 --- a/crates/polars-time/src/round.rs +++ b/crates/polars-time/src/round.rs @@ -21,7 +21,7 @@ impl PolarsRound for DatetimeChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Datetime to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Datetime to a non-positive Duration"); if (time_zone.is_none() || time_zone.as_deref() == Some("UTC")) && (every_parsed.months() == 0 && every_parsed.weeks() == 0) { @@ -76,7 +76,7 @@ impl PolarsRound for DatetimeChunked { (Some(timestamp), Some(every)) => { let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Datetime to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); func(&w, timestamp, tz).map(Some) @@ -94,7 +94,7 @@ impl PolarsRound for DateChunked { 1 => { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); self.try_apply_nonnull_values_generic(|t| { Ok( @@ -113,7 +113,7 @@ impl PolarsRound for DateChunked { (Some(t), Some(every)) => { let every_parsed = *duration_cache .get_or_insert_with(every, |every| Duration::parse(every)); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Date to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); Ok(Some( @@ -135,7 +135,7 @@ impl PolarsRound for DurationChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Duration to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Duration to a non-positive Duration"); polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs index a4cf11ab47e1..cbb260756157 100644 --- a/crates/polars-time/src/truncate.rs +++ b/crates/polars-time/src/truncate.rs @@ -21,7 +21,7 @@ impl PolarsTruncate for DatetimeChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive Duration"); if (time_zone.is_none() || time_zone.as_deref() == Some("UTC")) && (every_parsed.months() == 0 && every_parsed.weeks() == 0) { @@ -75,7 +75,7 @@ impl PolarsTruncate for DatetimeChunked { (Some(timestamp), Some(every)) => { let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Datetime to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); func(&w, timestamp, tz).map(Some) @@ -93,7 +93,7 @@ impl PolarsTruncate for DateChunked { 1 => { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); self.try_apply_nonnull_values_generic(|t| { Ok((w.truncate_ms(MILLISECONDS_IN_DAY * t as i64, None)? @@ -110,7 +110,7 @@ impl PolarsTruncate for DateChunked { (Some(t), Some(every)) => { let every_parsed = *duration_cache .get_or_insert_with(every, |every| Duration::parse(every)); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Date to a non-positive Duration"); let w = Window::new(every_parsed, every_parsed, offset); Ok(Some( @@ -132,7 +132,7 @@ impl PolarsTruncate for DurationChunked { if every.len() == 1 { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive Duration"); polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), @@ -158,7 +158,7 @@ impl PolarsTruncate for DurationChunked { (Some(t), Some(every)) => { let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); - polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive duration"); + polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive Duration"); polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 0cfdf97ada63..fdfaef04893b 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -517,7 +517,7 @@ def test_truncate_duration_zero() -> None: with pytest.raises( InvalidOperationError, - match="cannot truncate a Duration to a non-positive duration", + match="cannot truncate a Duration to a non-positive Duration", ): durations.dt.truncate("0s") @@ -569,31 +569,31 @@ def test_truncate_negative() -> None: } ) with pytest.raises( - InvalidOperationError, match="cannot truncate a Date to a non-positive duration" + InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" ): df.select(pl.col("date").dt.truncate("-1m")) with pytest.raises( InvalidOperationError, - match="cannot truncate a Datetime to a non-positive duration", + match="cannot truncate a Datetime to a non-positive Duration", ): df.select(pl.col("datetime").dt.truncate("-1m")) with pytest.raises( InvalidOperationError, - match="cannot truncate a Duration to a non-positive duration", + match="cannot truncate a Duration to a non-positive Duration", ): df.select(pl.col("duration").dt.truncate("-1m")) with pytest.raises( - InvalidOperationError, match="cannot truncate a Date to a non-positive duration" + InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" ): df.select(pl.col("date").dt.truncate(pl.col("every"))) with pytest.raises( InvalidOperationError, - match="cannot truncate a Datetime to a non-positive duration", + match="cannot truncate a Datetime to a non-positive Duration", ): df.select(pl.col("datetime").dt.truncate(pl.col("every"))) with pytest.raises( InvalidOperationError, - match="cannot truncate a Duration to a non-positive duration", + match="cannot truncate a Duration to a non-positive Duration", ): df.select(pl.col("duration").dt.truncate(pl.col("every"))) @@ -663,7 +663,7 @@ def test_round_duration_zero() -> None: with pytest.raises( InvalidOperationError, - match="cannot round a Duration to a non-positive duration", + match="cannot round a Duration to a non-positive Duration", ): durations.dt.round("0s") @@ -752,19 +752,19 @@ def test_round_expr() -> None: def test_round_negative() -> None: """Test that rounding to a negative duration gives a helpful error message.""" with pytest.raises( - InvalidOperationError, match="cannot round a Date to a non-positive duration" + InvalidOperationError, match="cannot round a Date to a non-positive Duration" ): pl.Series([date(1895, 5, 7)]).dt.round("-1m") with pytest.raises( InvalidOperationError, - match="cannot round a Datetime to a non-positive duration", + match="cannot round a Datetime to a non-positive Duration", ): pl.Series([datetime(1895, 5, 7)]).dt.round("-1m") with pytest.raises( InvalidOperationError, - match="cannot round a Duration to a non-positive duration", + match="cannot round a Duration to a non-positive Duration", ): pl.Series([timedelta(days=1)]).dt.round("-1m") From fc7a3c782a8afa45eff293868b375c3c38327271 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:00:38 +0100 Subject: [PATCH 4/6] consistency --- crates/polars-time/src/round.rs | 4 ++-- crates/polars-time/src/truncate.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/polars-time/src/round.rs b/crates/polars-time/src/round.rs index f8b6d2a6201c..8a304958db32 100644 --- a/crates/polars-time/src/round.rs +++ b/crates/polars-time/src/round.rs @@ -136,7 +136,7 @@ impl PolarsRound for DurationChunked { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot round a Duration to a non-positive Duration"); - polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant Duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), TimeUnit::Microseconds => every_parsed.duration_us(), @@ -166,7 +166,7 @@ impl PolarsRound for DurationChunked { let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); polars_ensure!(!every_parsed.negative, InvalidOperation: "cannot round a Duration to a negative duration"); - polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot round a Duration to a non-constant Duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), TimeUnit::Microseconds => every_parsed.duration_us(), diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs index cbb260756157..b7380378e1cd 100644 --- a/crates/polars-time/src/truncate.rs +++ b/crates/polars-time/src/truncate.rs @@ -133,7 +133,7 @@ impl PolarsTruncate for DurationChunked { if let Some(every) = every.get(0) { let every_parsed = Duration::parse(every); polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive Duration"); - polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant Duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), TimeUnit::Microseconds => every_parsed.duration_us(), @@ -159,7 +159,7 @@ impl PolarsTruncate for DurationChunked { let every_parsed = *duration_cache.get_or_insert_with(every, |every| Duration::parse(every)); polars_ensure!(!every_parsed.negative & !every_parsed.is_zero(), InvalidOperation: "cannot truncate a Duration to a non-positive Duration"); - polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant duration (i.e. one that involves weeks / months)"); + polars_ensure!(every_parsed.is_constant_duration(None), InvalidOperation:"cannot truncate a Duration to a non-constant Duration (i.e. one that involves weeks / months)"); let every = match self.time_unit() { TimeUnit::Milliseconds => every_parsed.duration_ms(), TimeUnit::Microseconds => every_parsed.duration_us(), From c72d3c48d370cfd3b02247a93e4590c4a5b461de Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:02:52 +0100 Subject: [PATCH 5/6] move tests to more appropriate location --- .../namespaces/temporal/test_datetime.py | 317 ------------------ .../namespaces/temporal/test_round.py | 174 +++++++++- .../namespaces/temporal/test_truncate.py | 147 ++++++++ 3 files changed, 320 insertions(+), 318 deletions(-) diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index fdfaef04893b..0329400012b9 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -452,323 +452,6 @@ def test_duration_extract_times( assert_series_equal(getattr(duration.dt, unit_attr)(), expected) -@pytest.mark.parametrize( - ("time_unit", "every"), - [ - ("ms", "1h"), - ("us", "1h0m0s"), - ("ns", timedelta(hours=1)), - ], - ids=["milliseconds", "microseconds", "nanoseconds"], -) -def test_truncate( - time_unit: TimeUnit, - every: str | timedelta, -) -> None: - start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) - s = pl.datetime_range( - start, - stop, - timedelta(minutes=30), - time_unit=time_unit, - eager=True, - ).alias(f"dates[{time_unit}]") - - # can pass strings and time-deltas - out = s.dt.truncate(every) - assert out.dt[0] == start - assert out.dt[1] == start - assert out.dt[2] == start + timedelta(hours=1) - assert out.dt[3] == start + timedelta(hours=1) - # ... - assert out.dt[-3] == stop - timedelta(hours=1) - assert out.dt[-2] == stop - timedelta(hours=1) - assert out.dt[-1] == stop - - -@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) -def test_truncate_duration(time_unit: TimeUnit) -> None: - durations = pl.Series( - [ - timedelta(seconds=21), - timedelta(seconds=35), - timedelta(seconds=59), - None, - timedelta(seconds=-35), - ] - ).dt.cast_time_unit(time_unit) - - expected = pl.Series( - [ - timedelta(seconds=20), - timedelta(seconds=30), - timedelta(seconds=50), - None, - timedelta(seconds=-30), - ] - ).dt.cast_time_unit(time_unit) - - assert_series_equal(durations.dt.truncate("10s"), expected) - - -def test_truncate_duration_zero() -> None: - """Truncating to the nearest zero should raise a descriptive error.""" - durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) - - with pytest.raises( - InvalidOperationError, - match="cannot truncate a Duration to a non-positive Duration", - ): - durations.dt.truncate("0s") - - -def test_truncate_expressions() -> None: - df = pl.DataFrame( - { - "duration": [ - timedelta(seconds=20), - timedelta(seconds=21), - timedelta(seconds=22), - ], - "every": ["3s", "4s", "5s"], - } - ) - result = df.select(pl.col("duration").dt.truncate(pl.col("every")))["duration"] - expected = pl.Series( - "duration", - [timedelta(seconds=18), timedelta(seconds=20), timedelta(seconds=20)], - ) - assert_series_equal(result, expected) - - -@pytest.mark.parametrize("every_unit", ["mo", "q", "y"]) -def test_truncated_duration_non_constant(every_unit: str) -> None: - # Duration series can't be truncated to non-constant durations - df = pl.DataFrame( - { - "durations": [timedelta(seconds=1), timedelta(seconds=2)], - "every": ["1" + every_unit, "1" + every_unit], - } - ) - - with pytest.raises(InvalidOperationError): - df["durations"].dt.truncate("1" + every_unit) - - with pytest.raises(InvalidOperationError): - df.select(pl.col("durations").dt.truncate(pl.col("every"))) - - -def test_truncate_negative() -> None: - """Test that truncating to a negative duration gives a helpful error message.""" - df = pl.DataFrame( - { - "date": [date(1895, 5, 7), date(1955, 11, 5)], - "datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)], - "duration": [timedelta(minutes=1), timedelta(minutes=-1)], - "every": ["-1m", "1m"], - } - ) - with pytest.raises( - InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" - ): - df.select(pl.col("date").dt.truncate("-1m")) - with pytest.raises( - InvalidOperationError, - match="cannot truncate a Datetime to a non-positive Duration", - ): - df.select(pl.col("datetime").dt.truncate("-1m")) - with pytest.raises( - InvalidOperationError, - match="cannot truncate a Duration to a non-positive Duration", - ): - df.select(pl.col("duration").dt.truncate("-1m")) - with pytest.raises( - InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" - ): - df.select(pl.col("date").dt.truncate(pl.col("every"))) - with pytest.raises( - InvalidOperationError, - match="cannot truncate a Datetime to a non-positive Duration", - ): - df.select(pl.col("datetime").dt.truncate(pl.col("every"))) - with pytest.raises( - InvalidOperationError, - match="cannot truncate a Duration to a non-positive Duration", - ): - df.select(pl.col("duration").dt.truncate(pl.col("every"))) - - -@pytest.mark.parametrize( - ("time_unit", "every"), - [ - ("ms", "1h"), - ("us", "1h0m0s"), - ("ns", timedelta(hours=1)), - ], - ids=["milliseconds", "microseconds", "nanoseconds"], -) -def test_round( - time_unit: TimeUnit, - every: str | timedelta, -) -> None: - start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) - s = pl.datetime_range( - start, - stop, - timedelta(minutes=30), - time_unit=time_unit, - eager=True, - ).alias(f"dates[{time_unit}]") - - # can pass strings and time-deltas - out = s.dt.round(every) - assert out.dt[0] == start - assert out.dt[1] == start + timedelta(hours=1) - assert out.dt[2] == start + timedelta(hours=1) - assert out.dt[3] == start + timedelta(hours=2) - # ... - assert out.dt[-3] == stop - timedelta(hours=1) - assert out.dt[-2] == stop - assert out.dt[-1] == stop - - -@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) -def test_round_duration(time_unit: TimeUnit) -> None: - durations = pl.Series( - [ - timedelta(seconds=21), - timedelta(seconds=35), - timedelta(seconds=59), - None, - timedelta(seconds=-35), - ] - ).dt.cast_time_unit(time_unit) - - expected = pl.Series( - [ - timedelta(seconds=20), - timedelta(seconds=40), - timedelta(seconds=60), - None, - timedelta(seconds=-40), - ] - ).dt.cast_time_unit(time_unit) - - assert_series_equal(durations.dt.round("10s"), expected) - - -def test_round_duration_zero() -> None: - """Rounding to the nearest zero should raise a descriptive error.""" - durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) - - with pytest.raises( - InvalidOperationError, - match="cannot round a Duration to a non-positive Duration", - ): - durations.dt.round("0s") - - -@pytest.mark.parametrize("every", ["mo", "q", "y"]) -def test_round_duration_non_constant(every: str) -> None: - # Duration series can't be rounded to non-constant durations - durations = pl.Series([timedelta(seconds=21)]) - - with pytest.raises(InvalidOperationError): - durations.dt.round("1" + every) - - -@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) -def test_round_duration_half(time_unit: TimeUnit) -> None: - # Values at halfway points should round away from zero - durations = pl.Series( - [timedelta(minutes=-30), timedelta(minutes=30), timedelta(minutes=90)] - ).dt.cast_time_unit(time_unit) - - expected = pl.Series( - [timedelta(hours=-1), timedelta(hours=1), timedelta(hours=2)] - ).dt.cast_time_unit(time_unit) - - assert_series_equal(durations.dt.round("1h"), expected) - - -def test_round_expr() -> None: - df = pl.DataFrame( - { - "date": [ - datetime(2022, 11, 14), - datetime(2023, 10, 11), - datetime(2022, 3, 20, 5, 7, 18), - datetime(2022, 4, 3, 13, 30, 32), - None, - datetime(2022, 12, 1), - ], - "every": ["1y", "1mo", "1m", "1m", "1mo", None], - } - ) - - output = df.select( - all_expr=pl.col("date").dt.round(every=pl.col("every")), - date_lit=pl.lit(datetime(2022, 4, 3, 13, 30, 32)).dt.round( - every=pl.col("every") - ), - every_lit=pl.col("date").dt.round("1d"), - ) - - expected = pl.DataFrame( - { - "all_expr": [ - datetime(2023, 1, 1), - datetime(2023, 10, 1), - datetime(2022, 3, 20, 5, 7), - datetime(2022, 4, 3, 13, 31), - None, - None, - ], - "date_lit": [ - datetime(2022, 1, 1), - datetime(2022, 4, 1), - datetime(2022, 4, 3, 13, 31), - datetime(2022, 4, 3, 13, 31), - datetime(2022, 4, 1), - None, - ], - "every_lit": [ - datetime(2022, 11, 14), - datetime(2023, 10, 11), - datetime(2022, 3, 20), - datetime(2022, 4, 4), - None, - datetime(2022, 12, 1), - ], - } - ) - - assert_frame_equal(output, expected) - - all_lit = pl.select(all_lit=pl.lit(datetime(2022, 3, 20, 5, 7)).dt.round("1h")) - assert all_lit.to_dict(as_series=False) == {"all_lit": [datetime(2022, 3, 20, 5)]} - - -def test_round_negative() -> None: - """Test that rounding to a negative duration gives a helpful error message.""" - with pytest.raises( - InvalidOperationError, match="cannot round a Date to a non-positive Duration" - ): - pl.Series([date(1895, 5, 7)]).dt.round("-1m") - - with pytest.raises( - InvalidOperationError, - match="cannot round a Datetime to a non-positive Duration", - ): - pl.Series([datetime(1895, 5, 7)]).dt.round("-1m") - - with pytest.raises( - InvalidOperationError, - match="cannot round a Duration to a non-positive Duration", - ): - pl.Series([timedelta(days=1)]).dt.round("-1m") - - @pytest.mark.parametrize( ("time_unit", "date_in_that_unit"), [ diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py index 1ac7acc3edcd..25b7304208cf 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_round.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_round.py @@ -9,7 +9,8 @@ import polars as pl from polars._utils.convert import parse_as_duration_string -from polars.testing import assert_series_equal +from polars.exceptions import InvalidOperationError +from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: from zoneinfo import ZoneInfo @@ -189,3 +190,174 @@ def test_round_datetime_w_expression(time_unit: TimeUnit) -> None: result = df.select(pl.col("a").dt.round(pl.col("b")))["a"] assert result[0] == datetime(2020, 1, 1) assert result[1] == datetime(2020, 1, 21) + + +@pytest.mark.parametrize( + ("time_unit", "every"), + [ + ("ms", "1h"), + ("us", "1h0m0s"), + ("ns", timedelta(hours=1)), + ], + ids=["milliseconds", "microseconds", "nanoseconds"], +) +def test_round( + time_unit: TimeUnit, + every: str | timedelta, +) -> None: + start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) + s = pl.datetime_range( + start, + stop, + timedelta(minutes=30), + time_unit=time_unit, + eager=True, + ).alias(f"dates[{time_unit}]") + + # can pass strings and time-deltas + out = s.dt.round(every) + assert out.dt[0] == start + assert out.dt[1] == start + timedelta(hours=1) + assert out.dt[2] == start + timedelta(hours=1) + assert out.dt[3] == start + timedelta(hours=2) + # ... + assert out.dt[-3] == stop - timedelta(hours=1) + assert out.dt[-2] == stop + assert out.dt[-1] == stop + + +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_round_duration(time_unit: TimeUnit) -> None: + durations = pl.Series( + [ + timedelta(seconds=21), + timedelta(seconds=35), + timedelta(seconds=59), + None, + timedelta(seconds=-35), + ] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [ + timedelta(seconds=20), + timedelta(seconds=40), + timedelta(seconds=60), + None, + timedelta(seconds=-40), + ] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.round("10s"), expected) + + +def test_round_duration_zero() -> None: + """Rounding to the nearest zero should raise a descriptive error.""" + durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) + + with pytest.raises( + InvalidOperationError, + match="cannot round a Duration to a non-positive Duration", + ): + durations.dt.round("0s") + + +@pytest.mark.parametrize("every", ["mo", "q", "y"]) +def test_round_duration_non_constant(every: str) -> None: + # Duration series can't be rounded to non-constant durations + durations = pl.Series([timedelta(seconds=21)]) + + with pytest.raises(InvalidOperationError): + durations.dt.round("1" + every) + + +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_round_duration_half(time_unit: TimeUnit) -> None: + # Values at halfway points should round away from zero + durations = pl.Series( + [timedelta(minutes=-30), timedelta(minutes=30), timedelta(minutes=90)] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [timedelta(hours=-1), timedelta(hours=1), timedelta(hours=2)] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.round("1h"), expected) + + +def test_round_expr() -> None: + df = pl.DataFrame( + { + "date": [ + datetime(2022, 11, 14), + datetime(2023, 10, 11), + datetime(2022, 3, 20, 5, 7, 18), + datetime(2022, 4, 3, 13, 30, 32), + None, + datetime(2022, 12, 1), + ], + "every": ["1y", "1mo", "1m", "1m", "1mo", None], + } + ) + + output = df.select( + all_expr=pl.col("date").dt.round(every=pl.col("every")), + date_lit=pl.lit(datetime(2022, 4, 3, 13, 30, 32)).dt.round( + every=pl.col("every") + ), + every_lit=pl.col("date").dt.round("1d"), + ) + + expected = pl.DataFrame( + { + "all_expr": [ + datetime(2023, 1, 1), + datetime(2023, 10, 1), + datetime(2022, 3, 20, 5, 7), + datetime(2022, 4, 3, 13, 31), + None, + None, + ], + "date_lit": [ + datetime(2022, 1, 1), + datetime(2022, 4, 1), + datetime(2022, 4, 3, 13, 31), + datetime(2022, 4, 3, 13, 31), + datetime(2022, 4, 1), + None, + ], + "every_lit": [ + datetime(2022, 11, 14), + datetime(2023, 10, 11), + datetime(2022, 3, 20), + datetime(2022, 4, 4), + None, + datetime(2022, 12, 1), + ], + } + ) + + assert_frame_equal(output, expected) + + all_lit = pl.select(all_lit=pl.lit(datetime(2022, 3, 20, 5, 7)).dt.round("1h")) + assert all_lit.to_dict(as_series=False) == {"all_lit": [datetime(2022, 3, 20, 5)]} + + +def test_round_negative() -> None: + """Test that rounding to a negative duration gives a helpful error message.""" + with pytest.raises( + InvalidOperationError, match="cannot round a Date to a non-positive Duration" + ): + pl.Series([date(1895, 5, 7)]).dt.round("-1m") + + with pytest.raises( + InvalidOperationError, + match="cannot round a Datetime to a non-positive Duration", + ): + pl.Series([datetime(1895, 5, 7)]).dt.round("-1m") + + with pytest.raises( + InvalidOperationError, + match="cannot round a Duration to a non-positive Duration", + ): + pl.Series([timedelta(days=1)]).dt.round("-1m") diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py index f56d356b0457..7a7fee428d5b 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py @@ -9,6 +9,7 @@ import polars as pl from polars._utils.convert import parse_as_duration_string +from polars.exceptions import InvalidOperationError from polars.testing import assert_series_equal if TYPE_CHECKING: @@ -119,3 +120,149 @@ def test_fast_path_vs_slow_path(datetimes: list[datetime], every: str) -> None: # Definitely uses slowpath: expected = s.dt.truncate(pl.Series([every] * len(datetimes))) assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + ("time_unit", "every"), + [ + ("ms", "1h"), + ("us", "1h0m0s"), + ("ns", timedelta(hours=1)), + ], + ids=["milliseconds", "microseconds", "nanoseconds"], +) +def test_truncate( + time_unit: TimeUnit, + every: str | timedelta, +) -> None: + start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2) + s = pl.datetime_range( + start, + stop, + timedelta(minutes=30), + time_unit=time_unit, + eager=True, + ).alias(f"dates[{time_unit}]") + + # can pass strings and time-deltas + out = s.dt.truncate(every) + assert out.dt[0] == start + assert out.dt[1] == start + assert out.dt[2] == start + timedelta(hours=1) + assert out.dt[3] == start + timedelta(hours=1) + # ... + assert out.dt[-3] == stop - timedelta(hours=1) + assert out.dt[-2] == stop - timedelta(hours=1) + assert out.dt[-1] == stop + + +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_truncate_duration(time_unit: TimeUnit) -> None: + durations = pl.Series( + [ + timedelta(seconds=21), + timedelta(seconds=35), + timedelta(seconds=59), + None, + timedelta(seconds=-35), + ] + ).dt.cast_time_unit(time_unit) + + expected = pl.Series( + [ + timedelta(seconds=20), + timedelta(seconds=30), + timedelta(seconds=50), + None, + timedelta(seconds=-30), + ] + ).dt.cast_time_unit(time_unit) + + assert_series_equal(durations.dt.truncate("10s"), expected) + + +def test_truncate_duration_zero() -> None: + """Truncating to the nearest zero should raise a descriptive error.""" + durations = pl.Series([timedelta(seconds=21), timedelta(seconds=35)]) + + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Duration to a non-positive Duration", + ): + durations.dt.truncate("0s") + + +def test_truncate_expressions() -> None: + df = pl.DataFrame( + { + "duration": [ + timedelta(seconds=20), + timedelta(seconds=21), + timedelta(seconds=22), + ], + "every": ["3s", "4s", "5s"], + } + ) + result = df.select(pl.col("duration").dt.truncate(pl.col("every")))["duration"] + expected = pl.Series( + "duration", + [timedelta(seconds=18), timedelta(seconds=20), timedelta(seconds=20)], + ) + assert_series_equal(result, expected) + + +@pytest.mark.parametrize("every_unit", ["mo", "q", "y"]) +def test_truncated_duration_non_constant(every_unit: str) -> None: + # Duration series can't be truncated to non-constant durations + df = pl.DataFrame( + { + "durations": [timedelta(seconds=1), timedelta(seconds=2)], + "every": ["1" + every_unit, "1" + every_unit], + } + ) + + with pytest.raises(InvalidOperationError): + df["durations"].dt.truncate("1" + every_unit) + + with pytest.raises(InvalidOperationError): + df.select(pl.col("durations").dt.truncate(pl.col("every"))) + + +def test_truncate_negative() -> None: + """Test that truncating to a negative duration gives a helpful error message.""" + df = pl.DataFrame( + { + "date": [date(1895, 5, 7), date(1955, 11, 5)], + "datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)], + "duration": [timedelta(minutes=1), timedelta(minutes=-1)], + "every": ["-1m", "1m"], + } + ) + with pytest.raises( + InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" + ): + df.select(pl.col("date").dt.truncate("-1m")) + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Datetime to a non-positive Duration", + ): + df.select(pl.col("datetime").dt.truncate("-1m")) + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Duration to a non-positive Duration", + ): + df.select(pl.col("duration").dt.truncate("-1m")) + with pytest.raises( + InvalidOperationError, match="cannot truncate a Date to a non-positive Duration" + ): + df.select(pl.col("date").dt.truncate(pl.col("every"))) + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Datetime to a non-positive Duration", + ): + df.select(pl.col("datetime").dt.truncate(pl.col("every"))) + with pytest.raises( + InvalidOperationError, + match="cannot truncate a Duration to a non-positive Duration", + ): + df.select(pl.col("duration").dt.truncate(pl.col("every"))) From 1554a579a9464d9f2e3d4e84435bd1e5fe927c22 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:31:47 +0100 Subject: [PATCH 6/6] align formulae between datetime and duration --- crates/polars-time/src/round.rs | 21 +++++++------------ crates/polars-time/src/truncate.rs | 14 +++++++------ .../namespaces/temporal/test_truncate.py | 2 +- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/crates/polars-time/src/round.rs b/crates/polars-time/src/round.rs index 8a304958db32..5821f3e786fb 100644 --- a/crates/polars-time/src/round.rs +++ b/crates/polars-time/src/round.rs @@ -12,6 +12,11 @@ pub trait PolarsRound { Self: Sized; } +fn simple_round(t: i64, every: i64) -> i64 { + let half_away = t.signum() * every / 2; + t + half_away - (t + half_away) % every +} + impl PolarsRound for DatetimeChunked { fn round(&self, every: &StringChunked, tz: Option<&Tz>) -> PolarsResult { let time_zone = self.time_zone(); @@ -33,11 +38,7 @@ impl PolarsRound for DatetimeChunked { TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; return Ok(self - .apply_values(|t| { - // Round half-way values away from zero - let half_away = t.signum() * every / 2; - t + half_away - (t + half_away) % every - }) + .apply_values(|t| simple_round(t, every)) .into_datetime(self.time_unit(), time_zone.clone())); } else { let w = Window::new(every_parsed, every_parsed, offset); @@ -143,11 +144,7 @@ impl PolarsRound for DurationChunked { TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; return Ok(self - .apply_values(|t| { - // Round half-way values away from zero - let half_away = t.signum() * every / 2; - t + half_away - (t + half_away) % every - }) + .apply_values(|t| simple_round(t, every)) .into_duration(self.time_unit())); } else { return Ok(Int64Chunked::full_null(self.name(), self.len()) @@ -172,9 +169,7 @@ impl PolarsRound for DurationChunked { TimeUnit::Microseconds => every_parsed.duration_us(), TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; - // Round half-way values away from zero - let half_away = t.signum() * every / 2; - Ok(Some(t + half_away - (t + half_away) % every)) + Ok(Some(simple_round(t, every))) }, _ => Ok(None), }); diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs index b7380378e1cd..25d6c3f2a206 100644 --- a/crates/polars-time/src/truncate.rs +++ b/crates/polars-time/src/truncate.rs @@ -12,6 +12,11 @@ pub trait PolarsTruncate { Self: Sized; } +fn simple_truncate(t: i64, every: i64) -> i64 { + let remainder = t % every; + t - (remainder + every * (remainder < 0) as i64) +} + impl PolarsTruncate for DatetimeChunked { fn truncate(&self, tz: Option<&Tz>, every: &StringChunked) -> PolarsResult { let time_zone = self.time_zone(); @@ -33,10 +38,7 @@ impl PolarsTruncate for DatetimeChunked { TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; return Ok(self - .apply_values(|t| { - let remainder = t % every; - t - (remainder + every * (remainder < 0) as i64) - }) + .apply_values(|t| simple_truncate(t, every)) .into_datetime(self.time_unit(), time_zone.clone())); } else { let w = Window::new(every_parsed, every_parsed, offset); @@ -140,7 +142,7 @@ impl PolarsTruncate for DurationChunked { TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; return Ok(self - .apply_values(|t| t - t % every) + .apply_values(|t: i64| simple_truncate(t, every)) .into_duration(self.time_unit())); } else { return Ok(Int64Chunked::full_null(self.name(), self.len()) @@ -165,7 +167,7 @@ impl PolarsTruncate for DurationChunked { TimeUnit::Microseconds => every_parsed.duration_us(), TimeUnit::Nanoseconds => every_parsed.duration_ns(), }; - Ok(Some(t - t % every)) + Ok(Some(simple_truncate(t, every))) }, _ => Ok(None), }); diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py index 7a7fee428d5b..868422ac65e4 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_truncate.py @@ -174,7 +174,7 @@ def test_truncate_duration(time_unit: TimeUnit) -> None: timedelta(seconds=30), timedelta(seconds=50), None, - timedelta(seconds=-30), + timedelta(seconds=-40), ] ).dt.cast_time_unit(time_unit)