From aae6d5885d89921a6c248d2cb70a917a1c305d00 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 1 Aug 2023 22:49:48 +0200 Subject: [PATCH] Add Rust side & test --- .../src/dsl/function_expr/datetime.rs | 7 ++ .../polars-plan/src/dsl/function_expr/mod.rs | 7 ++ .../src/dsl/function_expr/schema.rs | 10 ++ .../src/dsl/function_expr/temporal.rs | 94 +++++++++++++++++++ .../polars-plan/src/dsl/functions/temporal.rs | 85 ++--------------- .../tests/unit/functions/test_as_datatype.py | 25 +++++ 6 files changed, 153 insertions(+), 75 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index c1d9054e702bf..c5af4c5f2772e 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -64,6 +64,11 @@ pub enum TemporalFunction { closed: ClosedWindow, }, Combine(TimeUnit), + #[cfg(feature = "timezones")] + DatetimeFunction { + time_unit: TimeUnit, + time_zone: Option, + }, } impl Display for TemporalFunction { @@ -105,6 +110,8 @@ impl Display for TemporalFunction { DateRanges { .. } => return write!(f, "date_ranges"), TimeRange { .. } => return write!(f, "time_range"), TimeRanges { .. } => return write!(f, "time_ranges"), + #[cfg(feature = "timezones")] + DatetimeFunction { .. } => return write!(f, "datetime"), Combine(_) => "combine", }; write!(f, "dt.{s}") diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 3dd74b58331f8..7a437f5ed7951 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -799,6 +799,13 @@ impl From for SpecialEq> { None ) } + #[cfg(feature = "timezones")] + DatetimeFunction { + time_unit, + time_zone, + } => { + map_as_slice!(temporal::datetime, &time_unit, time_zone.as_deref()) + } } } } diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 82dd321fd8532..19b164f95b3cf 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -96,6 +96,16 @@ impl FunctionExpr { DataType::List(Box::new(DataType::Time)), )); } + #[cfg(feature = "timezones")] + DatetimeFunction { + time_unit, + time_zone, + } => { + return Ok(Field::new( + "datetime", + DataType::Datetime(*time_unit, time_zone.clone()), + )); + } Combine(tu) => match mapper.with_same_dtype().unwrap().dtype { DataType::Datetime(_, tz) => DataType::Datetime(*tu, tz), DataType::Date => DataType::Datetime(*tu, None), diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index ae07aa4fd6360..6afbc61525b0f 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -6,6 +6,100 @@ use polars_time::prelude::*; use super::*; +#[cfg(feature = "timezones")] +pub(super) fn datetime( + s: &[Series], + time_unit: &TimeUnit, + time_zone: Option<&str>, +) -> PolarsResult { + use polars_core::export::chrono::NaiveDate; + use polars_core::utils::CustomIterTools; + + let year = &s[0]; + let month = &s[1]; + let day = &s[2]; + let hour = &s[3]; + let minute = &s[4]; + let second = &s[5]; + let microsecond = &s[6]; + + let max_len = s.iter().map(|s| s.len()).max().unwrap(); + + let mut year = year.cast(&DataType::Int32)?; + if year.len() < max_len { + year = year.new_from_index(0, max_len) + } + let year = year.i32()?; + + let mut month = month.cast(&DataType::UInt32)?; + if month.len() < max_len { + month = month.new_from_index(0, max_len); + } + let month = month.u32()?; + + let mut day = day.cast(&DataType::UInt32)?; + if day.len() < max_len { + day = day.new_from_index(0, max_len); + } + let day = day.u32()?; + + let mut hour = hour.cast(&DataType::UInt32)?; + if hour.len() < max_len { + hour = hour.new_from_index(0, max_len); + } + let hour = hour.u32()?; + + let mut minute = minute.cast(&DataType::UInt32)?; + if minute.len() < max_len { + minute = minute.new_from_index(0, max_len); + } + let minute = minute.u32()?; + + let mut second = second.cast(&DataType::UInt32)?; + if second.len() < max_len { + second = second.new_from_index(0, max_len); + } + let second = second.u32()?; + + let mut microsecond = microsecond.cast(&DataType::UInt32)?; + if microsecond.len() < max_len { + microsecond = microsecond.new_from_index(0, max_len); + } + let microsecond = microsecond.u32()?; + + let ca: Int64Chunked = year + .into_iter() + .zip(month) + .zip(day) + .zip(hour) + .zip(minute) + .zip(second) + .zip(microsecond) + .map(|((((((y, m), d), h), mnt), s), us)| { + if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) = + (y, m, d, h, mnt, s, us) + { + NaiveDate::from_ymd_opt(y, m, d) + .and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us)) + .map(|ndt| match time_unit { + TimeUnit::Milliseconds => ndt.timestamp_millis(), + TimeUnit::Microseconds => ndt.timestamp_micros(), + TimeUnit::Nanoseconds => ndt.timestamp_nanos(), + }) + } else { + None + } + }) + .collect_trusted(); + + let mut ca = ca.into_datetime(*time_unit, None); + ca = replace_time_zone(&ca, time_zone, None)?; + + let mut s = ca.into_series(); + s.rename("datetime"); + Ok(s) +} + #[cfg(feature = "date_offset")] pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult { let preserve_sortedness: bool; diff --git a/crates/polars-plan/src/dsl/functions/temporal.rs b/crates/polars-plan/src/dsl/functions/temporal.rs index d6b2788e7c48e..805c08f6a3c87 100644 --- a/crates/polars-plan/src/dsl/functions/temporal.rs +++ b/crates/polars-plan/src/dsl/functions/temporal.rs @@ -103,11 +103,8 @@ impl DatetimeArgs { } /// Construct a column of `Datetime` from the provided [`DatetimeArgs`]. -#[cfg(feature = "temporal")] +#[cfg(all(feature = "timezones", feature = "temporal"))] pub fn datetime(args: DatetimeArgs) -> Expr { - use polars_core::export::chrono::NaiveDate; - use polars_core::utils::CustomIterTools; - let year = args.year; let month = args.month; let day = args.day; @@ -115,79 +112,17 @@ pub fn datetime(args: DatetimeArgs) -> Expr { let minute = args.minute; let second = args.second; let microsecond = args.microsecond; + let time_unit = args.time_unit; + let time_zone = args.time_zone; - let function = SpecialEq::new(Arc::new(move |s: &mut [Series]| { - assert_eq!(s.len(), 7); - let max_len = s.iter().map(|s| s.len()).max().unwrap(); - let mut year = s[0].cast(&DataType::Int32)?; - if year.len() < max_len { - year = year.new_from_index(0, max_len) - } - let year = year.i32()?; - let mut month = s[1].cast(&DataType::UInt32)?; - if month.len() < max_len { - month = month.new_from_index(0, max_len); - } - let month = month.u32()?; - let mut day = s[2].cast(&DataType::UInt32)?; - if day.len() < max_len { - day = day.new_from_index(0, max_len); - } - let day = day.u32()?; - let mut hour = s[3].cast(&DataType::UInt32)?; - if hour.len() < max_len { - hour = hour.new_from_index(0, max_len); - } - let hour = hour.u32()?; - - let mut minute = s[4].cast(&DataType::UInt32)?; - if minute.len() < max_len { - minute = minute.new_from_index(0, max_len); - } - let minute = minute.u32()?; + let input = vec![year, month, day, hour, minute, second, microsecond]; - let mut second = s[5].cast(&DataType::UInt32)?; - if second.len() < max_len { - second = second.new_from_index(0, max_len); - } - let second = second.u32()?; - - let mut microsecond = s[6].cast(&DataType::UInt32)?; - if microsecond.len() < max_len { - microsecond = microsecond.new_from_index(0, max_len); - } - let microsecond = microsecond.u32()?; - - let ca: Int64Chunked = year - .into_iter() - .zip(month) - .zip(day) - .zip(hour) - .zip(minute) - .zip(second) - .zip(microsecond) - .map(|((((((y, m), d), h), mnt), s), us)| { - if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) = - (y, m, d, h, mnt, s, us) - { - NaiveDate::from_ymd_opt(y, m, d) - .and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us)) - .map(|ndt| ndt.timestamp_micros()) - } else { - None - } - }) - .collect_trusted(); - - let mut s = ca.into_datetime(TimeUnit::Microseconds, None).into_series(); - s.rename("datetime"); - Ok(Some(s)) - }) as Arc); - - Expr::AnonymousFunction { - input: vec![year, month, day, hour, minute, second, microsecond], - function, - output_type: GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)), + Expr::Function { + input, + function: FunctionExpr::TemporalExpr(TemporalFunction::DatetimeFunction { + time_unit, + time_zone, + }), options: FunctionOptions { collect_groups: ApplyOptions::ApplyFlat, allow_rename: true, diff --git a/py-polars/tests/unit/functions/test_as_datatype.py b/py-polars/tests/unit/functions/test_as_datatype.py index 73a027f040934..55373e6af1faf 100644 --- a/py-polars/tests/unit/functions/test_as_datatype.py +++ b/py-polars/tests/unit/functions/test_as_datatype.py @@ -1,10 +1,16 @@ +from __future__ import annotations + from datetime import date, datetime +from typing import TYPE_CHECKING import pytest import polars as pl from polars.testing import assert_frame_equal, assert_series_equal +if TYPE_CHECKING: + from polars.type_aliases import TimeUnit + def test_date_datetime() -> None: df = pl.DataFrame( @@ -24,6 +30,25 @@ def test_date_datetime() -> None: assert_series_equal(out["h2"], df["hour"].rename("h2")) +@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"]) +def test_datetime_time_unit(time_unit: TimeUnit) -> None: + result = pl.datetime(2022, 1, 2, time_unit=time_unit) + + assert pl.select(result.dt.year()).item() == 2022 + assert pl.select(result.dt.month()).item() == 1 + assert pl.select(result.dt.day()).item() == 2 + + +@pytest.mark.parametrize("time_zone", [None, "Europe/Amsterdam", "UTC"]) +def test_datetime_time_zone(time_zone: str | None) -> None: + result = pl.datetime(2022, 1, 2, 10, time_zone=time_zone) + + assert pl.select(result.dt.year()).item() == 2022 + assert pl.select(result.dt.month()).item() == 1 + assert pl.select(result.dt.day()).item() == 2 + assert pl.select(result.dt.hour()).item() == 10 + + def test_time() -> None: df = pl.DataFrame( {