Skip to content

Commit

Permalink
feat(rust!,python): Extend datetime expression function with time z…
Browse files Browse the repository at this point in the history
…one/time unit parameters (#10235)
  • Loading branch information
stinodego authored Aug 3, 2023
1 parent a7e524b commit 873d18e
Show file tree
Hide file tree
Showing 10 changed files with 289 additions and 107 deletions.
6 changes: 6 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ pub enum TemporalFunction {
closed: ClosedWindow,
},
Combine(TimeUnit),
DatetimeFunction {
time_unit: TimeUnit,
time_zone: Option<TimeZone>,
use_earliest: Option<bool>,
},
}

impl Display for TemporalFunction {
Expand Down Expand Up @@ -105,6 +110,7 @@ impl Display for TemporalFunction {
DateRanges { .. } => return write!(f, "date_ranges"),
TimeRange { .. } => return write!(f, "time_range"),
TimeRanges { .. } => return write!(f, "time_ranges"),
DatetimeFunction { .. } => return write!(f, "datetime"),
Combine(_) => "combine",
};
write!(f, "dt.{s}")
Expand Down
12 changes: 12 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,18 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
None
)
}
DatetimeFunction {
time_unit,
time_zone,
use_earliest,
} => {
map_as_slice!(
temporal::datetime,
&time_unit,
time_zone.as_deref(),
use_earliest
)
}
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ impl FunctionExpr {
DataType::List(Box::new(DataType::Time)),
));
}
DatetimeFunction {
time_unit,
time_zone,
use_earliest: _,
} => {
return Ok(Field::new(
"datetime",
DataType::Datetime(*time_unit, time_zone.clone()),
));
}
Combine(tu) => match mapper.with_same_dtype().unwrap().dtype {
DataType::Datetime(_, tz) => DataType::Datetime(*tu, tz),
DataType::Date => DataType::Datetime(*tu, None),
Expand Down
107 changes: 107 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,113 @@ use polars_time::prelude::*;

use super::*;

pub(super) fn datetime(
s: &[Series],
time_unit: &TimeUnit,
time_zone: Option<&str>,
use_earliest: Option<bool>,
) -> PolarsResult<Series> {
use polars_core::export::chrono::NaiveDate;
use polars_core::utils::CustomIterTools;

let year = &s[0];
let month = &s[1];
let day = &s[2];
let hour = &s[3];
let minute = &s[4];
let second = &s[5];
let microsecond = &s[6];

let max_len = s.iter().map(|s| s.len()).max().unwrap();

let mut year = year.cast(&DataType::Int32)?;
if year.len() < max_len {
year = year.new_from_index(0, max_len)
}
let year = year.i32()?;

let mut month = month.cast(&DataType::UInt32)?;
if month.len() < max_len {
month = month.new_from_index(0, max_len);
}
let month = month.u32()?;

let mut day = day.cast(&DataType::UInt32)?;
if day.len() < max_len {
day = day.new_from_index(0, max_len);
}
let day = day.u32()?;

let mut hour = hour.cast(&DataType::UInt32)?;
if hour.len() < max_len {
hour = hour.new_from_index(0, max_len);
}
let hour = hour.u32()?;

let mut minute = minute.cast(&DataType::UInt32)?;
if minute.len() < max_len {
minute = minute.new_from_index(0, max_len);
}
let minute = minute.u32()?;

let mut second = second.cast(&DataType::UInt32)?;
if second.len() < max_len {
second = second.new_from_index(0, max_len);
}
let second = second.u32()?;

let mut microsecond = microsecond.cast(&DataType::UInt32)?;
if microsecond.len() < max_len {
microsecond = microsecond.new_from_index(0, max_len);
}
let microsecond = microsecond.u32()?;

let ca: Int64Chunked = year
.into_iter()
.zip(month)
.zip(day)
.zip(hour)
.zip(minute)
.zip(second)
.zip(microsecond)
.map(|((((((y, m), d), h), mnt), s), us)| {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) =
(y, m, d, h, mnt, s, us)
{
NaiveDate::from_ymd_opt(y, m, d)
.and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us))
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.timestamp_millis(),
TimeUnit::Microseconds => ndt.timestamp_micros(),
TimeUnit::Nanoseconds => ndt.timestamp_nanos(),
})
} else {
None
}
})
.collect_trusted();

let ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
let mut ca = ca.into_datetime(*time_unit, None);
ca = replace_time_zone(&ca, time_zone, use_earliest)?;
ca
}
_ => {
polars_ensure!(
time_zone.is_none() && use_earliest.is_none(),
ComputeError: "cannot make use of the `time_zone` and `use_earliest` arguments without the 'timezones' feature enabled."
);
ca.into_datetime(*time_unit, None)
}
};

let mut s = ca.into_series();
s.rename("datetime");
Ok(s)
}

#[cfg(feature = "date_offset")]
pub(super) fn date_offset(s: Series, offset: Duration) -> PolarsResult<Series> {
let preserve_sortedness: bool;
Expand Down
127 changes: 48 additions & 79 deletions crates/polars-plan/src/dsl/functions/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,26 @@ pub struct DatetimeArgs {
pub minute: Expr,
pub second: Expr,
pub microsecond: Expr,
pub time_unit: TimeUnit,
pub time_zone: Option<TimeZone>,
pub use_earliest: Option<bool>,
}

impl Default for DatetimeArgs {
fn default() -> Self {
Self {
year: lit(1970),
month: lit(1),
day: lit(1),
hour: lit(0),
minute: lit(0),
second: lit(0),
microsecond: lit(0),
time_unit: TimeUnit::Microseconds,
time_zone: None,
use_earliest: None,
}
}
}

impl DatetimeArgs {
Expand All @@ -47,10 +67,7 @@ impl DatetimeArgs {
year,
month,
day,
hour: lit(0),
minute: lit(0),
second: lit(0),
microsecond: lit(0),
..Default::default()
}
}

Expand Down Expand Up @@ -78,102 +95,54 @@ impl DatetimeArgs {
impl_unit_setter!(with_minute(minute));
impl_unit_setter!(with_second(second));
impl_unit_setter!(with_microsecond(microsecond));

pub fn with_time_unit(self, time_unit: TimeUnit) -> Self {
Self { time_unit, ..self }
}
#[cfg(feature = "timezones")]
pub fn with_time_zone(self, time_zone: Option<TimeZone>) -> Self {
Self { time_zone, ..self }
}
#[cfg(feature = "timezones")]
pub fn with_use_earliest(self, use_earliest: Option<bool>) -> Self {
Self {
use_earliest,
..self
}
}
}

/// Construct a column of `Datetime` from the provided [`DatetimeArgs`].
#[cfg(feature = "temporal")]
pub fn datetime(args: DatetimeArgs) -> Expr {
use polars_core::export::chrono::NaiveDate;
use polars_core::utils::CustomIterTools;

let year = args.year;
let month = args.month;
let day = args.day;
let hour = args.hour;
let minute = args.minute;
let second = args.second;
let microsecond = args.microsecond;
let time_unit = args.time_unit;
let time_zone = args.time_zone;
let use_earliest = args.use_earliest;

let function = SpecialEq::new(Arc::new(move |s: &mut [Series]| {
assert_eq!(s.len(), 7);
let max_len = s.iter().map(|s| s.len()).max().unwrap();
let mut year = s[0].cast(&DataType::Int32)?;
if year.len() < max_len {
year = year.new_from_index(0, max_len)
}
let year = year.i32()?;
let mut month = s[1].cast(&DataType::UInt32)?;
if month.len() < max_len {
month = month.new_from_index(0, max_len);
}
let month = month.u32()?;
let mut day = s[2].cast(&DataType::UInt32)?;
if day.len() < max_len {
day = day.new_from_index(0, max_len);
}
let day = day.u32()?;
let mut hour = s[3].cast(&DataType::UInt32)?;
if hour.len() < max_len {
hour = hour.new_from_index(0, max_len);
}
let hour = hour.u32()?;

let mut minute = s[4].cast(&DataType::UInt32)?;
if minute.len() < max_len {
minute = minute.new_from_index(0, max_len);
}
let minute = minute.u32()?;

let mut second = s[5].cast(&DataType::UInt32)?;
if second.len() < max_len {
second = second.new_from_index(0, max_len);
}
let second = second.u32()?;

let mut microsecond = s[6].cast(&DataType::UInt32)?;
if microsecond.len() < max_len {
microsecond = microsecond.new_from_index(0, max_len);
}
let microsecond = microsecond.u32()?;
let input = vec![year, month, day, hour, minute, second, microsecond];

let ca: Int64Chunked = year
.into_iter()
.zip(month)
.zip(day)
.zip(hour)
.zip(minute)
.zip(second)
.zip(microsecond)
.map(|((((((y, m), d), h), mnt), s), us)| {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) =
(y, m, d, h, mnt, s, us)
{
NaiveDate::from_ymd_opt(y, m, d)
.and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us))
.map(|ndt| ndt.timestamp_micros())
} else {
None
}
})
.collect_trusted();

Ok(Some(
ca.into_datetime(TimeUnit::Microseconds, None).into_series(),
))
}) as Arc<dyn SeriesUdf>);

Expr::AnonymousFunction {
input: vec![year, month, day, hour, minute, second, microsecond],
function,
output_type: GetOutput::from_type(DataType::Datetime(TimeUnit::Microseconds, None)),
Expr::Function {
input,
function: FunctionExpr::TemporalExpr(TemporalFunction::DatetimeFunction {
time_unit,
time_zone,
use_earliest,
}),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyFlat,
allow_rename: true,
input_wildcard_expansion: true,
fmt_str: "datetime",
..Default::default()
},
}
.alias("datetime")
}

/// Arguments used by `duration` in order to produce an `Expr` of `Duration`
Expand Down
14 changes: 8 additions & 6 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ def truncate(
Offset the window
use_earliest
Determine how to deal with ambiguous datetimes:
- None (default): raise;
- True: use the earliest datetime;
- False: use the latest datetime.
- ``None`` (default): raise
- ``True``: use the earliest datetime
- ``False``: use the latest datetime
Notes
-----
Expand Down Expand Up @@ -1507,9 +1508,10 @@ def replace_time_zone(
Time zone for the `Datetime` expression. Pass `None` to unset time zone.
use_earliest
Determine how to deal with ambiguous datetimes:
- None (default): raise;
- True: use the earliest datetime;
- False: use the latest datetime.
- ``None`` (default): raise
- ``True``: use the earliest datetime
- ``False``: use the latest datetime
Examples
--------
Expand Down
Loading

0 comments on commit 873d18e

Please sign in to comment.