Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add dt.replace #19708

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
31 changes: 31 additions & 0 deletions crates/polars-plan/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,4 +331,35 @@ impl DateLikeNameSpace {
TemporalFunction::TotalNanoseconds,
))
}

/// Replace the time units of a value
#[allow(clippy::too_many_arguments)]
pub fn replace(
self,
day: Expr,
month: Expr,
year: Expr,
hour: Expr,
minute: Expr,
second: Expr,
microsecond: Expr,
ambiguous: Expr,
non_existent: NonExistent,
) -> Expr {
self.0.map_many_private(
FunctionExpr::TemporalExpr(TemporalFunction::Replace(non_existent)),
&[
day,
month,
year,
hour,
minute,
second,
microsecond,
ambiguous,
],
false,
None,
)
}
}
45 changes: 45 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pub enum TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset,
Round,
Replace(NonExistent),
#[cfg(feature = "timezones")]
ReplaceTimeZone(Option<TimeZone>, NonExistent),
Combine(TimeUnit),
Expand Down Expand Up @@ -117,6 +118,7 @@ impl TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => mapper.with_dtype(DataType::Duration(TimeUnit::Milliseconds)),
Round => mapper.with_same_dtype(),
Replace(_non_existent) => mapper.with_same_dtype(),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, _non_existent) => mapper.map_datetime_dtype_timezone(tz.as_ref()),
DatetimeFunction {
Expand Down Expand Up @@ -187,6 +189,7 @@ impl Display for TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => "dst_offset",
Round => "round",
Replace(_) => "replace",
#[cfg(feature = "timezones")]
ReplaceTimeZone(_, _) => "replace_time_zone",
DatetimeFunction { .. } => return write!(f, "dt.datetime"),
Expand Down Expand Up @@ -555,3 +558,45 @@ pub(super) fn round(s: &[Column]) -> PolarsResult<Column> {
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
})
}

pub(super) fn replace(s: &[Column], non_existent: NonExistent) -> PolarsResult<Column> {
let time_series = &s[0];
let s_year = &s[1].strict_cast(&DataType::Int32)?;
let s_month = &s[2].strict_cast(&DataType::Int8)?;
let s_day = &s[3].strict_cast(&DataType::Int8)?;
let year = s_year.i32()?;
let month = s_month.i8()?;
let day = s_day.i8()?;

match time_series.dtype() {
DataType::Datetime(_, _) => {
let s_hour = &s[4].strict_cast(&DataType::Int8)?;
let s_minute = &s[5].strict_cast(&DataType::Int8)?;
let s_second = &s[6].strict_cast(&DataType::Int8)?;
let s_microsecond = &s[7].strict_cast(&DataType::Int32)?;
let hour = s_hour.i8()?;
let minute = s_minute.i8()?;
let second = s_second.i8()?;
let microsecond = s_microsecond.i32()?;
let s_ambiguous = &s[8].strict_cast(&DataType::String)?;
let ambiguous = s_ambiguous.str()?;

time_series.datetime().unwrap().replace(
year,
month,
day,
hour,
minute,
second,
microsecond,
ambiguous,
non_existent,
)
},
DataType::Date => time_series
.date()
.unwrap()
.replace(year, month, day, non_existent),
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
}
}
93 changes: 32 additions & 61 deletions crates/polars-plan/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#[cfg(feature = "dtype-datetime")]
use polars_time::datetime_series_from_parts;

use super::*;
use crate::{map, map_as_slice};

Expand Down Expand Up @@ -55,6 +58,7 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
#[cfg(feature = "timezones")]
DSTOffset => map!(datetime::dst_offset),
Round => map_as_slice!(datetime::round),
Replace(non_existent) => map_as_slice!(datetime::replace, non_existent),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, non_existent) => {
map_as_slice!(dispatch::replace_time_zone, tz.as_deref(), non_existent)
Expand All @@ -70,14 +74,12 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
}
}

#[cfg(feature = "dtype-datetime")]
pub(super) fn datetime(
s: &[Column],
time_unit: &TimeUnit,
time_zone: Option<&str>,
) -> PolarsResult<Column> {
use polars_core::export::chrono::NaiveDate;
use polars_core::utils::CustomIterTools;

let year = &s[0];
let month = &s[1];
let day = &s[2];
Expand All @@ -95,91 +97,60 @@ pub(super) fn datetime(
}
let year = year.i32()?;

let mut month = month.cast(&DataType::UInt32)?;
let mut month = month.cast(&DataType::Int8)?;
if month.len() < max_len {
month = month.new_from_index(0, max_len);
}
let month = month.u32()?;
let month = month.i8()?;

let mut day = day.cast(&DataType::UInt32)?;
let mut day = day.cast(&DataType::Int8)?;
if day.len() < max_len {
day = day.new_from_index(0, max_len);
}
let day = day.u32()?;
let day = day.i8()?;

let mut hour = hour.cast(&DataType::UInt32)?;
let mut hour = hour.cast(&DataType::Int8)?;
if hour.len() < max_len {
hour = hour.new_from_index(0, max_len);
}
let hour = hour.u32()?;
let hour = hour.i8()?;

let mut minute = minute.cast(&DataType::UInt32)?;
let mut minute = minute.cast(&DataType::Int8)?;
if minute.len() < max_len {
minute = minute.new_from_index(0, max_len);
}
let minute = minute.u32()?;
let minute = minute.i8()?;

let mut second = second.cast(&DataType::UInt32)?;
let mut second = second.cast(&DataType::Int8)?;
if second.len() < max_len {
second = second.new_from_index(0, max_len);
}
let second = second.u32()?;
let second = second.i8()?;

let mut microsecond = microsecond.cast(&DataType::UInt32)?;
let mut microsecond = microsecond.cast(&DataType::Int32)?;
if microsecond.len() < max_len {
microsecond = microsecond.new_from_index(0, max_len);
}
let microsecond = microsecond.u32()?;
let microsecond = microsecond.i32()?;
let mut _ambiguous = ambiguous.cast(&DataType::String)?;
if _ambiguous.len() < max_len {
_ambiguous = _ambiguous.new_from_index(0, max_len);
}
let _ambiguous = _ambiguous.str()?;

let ca: Int64Chunked = year
.into_iter()
.zip(month)
.zip(day)
.zip(hour)
.zip(minute)
.zip(second)
.zip(microsecond)
.map(|((((((y, m), d), h), mnt), s), us)| {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) =
(y, m, d, h, mnt, s, us)
{
NaiveDate::from_ymd_opt(y, m, d)
.and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us))
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
} else {
None
}
})
.collect_trusted();

let ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
let mut ca = ca.into_datetime(*time_unit, None);
ca = replace_time_zone(&ca, time_zone, _ambiguous, NonExistent::Raise)?;
ca
},
_ => {
polars_ensure!(
time_zone.is_none(),
ComputeError: "cannot make use of the `time_zone` argument without the 'timezones' feature enabled."
);
ca.into_datetime(*time_unit, None)
},
};
Comment on lines -139 to -178
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this code into datetime_series_from_parts in polars_time/src/series/mod.rs for re-use.


let mut s = ca.into_column();
s.rename(PlSmallStr::from_static("datetime"));
Ok(s)
let ambiguous = _ambiguous.str()?;

datetime_series_from_parts(
year,
month,
day,
hour,
minute,
second,
microsecond,
ambiguous,
time_unit,
time_zone,
"datetime",
)
}

pub(super) fn combine(s: &[Column], tu: TimeUnit) -> PolarsResult<Column> {
Expand Down
29 changes: 29 additions & 0 deletions crates/polars-python/src/expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,35 @@ impl PyExpr {
self.inner.clone().dt().round(every.inner).into()
}

fn dt_replace(
&self,
day: Self,
month: Self,
year: Self,
hour: Self,
minute: Self,
second: Self,
microsecond: Self,
ambiguous: Self,
non_existent: Wrap<NonExistent>,
) -> Self {
self.inner
.clone()
.dt()
.replace(
day.inner,
month.inner,
year.inner,
hour.inner,
minute.inner,
second.inner,
microsecond.inner,
ambiguous.inner,
non_existent.0,
)
.into()
}

fn dt_combine(&self, time: Self, time_unit: Wrap<TimeUnit>) -> Self {
self.inner
.clone()
Expand Down
6 changes: 6 additions & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ pub enum PyTemporalFunction {
BaseUtcOffset,
DSTOffset,
Round,
Replace,
ReplaceTimeZone,
Combine,
DatetimeFunction,
Expand Down Expand Up @@ -1049,6 +1050,11 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
#[cfg(feature = "timezones")]
TemporalFunction::DSTOffset => (PyTemporalFunction::DSTOffset,).into_py(py),
TemporalFunction::Round => (PyTemporalFunction::Round,).into_py(py),
TemporalFunction::Replace(non_existent) => (
PyTemporalFunction::Replace,
Into::<&str>::into(non_existent),
)
.into_py(py),
#[cfg(feature = "timezones")]
TemporalFunction::ReplaceTimeZone(time_zone, non_existent) => (
PyTemporalFunction::ReplaceTimeZone,
Expand Down
7 changes: 7 additions & 0 deletions crates/polars-time/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod month_start;
#[cfg(feature = "offset_by")]
mod offset_by;
pub mod prelude;
mod replace;
mod round;
pub mod series;
mod truncate;
Expand All @@ -33,7 +34,13 @@ pub use month_end::*;
pub use month_start::*;
#[cfg(feature = "offset_by")]
pub use offset_by::*;
#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
pub use replace::*;
pub use round::*;
#[cfg(feature = "dtype-date")]
pub use series::date_series_from_parts;
#[cfg(feature = "dtype-datetime")]
pub use series::datetime_series_from_parts;
pub use truncate::*;
pub use upsample::*;
pub use windows::duration::Duration;
Expand Down
Loading
Loading