diff --git a/crates/polars-arrow/src/kernels/mod.rs b/crates/polars-arrow/src/kernels/mod.rs index 29f56bc513a3..aff9870902e5 100644 --- a/crates/polars-arrow/src/kernels/mod.rs +++ b/crates/polars-arrow/src/kernels/mod.rs @@ -25,7 +25,7 @@ pub mod take_agg; mod time; #[cfg(feature = "timezones")] -pub use time::replace_time_zone; +pub use time::convert_to_naive_local; /// Internal state of [SlicesIterator] #[derive(Debug, PartialEq)] diff --git a/crates/polars-arrow/src/kernels/time.rs b/crates/polars-arrow/src/kernels/time.rs index b9774d932020..b901c9cb4bff 100644 --- a/crates/polars-arrow/src/kernels/time.rs +++ b/crates/polars-arrow/src/kernels/time.rs @@ -1,30 +1,25 @@ -use arrow::array::PrimitiveArray; -use arrow::compute::arity::try_unary; -use arrow::datatypes::{DataType as ArrowDataType, TimeUnit}; use arrow::error::{Error as ArrowError, Result}; -use arrow::temporal_conversions::{ - timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime, -}; use chrono::{LocalResult, NaiveDateTime, TimeZone}; use chrono_tz::Tz; -use crate::error::PolarsResult; - -fn convert_to_naive_local( +pub fn convert_to_naive_local( from_tz: &Tz, to_tz: &Tz, ndt: NaiveDateTime, - use_earliest: Option, + ambiguous: &str, ) -> Result { let ndt = from_tz.from_utc_datetime(&ndt).naive_local(); match to_tz.from_local_datetime(&ndt) { LocalResult::Single(dt) => Ok(dt.naive_utc()), - LocalResult::Ambiguous(dt_earliest, dt_latest) => match use_earliest { - Some(true) => Ok(dt_earliest.naive_utc()), - Some(false) => Ok(dt_latest.naive_utc()), - None => Err(ArrowError::InvalidArgumentError( - format!("datetime '{}' is ambiguous in time zone '{}'. Please use `use_earliest` to tell how it should be localized.", ndt, to_tz) - )) + LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous { + "earliest" => Ok(dt_earliest.naive_utc()), + "latest" => Ok(dt_latest.naive_utc()), + "raise" => Err(ArrowError::InvalidArgumentError( + format!("datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz) + )), + ambiguous => Err(ArrowError::InvalidArgumentError( + format!("Invalid argument {}, expected one of: \"earliest\", \"latest\", \"raise\"", ambiguous) + )), }, LocalResult::None => Err(ArrowError::InvalidArgumentError( format!( @@ -35,40 +30,3 @@ fn convert_to_naive_local( )), } } - -pub fn replace_time_zone( - arr: &PrimitiveArray, - tu: TimeUnit, - from_tz: &Tz, - to_tz: &Tz, - use_earliest: Option, -) -> PolarsResult> { - let res = match tu { - TimeUnit::Millisecond => try_unary( - arr, - |value| { - let ndt = timestamp_ms_to_datetime(value); - Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_millis()) - }, - ArrowDataType::Int64, - ), - TimeUnit::Microsecond => try_unary( - arr, - |value| { - let ndt = timestamp_us_to_datetime(value); - Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_micros()) - }, - ArrowDataType::Int64, - ), - TimeUnit::Nanosecond => try_unary( - arr, - |value| { - let ndt = timestamp_ns_to_datetime(value); - Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_nanos()) - }, - ArrowDataType::Int64, - ), - _ => unreachable!(), - }; - Ok(res?) -} diff --git a/crates/polars-io/src/csv/read_impl/mod.rs b/crates/polars-io/src/csv/read_impl/mod.rs index 6702d4779184..2b13585f80bf 100644 --- a/crates/polars-io/src/csv/read_impl/mod.rs +++ b/crates/polars-io/src/csv/read_impl/mod.rs @@ -46,7 +46,14 @@ pub(crate) fn cast_columns( (DataType::Utf8, DataType::Datetime(tu, _)) => s .utf8() .unwrap() - .as_datetime(None, *tu, false, false, None, None) + .as_datetime( + None, + *tu, + false, + false, + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + ) .map(|ca| ca.into_series()), (_, dt) => s.cast(dt), }?; diff --git a/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs b/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs index 82d2785f82a0..0935cf1b1ada 100644 --- a/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs +++ b/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs @@ -1,5 +1,10 @@ +use arrow::temporal_conversions::{ + timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime, +}; +use chrono::NaiveDateTime; use chrono_tz::Tz; -use polars_arrow::kernels::replace_time_zone as replace_time_zone_kernel; +use polars_arrow::kernels::convert_to_naive_local; +use polars_core::chunked_array::ops::arity::try_binary_elementwise_values; use polars_core::prelude::*; fn parse_time_zone(s: &str) -> PolarsResult { @@ -8,26 +13,42 @@ fn parse_time_zone(s: &str) -> PolarsResult { } pub fn replace_time_zone( - ca: &DatetimeChunked, + datetime: &Logical, time_zone: Option<&str>, - use_earliest: Option, + ambiguous: &Utf8Chunked, ) -> PolarsResult { - let out: PolarsResult<_> = { - let from_tz = parse_time_zone(ca.time_zone().as_deref().unwrap_or("UTC"))?; - let to_tz = parse_time_zone(time_zone.unwrap_or("UTC"))?; - let chunks = ca.downcast_iter().map(|arr| { - replace_time_zone_kernel( - arr, - ca.time_unit().to_arrow(), - &from_tz, - &to_tz, - use_earliest, - ) - }); - let out = ChunkedArray::try_from_chunk_iter(ca.name(), chunks)?; - Ok(out.into_datetime(ca.time_unit(), time_zone.map(|x| x.to_string()))) + let from_tz = parse_time_zone(datetime.time_zone().as_deref().unwrap_or("UTC"))?; + let to_tz = parse_time_zone(time_zone.unwrap_or("UTC"))?; + let timestamp_to_datetime: fn(i64) -> NaiveDateTime = match datetime.time_unit() { + TimeUnit::Milliseconds => timestamp_ms_to_datetime, + TimeUnit::Microseconds => timestamp_us_to_datetime, + TimeUnit::Nanoseconds => timestamp_ns_to_datetime, }; - let mut out = out?; - out.set_sorted_flag(ca.is_sorted_flag()); + let datetime_to_timestamp: fn(NaiveDateTime) -> i64 = match datetime.time_unit() { + TimeUnit::Milliseconds => datetime_to_timestamp_ms, + TimeUnit::Microseconds => datetime_to_timestamp_us, + TimeUnit::Nanoseconds => datetime_to_timestamp_ns, + }; + let out = match ambiguous.len() { + 1 => match ambiguous.get(0) { + Some(ambiguous) => datetime.0.try_apply(|timestamp| { + let ndt = timestamp_to_datetime(timestamp); + Ok(datetime_to_timestamp(convert_to_naive_local( + &from_tz, &to_tz, ndt, ambiguous, + )?)) + }), + _ => Ok(datetime.0.apply(|_| None)), + }, + _ => { + try_binary_elementwise_values(datetime, ambiguous, |timestamp: i64, ambiguous: &str| { + let ndt = timestamp_to_datetime(timestamp); + Ok::(datetime_to_timestamp(convert_to_naive_local( + &from_tz, &to_tz, ndt, ambiguous, + )?)) + }) + }, + }; + let mut out = out?.into_datetime(datetime.time_unit(), time_zone.map(|x| x.to_string())); + out.set_sorted_flag(datetime.is_sorted_flag()); Ok(out) } diff --git a/crates/polars-plan/src/dsl/dt.rs b/crates/polars-plan/src/dsl/dt.rs index 026a6c7b2850..7e630c0fcfa2 100644 --- a/crates/polars-plan/src/dsl/dt.rs +++ b/crates/polars-plan/src/dsl/dt.rs @@ -215,11 +215,12 @@ impl DateLikeNameSpace { .map_private(FunctionExpr::TemporalExpr(TemporalFunction::TimeStamp(tu))) } - pub fn truncate(self, options: TruncateOptions) -> Expr { - self.0 - .map_private(FunctionExpr::TemporalExpr(TemporalFunction::Truncate( - options, - ))) + pub fn truncate(self, options: TruncateOptions, ambiguous: Expr) -> Expr { + self.0.map_many_private( + FunctionExpr::TemporalExpr(TemporalFunction::Truncate(options)), + &[ambiguous], + false, + ) } // roll backward to the first day of the month @@ -267,14 +268,12 @@ impl DateLikeNameSpace { } #[cfg(feature = "timezones")] - pub fn replace_time_zone( - self, - time_zone: Option, - use_earliest: Option, - ) -> Expr { - self.0.map_private(FunctionExpr::TemporalExpr( - TemporalFunction::ReplaceTimeZone(time_zone, use_earliest), - )) + pub fn replace_time_zone(self, time_zone: Option, ambiguous: Expr) -> Expr { + self.0.map_many_private( + FunctionExpr::TemporalExpr(TemporalFunction::ReplaceTimeZone(time_zone)), + &[ambiguous], + false, + ) } pub fn combine(self, time: Expr, tu: TimeUnit) -> Expr { diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index dc697d5d5e98..143fc14d80bc 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -42,7 +42,7 @@ pub enum TemporalFunction { DSTOffset, Round(String, String), #[cfg(feature = "timezones")] - ReplaceTimeZone(Option, Option), + ReplaceTimeZone(Option), DateRange { every: Duration, closed: ClosedWindow, @@ -67,7 +67,6 @@ pub enum TemporalFunction { DatetimeFunction { time_unit: TimeUnit, time_zone: Option, - use_earliest: Option, }, } @@ -105,7 +104,7 @@ impl Display for TemporalFunction { DSTOffset => "dst_offset", Round(..) => "round", #[cfg(feature = "timezones")] - ReplaceTimeZone(_, _) => "replace_time_zone", + ReplaceTimeZone(_) => "replace_time_zone", DateRange { .. } => return write!(f, "date_range"), DateRanges { .. } => return write!(f, "date_ranges"), TimeRange { .. } => return write!(f, "time_range"), @@ -147,10 +146,12 @@ pub(super) fn ordinal_day(s: &Series) -> PolarsResult { pub(super) fn time(s: &Series) -> PolarsResult { match s.dtype() { #[cfg(feature = "timezones")] - DataType::Datetime(_, Some(_)) => { - polars_ops::prelude::replace_time_zone(s.datetime().unwrap(), None, None)? - .cast(&DataType::Time) - }, + DataType::Datetime(_, Some(_)) => polars_ops::prelude::replace_time_zone( + s.datetime().unwrap(), + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + )? + .cast(&DataType::Time), DataType::Datetime(_, _) => s.datetime().unwrap().cast(&DataType::Time), DataType::Date => s.datetime().unwrap().cast(&DataType::Time), DataType::Time => Ok(s.clone()), @@ -162,8 +163,12 @@ pub(super) fn date(s: &Series) -> PolarsResult { #[cfg(feature = "timezones")] DataType::Datetime(_, Some(tz)) => { let mut out = { - polars_ops::chunked_array::replace_time_zone(s.datetime().unwrap(), None, None)? - .cast(&DataType::Date)? + polars_ops::chunked_array::replace_time_zone( + s.datetime().unwrap(), + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + )? + .cast(&DataType::Date)? }; if tz != "UTC" { // DST transitions may not preserve sortedness. @@ -181,8 +186,12 @@ pub(super) fn datetime(s: &Series) -> PolarsResult { #[cfg(feature = "timezones")] DataType::Datetime(tu, Some(tz)) => { let mut out = { - polars_ops::chunked_array::replace_time_zone(s.datetime().unwrap(), None, None)? - .cast(&DataType::Datetime(*tu, None))? + polars_ops::chunked_array::replace_time_zone( + s.datetime().unwrap(), + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + )? + .cast(&DataType::Datetime(*tu, None))? }; if tz != "UTC" { // DST transitions may not preserve sortedness. @@ -216,21 +225,31 @@ pub(super) fn timestamp(s: &Series, tu: TimeUnit) -> PolarsResult { s.timestamp(tu).map(|ca| ca.into_series()) } -pub(super) fn truncate(s: &Series, options: &TruncateOptions) -> PolarsResult { - let mut out = match s.dtype() { +pub(super) fn truncate(s: &[Series], options: &TruncateOptions) -> PolarsResult { + let time_series = &s[0]; + let ambiguous = &s[1].utf8().unwrap(); + let mut out = match time_series.dtype() { DataType::Datetime(_, tz) => match tz { #[cfg(feature = "timezones")] - Some(tz) => s + Some(tz) => time_series + .datetime() + .unwrap() + .truncate(options, tz.parse::().ok().as_ref(), ambiguous)? + .into_series(), + _ => time_series .datetime() .unwrap() - .truncate(options, tz.parse::().ok().as_ref())? + .truncate(options, None, ambiguous)? .into_series(), - _ => s.datetime().unwrap().truncate(options, None)?.into_series(), }, - DataType::Date => s.date().unwrap().truncate(options, None)?.into_series(), + DataType::Date => time_series + .date() + .unwrap() + .truncate(options, None, ambiguous)? + .into_series(), dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), }; - out.set_sorted_flag(s.is_sorted_flag()); + out.set_sorted_flag(time_series.is_sorted_flag()); Ok(out) } diff --git a/crates/polars-plan/src/dsl/function_expr/dispatch.rs b/crates/polars-plan/src/dsl/function_expr/dispatch.rs index bb0db05515cb..d0f652c7be59 100644 --- a/crates/polars-plan/src/dsl/function_expr/dispatch.rs +++ b/crates/polars-plan/src/dsl/function_expr/dispatch.rs @@ -34,11 +34,9 @@ pub(super) fn set_sorted_flag(s: &Series, sorted: IsSorted) -> PolarsResult, - use_earliest: Option, -) -> PolarsResult { - let ca = s.datetime().unwrap(); - Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, use_earliest)?.into_series()) +pub(super) fn replace_time_zone(s: &[Series], time_zone: Option<&str>) -> PolarsResult { + let s1 = &s[0]; + let ca = s1.datetime().unwrap(); + let s2 = &s[1].utf8().unwrap(); + Ok(polars_ops::prelude::replace_time_zone(ca, time_zone, s2)?.into_series()) } diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 4d3c10f21dfa..ef3e5b1c3f8f 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -677,7 +677,7 @@ impl From for SpecialEq> { }, #[cfg(feature = "temporal")] Strptime(dtype, options) => { - map!(strings::strptime, dtype.clone(), &options) + map_as_slice!(strings::strptime, dtype.clone(), &options) }, #[cfg(feature = "concat_str")] ConcatVertical(delimiter) => map!(strings::concat, &delimiter), @@ -749,7 +749,7 @@ impl From for SpecialEq> { Nanosecond => map!(datetime::nanosecond), TimeStamp(tu) => map!(datetime::timestamp, tu), Truncate(truncate_options) => { - map!(datetime::truncate, &truncate_options) + map_as_slice!(datetime::truncate, &truncate_options) }, #[cfg(feature = "date_offset")] MonthStart => map!(datetime::month_start), @@ -761,8 +761,8 @@ impl From for SpecialEq> { DSTOffset => map!(datetime::dst_offset), Round(every, offset) => map!(datetime::round, &every, &offset), #[cfg(feature = "timezones")] - ReplaceTimeZone(tz, use_earliest) => { - map!(dispatch::replace_time_zone, tz.as_deref(), use_earliest) + ReplaceTimeZone(tz) => { + map_as_slice!(dispatch::replace_time_zone, tz.as_deref()) }, Combine(tu) => map_as_slice!(temporal::combine, tu), DateRange { @@ -818,14 +818,8 @@ impl From for SpecialEq> { DatetimeFunction { time_unit, time_zone, - use_earliest, } => { - map_as_slice!( - temporal::datetime, - &time_unit, - time_zone.as_deref(), - use_earliest - ) + map_as_slice!(temporal::datetime, &time_unit, time_zone.as_deref()) }, } } diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 91c272a0f082..37760a3c4742 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -58,9 +58,7 @@ impl FunctionExpr { DSTOffset => DataType::Duration(TimeUnit::Milliseconds), Round(..) => mapper.with_same_dtype().unwrap().dtype, #[cfg(feature = "timezones")] - ReplaceTimeZone(tz, _use_earliest) => { - return mapper.map_datetime_dtype_timezone(tz.as_ref()) - }, + ReplaceTimeZone(tz) => return mapper.map_datetime_dtype_timezone(tz.as_ref()), DateRange { every, closed: _, @@ -105,7 +103,6 @@ impl FunctionExpr { DatetimeFunction { time_unit, time_zone, - use_earliest: _, } => { return Ok(Field::new( "datetime", diff --git a/crates/polars-plan/src/dsl/function_expr/strings.rs b/crates/polars-plan/src/dsl/function_expr/strings.rs index 287356050f66..89a37c5dc170 100644 --- a/crates/polars-plan/src/dsl/function_expr/strings.rs +++ b/crates/polars-plan/src/dsl/function_expr/strings.rs @@ -414,16 +414,16 @@ pub(super) fn count_match(s: &Series, pat: &str) -> PolarsResult { #[cfg(feature = "temporal")] pub(super) fn strptime( - s: &Series, + s: &[Series], dtype: DataType, options: &StrptimeOptions, ) -> PolarsResult { match dtype { - DataType::Date => to_date(s, options), + DataType::Date => to_date(&s[0], options), DataType::Datetime(time_unit, time_zone) => { to_datetime(s, &time_unit, time_zone.as_ref(), options) }, - DataType::Time => to_time(s, options), + DataType::Time => to_time(&s[0], options), dt => polars_bail!(ComputeError: "not implemented for dtype {}", dt), } } @@ -491,11 +491,13 @@ fn to_date(s: &Series, options: &StrptimeOptions) -> PolarsResult { #[cfg(feature = "dtype-datetime")] fn to_datetime( - s: &Series, + s: &[Series], time_unit: &TimeUnit, time_zone: Option<&TimeZone>, options: &StrptimeOptions, ) -> PolarsResult { + let datetime_strings = &s[0].utf8().unwrap(); + let ambiguous = &s[1].utf8().unwrap(); let tz_aware = match &options.format { #[cfg(feature = "timezones")] Some(format) => TZ_AWARE_RE.is_match(format), @@ -511,30 +513,31 @@ fn to_datetime( } }; - let ca = s.utf8()?; let out = if options.exact { - ca.as_datetime( - options.format.as_deref(), - *time_unit, - options.cache, - tz_aware, - time_zone, - options.use_earliest, - )? - .into_series() + datetime_strings + .as_datetime( + options.format.as_deref(), + *time_unit, + options.cache, + tz_aware, + time_zone, + ambiguous, + )? + .into_series() } else { - ca.as_datetime_not_exact( - options.format.as_deref(), - *time_unit, - tz_aware, - time_zone, - options.use_earliest, - )? - .into_series() + datetime_strings + .as_datetime_not_exact( + options.format.as_deref(), + *time_unit, + tz_aware, + time_zone, + ambiguous, + )? + .into_series() }; - if options.strict && ca.null_count() != out.null_count() { - handle_temporal_parsing_error(ca, &out, options.format.as_deref(), true)?; + if options.strict && datetime_strings.null_count() != out.null_count() { + handle_temporal_parsing_error(datetime_strings, &out, options.format.as_deref(), true)?; } Ok(out.into_series()) } diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index 8ef177436cf7..3c75daba6e76 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -10,7 +10,6 @@ pub(super) fn datetime( s: &[Series], time_unit: &TimeUnit, time_zone: Option<&str>, - use_earliest: Option, ) -> PolarsResult { use polars_core::export::chrono::NaiveDate; use polars_core::utils::CustomIterTools; @@ -22,6 +21,7 @@ pub(super) fn datetime( let minute = &s[4]; let second = &s[5]; let microsecond = &s[6]; + let ambiguous = &s[7]; let max_len = s.iter().map(|s| s.len()).max().unwrap(); @@ -66,6 +66,11 @@ pub(super) fn datetime( microsecond = microsecond.new_from_index(0, max_len); } let microsecond = microsecond.u32()?; + let mut _ambiguous = ambiguous.cast(&DataType::Utf8)?; + if _ambiguous.len() < max_len { + _ambiguous = _ambiguous.new_from_index(0, max_len); + } + let _ambiguous = _ambiguous.utf8()?; let ca: Int64Chunked = year .into_iter() @@ -96,13 +101,13 @@ pub(super) fn datetime( #[cfg(feature = "timezones")] Some(_) => { let mut ca = ca.into_datetime(*time_unit, None); - ca = replace_time_zone(&ca, time_zone, use_earliest)?; + ca = replace_time_zone(&ca, time_zone, _ambiguous)?; ca }, _ => { polars_ensure!( - time_zone.is_none() && use_earliest.is_none(), - ComputeError: "cannot make use of the `time_zone` and `use_earliest` arguments without the 'timezones' feature enabled." + time_zone.is_none(), + ComputeError: "cannot make use of the `time_zone` argument without the 'timezones' feature enabled." ); ca.into_datetime(*time_unit, None) }, @@ -189,7 +194,7 @@ pub(super) fn combine(s: &[Series], tu: TimeUnit) -> PolarsResult { Some(tz) => Ok(polars_ops::prelude::replace_time_zone( result_naive.datetime().unwrap(), Some(tz), - None, + &Utf8Chunked::from_iter(std::iter::once("raise")), )? .into()), _ => Ok(result_naive), @@ -243,7 +248,7 @@ pub(super) fn temporal_range_dispatch( polars_ops::prelude::replace_time_zone( start.cast(&dtype)?.datetime().unwrap(), None, - None, + &Utf8Chunked::from_iter(std::iter::once("raise")), )? .into_series() .to_physical_repr() @@ -251,7 +256,7 @@ pub(super) fn temporal_range_dispatch( polars_ops::prelude::replace_time_zone( stop.cast(&dtype)?.datetime().unwrap(), None, - None, + &Utf8Chunked::from_iter(std::iter::once("raise")), )? .into_series() .to_physical_repr() @@ -359,7 +364,7 @@ pub(super) fn temporal_ranges_dispatch( polars_ops::prelude::replace_time_zone( start.cast(&dtype)?.datetime().unwrap(), None, - None, + &Utf8Chunked::from_iter(std::iter::once("raise")), )? .into_series() .to_physical_repr() @@ -367,7 +372,7 @@ pub(super) fn temporal_ranges_dispatch( polars_ops::prelude::replace_time_zone( stop.cast(&dtype)?.datetime().unwrap(), None, - None, + &Utf8Chunked::from_iter(std::iter::once("raise")), )? .into_series() .to_physical_repr() diff --git a/crates/polars-plan/src/dsl/functions/temporal.rs b/crates/polars-plan/src/dsl/functions/temporal.rs index 890f79b796dc..4e5211c2d88e 100644 --- a/crates/polars-plan/src/dsl/functions/temporal.rs +++ b/crates/polars-plan/src/dsl/functions/temporal.rs @@ -38,7 +38,7 @@ pub struct DatetimeArgs { pub microsecond: Expr, pub time_unit: TimeUnit, pub time_zone: Option, - pub use_earliest: Option, + pub ambiguous: Expr, } impl Default for DatetimeArgs { @@ -53,7 +53,7 @@ impl Default for DatetimeArgs { microsecond: lit(0), time_unit: TimeUnit::Microseconds, time_zone: None, - use_earliest: None, + ambiguous: lit(String::from("raise")), } } } @@ -104,11 +104,8 @@ impl DatetimeArgs { Self { time_zone, ..self } } #[cfg(feature = "timezones")] - pub fn with_use_earliest(self, use_earliest: Option) -> Self { - Self { - use_earliest, - ..self - } + pub fn with_ambiguous(self, ambiguous: Expr) -> Self { + Self { ambiguous, ..self } } } @@ -124,16 +121,24 @@ pub fn datetime(args: DatetimeArgs) -> Expr { let microsecond = args.microsecond; let time_unit = args.time_unit; let time_zone = args.time_zone; - let use_earliest = args.use_earliest; + let ambiguous = args.ambiguous; - let input = vec![year, month, day, hour, minute, second, microsecond]; + let input = vec![ + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous, + ]; Expr::Function { input, function: FunctionExpr::TemporalExpr(TemporalFunction::DatetimeFunction { time_unit, time_zone, - use_earliest, }), options: FunctionOptions { collect_groups: ApplyOptions::ApplyFlat, diff --git a/crates/polars-plan/src/dsl/options.rs b/crates/polars-plan/src/dsl/options.rs index 07ee571099dc..7496a5ded8cf 100644 --- a/crates/polars-plan/src/dsl/options.rs +++ b/crates/polars-plan/src/dsl/options.rs @@ -23,11 +23,6 @@ pub struct StrptimeOptions { pub exact: bool, /// use a cache of unique, converted dates to apply the datetime conversion. pub cache: bool, - /// use earliest datetime when localizing ambiguous datetimes - /// - True: use earliest datetime - /// - False: use latest datetime - /// - None: raise - pub use_earliest: Option, } impl Default for StrptimeOptions { @@ -37,7 +32,6 @@ impl Default for StrptimeOptions { strict: true, exact: true, cache: true, - use_earliest: None, } } } diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index 863dc1768086..9b0ba4653fb2 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -135,15 +135,18 @@ impl StringNameSpace { /// Convert a Utf8 column into a Date/Datetime/Time column. #[cfg(feature = "temporal")] - pub fn strptime(self, dtype: DataType, options: StrptimeOptions) -> Expr { - self.0 - .map_private(StringFunction::Strptime(dtype, options).into()) + pub fn strptime(self, dtype: DataType, options: StrptimeOptions, ambiguous: Expr) -> Expr { + self.0.map_many_private( + StringFunction::Strptime(dtype, options).into(), + &[ambiguous], + false, + ) } /// Convert a Utf8 column into a Date column. #[cfg(feature = "dtype-date")] pub fn to_date(self, options: StrptimeOptions) -> Expr { - self.strptime(DataType::Date, options) + self.strptime(DataType::Date, options, lit("raise")) } /// Convert a Utf8 column into a Datetime column. @@ -153,6 +156,7 @@ impl StringNameSpace { time_unit: Option, time_zone: Option, options: StrptimeOptions, + ambiguous: Expr, ) -> Expr { // If time_unit is None, try to infer it from the format or set a default let time_unit = match (&options.format, time_unit) { @@ -173,13 +177,13 @@ impl StringNameSpace { (None, None) => TimeUnit::Microseconds, }; - self.strptime(DataType::Datetime(time_unit, time_zone), options) + self.strptime(DataType::Datetime(time_unit, time_zone), options, ambiguous) } /// Convert a Utf8 column into a Time column. #[cfg(feature = "dtype-time")] pub fn to_time(self, options: StrptimeOptions) -> Expr { - self.strptime(DataType::Time, options) + self.strptime(DataType::Time, options, lit("raise")) } /// Convert a Utf8 column into a Decimal column. diff --git a/crates/polars-time/src/chunkedarray/utf8/infer.rs b/crates/polars-time/src/chunkedarray/utf8/infer.rs index 8870a979858a..f3e559368f74 100644 --- a/crates/polars-time/src/chunkedarray/utf8/infer.rs +++ b/crates/polars-time/src/chunkedarray/utf8/infer.rs @@ -494,7 +494,7 @@ pub(crate) fn to_datetime( ca: &Utf8Chunked, tu: TimeUnit, tz: Option<&TimeZone>, - _use_earliest: Option, + _ambiguous: &Utf8Chunked, ) -> PolarsResult { match ca.first_non_null() { None => Ok(Int64Chunked::full_null(ca.name(), ca.len()).into_datetime(tu, tz.cloned())), @@ -518,7 +518,7 @@ pub(crate) fn to_datetime( Pattern::DatetimeYMDZ => infer.coerce_utf8(ca).datetime().map(|ca| { let mut ca = ca.clone(); ca.set_time_unit(tu); - polars_ops::prelude::replace_time_zone(&ca, Some("UTC"), _use_earliest) + polars_ops::prelude::replace_time_zone(&ca, Some("UTC"), _ambiguous) })?, _ => infer.coerce_utf8(ca).datetime().map(|ca| { let mut ca = ca.clone(); @@ -526,7 +526,7 @@ pub(crate) fn to_datetime( match tz { #[cfg(feature = "timezones")] Some(tz) => { - polars_ops::prelude::replace_time_zone(&ca, Some(tz), _use_earliest) + polars_ops::prelude::replace_time_zone(&ca, Some(tz), _ambiguous) }, _ => Ok(ca), } diff --git a/crates/polars-time/src/chunkedarray/utf8/mod.rs b/crates/polars-time/src/chunkedarray/utf8/mod.rs index e1236b9dd626..37dc9a7ab2f6 100644 --- a/crates/polars-time/src/chunkedarray/utf8/mod.rs +++ b/crates/polars-time/src/chunkedarray/utf8/mod.rs @@ -235,7 +235,7 @@ pub trait Utf8Methods: AsUtf8 { tu: TimeUnit, tz_aware: bool, tz: Option<&TimeZone>, - _use_earliest: Option, + _ambiguous: &Utf8Chunked, ) -> PolarsResult { let utf8_ca = self.as_utf8(); let fmt = match fmt { @@ -289,7 +289,7 @@ pub trait Utf8Methods: AsUtf8 { (false, Some(tz)) => polars_ops::prelude::replace_time_zone( &ca.into_datetime(tu, None), Some(tz), - _use_earliest, + _ambiguous, ), #[cfg(feature = "timezones")] (true, _) => Ok(ca.into_datetime(tu, Some("UTC".to_string()))), @@ -382,12 +382,12 @@ pub trait Utf8Methods: AsUtf8 { cache: bool, tz_aware: bool, tz: Option<&TimeZone>, - use_earliest: Option, + ambiguous: &Utf8Chunked, ) -> PolarsResult { let utf8_ca = self.as_utf8(); let fmt = match fmt { Some(fmt) => fmt, - None => return infer::to_datetime(utf8_ca, tu, tz, use_earliest), + None => return infer::to_datetime(utf8_ca, tu, tz, ambiguous), }; let fmt = strptime::compile_fmt(fmt)?; let cache = cache && utf8_ca.len() > 50; @@ -507,7 +507,7 @@ pub trait Utf8Methods: AsUtf8 { Some(tz) => polars_ops::prelude::replace_time_zone( &ca.into_datetime(tu, None), Some(tz), - use_earliest, + ambiguous, ), _ => Ok(ca.into_datetime(tu, None)), } diff --git a/crates/polars-time/src/group_by/dynamic.rs b/crates/polars-time/src/group_by/dynamic.rs index 56e2bb969f34..0364096189c6 100644 --- a/crates/polars-time/src/group_by/dynamic.rs +++ b/crates/polars-time/src/group_by/dynamic.rs @@ -682,7 +682,14 @@ mod test { "2020-01-08 23:16:43", ], ) - .as_datetime(None, tu, false, false, None, None)? + .as_datetime( + None, + tu, + false, + false, + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + )? .into_series(); date.set_sorted_flag(IsSorted::Ascending); let a = Series::new("a", [3, 7, 5, 9, 2, 1]); @@ -722,7 +729,14 @@ mod test { "2020-01-08 23:16:43", ], ) - .as_datetime(None, TimeUnit::Milliseconds, false, false, None, None)? + .as_datetime( + None, + TimeUnit::Milliseconds, + false, + false, + None, + &Utf8Chunked::from_iter(std::iter::once("raise")), + )? .into_series(); date.set_sorted_flag(IsSorted::Ascending); diff --git a/crates/polars-time/src/month_start.rs b/crates/polars-time/src/month_start.rs index 4f6ed5e6f164..f317d3852112 100644 --- a/crates/polars-time/src/month_start.rs +++ b/crates/polars-time/src/month_start.rs @@ -43,7 +43,7 @@ pub(crate) fn roll_backward( let ndt = NaiveDateTime::new(date, time); let t = match tz { #[cfg(feature = "timezones")] - Some(tz) => datetime_to_timestamp(localize_datetime(ndt, tz, None)?), + Some(tz) => datetime_to_timestamp(localize_datetime(ndt, tz, "raise")?), _ => datetime_to_timestamp(ndt), }; Ok(t) diff --git a/crates/polars-time/src/truncate.rs b/crates/polars-time/src/truncate.rs index ef250864963e..bc233c51a662 100644 --- a/crates/polars-time/src/truncate.rs +++ b/crates/polars-time/src/truncate.rs @@ -1,6 +1,7 @@ #[cfg(feature = "dtype-date")] use polars_arrow::export::arrow::temporal_conversions::{MILLISECONDS, SECONDS_IN_DAY}; use polars_arrow::time_zone::Tz; +use polars_core::chunked_array::ops::arity::try_binary_elementwise_values; use polars_core::prelude::*; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -13,19 +14,27 @@ pub struct TruncateOptions { pub every: String, /// Offset of the window pub offset: String, - /// How to deal with ambiguous datetimes - pub use_earliest: Option, } pub trait PolarsTruncate { - fn truncate(&self, options: &TruncateOptions, tz: Option<&Tz>) -> PolarsResult + fn truncate( + &self, + options: &TruncateOptions, + tz: Option<&Tz>, + ambiguous: &Utf8Chunked, + ) -> PolarsResult where Self: Sized; } #[cfg(feature = "dtype-datetime")] impl PolarsTruncate for DatetimeChunked { - fn truncate(&self, options: &TruncateOptions, tz: Option<&Tz>) -> PolarsResult { + fn truncate( + &self, + options: &TruncateOptions, + tz: Option<&Tz>, + ambiguous: &Utf8Chunked, + ) -> PolarsResult { let every = Duration::parse(&options.every); let offset = Duration::parse(&options.offset); let w = Window::new(every, every, offset); @@ -36,22 +45,38 @@ impl PolarsTruncate for DatetimeChunked { TimeUnit::Milliseconds => Window::truncate_ms, }; - Ok(self - .try_apply(|t| func(&w, t, tz, options.use_earliest))? - .into_datetime(self.time_unit(), self.time_zone().clone())) + let out = match ambiguous.len() { + 1 => match ambiguous.get(0) { + Some(ambiguous) => self + .0 + .try_apply(|timestamp| func(&w, timestamp, tz, ambiguous)), + _ => Ok(self.0.apply(|_| None)), + }, + _ => { + try_binary_elementwise_values(self, ambiguous, |timestamp: i64, ambiguous: &str| { + func(&w, timestamp, tz, ambiguous) + }) + }, + }; + Ok(out?.into_datetime(self.time_unit(), self.time_zone().clone())) } } #[cfg(feature = "dtype-date")] impl PolarsTruncate for DateChunked { - fn truncate(&self, options: &TruncateOptions, _tz: Option<&Tz>) -> PolarsResult { + fn truncate( + &self, + options: &TruncateOptions, + _tz: Option<&Tz>, + _ambiguous: &Utf8Chunked, + ) -> PolarsResult { let every = Duration::parse(&options.every); let offset = Duration::parse(&options.offset); let w = Window::new(every, every, offset); Ok(self .try_apply(|t| { const MSECS_IN_DAY: i64 = MILLISECONDS * SECONDS_IN_DAY; - Ok((w.truncate_ms(MSECS_IN_DAY * t as i64, None, None)? / MSECS_IN_DAY) as i32) + Ok((w.truncate_ms(MSECS_IN_DAY * t as i64, None, "raise")? / MSECS_IN_DAY) as i32) })? .into_date()) } diff --git a/crates/polars-time/src/utils.rs b/crates/polars-time/src/utils.rs index eddaef8f6e29..21edd285941f 100644 --- a/crates/polars-time/src/utils.rs +++ b/crates/polars-time/src/utils.rs @@ -15,20 +15,23 @@ use polars_core::prelude::{polars_bail, PolarsResult, TimeUnit}; pub(crate) fn localize_datetime( ndt: NaiveDateTime, tz: &Tz, - use_earliest: Option, + ambiguous: &str, ) -> PolarsResult { // e.g. '2021-01-01 03:00' -> '2021-01-01 03:00CDT' match tz.from_local_datetime(&ndt) { LocalResult::Single(tz) => Ok(tz.naive_utc()), - LocalResult::Ambiguous(dt_earliest, dt_latest) => match use_earliest { - Some(true) => Ok(dt_earliest.naive_utc()), - Some(false) => Ok(dt_latest.naive_utc()), - None => polars_bail!(ComputeError: + LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous { + "earliest" => Ok(dt_earliest.naive_utc()), + "latest" => Ok(dt_latest.naive_utc()), + "raise" => polars_bail!(ComputeError: format!("datetime '{}' is ambiguous in time zone '{}'. \ - Please use `use_earliest` to tell how it should be localized. \ - If you got here from a function which doesn't have a `use_earliest` argument, \ + Please use `ambiguous` to tell how it should be localized. \ + If you got here from a function which doesn't have a `ambiguous` argument, \ please open an issue at https://github.com/pola-rs/polars/issues.", ndt, tz) ), + ambiguous => polars_bail!(ComputeError: + format!("Invalid argument {}, expected one of: \"earliest\", \"latest\", \"raise\"", ambiguous) + ), }, LocalResult::None => { polars_bail!( @@ -49,19 +52,19 @@ pub(crate) fn localize_timestamp(timestamp: i64, tu: TimeUnit, tz: Tz) -> Polars match tu { TimeUnit::Nanoseconds => { Ok( - localize_datetime(timestamp_ns_to_datetime(timestamp), &tz, None)? + localize_datetime(timestamp_ns_to_datetime(timestamp), &tz, "raise")? .timestamp_nanos(), ) }, TimeUnit::Microseconds => { Ok( - localize_datetime(timestamp_us_to_datetime(timestamp), &tz, None)? + localize_datetime(timestamp_us_to_datetime(timestamp), &tz, "raise")? .timestamp_micros(), ) }, TimeUnit::Milliseconds => { Ok( - localize_datetime(timestamp_ms_to_datetime(timestamp), &tz, None)? + localize_datetime(timestamp_ms_to_datetime(timestamp), &tz, "raise")? .timestamp_millis(), ) }, diff --git a/crates/polars-time/src/windows/duration.rs b/crates/polars-time/src/windows/duration.rs index 263a95974a61..d87032d03804 100644 --- a/crates/polars-time/src/windows/duration.rs +++ b/crates/polars-time/src/windows/duration.rs @@ -438,7 +438,7 @@ impl Duration { nsecs_to_unit: F, timestamp_to_datetime: G, datetime_to_timestamp: J, - _use_earliest: Option, + _ambiguous: &str, ) -> PolarsResult where F: Fn(i64) -> i64, @@ -466,7 +466,7 @@ impl Duration { Some(tz) => Ok(datetime_to_timestamp(localize_datetime( timestamp_to_datetime(t - remainder), tz, - _use_earliest, + _ambiguous, )?)), _ => Ok(t - remainder), } @@ -486,7 +486,7 @@ impl Duration { Some(tz) => Ok(datetime_to_timestamp(localize_datetime( first_day_of_week.and_time(NaiveTime::default()), tz, - _use_earliest, + _ambiguous, )?)), _ => Ok(datetime_to_timestamp( first_day_of_week.and_time(NaiveTime::default()), @@ -512,7 +512,7 @@ impl Duration { Some(tz) => Ok(datetime_to_timestamp(localize_datetime( timestamp_to_datetime(t - remainder), tz, - _use_earliest, + _ambiguous, )?)), _ => Ok(t - remainder), } @@ -541,9 +541,7 @@ impl Duration { match tz { #[cfg(feature = "timezones")] Some(tz) => Ok(datetime_to_timestamp(localize_datetime( - dt, - tz, - _use_earliest, + dt, tz, _ambiguous, )?)), _ => Ok(datetime_to_timestamp(dt)), } @@ -556,55 +554,40 @@ impl Duration { // Truncate the given ns timestamp by the window boundary. #[inline] - pub fn truncate_ns( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { + pub fn truncate_ns(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { self.truncate_impl( t, tz, |nsecs| nsecs, timestamp_ns_to_datetime, datetime_to_timestamp_ns, - use_earliest, + ambiguous, ) } // Truncate the given ns timestamp by the window boundary. #[inline] - pub fn truncate_us( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { + pub fn truncate_us(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { self.truncate_impl( t, tz, |nsecs| nsecs / 1000, timestamp_us_to_datetime, datetime_to_timestamp_us, - use_earliest, + ambiguous, ) } // Truncate the given ms timestamp by the window boundary. #[inline] - pub fn truncate_ms( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { + pub fn truncate_ms(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { self.truncate_impl( t, tz, |nsecs| nsecs / 1_000_000, timestamp_ms_to_datetime, datetime_to_timestamp_ms, - use_earliest, + ambiguous, ) } @@ -633,7 +616,7 @@ impl Duration { let dt = Self::add_month(ts, d.months, d.negative, d.saturating)?; new_t = match tz { #[cfg(feature = "timezones")] - Some(tz) => datetime_to_timestamp(localize_datetime(dt, tz, None)?), + Some(tz) => datetime_to_timestamp(localize_datetime(dt, tz, "raise")?), _ => datetime_to_timestamp(dt), }; } @@ -649,7 +632,7 @@ impl Duration { new_t = datetime_to_timestamp(localize_datetime( timestamp_to_datetime(new_t), tz, - None, + "raise", )?); }, _ => new_t += if d.negative { -t_weeks } else { t_weeks }, @@ -667,7 +650,7 @@ impl Duration { new_t = datetime_to_timestamp(localize_datetime( timestamp_to_datetime(new_t), tz, - None, + "raise", )?); }, _ => new_t += if d.negative { -t_days } else { t_days }, diff --git a/crates/polars-time/src/windows/window.rs b/crates/polars-time/src/windows/window.rs index 4aa51611fcb3..48e5fc3bcf69 100644 --- a/crates/polars-time/src/windows/window.rs +++ b/crates/polars-time/src/windows/window.rs @@ -31,88 +31,73 @@ impl Window { } /// Truncate the given ns timestamp by the window boundary. - pub fn truncate_ns( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { - let t = self.every.truncate_ns(t, tz, use_earliest)?; + pub fn truncate_ns(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { + let t = self.every.truncate_ns(t, tz, ambiguous)?; self.offset.add_ns(t, tz) } pub fn truncate_no_offset_ns(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - self.every.truncate_ns(t, tz, None) + self.every.truncate_ns(t, tz, "raise") } /// Truncate the given us timestamp by the window boundary. - pub fn truncate_us( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { - let t = self.every.truncate_us(t, tz, use_earliest)?; + pub fn truncate_us(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { + let t = self.every.truncate_us(t, tz, ambiguous)?; self.offset.add_us(t, tz) } pub fn truncate_no_offset_us(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - self.every.truncate_us(t, tz, None) + self.every.truncate_us(t, tz, "raise") } - pub fn truncate_ms( - &self, - t: i64, - tz: Option<&Tz>, - use_earliest: Option, - ) -> PolarsResult { - let t = self.every.truncate_ms(t, tz, use_earliest)?; + pub fn truncate_ms(&self, t: i64, tz: Option<&Tz>, ambiguous: &str) -> PolarsResult { + let t = self.every.truncate_ms(t, tz, ambiguous)?; self.offset.add_ms(t, tz) } #[inline] pub fn truncate_no_offset_ms(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - self.every.truncate_ms(t, tz, None) + self.every.truncate_ms(t, tz, "raise") } /// Round the given ns timestamp by the window boundary. pub fn round_ns(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { let t = t + self.every.duration_ns() / 2_i64; - self.truncate_ns(t, tz, None) + self.truncate_ns(t, tz, "raise") } /// Round the given us timestamp by the window boundary. pub fn round_us(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { let t = t + self.every.duration_ns() / (2 * timeunit_scale(ArrowTimeUnit::Nanosecond, ArrowTimeUnit::Microsecond) as i64); - self.truncate_us(t, tz, None) + self.truncate_us(t, tz, "raise") } /// Round the given ms timestamp by the window boundary. pub fn round_ms(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { let t = t + self.every.duration_ns() / (2 * timeunit_scale(ArrowTimeUnit::Nanosecond, ArrowTimeUnit::Millisecond) as i64); - self.truncate_ms(t, tz, None) + self.truncate_ms(t, tz, "raise") } /// returns the bounds for the earliest window bounds /// that contains the given time t. For underlapping windows that /// do not contain time t, the window directly after time t will be returned. pub fn get_earliest_bounds_ns(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - let start = self.truncate_ns(t, tz, None)?; + let start = self.truncate_ns(t, tz, "raise")?; let stop = self.period.add_ns(start, tz)?; Ok(Bounds::new_checked(start, stop)) } pub fn get_earliest_bounds_us(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - let start = self.truncate_us(t, tz, None)?; + let start = self.truncate_us(t, tz, "raise")?; let stop = self.period.add_us(start, tz)?; Ok(Bounds::new_checked(start, stop)) } pub fn get_earliest_bounds_ms(&self, t: i64, tz: Option<&Tz>) -> PolarsResult { - let start = self.truncate_ms(t, tz, None)?; + let start = self.truncate_ms(t, tz, "raise")?; let stop = self.period.add_ms(start, tz)?; Ok(Bounds::new_checked(start, stop)) diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index ca5ea8c9f7d9..08012e099835 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -9,12 +9,13 @@ from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr from polars.utils.convert import _timedelta_to_pl_duration +from polars.utils.deprecation import rename_use_earliest_to_ambiguous if TYPE_CHECKING: from datetime import timedelta from polars import Expr - from polars.type_aliases import EpochTimeUnit, TimeUnit + from polars.type_aliases import Ambiguous, EpochTimeUnit, TimeUnit class ExprDateTimeNameSpace: @@ -31,6 +32,7 @@ def truncate( offset: str | timedelta | None = None, *, use_earliest: bool | None = None, + ambiguous: Ambiguous | Expr = "raise", ) -> Expr: """ Divide the date/datetime range into buckets. @@ -51,6 +53,15 @@ def truncate( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Notes ----- The ``every`` and ``offset`` argument are created with the @@ -148,8 +159,7 @@ def truncate( └─────────────────────┴─────────────────────┘ If crossing daylight savings time boundaries, you may want to use - `use_earliest` and combine with :func:`~polars.Series.dt.dst_offset` - and :func:`~polars.when`: + `use_earliest` and combine with :func:`~polars.Series.dt.dst_offset`: >>> df = ( ... pl.date_range( @@ -178,10 +188,17 @@ def truncate( │ 2020-10-25 02:15:00 GMT │ └─────────────────────────────┘ + >>> ambiguous_mapping = { + ... timedelta(hours=1): "earliest", + ... timedelta(hours=0): "latest", + ... } >>> df.select( - ... pl.when(pl.col("date").dt.dst_offset() == pl.duration(hours=1)) - ... .then(pl.col("date").dt.truncate("30m", use_earliest=True)) - ... .otherwise(pl.col("date").dt.truncate("30m", use_earliest=False)) + ... pl.col("date").dt.truncate( + ... "30m", + ... ambiguous=( + ... pl.col("date").dt.dst_offset().map_dict(ambiguous_mapping) + ... ), + ... ) ... ) shape: (7, 1) ┌─────────────────────────────┐ @@ -198,6 +215,9 @@ def truncate( │ 2020-10-25 02:00:00 GMT │ └─────────────────────────────┘ """ + ambiguous = rename_use_earliest_to_ambiguous(use_earliest, ambiguous) + if not isinstance(ambiguous, pl.Expr): + ambiguous = F.lit(ambiguous) if offset is None: offset = "0ns" @@ -205,7 +225,7 @@ def truncate( self._pyexpr.dt_truncate( _timedelta_to_pl_duration(every), _timedelta_to_pl_duration(offset), - use_earliest, + ambiguous._pyexpr, ) ) @@ -1492,7 +1512,11 @@ def convert_time_zone(self, time_zone: str) -> Expr: return wrap_expr(self._pyexpr.dt_convert_time_zone(time_zone)) def replace_time_zone( - self, time_zone: str | None, *, use_earliest: bool | None = None + self, + time_zone: str | None, + *, + use_earliest: bool | None = None, + ambiguous: Ambiguous | Expr = "raise", ) -> Expr: """ Replace time zone for an expression of type Datetime. @@ -1511,6 +1535,15 @@ def replace_time_zone( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Examples -------- >>> from datetime import datetime @@ -1553,42 +1586,37 @@ def replace_time_zone( ... "2018-10-28 02:00", ... "2018-10-28 02:30", ... "2018-10-28 02:00", - ... "2018-10-28 02:30", ... ] >>> df = pl.DataFrame( ... { ... "ts": pl.Series(dates).str.strptime(pl.Datetime), - ... "DST": [True, True, True, False, False], + ... "ambiguous": ["earliest", "earliest", "latest", "latest"], ... } ... ) >>> df.with_columns( - ... ts_localized=pl.when(pl.col("DST")) - ... .then( - ... pl.col("ts").dt.replace_time_zone( - ... "Europe/Brussels", use_earliest=True - ... ) - ... ) - ... .otherwise( - ... pl.col("ts").dt.replace_time_zone( - ... "Europe/Brussels", use_earliest=False - ... ) + ... ts_localized=pl.col("ts").dt.replace_time_zone( + ... "Europe/Brussels", ambiguous=pl.col("ambiguous") ... ) ... ) - shape: (5, 3) - ┌─────────────────────┬───────┬───────────────────────────────┐ - │ ts ┆ DST ┆ ts_localized │ - │ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ bool ┆ datetime[μs, Europe/Brussels] │ - ╞═════════════════════╪═══════╪═══════════════════════════════╡ - │ 2018-10-28 01:30:00 ┆ true ┆ 2018-10-28 01:30:00 CEST │ - │ 2018-10-28 02:00:00 ┆ true ┆ 2018-10-28 02:00:00 CEST │ - │ 2018-10-28 02:30:00 ┆ true ┆ 2018-10-28 02:30:00 CEST │ - │ 2018-10-28 02:00:00 ┆ false ┆ 2018-10-28 02:00:00 CET │ - │ 2018-10-28 02:30:00 ┆ false ┆ 2018-10-28 02:30:00 CET │ - └─────────────────────┴───────┴───────────────────────────────┘ - - """ - return wrap_expr(self._pyexpr.dt_replace_time_zone(time_zone, use_earliest)) + shape: (4, 3) + ┌─────────────────────┬───────────┬───────────────────────────────┐ + │ ts ┆ ambiguous ┆ ts_localized │ + │ --- ┆ --- ┆ --- │ + │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │ + ╞═════════════════════╪═══════════╪═══════════════════════════════╡ + │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │ + │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │ + │ 2018-10-28 02:30:00 ┆ latest ┆ 2018-10-28 02:30:00 CET │ + │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │ + └─────────────────────┴───────────┴───────────────────────────────┘ + + """ + ambiguous = rename_use_earliest_to_ambiguous(use_earliest, ambiguous) + if not isinstance(ambiguous, pl.Expr): + ambiguous = F.lit(ambiguous) + return wrap_expr( + self._pyexpr.dt_replace_time_zone(time_zone, ambiguous._pyexpr) + ) def days(self) -> Expr: """ diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 5897eb77488e..b1d548e9fc61 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -3,15 +3,19 @@ import warnings from typing import TYPE_CHECKING +import polars._reexport as pl +from polars import functions as F from polars.datatypes import Date, Datetime, Time, py_type_to_dtype from polars.exceptions import ChronoFormatWarning from polars.utils._parse_expr_input import parse_as_expression from polars.utils._wrap import wrap_expr +from polars.utils.deprecation import rename_use_earliest_to_ambiguous from polars.utils.various import find_stacklevel if TYPE_CHECKING: from polars import Expr from polars.type_aliases import ( + Ambiguous, PolarsDataType, PolarsTemporalType, TimeUnit, @@ -83,6 +87,7 @@ def to_datetime( exact: bool = True, cache: bool = True, use_earliest: bool | None = None, + ambiguous: Ambiguous | Expr = "raise", ) -> Expr: """ Convert a Utf8 column into a Datetime column. @@ -119,6 +124,15 @@ def to_datetime( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Examples -------- >>> s = pl.Series(["2020-01-01 01:00Z", "2020-01-01 02:00Z"]) @@ -131,6 +145,9 @@ def to_datetime( ] """ _validate_format_argument(format) + ambiguous = rename_use_earliest_to_ambiguous(use_earliest, ambiguous) + if not isinstance(ambiguous, pl.Expr): + ambiguous = F.lit(ambiguous) return wrap_expr( self._pyexpr.str_to_datetime( format, @@ -139,7 +156,7 @@ def to_datetime( strict, exact, cache, - use_earliest, + ambiguous._pyexpr, ) ) @@ -190,6 +207,7 @@ def strptime( exact: bool = True, cache: bool = True, use_earliest: bool | None = None, + ambiguous: Ambiguous | Expr = "raise", ) -> Expr: """ Convert a Utf8 column into a Date/Datetime/Time column. @@ -221,6 +239,15 @@ def strptime( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Notes ----- When converting to a Datetime type, the time unit is inferred from the format @@ -281,6 +308,7 @@ def strptime( exact=exact, cache=cache, use_earliest=use_earliest, + ambiguous=ambiguous, ) elif dtype == Time: return self.to_time(format, strict=strict, cache=cache) diff --git a/py-polars/polars/functions/as_datatype.py b/py-polars/polars/functions/as_datatype.py index 80db1ead67f2..ec71f94f6a7c 100644 --- a/py-polars/polars/functions/as_datatype.py +++ b/py-polars/polars/functions/as_datatype.py @@ -10,6 +10,7 @@ parse_as_list_of_expressions, ) from polars.utils._wrap import wrap_expr +from polars.utils.deprecation import rename_use_earliest_to_ambiguous with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr @@ -19,7 +20,7 @@ from typing import Literal from polars import Expr, Series - from polars.type_aliases import IntoExpr, SchemaDict, TimeUnit + from polars.type_aliases import Ambiguous, IntoExpr, SchemaDict, TimeUnit def datetime_( @@ -34,6 +35,7 @@ def datetime_( time_unit: TimeUnit = "us", time_zone: str | None = None, use_earliest: bool | None = None, + ambiguous: Ambiguous | Expr = "raise", ) -> Expr: """ Create a Polars literal expression of type Datetime. @@ -65,6 +67,15 @@ def datetime_( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Returns ------- @@ -72,6 +83,9 @@ def datetime_( Expression of data type :class:`Datetime`. """ + ambiguous = parse_as_expression( + rename_use_earliest_to_ambiguous(use_earliest, ambiguous), str_as_lit=True + ) year_expr = parse_as_expression(year) month_expr = parse_as_expression(month) day_expr = parse_as_expression(day) @@ -96,7 +110,7 @@ def datetime_( microsecond, time_unit, time_zone, - use_earliest, + ambiguous, ) ) diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index d3c9e08c0d26..d55681c2d25d 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -12,7 +12,7 @@ from polars import Expr, Series from polars.polars import PySeries - from polars.type_aliases import EpochTimeUnit, TimeUnit + from polars.type_aliases import Ambiguous, EpochTimeUnit, TimeUnit @expr_dispatch @@ -1142,7 +1142,11 @@ def convert_time_zone(self, time_zone: str) -> Series: """ def replace_time_zone( - self, time_zone: str | None, *, use_earliest: bool | None = None + self, + time_zone: str | None, + *, + use_earliest: bool | None = None, + ambiguous: Ambiguous | Series = "raise", ) -> Series: """ Replace time zone for a Series of type Datetime. @@ -1161,6 +1165,15 @@ def replace_time_zone( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Examples -------- >>> from datetime import datetime @@ -1203,39 +1216,29 @@ def replace_time_zone( ... "2018-10-28 02:00", ... "2018-10-28 02:30", ... "2018-10-28 02:00", - ... "2018-10-28 02:30", ... ] >>> df = pl.DataFrame( ... { ... "ts": pl.Series(dates).str.strptime(pl.Datetime), - ... "DST": [True, True, True, False, False], + ... "ambiguous": ["earliest", "earliest", "earliest", "latest"], ... } ... ) >>> df.with_columns( - ... ts_localized=pl.when(pl.col("DST")) - ... .then( - ... pl.col("ts").dt.replace_time_zone( - ... "Europe/Brussels", use_earliest=True - ... ) - ... ) - ... .otherwise( - ... pl.col("ts").dt.replace_time_zone( - ... "Europe/Brussels", use_earliest=False - ... ) + ... ts_localized=pl.col("ts").dt.replace_time_zone( + ... "Europe/Brussels", ambiguous=pl.col("ambiguous") ... ) ... ) - shape: (5, 3) - ┌─────────────────────┬───────┬───────────────────────────────┐ - │ ts ┆ DST ┆ ts_localized │ - │ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ bool ┆ datetime[μs, Europe/Brussels] │ - ╞═════════════════════╪═══════╪═══════════════════════════════╡ - │ 2018-10-28 01:30:00 ┆ true ┆ 2018-10-28 01:30:00 CEST │ - │ 2018-10-28 02:00:00 ┆ true ┆ 2018-10-28 02:00:00 CEST │ - │ 2018-10-28 02:30:00 ┆ true ┆ 2018-10-28 02:30:00 CEST │ - │ 2018-10-28 02:00:00 ┆ false ┆ 2018-10-28 02:00:00 CET │ - │ 2018-10-28 02:30:00 ┆ false ┆ 2018-10-28 02:30:00 CET │ - └─────────────────────┴───────┴───────────────────────────────┘ + shape: (4, 3) + ┌─────────────────────┬───────────┬───────────────────────────────┐ + │ ts ┆ ambiguous ┆ ts_localized │ + │ --- ┆ --- ┆ --- │ + │ datetime[μs] ┆ str ┆ datetime[μs, Europe/Brussels] │ + ╞═════════════════════╪═══════════╪═══════════════════════════════╡ + │ 2018-10-28 01:30:00 ┆ earliest ┆ 2018-10-28 01:30:00 CEST │ + │ 2018-10-28 02:00:00 ┆ earliest ┆ 2018-10-28 02:00:00 CEST │ + │ 2018-10-28 02:30:00 ┆ earliest ┆ 2018-10-28 02:30:00 CEST │ + │ 2018-10-28 02:00:00 ┆ latest ┆ 2018-10-28 02:00:00 CET │ + └─────────────────────┴───────────┴───────────────────────────────┘ """ @@ -1595,6 +1598,7 @@ def truncate( offset: str | dt.timedelta | None = None, *, use_earliest: bool | None = None, + ambiguous: Ambiguous | Series = "raise", ) -> Series: """ Divide the date/ datetime range into buckets. @@ -1615,6 +1619,15 @@ def truncate( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Notes ----- The ``every`` and ``offset`` argument are created with the @@ -1738,11 +1751,14 @@ def truncate( 2020-10-25 02:15:00 GMT ] - >>> pl.select( - ... pl.when(ser.dt.dst_offset() == pl.duration(hours=1)) - ... .then(ser.dt.truncate("30m", use_earliest=True)) - ... .otherwise(ser.dt.truncate("30m", use_earliest=False)) - ... )["date"] + >>> ( + ... ser.dt.truncate( + ... "30m", + ... ambiguous=(ser.dt.dst_offset() == pl.duration(hours=1)).map_dict( + ... {True: "earliest", False: "latest"} + ... ), + ... ) + ... ) shape: (7,) Series: 'date' [datetime[μs, Europe/London]] [ diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 1ae8d9c9002e..70e8f4832540 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -8,6 +8,7 @@ from polars import Expr, Series from polars.polars import PySeries from polars.type_aliases import ( + Ambiguous, PolarsDataType, PolarsTemporalType, TimeUnit, @@ -79,6 +80,7 @@ def to_datetime( cache: bool = True, utc: bool | None = None, use_earliest: bool | None = None, + ambiguous: Ambiguous | Series = "raise", ) -> Series: """ Convert a Utf8 column into a Datetime column. @@ -124,6 +126,15 @@ def to_datetime( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Examples -------- >>> s = pl.Series(["2020-01-01 01:00Z", "2020-01-01 02:00Z"]) @@ -181,6 +192,7 @@ def strptime( exact: bool = True, cache: bool = True, use_earliest: bool | None = None, + ambiguous: Ambiguous | Series = "raise", ) -> Series: """ Convert a Utf8 column into a Date/Datetime/Time column. @@ -212,6 +224,15 @@ def strptime( - ``True``: use the earliest datetime - ``False``: use the latest datetime + .. deprecated:: 0.19.0 + Use `ambiguous` instead + ambiguous + Determine how to deal with ambiguous datetimes: + + - ``'raise'`` (default): raise + - ``'earliest'``: use the earliest datetime + - ``'latest'``: use the latest datetime + Notes ----- When converting to a Datetime type, the time unit is inferred from the format diff --git a/py-polars/polars/type_aliases.py b/py-polars/polars/type_aliases.py index e87d7ade9b12..d3713e68ae6f 100644 --- a/py-polars/polars/type_aliases.py +++ b/py-polars/polars/type_aliases.py @@ -139,6 +139,7 @@ ] # ListToStructWidthStrategy # The following have no equivalent on the Rust side +Ambiguous: TypeAlias = Literal["earliest", "latest", "raise"] ConcatMethod = Literal[ "vertical", "vertical_relaxed", "diagonal", "horizontal", "align" ] diff --git a/py-polars/polars/utils/deprecation.py b/py-polars/polars/utils/deprecation.py index db1948cf8913..4f3abdac0f03 100644 --- a/py-polars/polars/utils/deprecation.py +++ b/py-polars/polars/utils/deprecation.py @@ -9,6 +9,9 @@ if TYPE_CHECKING: import sys + from typing import Mapping + + from polars.type_aliases import Ambiguous if sys.version_info >= (3, 10): from typing import ParamSpec @@ -17,6 +20,13 @@ P = ParamSpec("P") T = TypeVar("T") +if TYPE_CHECKING: + from polars import Expr + +USE_EARLIEST_TO_AMBIGUOUS: Mapping[bool, Ambiguous] = { + True: "earliest", + False: "latest", +} def issue_deprecation_warning(message: str, *, version: str) -> None: @@ -225,3 +235,21 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: return wrapper return decorate + + +def rename_use_earliest_to_ambiguous( + use_earliest: bool | None, ambiguous: Ambiguous | Expr +) -> Ambiguous | Expr: + """Issue deprecation warning if deprecated `use_earliest` argument is used.""" + if isinstance(use_earliest, bool): + ambiguous = USE_EARLIEST_TO_AMBIGUOUS[use_earliest] + warnings.warn( + "The argument 'use_earliest' in 'replace_time_zone' is deprecated. " + f"Please replace `use_earliest={use_earliest}` with " + f"`ambiguous='{ambiguous}'`. Note that this new argument can also " + "accept expressions.", + DeprecationWarning, + stacklevel=find_stacklevel(), + ) + return ambiguous + return ambiguous diff --git a/py-polars/src/expr/datetime.rs b/py-polars/src/expr/datetime.rs index 2369b0b93eee..486bd794b4d8 100644 --- a/py-polars/src/expr/datetime.rs +++ b/py-polars/src/expr/datetime.rs @@ -42,23 +42,20 @@ impl PyExpr { } #[cfg(feature = "timezones")] - fn dt_replace_time_zone(&self, time_zone: Option, use_earliest: Option) -> Self { + #[pyo3(signature = (time_zone, ambiguous))] + fn dt_replace_time_zone(&self, time_zone: Option, ambiguous: Self) -> Self { self.inner .clone() .dt() - .replace_time_zone(time_zone, use_earliest) + .replace_time_zone(time_zone, ambiguous.inner) .into() } - fn dt_truncate(&self, every: String, offset: String, use_earliest: Option) -> Self { + fn dt_truncate(&self, every: String, offset: String, ambiguous: Self) -> Self { self.inner .clone() .dt() - .truncate(TruncateOptions { - every, - offset, - use_earliest, - }) + .truncate(TruncateOptions { every, offset }, ambiguous.inner) .into() } diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs index 429de9ad04cc..936276a43995 100644 --- a/py-polars/src/expr/string.rs +++ b/py-polars/src/expr/string.rs @@ -18,12 +18,11 @@ impl PyExpr { strict, exact, cache, - use_earliest: None, }; self.inner.clone().str().to_date(options).into() } - #[pyo3(signature = (format, time_unit, time_zone, strict, exact, cache, use_earliest))] + #[pyo3(signature = (format, time_unit, time_zone, strict, exact, cache, ambiguous))] #[allow(clippy::too_many_arguments)] fn str_to_datetime( &self, @@ -33,19 +32,23 @@ impl PyExpr { strict: bool, exact: bool, cache: bool, - use_earliest: Option, + ambiguous: Self, ) -> Self { let options = StrptimeOptions { format, strict, exact, cache, - use_earliest, }; self.inner .clone() .str() - .to_datetime(time_unit.map(|tu| tu.0), time_zone, options) + .to_datetime( + time_unit.map(|tu| tu.0), + time_zone, + options, + ambiguous.inner, + ) .into() } @@ -56,7 +59,6 @@ impl PyExpr { strict, cache, exact: true, - use_earliest: None, }; self.inner.clone().str().to_time(options).into() } diff --git a/py-polars/src/functions/lazy.rs b/py-polars/src/functions/lazy.rs index fbf78e5ae957..843af02bb254 100644 --- a/py-polars/src/functions/lazy.rs +++ b/py-polars/src/functions/lazy.rs @@ -220,7 +220,7 @@ pub fn cumreduce(lambda: PyObject, exprs: Vec) -> PyExpr { #[allow(clippy::too_many_arguments)] #[pyfunction] -#[pyo3(signature = (year, month, day, hour=None, minute=None, second=None, microsecond=None, time_unit=Wrap(TimeUnit::Microseconds), time_zone=None, use_earliest=None))] +#[pyo3(signature = (year, month, day, hour=None, minute=None, second=None, microsecond=None, time_unit=Wrap(TimeUnit::Microseconds), time_zone=None, ambiguous=None))] pub fn datetime( year: PyExpr, month: PyExpr, @@ -231,12 +231,16 @@ pub fn datetime( microsecond: Option, time_unit: Wrap, time_zone: Option, - use_earliest: Option, + ambiguous: Option, ) -> PyExpr { let year = year.inner; let month = month.inner; let day = day.inner; set_unwrapped_or_0!(hour, minute, second, microsecond); + let ambiguous = ambiguous + .map(|e| e.inner) + .unwrap_or(dsl::lit(String::from("raise"))); + println!("ambiguous: {:?}", ambiguous); let time_unit = time_unit.0; let args = DatetimeArgs { @@ -249,7 +253,7 @@ pub fn datetime( microsecond, time_unit, time_zone, - use_earliest, + ambiguous, }; dsl::datetime(args).into() } diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 269b3947bc7d..d78547d33e9d 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -22,7 +22,7 @@ if TYPE_CHECKING: from zoneinfo import ZoneInfo - from polars.type_aliases import PolarsTemporalType, StartBy, TimeUnit + from polars.type_aliases import Ambiguous, PolarsTemporalType, StartBy, TimeUnit else: from polars.utils.convert import get_zoneinfo as ZoneInfo @@ -2112,36 +2112,196 @@ def test_replace_time_zone_from_naive() -> None: @pytest.mark.parametrize( - ("use_earliest", "expected"), + ("ambiguous", "expected"), [ ( - False, + "latest", datetime(2018, 10, 28, 2, 30, fold=0, tzinfo=ZoneInfo("Europe/Brussels")), ), ( - True, + "earliest", datetime(2018, 10, 28, 2, 30, fold=1, tzinfo=ZoneInfo("Europe/Brussels")), ), ], ) def test_replace_time_zone_ambiguous_with_use_earliest( - use_earliest: bool, expected: datetime + ambiguous: Ambiguous, expected: datetime ) -> None: ts = pl.Series(["2018-10-28 02:30:00"]).str.strptime(pl.Datetime) - result = ts.dt.replace_time_zone( - "Europe/Brussels", use_earliest=use_earliest - ).item() + result = ts.dt.replace_time_zone("Europe/Brussels", ambiguous=ambiguous).item() assert result == expected def test_replace_time_zone_ambiguous_raises() -> None: ts = pl.Series(["2018-10-28 02:30:00"]).str.strptime(pl.Datetime) with pytest.raises( - ArrowError, match="Please use `use_earliest` to tell how it should be localized" + ArrowError, match="Please use `ambiguous` to tell how it should be localized" ): ts.dt.replace_time_zone("Europe/Brussels") +def test_use_earliest_deprecation() -> None: + # strptime + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=True` with `ambiguous='earliest'`", + ): + result = pl.Series(["2020-10-25 01:00"]).str.strptime( + pl.Datetime("us", "Europe/London"), use_earliest=True + ) + expected = pl.Series(["2020-10-25 01:00"]).str.strptime( + pl.Datetime("us", "Europe/London"), ambiguous="earliest" + ) + assert_series_equal(result, expected) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=False` with `ambiguous='latest'`", + ): + result = pl.Series(["2020-10-25 01:00"]).str.strptime( + pl.Datetime("us", "Europe/London"), use_earliest=False + ) + expected = pl.Series(["2020-10-25 01:00"]).str.strptime( + pl.Datetime("us", "Europe/London"), ambiguous="latest" + ) + assert_series_equal(result, expected) + + # truncate + ser = pl.Series(["2020-10-25 01:00"]).str.to_datetime( + time_zone="Europe/London", ambiguous="latest" + ) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=True` with `ambiguous='earliest'`", + ): + result = ser.dt.truncate("1h", use_earliest=True) + expected = ser.dt.truncate("1h", ambiguous="earliest") + assert_series_equal(result, expected) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=True` with `ambiguous='earliest'`", + ): + result = ser.dt.truncate("1h", use_earliest=True) + expected = ser.dt.truncate("1h", ambiguous="earliest") + assert_series_equal(result, expected) + + # replace_time_zone + ser = pl.Series([datetime(2020, 10, 25, 1)]) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=True` with `ambiguous='earliest'`", + ): + result = ser.dt.replace_time_zone("Europe/London", use_earliest=True) + expected = ser.dt.replace_time_zone("Europe/London", ambiguous="earliest") + assert_series_equal(result, expected) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=False` with `ambiguous='latest'`", + ): + result = ser.dt.replace_time_zone("Europe/London", use_earliest=False) + expected = ser.dt.replace_time_zone("Europe/London", ambiguous="latest") + assert_series_equal(result, expected) + + # pl.datetime + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=True` with `ambiguous='earliest'`", + ): + result = pl.select(pl.datetime(2020, 10, 25, 1, use_earliest=True))["datetime"] + expected = pl.select(pl.datetime(2020, 10, 25, 1, ambiguous="earliest"))["datetime"] + assert_series_equal(result, expected) + with pytest.warns( + DeprecationWarning, + match="Please replace `use_earliest=False` with `ambiguous='latest'`", + ): + result = pl.select(pl.datetime(2020, 10, 25, 1, use_earliest=False))["datetime"] + expected = pl.select(pl.datetime(2020, 10, 25, 1, ambiguous="latest"))["datetime"] + assert_series_equal(result, expected) + + +def test_ambiguous_expressions() -> None: + # strptime + df = pl.DataFrame( + { + "ts": ["2020-10-25 01:00"] * 2, + "ambiguous": ["earliest", "latest"], + } + ) + result = df.select( + pl.col("ts").str.strptime( + pl.Datetime("us", "Europe/London"), ambiguous=pl.col("ambiguous") + ) + )["ts"] + expected = pl.Series("ts", [1603584000000000, 1603587600000000]).cast( + pl.Datetime("us", "Europe/London") + ) + assert_series_equal(result, expected) + + # truncate + df = pl.DataFrame( + { + "ts": [datetime(2020, 10, 25, 1), datetime(2020, 10, 25, 1)], + "ambiguous": ["earliest", "latest"], + } + ) + df = df.with_columns( + pl.col("ts").dt.replace_time_zone( + "Europe/London", ambiguous=pl.col("ambiguous") + ) + ) + result = df.select(pl.col("ts").dt.truncate("1h", ambiguous=pl.col("ambiguous")))[ + "ts" + ] + expected = pl.Series("ts", [1603584000000000, 1603587600000000]).cast( + pl.Datetime("us", "Europe/London") + ) + assert_series_equal(result, expected) + + # replace_time_zone + df = pl.DataFrame( + { + "ts": [datetime(2020, 10, 25, 1), datetime(2020, 10, 25, 1)], + "ambiguous": ["earliest", "latest"], + } + ) + result = df.select( + pl.col("ts").dt.replace_time_zone( + "Europe/London", ambiguous=pl.col("ambiguous") + ) + )["ts"] + expected = pl.Series("ts", [1603584000000000, 1603587600000000]).cast( + pl.Datetime("us", "Europe/London") + ) + assert_series_equal(result, expected) + + # pl.datetime + df = pl.DataFrame( + { + "year": [2020] * 2, + "month": [10] * 2, + "day": [25] * 2, + "hour": [1] * 2, + "minute": [0] * 2, + "ambiguous": ["earliest", "latest"], + } + ) + result = df.select( + pl.datetime( + "year", + "month", + "day", + "hour", + "minute", + time_zone="Europe/London", + ambiguous=pl.col("ambiguous"), + ) + )["datetime"] + expected = pl.DataFrame( + {"datetime": [1603584000000000, 1603587600000000]}, + schema={"datetime": pl.Datetime("us", "Europe/London")}, + )["datetime"] + assert_series_equal(result, expected) + + def test_unlocalize() -> None: tz_naive = pl.Series(["2020-01-01 03:00:00"]).str.strptime(pl.Datetime) tz_aware = tz_naive.dt.replace_time_zone("UTC").dt.convert_time_zone( @@ -2442,8 +2602,8 @@ def test_truncate_use_earliest() -> None: ) result = df.select( pl.when(pl.col("use_earliest")) - .then(pl.col("date").dt.truncate("30m", use_earliest=True)) - .otherwise(pl.col("date").dt.truncate("30m", use_earliest=False)) + .then(pl.col("date").dt.truncate("30m", ambiguous="earliest")) + .otherwise(pl.col("date").dt.truncate("30m", ambiguous="latest")) ) expected = pl.date_range( date(2020, 10, 25), diff --git a/py-polars/tests/unit/functions/test_as_datatype.py b/py-polars/tests/unit/functions/test_as_datatype.py index 9442e4a5ff13..2f12b19097e6 100644 --- a/py-polars/tests/unit/functions/test_as_datatype.py +++ b/py-polars/tests/unit/functions/test_as_datatype.py @@ -62,7 +62,7 @@ def test_datetime_ambiguous_time_zone() -> None: def test_datetime_ambiguous_time_zone_use_earliest() -> None: expr = pl.datetime( - 2018, 10, 28, 2, 30, time_zone="Europe/Brussels", use_earliest=True + 2018, 10, 28, 2, 30, time_zone="Europe/Brussels", ambiguous="earliest" ) result = pl.select(expr).item() diff --git a/py-polars/tests/unit/namespaces/test_strptime.py b/py-polars/tests/unit/namespaces/test_strptime.py index 2e62967442d0..878078b56747 100644 --- a/py-polars/tests/unit/namespaces/test_strptime.py +++ b/py-polars/tests/unit/namespaces/test_strptime.py @@ -631,14 +631,14 @@ def test_to_time_format_warning() -> None: def test_to_datetime_use_earliest(exact: bool) -> None: result = ( pl.Series(["2020-10-25 01:00"]) - .str.to_datetime(time_zone="Europe/London", use_earliest=True, exact=exact) + .str.to_datetime(time_zone="Europe/London", ambiguous="earliest", exact=exact) .item() ) expected = datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")) assert result == expected result = ( pl.Series(["2020-10-25 01:00"]) - .str.to_datetime(time_zone="Europe/London", use_earliest=False, exact=exact) + .str.to_datetime(time_zone="Europe/London", ambiguous="latest", exact=exact) .item() ) expected = datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")) @@ -655,7 +655,7 @@ def test_strptime_use_earliest(exact: bool) -> None: result = ( pl.Series(["2020-10-25 01:00"]) .str.strptime( - pl.Datetime("us", "Europe/London"), use_earliest=True, exact=exact + pl.Datetime("us", "Europe/London"), ambiguous="earliest", exact=exact ) .item() ) @@ -664,7 +664,7 @@ def test_strptime_use_earliest(exact: bool) -> None: result = ( pl.Series(["2020-10-25 01:00"]) .str.strptime( - pl.Datetime("us", "Europe/London"), use_earliest=False, exact=exact + pl.Datetime("us", "Europe/London"), ambiguous="latest", exact=exact ) .item() )