From 713789509c50c12eec2200e62151098892eb6893 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 25 Jul 2024 10:21:11 +0100 Subject: [PATCH] feat: Raise informative error instead of panicking when passing invalid directives to `to_string` for Date dtype (#17670) --- .../src/chunked_array/ops/apply.rs | 48 ++++++++++ .../src/chunked_array/temporal/date.rs | 30 ++----- .../src/chunked_array/temporal/datetime.rs | 89 +++++-------------- .../src/series/implementations/date.rs | 2 +- crates/polars-time/src/series/mod.rs | 2 +- crates/polars/tests/it/core/pivot.rs | 2 +- py-polars/src/map/series.rs | 18 ++-- .../tests/unit/datatypes/test_temporal.py | 8 +- 8 files changed, 95 insertions(+), 104 deletions(-) diff --git a/crates/polars-core/src/chunked_array/ops/apply.rs b/crates/polars-core/src/chunked_array/ops/apply.rs index 145c898b0bee..d24cfdbd29a0 100644 --- a/crates/polars-core/src/chunked_array/ops/apply.rs +++ b/crates/polars-core/src/chunked_array/ops/apply.rs @@ -115,6 +115,54 @@ where ChunkedArray::try_from_chunk_iter(self.name(), iter) } + + pub fn apply_into_string_amortized<'a, F>(&'a self, mut f: F) -> StringChunked + where + F: FnMut(T::Physical<'a>, &mut String), + { + let mut buf = String::new(); + let chunks = self + .downcast_iter() + .map(|arr| { + let mut mutarr = MutablePlString::with_capacity(arr.len()); + arr.iter().for_each(|opt| match opt { + None => mutarr.push_null(), + Some(v) => { + buf.clear(); + f(v, &mut buf); + mutarr.push_value(&buf) + }, + }); + mutarr.freeze() + }) + .collect::>(); + ChunkedArray::from_chunk_iter(self.name(), chunks) + } + + pub fn try_apply_into_string_amortized<'a, F, E>(&'a self, mut f: F) -> Result + where + F: FnMut(T::Physical<'a>, &mut String) -> Result<(), E>, + { + let mut buf = String::new(); + let chunks = self + .downcast_iter() + .map(|arr| { + let mut mutarr = MutablePlString::with_capacity(arr.len()); + for opt in arr.iter() { + match opt { + None => mutarr.push_null(), + Some(v) => { + buf.clear(); + f(v, &mut buf)?; + mutarr.push_value(&buf) + }, + }; + } + Ok(mutarr.freeze()) + }) + .collect::>(); + ChunkedArray::try_from_chunk_iter(self.name(), chunks) + } } fn apply_in_place_impl(name: &str, chunks: Vec, f: F) -> ChunkedArray diff --git a/crates/polars-core/src/chunked_array/temporal/date.rs b/crates/polars-core/src/chunked_array/temporal/date.rs index f517f5acc8ef..26e52d7d2f0f 100644 --- a/crates/polars-core/src/chunked_array/temporal/date.rs +++ b/crates/polars-core/src/chunked_array/temporal/date.rs @@ -32,34 +32,20 @@ impl DateChunked { /// Convert from Date into String with the given format. /// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html). - pub fn to_string(&self, format: &str) -> StringChunked { - let mut ca: StringChunked = self.apply_kernel_cast(&|arr| { - let mut buf = String::new(); - let mut mutarr = MutablePlString::with_capacity(arr.len()); - - for opt in arr.into_iter() { - match opt { - None => mutarr.push_null(), - Some(v) => { - buf.clear(); - let datefmt = date32_to_date(*v).format(format); - write!(buf, "{datefmt}").unwrap(); - mutarr.push_value(&buf) - }, - } - } - - mutarr.freeze().boxed() - }); - ca.rename(self.name()); - ca + pub fn to_string(&self, format: &str) -> PolarsResult { + let datefmt_f = |ndt: NaiveDate| ndt.format(format); + self.try_apply_into_string_amortized(|val, buf| { + let ndt = date32_to_date(val); + write!(buf, "{}", datefmt_f(ndt)) + }) + .map_err(|_| polars_err!(ComputeError: "cannot format Date with format '{}'", format)) } /// Convert from Date into String with the given format. /// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html). /// /// Alias for `to_string`. - pub fn strftime(&self, format: &str) -> StringChunked { + pub fn strftime(&self, format: &str) -> PolarsResult { self.to_string(format) } diff --git a/crates/polars-core/src/chunked_array/temporal/datetime.rs b/crates/polars-core/src/chunked_array/temporal/datetime.rs index adacd1c790a1..838bc2cd9527 100644 --- a/crates/polars-core/src/chunked_array/temporal/datetime.rs +++ b/crates/polars-core/src/chunked_array/temporal/datetime.rs @@ -3,7 +3,6 @@ use std::fmt::Write; use arrow::temporal_conversions::{ timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime, }; -use chrono::format::{DelayedFormat, StrftimeItems}; #[cfg(feature = "timezones")] use chrono::TimeZone as TimeZoneTrait; @@ -11,47 +10,6 @@ use super::*; use crate::prelude::DataType::Datetime; use crate::prelude::*; -fn apply_datefmt_f<'a>( - arr: &PrimitiveArray, - conversion_f: fn(i64) -> NaiveDateTime, - datefmt_f: impl Fn(NaiveDateTime) -> DelayedFormat>, -) -> ArrayRef { - let mut buf = String::new(); - let mut mutarr = MutableBinaryViewArray::::with_capacity(arr.len()); - for opt in arr.into_iter() { - match opt { - None => mutarr.push_null(), - Some(v) => { - buf.clear(); - let converted = conversion_f(*v); - let datefmt = datefmt_f(converted); - write!(buf, "{datefmt}").unwrap(); - mutarr.push_value(&buf) - }, - } - } - mutarr.freeze().boxed() -} - -#[cfg(feature = "timezones")] -fn format_tz( - tz: Tz, - arr: &PrimitiveArray, - fmt: &str, - conversion_f: fn(i64) -> NaiveDateTime, -) -> ArrayRef { - let datefmt_f = |ndt| tz.from_utc_datetime(&ndt).format(fmt); - apply_datefmt_f(arr, conversion_f, datefmt_f) -} -fn format_naive( - arr: &PrimitiveArray, - fmt: &str, - conversion_f: fn(i64) -> NaiveDateTime, -) -> ArrayRef { - let datefmt_f = |ndt: NaiveDateTime| ndt.format(fmt); - apply_datefmt_f(arr, conversion_f, datefmt_f) -} - impl DatetimeChunked { pub fn as_datetime_iter(&self) -> impl TrustedLen> + '_ { let func = match self.time_unit() { @@ -84,40 +42,35 @@ impl DatetimeChunked { /// Convert from Datetime into String with the given format. /// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html). pub fn to_string(&self, format: &str) -> PolarsResult { - #[cfg(feature = "timezones")] - use chrono::Utc; let conversion_f = match self.time_unit() { TimeUnit::Nanoseconds => timestamp_ns_to_datetime, TimeUnit::Microseconds => timestamp_us_to_datetime, TimeUnit::Milliseconds => timestamp_ms_to_datetime, }; - let dt = NaiveDate::from_ymd_opt(2001, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap(); - let mut fmted = String::new(); - match self.time_zone() { - #[cfg(feature = "timezones")] - Some(_) => write!( - fmted, - "{}", - Utc.from_local_datetime(&dt).earliest().unwrap().format(format) - ) - .map_err( - |_| polars_err!(ComputeError: "cannot format DateTime with format '{}'", format), - )?, - _ => write!(fmted, "{}", dt.format(format)).map_err( - |_| polars_err!(ComputeError: "cannot format NaiveDateTime with format '{}'", format), - )?, - }; - let mut ca: StringChunked = match self.time_zone() { #[cfg(feature = "timezones")] - Some(time_zone) => self.apply_kernel_cast(&|arr| { - format_tz(time_zone.parse::().unwrap(), arr, format, conversion_f) - }), - _ => self.apply_kernel_cast(&|arr| format_naive(arr, format, conversion_f)), + Some(time_zone) => { + let parsed_time_zone = time_zone.parse::().expect("already validated"); + let datefmt_f = |ndt| parsed_time_zone.from_utc_datetime(&ndt).format(format); + self.try_apply_into_string_amortized(|val, buf| { + let ndt = conversion_f(val); + write!(buf, "{}", datefmt_f(ndt)) + } + ).map_err( + |_| polars_err!(ComputeError: "cannot format timezone-aware Datetime with format '{}'", format), + )? + }, + _ => { + let datefmt_f = |ndt: NaiveDateTime| ndt.format(format); + self.try_apply_into_string_amortized(|val, buf| { + let ndt = conversion_f(val); + write!(buf, "{}", datefmt_f(ndt)) + } + ).map_err( + |_| polars_err!(ComputeError: "cannot format timezone-naive Datetime with format '{}'", format), + )? + }, }; ca.rename(self.name()); Ok(ca) diff --git a/crates/polars-core/src/series/implementations/date.rs b/crates/polars-core/src/series/implementations/date.rs index 44a8ee6b32bf..0585952da0ce 100644 --- a/crates/polars-core/src/series/implementations/date.rs +++ b/crates/polars-core/src/series/implementations/date.rs @@ -242,7 +242,7 @@ impl SeriesTrait for SeriesWrap { .into_series() .date() .unwrap() - .to_string("%Y-%m-%d") + .to_string("%Y-%m-%d")? .into_series()), #[cfg(feature = "dtype-datetime")] DataType::Datetime(_, _) => { diff --git a/crates/polars-time/src/series/mod.rs b/crates/polars-time/src/series/mod.rs index c3fbf9aac635..4564addab4ca 100644 --- a/crates/polars-time/src/series/mod.rs +++ b/crates/polars-time/src/series/mod.rs @@ -232,7 +232,7 @@ pub trait TemporalMethods: AsSeries { let s = self.as_series(); match s.dtype() { #[cfg(feature = "dtype-date")] - DataType::Date => s.date().map(|ca| ca.to_string(format).into_series()), + DataType::Date => s.date().map(|ca| Ok(ca.to_string(format)?.into_series()))?, #[cfg(feature = "dtype-datetime")] DataType::Datetime(_, _) => s .datetime() diff --git a/crates/polars/tests/it/core/pivot.rs b/crates/polars/tests/it/core/pivot.rs index e6e507be3163..b0e1b13ca9f4 100644 --- a/crates/polars/tests/it/core/pivot.rs +++ b/crates/polars/tests/it/core/pivot.rs @@ -42,7 +42,7 @@ fn test_pivot_date_() -> PolarsResult<()> { )?; out.try_apply("1", |s| { let ca = s.date()?; - Ok(ca.to_string("%Y-%d-%m")) + ca.to_string("%Y-%d-%m") })?; let expected = df![ diff --git a/py-polars/src/map/series.rs b/py-polars/src/map/series.rs index c4a4288d9d97..9ec530002429 100644 --- a/py-polars/src/map/series.rs +++ b/py-polars/src/map/series.rs @@ -83,7 +83,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>( } } else if out.is_instance_of::() { let first = out.extract::>>()?; - applyer.apply_to_struct(py, lambda, null_count, first.0) + applyer.apply_into_struct(py, lambda, null_count, first.0) } // this succeeds for numpy ints as well, where checking if it is pyint fails // we do this later in the chain so that we don't extract integers from string chars. @@ -128,7 +128,7 @@ pub trait ApplyLambda<'a> { ) -> PyResult; // Used to store a struct type - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -253,7 +253,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked { .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -546,7 +546,7 @@ where .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -832,7 +832,7 @@ impl<'a> ApplyLambda<'a> for StringChunked { .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -1155,7 +1155,7 @@ impl<'a> ApplyLambda<'a> for ListChunked { .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -1570,7 +1570,7 @@ impl<'a> ApplyLambda<'a> for ArrayChunked { .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, py: Python, lambda: &Bound<'a, PyAny>, @@ -1976,7 +1976,7 @@ impl<'a> ApplyLambda<'a> for ObjectChunked { .into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, _py: Python, lambda: &Bound<'a, PyAny>, @@ -2257,7 +2257,7 @@ impl<'a> ApplyLambda<'a> for StructChunked { Ok(self.clone().into_series().into()) } - fn apply_to_struct( + fn apply_into_struct( &'a self, _py: Python, lambda: &Bound<'a, PyAny>, diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 0c4ef2cc8ad2..02a80701630c 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -1651,11 +1651,15 @@ def test_tz_aware_truncate() -> None: def test_to_string_invalid_format() -> None: tz_naive = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime) with pytest.raises( - ComputeError, match="cannot format NaiveDateTime with format '%z'" + ComputeError, match="cannot format timezone-naive Datetime with format '%z'" ): tz_naive.dt.to_string("%z") - with pytest.raises(ComputeError, match="cannot format DateTime with format '%q'"): + with pytest.raises( + ComputeError, match="cannot format timezone-aware Datetime with format '%q'" + ): tz_naive.dt.replace_time_zone("UTC").dt.to_string("%q") + with pytest.raises(ComputeError, match="cannot format Date with format '%q'"): + tz_naive.dt.date().dt.to_string("%q") def test_tz_aware_to_string() -> None: