Skip to content

Commit

Permalink
feat: Raise informative error instead of panicking when passing inval…
Browse files Browse the repository at this point in the history
…id directives to `to_string` for Date dtype (#17670)
  • Loading branch information
MarcoGorelli authored Jul 25, 2024
1 parent 903d891 commit 7137895
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 104 deletions.
48 changes: 48 additions & 0 deletions crates/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,54 @@ where

ChunkedArray::try_from_chunk_iter(self.name(), iter)
}

pub fn apply_into_string_amortized<'a, F>(&'a self, mut f: F) -> StringChunked
where
F: FnMut(T::Physical<'a>, &mut String),
{
let mut buf = String::new();
let chunks = self
.downcast_iter()
.map(|arr| {
let mut mutarr = MutablePlString::with_capacity(arr.len());
arr.iter().for_each(|opt| match opt {
None => mutarr.push_null(),
Some(v) => {
buf.clear();
f(v, &mut buf);
mutarr.push_value(&buf)
},
});
mutarr.freeze()
})
.collect::<Vec<_>>();
ChunkedArray::from_chunk_iter(self.name(), chunks)
}

pub fn try_apply_into_string_amortized<'a, F, E>(&'a self, mut f: F) -> Result<StringChunked, E>
where
F: FnMut(T::Physical<'a>, &mut String) -> Result<(), E>,
{
let mut buf = String::new();
let chunks = self
.downcast_iter()
.map(|arr| {
let mut mutarr = MutablePlString::with_capacity(arr.len());
for opt in arr.iter() {
match opt {
None => mutarr.push_null(),
Some(v) => {
buf.clear();
f(v, &mut buf)?;
mutarr.push_value(&buf)
},
};
}
Ok(mutarr.freeze())
})
.collect::<Vec<_>>();
ChunkedArray::try_from_chunk_iter(self.name(), chunks)
}
}

fn apply_in_place_impl<S, F>(name: &str, chunks: Vec<ArrayRef>, f: F) -> ChunkedArray<S>
Expand Down
30 changes: 8 additions & 22 deletions crates/polars-core/src/chunked_array/temporal/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,34 +32,20 @@ impl DateChunked {

/// Convert from Date into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
pub fn to_string(&self, format: &str) -> StringChunked {
let mut ca: StringChunked = self.apply_kernel_cast(&|arr| {
let mut buf = String::new();
let mut mutarr = MutablePlString::with_capacity(arr.len());

for opt in arr.into_iter() {
match opt {
None => mutarr.push_null(),
Some(v) => {
buf.clear();
let datefmt = date32_to_date(*v).format(format);
write!(buf, "{datefmt}").unwrap();
mutarr.push_value(&buf)
},
}
}

mutarr.freeze().boxed()
});
ca.rename(self.name());
ca
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
let datefmt_f = |ndt: NaiveDate| ndt.format(format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = date32_to_date(val);
write!(buf, "{}", datefmt_f(ndt))
})
.map_err(|_| polars_err!(ComputeError: "cannot format Date with format '{}'", format))
}

/// Convert from Date into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
///
/// Alias for `to_string`.
pub fn strftime(&self, format: &str) -> StringChunked {
pub fn strftime(&self, format: &str) -> PolarsResult<StringChunked> {
self.to_string(format)
}

Expand Down
89 changes: 21 additions & 68 deletions crates/polars-core/src/chunked_array/temporal/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,13 @@ use std::fmt::Write;
use arrow::temporal_conversions::{
timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_us_to_datetime,
};
use chrono::format::{DelayedFormat, StrftimeItems};
#[cfg(feature = "timezones")]
use chrono::TimeZone as TimeZoneTrait;

use super::*;
use crate::prelude::DataType::Datetime;
use crate::prelude::*;

fn apply_datefmt_f<'a>(
arr: &PrimitiveArray<i64>,
conversion_f: fn(i64) -> NaiveDateTime,
datefmt_f: impl Fn(NaiveDateTime) -> DelayedFormat<StrftimeItems<'a>>,
) -> ArrayRef {
let mut buf = String::new();
let mut mutarr = MutableBinaryViewArray::<str>::with_capacity(arr.len());
for opt in arr.into_iter() {
match opt {
None => mutarr.push_null(),
Some(v) => {
buf.clear();
let converted = conversion_f(*v);
let datefmt = datefmt_f(converted);
write!(buf, "{datefmt}").unwrap();
mutarr.push_value(&buf)
},
}
}
mutarr.freeze().boxed()
}

#[cfg(feature = "timezones")]
fn format_tz(
tz: Tz,
arr: &PrimitiveArray<i64>,
fmt: &str,
conversion_f: fn(i64) -> NaiveDateTime,
) -> ArrayRef {
let datefmt_f = |ndt| tz.from_utc_datetime(&ndt).format(fmt);
apply_datefmt_f(arr, conversion_f, datefmt_f)
}
fn format_naive(
arr: &PrimitiveArray<i64>,
fmt: &str,
conversion_f: fn(i64) -> NaiveDateTime,
) -> ArrayRef {
let datefmt_f = |ndt: NaiveDateTime| ndt.format(fmt);
apply_datefmt_f(arr, conversion_f, datefmt_f)
}

impl DatetimeChunked {
pub fn as_datetime_iter(&self) -> impl TrustedLen<Item = Option<NaiveDateTime>> + '_ {
let func = match self.time_unit() {
Expand Down Expand Up @@ -84,40 +42,35 @@ impl DatetimeChunked {
/// Convert from Datetime into String with the given format.
/// See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
pub fn to_string(&self, format: &str) -> PolarsResult<StringChunked> {
#[cfg(feature = "timezones")]
use chrono::Utc;
let conversion_f = match self.time_unit() {
TimeUnit::Nanoseconds => timestamp_ns_to_datetime,
TimeUnit::Microseconds => timestamp_us_to_datetime,
TimeUnit::Milliseconds => timestamp_ms_to_datetime,
};

let dt = NaiveDate::from_ymd_opt(2001, 1, 1)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap();
let mut fmted = String::new();
match self.time_zone() {
#[cfg(feature = "timezones")]
Some(_) => write!(
fmted,
"{}",
Utc.from_local_datetime(&dt).earliest().unwrap().format(format)
)
.map_err(
|_| polars_err!(ComputeError: "cannot format DateTime with format '{}'", format),
)?,
_ => write!(fmted, "{}", dt.format(format)).map_err(
|_| polars_err!(ComputeError: "cannot format NaiveDateTime with format '{}'", format),
)?,
};

let mut ca: StringChunked = match self.time_zone() {
#[cfg(feature = "timezones")]
Some(time_zone) => self.apply_kernel_cast(&|arr| {
format_tz(time_zone.parse::<Tz>().unwrap(), arr, format, conversion_f)
}),
_ => self.apply_kernel_cast(&|arr| format_naive(arr, format, conversion_f)),
Some(time_zone) => {
let parsed_time_zone = time_zone.parse::<Tz>().expect("already validated");
let datefmt_f = |ndt| parsed_time_zone.from_utc_datetime(&ndt).format(format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = conversion_f(val);
write!(buf, "{}", datefmt_f(ndt))
}
).map_err(
|_| polars_err!(ComputeError: "cannot format timezone-aware Datetime with format '{}'", format),
)?
},
_ => {
let datefmt_f = |ndt: NaiveDateTime| ndt.format(format);
self.try_apply_into_string_amortized(|val, buf| {
let ndt = conversion_f(val);
write!(buf, "{}", datefmt_f(ndt))
}
).map_err(
|_| polars_err!(ComputeError: "cannot format timezone-naive Datetime with format '{}'", format),
)?
},
};
ca.rename(self.name());
Ok(ca)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/implementations/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ impl SeriesTrait for SeriesWrap<DateChunked> {
.into_series()
.date()
.unwrap()
.to_string("%Y-%m-%d")
.to_string("%Y-%m-%d")?
.into_series()),
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-time/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ pub trait TemporalMethods: AsSeries {
let s = self.as_series();
match s.dtype() {
#[cfg(feature = "dtype-date")]
DataType::Date => s.date().map(|ca| ca.to_string(format).into_series()),
DataType::Date => s.date().map(|ca| Ok(ca.to_string(format)?.into_series()))?,
#[cfg(feature = "dtype-datetime")]
DataType::Datetime(_, _) => s
.datetime()
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/tests/it/core/pivot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fn test_pivot_date_() -> PolarsResult<()> {
)?;
out.try_apply("1", |s| {
let ca = s.date()?;
Ok(ca.to_string("%Y-%d-%m"))
ca.to_string("%Y-%d-%m")
})?;

let expected = df![
Expand Down
18 changes: 9 additions & 9 deletions py-polars/src/map/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
}
} else if out.is_instance_of::<PyDict>() {
let first = out.extract::<Wrap<AnyValue<'_>>>()?;
applyer.apply_to_struct(py, lambda, null_count, first.0)
applyer.apply_into_struct(py, lambda, null_count, first.0)
}
// this succeeds for numpy ints as well, where checking if it is pyint fails
// we do this later in the chain so that we don't extract integers from string chars.
Expand Down Expand Up @@ -128,7 +128,7 @@ pub trait ApplyLambda<'a> {
) -> PyResult<PySeries>;

// Used to store a struct type
fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -253,7 +253,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -546,7 +546,7 @@ where
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -832,7 +832,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -1155,7 +1155,7 @@ impl<'a> ApplyLambda<'a> for ListChunked {
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -1570,7 +1570,7 @@ impl<'a> ApplyLambda<'a> for ArrayChunked {
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -1976,7 +1976,7 @@ impl<'a> ApplyLambda<'a> for ObjectChunked<ObjectValue> {
.into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
_py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down Expand Up @@ -2257,7 +2257,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
Ok(self.clone().into_series().into())
}

fn apply_to_struct(
fn apply_into_struct(
&'a self,
_py: Python,
lambda: &Bound<'a, PyAny>,
Expand Down
8 changes: 6 additions & 2 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,11 +1651,15 @@ def test_tz_aware_truncate() -> None:
def test_to_string_invalid_format() -> None:
tz_naive = pl.Series(["2020-01-01"]).str.strptime(pl.Datetime)
with pytest.raises(
ComputeError, match="cannot format NaiveDateTime with format '%z'"
ComputeError, match="cannot format timezone-naive Datetime with format '%z'"
):
tz_naive.dt.to_string("%z")
with pytest.raises(ComputeError, match="cannot format DateTime with format '%q'"):
with pytest.raises(
ComputeError, match="cannot format timezone-aware Datetime with format '%q'"
):
tz_naive.dt.replace_time_zone("UTC").dt.to_string("%q")
with pytest.raises(ComputeError, match="cannot format Date with format '%q'"):
tz_naive.dt.date().dt.to_string("%q")


def test_tz_aware_to_string() -> None:
Expand Down

0 comments on commit 7137895

Please sign in to comment.