Skip to content

Commit

Permalink
fix(rust, python): fix date/datetime parsing for short inputs with ex…
Browse files Browse the repository at this point in the history
…act=False (#10231)
  • Loading branch information
MarcoGorelli authored Aug 2, 2023
1 parent d834458 commit d434aee
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 46 deletions.
74 changes: 28 additions & 46 deletions crates/polars-time/src/chunkedarray/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,54 +29,33 @@ fn datetime_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
where
F: Fn(&str, &str) -> chrono::ParseResult<K>,
{
[
// 21/12/31 12:54:98
"%y/%m/%d %H:%M:%S",
// 2021-12-31 24:58:01
"%Y-%m-%d %H:%M:%S",
// 21/12/31 24:58:01
"%y/%m/%d %H:%M:%S",
//210319 23:58:50
"%y%m%d %H:%M:%S",
// 2021/12/31 12:54:98
"%Y/%m/%d %H:%M:%S",
// 2021-12-31 24:58:01
"%Y-%m-%d %H:%M:%S",
// 2021/12/31 24:58:01
"%Y/%m/%d %H:%M:%S",
// 20210319 23:58:50
"%Y%m%d %H:%M:%S",
// note: '%F' cannot be parsed by polars native parser
// 2019-04-18T02:45:55
"%Y-%m-%dT%H:%M:%S",
// 2019-04-18T02:45:55[...]
// milliseconds
"%Y-%m-%d %H:%M:%S.%3f",
"%Y-%m-%dT%H:%M:%S.%3f",
// microseconds
"%Y-%m-%d %H:%M:%S.%6f",
"%Y-%m-%dT%H:%M:%S.%6f",
// nanoseconds
"%Y-%m-%d %H:%M:%S.%9f",
"%Y-%m-%dT%H:%M:%S.%9f",
]
.into_iter()
.find(|&fmt| convert(val, fmt).is_ok())
let result = patterns::DATETIME_Y_M_D
.iter()
.find(|fmt| convert(val, fmt).is_ok())
.copied();
result.or_else(|| {
patterns::DATETIME_D_M_Y
.iter()
.find(|fmt| convert(val, fmt).is_ok())
.copied()
})
}

fn date_pattern<F, K>(val: &str, convert: F) -> Option<&'static str>
// (string, fmt) -> PolarsResult
where
F: Fn(&str, &str) -> chrono::ParseResult<K>,
{
[
// 2021-12-31
"%Y-%m-%d", // 31-12-2021
"%d-%m-%Y", // 2021319 (2021-03-19)
"%Y%m%d",
]
.into_iter()
.find(|&fmt| convert(val, fmt).is_ok())
let result = patterns::DATE_Y_M_D
.iter()
.find(|fmt| convert(val, fmt).is_ok())
.copied();
result.or_else(|| {
patterns::DATE_D_M_Y
.iter()
.find(|fmt| convert(val, fmt).is_ok())
.copied()
})
}

struct ParseErrorByteCopy(ParseErrorKind);
Expand Down Expand Up @@ -113,10 +92,13 @@ fn get_first_val(ca: &Utf8Chunked) -> PolarsResult<&str> {
#[cfg(feature = "dtype-datetime")]
fn sniff_fmt_datetime(ca_utf8: &Utf8Chunked) -> PolarsResult<&'static str> {
let val = get_first_val(ca_utf8)?;
if let Some(pattern) = datetime_pattern(val, NaiveDateTime::parse_from_str) {
return Ok(pattern);
match datetime_pattern(val, NaiveDateTime::parse_from_str) {
Some(pattern) => Ok(pattern),
None => match datetime_pattern(val, NaiveDate::parse_from_str) {
Some(pattern) => Ok(pattern),
None => polars_bail!(parse_fmt_idk = "datetime"),
},
}
polars_bail!(parse_fmt_idk = "date");
}

#[cfg(feature = "dtype-date")]
Expand Down Expand Up @@ -216,7 +198,7 @@ pub trait Utf8Methods: AsUtf8 {
Some(mut s) => {
let fmt_len = fmt.len();

for i in 1..(s.len() - fmt_len) {
for i in 1..(s.len().saturating_sub(fmt_len)) {
if s.is_empty() {
return None;
}
Expand Down Expand Up @@ -273,7 +255,7 @@ pub trait Utf8Methods: AsUtf8 {
Some(mut s) => {
let fmt_len = fmt.len();

for i in 1..(s.len() - fmt_len) {
for i in 1..(s.len().saturating_sub(fmt_len)) {
if s.is_empty() {
return None;
}
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/unit/namespaces/test_strptime.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,20 @@ def test_to_date_non_exact_strptime() -> None:
s.str.to_date(format, strict=True, exact=True)


@pytest.mark.parametrize(
("value", "attr"),
[
("a", "to_date"),
("ab", "to_date"),
("a", "to_datetime"),
("ab", "to_datetime"),
],
)
def test_non_exact_short_elements_10223(value: str, attr: str) -> None:
with pytest.raises(pl.ComputeError, match="strict conversion to .* failed"):
getattr(pl.Series(["2019-01-01", value]).str, attr)(exact=False)


@pytest.mark.parametrize(
("offset", "time_zone", "tzinfo", "format"),
[
Expand Down

0 comments on commit d434aee

Please sign in to comment.