Skip to content

Commit

Permalink
further micro-optimisation, additional tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 9, 2024
1 parent 34283e6 commit 620a23f
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 35 deletions.
45 changes: 29 additions & 16 deletions crates/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -991,11 +991,11 @@ pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String {
// Polars: "3d 22m 55s 1ms"
// ISO: "P3DT22M55.001S"
//
// The parts (days, hours, minutes, seconds) occur in the same order in
// the parts (days, hours, minutes, seconds) occur in the same order in
// each string, so we use the same code to generate each of them, with
// only the separators and the 'seconds' part differing.
//
// Ref: https://en.wikipedia.org/wiki/ISO_8601#Durations
// ref: https://en.wikipedia.org/wiki/ISO_8601#Durations
if v == 0 {
return if iso {
"PT0S".to_string()
Expand All @@ -1015,6 +1015,7 @@ pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String {

let mut s = String::with_capacity(32);
let mut buffer = itoa::Buffer::new();
let mut wrote_part = false;
if iso {
if v < 0 {
// negative sign before "P" indicates that the entire ISO duration is negative.
Expand All @@ -1034,34 +1035,46 @@ pub fn fmt_duration_string(mut v: i64, unit: TimeUnit, iso: bool) -> String {
(v % sizes[i - 1]) / size
};
if whole_num != 0 || (iso && i == 3) {
s.push_str(buffer.format(whole_num));
if iso {
// (index 3 => 'seconds' part): the ISO version writes
// fractional seconds, not integer nano/micro/milliseconds.
if i == 3 {
let secs = match unit {
TimeUnit::Nanoseconds => format!(".{:09}", v % size),
TimeUnit::Microseconds => format!(".{:06}", v % size),
TimeUnit::Milliseconds => format!(".{:03}", v % size),
};
s.push_str(secs.trim_end_matches('0'));
if s.ends_with('.') {
s.pop();
if i != 3 {
// days, hours, minutes
s.push_str(buffer.format(whole_num));
s.push_str(ISO_DURATION_PARTS[i]);
} else {
// (index 3 => 'seconds' part): the ISO version writes
// fractional seconds, not integer nano/micro/milliseconds.
// if zero, only write out if no other parts written yet.
let fractional_part = v % size;
if whole_num == 0 && fractional_part == 0 {
if !wrote_part {
s.push_str("0S")
}
} else {
s.push_str(buffer.format(whole_num));
if fractional_part != 0 {
let secs = match unit {
TimeUnit::Nanoseconds => format!(".{:09}", fractional_part),
TimeUnit::Microseconds => format!(".{:06}", fractional_part),
TimeUnit::Milliseconds => format!(".{:03}", fractional_part),
};
s.push_str(secs.trim_end_matches('0'));
}
s.push_str(ISO_DURATION_PARTS[i]);
}
}
s.push_str(ISO_DURATION_PARTS[i]);

// (index 0 => 'days' part): after writing days above (if non-zero)
// the ISO duration string requires a `T` before the time part.
if i == 0 {
s.push('T');
}
} else {
s.push_str(buffer.format(whole_num));
s.push_str(DURATION_PARTS[i]);
if v % size != 0 {
s.push(' ');
}
}
wrote_part = true;
} else if iso && i == 0 {
// always need to write the `T` separator for ISO
// durations, even if there is no 'days' part.
Expand Down
77 changes: 59 additions & 18 deletions py-polars/tests/unit/datatypes/test_duration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,28 @@ def test_duration_cum_sum() -> None:
assert df.schema["A"].is_(duration_dtype) is False


def test_duration_cast() -> None:
durations = [
timedelta(days=180, seconds=56789, microseconds=987654),
timedelta(days=0, seconds=64875, microseconds=8884),
timedelta(days=2, hours=23, seconds=4975, milliseconds=1),
timedelta(hours=1, seconds=1, milliseconds=1, microseconds=1),
timedelta(seconds=-42, milliseconds=-42),
timedelta(days=-1),
None,
]
def test_duration_to_string() -> None:
df = pl.DataFrame(
{
"td": [
timedelta(days=180, seconds=56789, microseconds=987654),
timedelta(days=0, seconds=64875, microseconds=8884),
timedelta(days=2, hours=23, seconds=4975, milliseconds=1),
timedelta(hours=1, seconds=1, milliseconds=1, microseconds=1),
timedelta(seconds=-42, milliseconds=-42),
None,
]
},
schema={"td": pl.Duration("us")},
)

df = pl.DataFrame({"td": durations}, schema={"td": pl.Duration("us")})
df_cast = df.select(
df_str = df.select(
td_ms=pl.col("td").cast(pl.Duration("ms")),
td_int=pl.col("td").cast(pl.Int64),
td_str_iso=pl.col("td").dt.to_string(),
td_str_pl=pl.col("td").dt.to_string("polars"),
)
assert df_cast.schema == {
assert df_str.schema == {
"td_ms": pl.Duration(time_unit="ms"),
"td_int": pl.Int64,
"td_str_iso": pl.String,
Expand All @@ -55,7 +58,6 @@ def test_duration_cast() -> None:
timedelta(days=2, hours=23, seconds=4975, milliseconds=1),
timedelta(hours=1, seconds=1, milliseconds=1),
timedelta(seconds=-42, milliseconds=-42),
timedelta(days=-1),
None,
],
"td_int": [
Expand All @@ -64,7 +66,6 @@ def test_duration_cast() -> None:
260575001000,
3601001001,
-42042000,
-86400000000,
None,
],
"td_str_iso": [
Expand All @@ -73,7 +74,6 @@ def test_duration_cast() -> None:
"P3DT22M55.001S",
"PT1H1.001001S",
"-PT42.042S",
"-P1DT0S",
None,
],
"td_str_pl": [
Expand All @@ -82,13 +82,54 @@ def test_duration_cast() -> None:
"3d 22m 55s 1ms",
"1h 1s 1001µs",
"-42s -42ms",
"-1d",
None,
],
},
schema_overrides={"td_ms": pl.Duration(time_unit="ms")},
)
assert_frame_equal(expected, df_cast)
assert_frame_equal(expected, df_str)

# individual +/- parts
df = pl.DataFrame(
{
"td_ns": [
timedelta(weeks=1),
timedelta(days=1),
timedelta(hours=1),
timedelta(minutes=1),
timedelta(seconds=1),
timedelta(milliseconds=1),
timedelta(microseconds=1),
timedelta(seconds=0),
timedelta(microseconds=-1),
timedelta(milliseconds=-1),
timedelta(seconds=-1),
timedelta(minutes=-1),
timedelta(hours=-1),
timedelta(days=-1),
timedelta(weeks=-1),
]
},
schema={"td_ns": pl.Duration("ns")},
)
df_str = df.select(pl.col("td_ns").dt.to_string("iso"))
assert df_str["td_ns"].to_list() == [
"P7D",
"P1D",
"PT1H",
"PT1M",
"PT1S",
"PT0.001S",
"PT0.000001S",
"PT0S",
"-PT0.000001S",
"-PT0.001S",
"-PT1S",
"-PT1M",
"-PT1H",
"-P1D",
"-P7D",
]


def test_duration_std_var() -> None:
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ def test_temporal_to_string_iso_default() -> None:
"0µs",
],
"td": [
"-P1DT42.S",
"-P1DT42S",
"P13DT14H0.001001S",
"PT0S",
],
Expand Down

0 comments on commit 620a23f

Please sign in to comment.