Skip to content

Commit

Permalink
refactor(python): Clean up conversion utils (#11789)
Browse files Browse the repository at this point in the history
Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
rancomp and stinodego authored Nov 9, 2023
1 parent ff38da7 commit e436e4f
Showing 1 changed file with 46 additions and 47 deletions.
93 changes: 46 additions & 47 deletions py-polars/polars/utils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,19 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103
_views: list[Reversible[Any]] = [{}.keys(), {}.values(), {}.items()]
_reverse_mapping_views = tuple(type(reversed(view)) for view in _views)

SECONDS_PER_DAY = 60 * 60 * 24
SECONDS_PER_DAY = 86_400
SECONDS_PER_HOUR = 3_600
NS_PER_SECOND = 1_000_000_000
US_PER_SECOND = 1_000_000
MS_PER_SECOND = 1_000

EPOCH = datetime(1970, 1, 1).replace(tzinfo=None)
EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc)

_fromtimestamp = datetime.fromtimestamp


def _timestamp_in_seconds(dt: datetime) -> int:
du = dt - EPOCH_UTC
return du.days * 86400 + du.seconds
return du.days * SECONDS_PER_DAY + du.seconds


@overload
Expand Down Expand Up @@ -84,7 +87,7 @@ def _timedelta_to_pl_duration(td: timedelta | str | None) -> str | None:
else:
corrected_d = td.days + 1
d = corrected_d and f"{corrected_d}d" or "-"
corrected_seconds = 24 * 3600 - (td.seconds + (td.microseconds > 0))
corrected_seconds = SECONDS_PER_DAY - (td.seconds + (td.microseconds > 0))
s = corrected_seconds and f"{corrected_seconds}s" or ""
us = td.microseconds and f"{10**6 - td.microseconds}us" or ""

Expand All @@ -97,70 +100,66 @@ def _negate_duration(duration: str) -> str:
return f"-{duration}"


def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit) -> int:
def _time_to_pl_time(t: time) -> int:
t = t.replace(tzinfo=timezone.utc)
seconds = t.hour * SECONDS_PER_HOUR + t.minute * 60 + t.second
microseconds = t.microsecond
return seconds * NS_PER_SECOND + microseconds * 1_000


def _date_to_pl_date(d: date) -> int:
dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc)
return int(dt.timestamp()) // SECONDS_PER_DAY


def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int:
"""Convert a python datetime to a timestamp in given time unit."""
if dt.tzinfo is None:
# Make sure to use UTC rather than system time zone.
dt = dt.replace(tzinfo=timezone.utc)
microseconds = dt.microsecond
seconds = _timestamp_in_seconds(dt)
if time_unit == "ns":
micros = dt.microsecond
return 1_000 * (_timestamp_in_seconds(dt) * 1_000_000 + micros)
return seconds * NS_PER_SECOND + microseconds * 1_000
elif time_unit == "us" or time_unit is None:
micros = dt.microsecond
return _timestamp_in_seconds(dt) * 1_000_000 + micros
return seconds * US_PER_SECOND + microseconds
elif time_unit == "ms":
millis = dt.microsecond // 1000
return _timestamp_in_seconds(dt) * 1_000 + millis
else:
raise ValueError(
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)


def _time_to_pl_time(t: time) -> int:
t = t.replace(tzinfo=timezone.utc)
return int((t.hour * 3_600 + t.minute * 60 + t.second) * 1e9 + t.microsecond * 1e3)


def _date_to_pl_date(d: date) -> int:
dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc)
return int(dt.timestamp()) // (3600 * 24)
return seconds * MS_PER_SECOND + microseconds // 1_000
raise ValueError(
f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}"
)


def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int:
"""Convert a Python timedelta object to a total number of subseconds."""
microseconds = td.microseconds
seconds = td.days * SECONDS_PER_DAY + td.seconds
if time_unit == "ns":
subseconds = td.microseconds * 1_000
subseconds_per_second = 1_000_000_000
return seconds * NS_PER_SECOND + microseconds * 1_000
elif time_unit == "us" or time_unit is None:
return seconds * US_PER_SECOND + microseconds
elif time_unit == "ms":
subseconds = td.microseconds // 1_000
subseconds_per_second = 1_000
else:
subseconds = td.microseconds
subseconds_per_second = 1_000_000

subseconds += td.seconds * subseconds_per_second
subseconds += td.days * SECONDS_PER_DAY * subseconds_per_second

return subseconds
return seconds * MS_PER_SECOND + microseconds // 1_000


def _to_python_time(value: int) -> time:
"""Convert polars int64 (ns) timestamp to python time object."""
if value == 0:
return time(microsecond=0)
else:
seconds, nanoseconds = divmod(value, 1_000_000_000)
seconds, nanoseconds = divmod(value, NS_PER_SECOND)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return time(
hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1000
hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1_000
)


def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta:
def _to_python_timedelta(
value: int | float, time_unit: TimeUnit | None = "ns"
) -> timedelta:
if time_unit == "ns":
return timedelta(microseconds=value // 1e3)
return timedelta(microseconds=value // 1_000)
elif time_unit == "us":
return timedelta(microseconds=value)
elif time_unit == "ms":
Expand All @@ -174,7 +173,7 @@ def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> time
@lru_cache(256)
def _to_python_date(value: int | float) -> date:
"""Convert polars int64 timestamp to Python date."""
return (EPOCH_UTC + timedelta(seconds=value * 86400)).date()
return (EPOCH_UTC + timedelta(seconds=value * SECONDS_PER_DAY)).date()


def _to_python_datetime(
Expand All @@ -187,23 +186,23 @@ def _to_python_datetime(
if time_unit == "us":
return EPOCH + timedelta(microseconds=value)
elif time_unit == "ns":
return EPOCH + timedelta(microseconds=value // 1000)
return EPOCH + timedelta(microseconds=value // 1_000)
elif time_unit == "ms":
return EPOCH + timedelta(milliseconds=value)
else:
raise ValueError(
f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}"
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)
elif _ZONEINFO_AVAILABLE:
if time_unit == "us":
dt = EPOCH_UTC + timedelta(microseconds=value)
elif time_unit == "ns":
dt = EPOCH_UTC + timedelta(microseconds=value // 1000)
dt = EPOCH_UTC + timedelta(microseconds=value // 1_000)
elif time_unit == "ms":
dt = EPOCH_UTC + timedelta(milliseconds=value)
else:
raise ValueError(
f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}"
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)
return _localize(dt, time_zone)
else:
Expand Down

0 comments on commit e436e4f

Please sign in to comment.