Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(python): Clean up conversion utils #11789

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 46 additions & 47 deletions py-polars/polars/utils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,19 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103
_views: list[Reversible[Any]] = [{}.keys(), {}.values(), {}.items()]
_reverse_mapping_views = tuple(type(reversed(view)) for view in _views)

SECONDS_PER_DAY = 60 * 60 * 24
SECONDS_PER_DAY = 86_400
SECONDS_PER_HOUR = 3_600
NS_PER_SECOND = 1_000_000_000
US_PER_SECOND = 1_000_000
MS_PER_SECOND = 1_000

EPOCH = datetime(1970, 1, 1).replace(tzinfo=None)
EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc)

_fromtimestamp = datetime.fromtimestamp


def _timestamp_in_seconds(dt: datetime) -> int:
du = dt - EPOCH_UTC
return du.days * 86400 + du.seconds
return du.days * SECONDS_PER_DAY + du.seconds


@overload
Expand Down Expand Up @@ -84,7 +87,7 @@ def _timedelta_to_pl_duration(td: timedelta | str | None) -> str | None:
else:
corrected_d = td.days + 1
d = corrected_d and f"{corrected_d}d" or "-"
corrected_seconds = 24 * 3600 - (td.seconds + (td.microseconds > 0))
corrected_seconds = SECONDS_PER_DAY - (td.seconds + (td.microseconds > 0))
s = corrected_seconds and f"{corrected_seconds}s" or ""
us = td.microseconds and f"{10**6 - td.microseconds}us" or ""

Expand All @@ -97,70 +100,66 @@ def _negate_duration(duration: str) -> str:
return f"-{duration}"


def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit) -> int:
def _time_to_pl_time(t: time) -> int:
t = t.replace(tzinfo=timezone.utc)
seconds = t.hour * SECONDS_PER_HOUR + t.minute * 60 + t.second
microseconds = t.microsecond
return seconds * NS_PER_SECOND + microseconds * 1_000


def _date_to_pl_date(d: date) -> int:
dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc)
return int(dt.timestamp()) // SECONDS_PER_DAY


def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int:
"""Convert a python datetime to a timestamp in given time unit."""
if dt.tzinfo is None:
# Make sure to use UTC rather than system time zone.
dt = dt.replace(tzinfo=timezone.utc)
microseconds = dt.microsecond
seconds = _timestamp_in_seconds(dt)
if time_unit == "ns":
micros = dt.microsecond
return 1_000 * (_timestamp_in_seconds(dt) * 1_000_000 + micros)
return seconds * NS_PER_SECOND + microseconds * 1_000
elif time_unit == "us" or time_unit is None:
micros = dt.microsecond
return _timestamp_in_seconds(dt) * 1_000_000 + micros
return seconds * US_PER_SECOND + microseconds
elif time_unit == "ms":
millis = dt.microsecond // 1000
return _timestamp_in_seconds(dt) * 1_000 + millis
else:
raise ValueError(
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)


def _time_to_pl_time(t: time) -> int:
t = t.replace(tzinfo=timezone.utc)
return int((t.hour * 3_600 + t.minute * 60 + t.second) * 1e9 + t.microsecond * 1e3)


def _date_to_pl_date(d: date) -> int:
dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc)
return int(dt.timestamp()) // (3600 * 24)
return seconds * MS_PER_SECOND + microseconds // 1_000
raise ValueError(
f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}"
)


def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int:
"""Convert a Python timedelta object to a total number of subseconds."""
microseconds = td.microseconds
seconds = td.days * SECONDS_PER_DAY + td.seconds
if time_unit == "ns":
subseconds = td.microseconds * 1_000
subseconds_per_second = 1_000_000_000
return seconds * NS_PER_SECOND + microseconds * 1_000
elif time_unit == "us" or time_unit is None:
return seconds * US_PER_SECOND + microseconds
elif time_unit == "ms":
subseconds = td.microseconds // 1_000
subseconds_per_second = 1_000
else:
subseconds = td.microseconds
subseconds_per_second = 1_000_000

subseconds += td.seconds * subseconds_per_second
subseconds += td.days * SECONDS_PER_DAY * subseconds_per_second

return subseconds
return seconds * MS_PER_SECOND + microseconds // 1_000


def _to_python_time(value: int) -> time:
"""Convert polars int64 (ns) timestamp to python time object."""
if value == 0:
return time(microsecond=0)
else:
seconds, nanoseconds = divmod(value, 1_000_000_000)
seconds, nanoseconds = divmod(value, NS_PER_SECOND)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return time(
hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1000
hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1_000
)


def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta:
def _to_python_timedelta(
value: int | float, time_unit: TimeUnit | None = "ns"
) -> timedelta:
if time_unit == "ns":
return timedelta(microseconds=value // 1e3)
return timedelta(microseconds=value // 1_000)
elif time_unit == "us":
return timedelta(microseconds=value)
elif time_unit == "ms":
Expand All @@ -174,7 +173,7 @@ def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> time
@lru_cache(256)
def _to_python_date(value: int | float) -> date:
"""Convert polars int64 timestamp to Python date."""
return (EPOCH_UTC + timedelta(seconds=value * 86400)).date()
return (EPOCH_UTC + timedelta(seconds=value * SECONDS_PER_DAY)).date()
rancomp marked this conversation as resolved.
Show resolved Hide resolved


def _to_python_datetime(
Expand All @@ -187,23 +186,23 @@ def _to_python_datetime(
if time_unit == "us":
return EPOCH + timedelta(microseconds=value)
elif time_unit == "ns":
return EPOCH + timedelta(microseconds=value // 1000)
return EPOCH + timedelta(microseconds=value // 1_000)
elif time_unit == "ms":
return EPOCH + timedelta(milliseconds=value)
else:
raise ValueError(
f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}"
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)
elif _ZONEINFO_AVAILABLE:
if time_unit == "us":
dt = EPOCH_UTC + timedelta(microseconds=value)
elif time_unit == "ns":
dt = EPOCH_UTC + timedelta(microseconds=value // 1000)
dt = EPOCH_UTC + timedelta(microseconds=value // 1_000)
elif time_unit == "ms":
dt = EPOCH_UTC + timedelta(milliseconds=value)
else:
raise ValueError(
f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}"
f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}"
)
return _localize(dt, time_zone)
else:
Expand Down