From 1405d00e9da161dfe24941afd2f415f55f6460bb Mon Sep 17 00:00:00 2001 From: ran Date: Mon, 16 Oct 2023 21:50:18 +0300 Subject: [PATCH 01/22] seconds_per_day --- py-polars/polars/utils/convert.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 85caae3266b3..d7146c349ffd 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -54,7 +54,7 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 def _timestamp_in_seconds(dt: datetime) -> int: du = dt - EPOCH_UTC - return du.days * 86400 + du.seconds + return du.days * SECONDS_PER_DAY + du.seconds @overload @@ -84,7 +84,7 @@ def _timedelta_to_pl_duration(td: timedelta | str | None) -> str | None: else: corrected_d = td.days + 1 d = corrected_d and f"{corrected_d}d" or "-" - corrected_seconds = 24 * 3600 - (td.seconds + (td.microseconds > 0)) + corrected_seconds = SECONDS_PER_DAY - (td.seconds + (td.microseconds > 0)) s = corrected_seconds and f"{corrected_seconds}s" or "" us = td.microseconds and f"{10**6 - td.microseconds}us" or "" @@ -124,7 +124,7 @@ def _time_to_pl_time(t: time) -> int: def _date_to_pl_date(d: date) -> int: dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc) - return int(dt.timestamp()) // (3600 * 24) + return int(dt.timestamp()) // SECONDS_PER_DAY def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: @@ -174,7 +174,7 @@ def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> time @lru_cache(256) def _to_python_date(value: int | float) -> date: """Convert polars int64 timestamp to Python date.""" - return (EPOCH_UTC + timedelta(seconds=value * 86400)).date() + return (EPOCH_UTC + timedelta(seconds=value * SECONDS_PER_DAY)).date() def _to_python_datetime( From 83829cfe8aecda5476deb29dad6b5ae02133497d Mon Sep 17 00:00:00 2001 From: ran Date: Mon, 16 Oct 2023 22:58:13 +0300 Subject: [PATCH 02/22] add validate time unit method --- py-polars/polars/utils/convert.py | 38 +++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index d7146c349ffd..c34465d06394 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -48,6 +48,13 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 SECONDS_PER_DAY = 60 * 60 * 24 EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) +ADMISSABLE_TIME_UNITS = {"ns", "us", "ms"} + +def _validate_time_unit(time_unit: TimeUnit) -> None: + if time_unit not in ADMISSABLE_TIME_UNITS: + raise ValueError( + f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" + ) _fromtimestamp = datetime.fromtimestamp @@ -102,19 +109,20 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: if dt.tzinfo is None: # Make sure to use UTC rather than system time zone. dt = dt.replace(tzinfo=timezone.utc) + if time_unit is None: + time_unit = "us" + _validate_time_unit(time_unit) + if time_unit == "ns": micros = dt.microsecond return 1_000 * (_timestamp_in_seconds(dt) * 1_000_000 + micros) - elif time_unit == "us" or time_unit is None: + elif time_unit == "us": micros = dt.microsecond return _timestamp_in_seconds(dt) * 1_000_000 + micros elif time_unit == "ms": millis = dt.microsecond // 1000 return _timestamp_in_seconds(dt) * 1_000 + millis - else: - raise ValueError( - f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - ) + def _time_to_pl_time(t: time) -> int: @@ -129,6 +137,10 @@ def _date_to_pl_date(d: date) -> int: def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" + if time_unit is None: + time_unit = "us" + _validate_time_unit(time_unit) + if time_unit == "ns": subseconds = td.microseconds * 1_000 subseconds_per_second = 1_000_000_000 @@ -159,16 +171,14 @@ def _to_python_time(value: int) -> time: def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta: + _validate_time_unit(time_unit) + if time_unit == "ns": return timedelta(microseconds=value // 1e3) elif time_unit == "us": return timedelta(microseconds=value) elif time_unit == "ms": return timedelta(milliseconds=value) - else: - raise ValueError( - f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - ) @lru_cache(256) @@ -184,27 +194,21 @@ def _to_python_datetime( ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" if not time_zone: + _validate_time_unit(time_unit) if time_unit == "us": return EPOCH + timedelta(microseconds=value) elif time_unit == "ns": return EPOCH + timedelta(microseconds=value // 1000) elif time_unit == "ms": return EPOCH + timedelta(milliseconds=value) - else: - raise ValueError( - f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}" - ) elif _ZONEINFO_AVAILABLE: + _validate_time_unit(time_unit) if time_unit == "us": dt = EPOCH_UTC + timedelta(microseconds=value) elif time_unit == "ns": dt = EPOCH_UTC + timedelta(microseconds=value // 1000) elif time_unit == "ms": dt = EPOCH_UTC + timedelta(milliseconds=value) - else: - raise ValueError( - f"`time_unit` must be one of {{'ns','us','ms'}}, got {time_unit!r}" - ) return _localize(dt, time_zone) else: raise ImportError( From 194c81d73ddafb7848bd9cc40b308cba1d0c7a14 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 09:16:49 +0300 Subject: [PATCH 03/22] get VALID_TIME_UNITS directly from TimeUnit --- py-polars/polars/utils/convert.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index c34465d06394..68421d161326 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -4,16 +4,16 @@ from datetime import datetime, time, timedelta, timezone from decimal import Context from functools import lru_cache -from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, overload +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, overload, get_args from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo +from polars.type_aliases import TimeUnit if TYPE_CHECKING: from collections.abc import Reversible from datetime import date, tzinfo from decimal import Decimal - from polars.type_aliases import TimeUnit if sys.version_info >= (3, 10): from typing import ParamSpec @@ -48,12 +48,12 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 SECONDS_PER_DAY = 60 * 60 * 24 EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) -ADMISSABLE_TIME_UNITS = {"ns", "us", "ms"} +VALID_TIME_UNITS = get_args(TimeUnit) def _validate_time_unit(time_unit: TimeUnit) -> None: - if time_unit not in ADMISSABLE_TIME_UNITS: + if time_unit not in VALID_TIME_UNITS: raise ValueError( - f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" + f"`time_unit` must be one of {set(VALID_TIME_UNITS)}, got {time_unit!r}" ) _fromtimestamp = datetime.fromtimestamp From ce6ca8f6f38d47589e34e3e4bc0174452eb86a9f Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 10:23:24 +0300 Subject: [PATCH 04/22] linting and more clean up --- py-polars/polars/utils/convert.py | 35 ++++++++++--------------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 68421d161326..59aa028cdb34 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -14,7 +14,6 @@ from datetime import date, tzinfo from decimal import Decimal - if sys.version_info >= (3, 10): from typing import ParamSpec else: @@ -50,12 +49,14 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) VALID_TIME_UNITS = get_args(TimeUnit) + def _validate_time_unit(time_unit: TimeUnit) -> None: if time_unit not in VALID_TIME_UNITS: raise ValueError( f"`time_unit` must be one of {set(VALID_TIME_UNITS)}, got {time_unit!r}" ) + _fromtimestamp = datetime.fromtimestamp @@ -113,16 +114,14 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: time_unit = "us" _validate_time_unit(time_unit) + micros = dt.microsecond + seconds = _timestamp_in_seconds(dt) if time_unit == "ns": - micros = dt.microsecond - return 1_000 * (_timestamp_in_seconds(dt) * 1_000_000 + micros) + return 1_000 * (seconds * 1_000_000 + micros) elif time_unit == "us": - micros = dt.microsecond - return _timestamp_in_seconds(dt) * 1_000_000 + micros + return seconds * 1_000_000 + micros elif time_unit == "ms": - millis = dt.microsecond // 1000 - return _timestamp_in_seconds(dt) * 1_000 + millis - + return seconds * 1_000 + micros // 1_000 def _time_to_pl_time(t: time) -> int: @@ -166,7 +165,7 @@ def _to_python_time(value: int) -> time: minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return time( - hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1000 + hour=hours, minute=minutes, second=seconds, microsecond=nanoseconds // 1_000 ) @@ -174,7 +173,7 @@ def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> time _validate_time_unit(time_unit) if time_unit == "ns": - return timedelta(microseconds=value // 1e3) + return timedelta(microseconds=value // 1_000) elif time_unit == "us": return timedelta(microseconds=value) elif time_unit == "ms": @@ -194,21 +193,9 @@ def _to_python_datetime( ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" if not time_zone: - _validate_time_unit(time_unit) - if time_unit == "us": - return EPOCH + timedelta(microseconds=value) - elif time_unit == "ns": - return EPOCH + timedelta(microseconds=value // 1000) - elif time_unit == "ms": - return EPOCH + timedelta(milliseconds=value) + return EPOCH + _to_python_timedelta(value, time_unit) elif _ZONEINFO_AVAILABLE: - _validate_time_unit(time_unit) - if time_unit == "us": - dt = EPOCH_UTC + timedelta(microseconds=value) - elif time_unit == "ns": - dt = EPOCH_UTC + timedelta(microseconds=value // 1000) - elif time_unit == "ms": - dt = EPOCH_UTC + timedelta(milliseconds=value) + dt = EPOCH_UTC + _to_python_timedelta(value, time_unit) return _localize(dt, time_zone) else: raise ImportError( From b37c25e05834a053c214cdc59e07e5c806d6706c Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 11:03:27 +0300 Subject: [PATCH 05/22] repetitions in calculating subseconds --- py-polars/polars/utils/convert.py | 52 ++++++++++++------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 59aa028cdb34..f2cbf27d59e3 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -105,24 +105,6 @@ def _negate_duration(duration: str) -> str: return f"-{duration}" -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: - """Convert a python datetime to a timestamp in given time unit.""" - if dt.tzinfo is None: - # Make sure to use UTC rather than system time zone. - dt = dt.replace(tzinfo=timezone.utc) - if time_unit is None: - time_unit = "us" - _validate_time_unit(time_unit) - - micros = dt.microsecond - seconds = _timestamp_in_seconds(dt) - if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + micros) - elif time_unit == "us": - return seconds * 1_000_000 + micros - elif time_unit == "ms": - return seconds * 1_000 + micros // 1_000 - def _time_to_pl_time(t: time) -> int: t = t.replace(tzinfo=timezone.utc) @@ -133,27 +115,33 @@ def _date_to_pl_date(d: date) -> int: dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc) return int(dt.timestamp()) // SECONDS_PER_DAY - -def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: - """Convert a Python timedelta object to a total number of subseconds.""" +def _seconds_and_micros_to_subseconds(seconds: int, micros:int, time_unit: TimeUnit | None) -> int: if time_unit is None: time_unit = "us" _validate_time_unit(time_unit) if time_unit == "ns": - subseconds = td.microseconds * 1_000 - subseconds_per_second = 1_000_000_000 + return 1_000 * (seconds * 1_000_000 + micros) + elif time_unit == "us": + return seconds * 1_000_000 + micros elif time_unit == "ms": - subseconds = td.microseconds // 1_000 - subseconds_per_second = 1_000 - else: - subseconds = td.microseconds - subseconds_per_second = 1_000_000 + return seconds * 1_000 + micros // 1_000 + +def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: + """Convert a python datetime to a timestamp in given time unit.""" + if dt.tzinfo is None: + # Make sure to use UTC rather than system time zone. + dt = dt.replace(tzinfo=timezone.utc) + micros = dt.microsecond + seconds = _timestamp_in_seconds(dt) + return _seconds_and_micros_to_subseconds(seconds=seconds, subseconds=micros, time_unit=time_unit) - subseconds += td.seconds * subseconds_per_second - subseconds += td.days * SECONDS_PER_DAY * subseconds_per_second - return subseconds +def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: + """Convert a Python timedelta object to a total number of subseconds.""" + micros = td.microsecond + seconds = td.days * SECONDS_PER_DAY + td.seconds + return _seconds_and_micros_to_subseconds(seconds=seconds, subseconds=micros, time_unit=time_unit) def _to_python_time(value: int) -> time: @@ -169,7 +157,7 @@ def _to_python_time(value: int) -> time: ) -def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta: +def _to_python_timedelta(value: int | float, time_unit: TimeUnit | None = "ns") -> timedelta: _validate_time_unit(time_unit) if time_unit == "ns": From 69835541d58a1b101952a7c0d1fccca141f98ef1 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 11:04:33 +0300 Subject: [PATCH 06/22] black and typing --- py-polars/polars/utils/convert.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index f2cbf27d59e3..afcad7f9065d 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -105,7 +105,6 @@ def _negate_duration(duration: str) -> str: return f"-{duration}" - def _time_to_pl_time(t: time) -> int: t = t.replace(tzinfo=timezone.utc) return int((t.hour * 3_600 + t.minute * 60 + t.second) * 1e9 + t.microsecond * 1e3) @@ -115,7 +114,10 @@ def _date_to_pl_date(d: date) -> int: dt = datetime.combine(d, datetime.min.time()).replace(tzinfo=timezone.utc) return int(dt.timestamp()) // SECONDS_PER_DAY -def _seconds_and_micros_to_subseconds(seconds: int, micros:int, time_unit: TimeUnit | None) -> int: + +def _seconds_and_micros_to_subseconds( + seconds: int, micros: int, time_unit: TimeUnit | None +) -> int: if time_unit is None: time_unit = "us" _validate_time_unit(time_unit) @@ -127,6 +129,7 @@ def _seconds_and_micros_to_subseconds(seconds: int, micros:int, time_unit: TimeU elif time_unit == "ms": return seconds * 1_000 + micros // 1_000 + def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: """Convert a python datetime to a timestamp in given time unit.""" if dt.tzinfo is None: @@ -134,14 +137,18 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: dt = dt.replace(tzinfo=timezone.utc) micros = dt.microsecond seconds = _timestamp_in_seconds(dt) - return _seconds_and_micros_to_subseconds(seconds=seconds, subseconds=micros, time_unit=time_unit) + return _seconds_and_micros_to_subseconds( + seconds=seconds, subseconds=micros, time_unit=time_unit + ) def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" micros = td.microsecond seconds = td.days * SECONDS_PER_DAY + td.seconds - return _seconds_and_micros_to_subseconds(seconds=seconds, subseconds=micros, time_unit=time_unit) + return _seconds_and_micros_to_subseconds( + seconds=seconds, subseconds=micros, time_unit=time_unit + ) def _to_python_time(value: int) -> time: @@ -157,7 +164,9 @@ def _to_python_time(value: int) -> time: ) -def _to_python_timedelta(value: int | float, time_unit: TimeUnit | None = "ns") -> timedelta: +def _to_python_timedelta( + value: int | float, time_unit: TimeUnit | None = "ns" +) -> timedelta: _validate_time_unit(time_unit) if time_unit == "ns": From a14d0724dd977e4cd683f07eb4ee8a94a6ae11c6 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 11:08:01 +0300 Subject: [PATCH 07/22] pre-commit --- py-polars/polars/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index afcad7f9065d..8b1f4ef7ff36 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -4,7 +4,7 @@ from datetime import datetime, time, timedelta, timezone from decimal import Context from functools import lru_cache -from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, overload, get_args +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, get_args, overload from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo from polars.type_aliases import TimeUnit From 8e158d8f934b560c6fa2d86441a715c1b400d431 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 11:18:40 +0300 Subject: [PATCH 08/22] .micrsecond and .microseconds?! --- py-polars/polars/utils/convert.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 8b1f4ef7ff36..df62602df98a 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -138,16 +138,16 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: micros = dt.microsecond seconds = _timestamp_in_seconds(dt) return _seconds_and_micros_to_subseconds( - seconds=seconds, subseconds=micros, time_unit=time_unit + seconds=seconds, micros=micros, time_unit=time_unit ) def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" - micros = td.microsecond + micros = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds return _seconds_and_micros_to_subseconds( - seconds=seconds, subseconds=micros, time_unit=time_unit + seconds=seconds, micros=micros, time_unit=time_unit ) From 052c8deee86c83301ba412fb592fea03f7c338eb Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 17:42:39 +0300 Subject: [PATCH 09/22] default time_unit is us, remove None --- py-polars/polars/utils/convert.py | 31 ++++++++++-------------- py-polars/tests/unit/utils/test_utils.py | 2 -- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index df62602df98a..4dc3e5b38c2d 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -116,38 +116,36 @@ def _date_to_pl_date(d: date) -> int: def _seconds_and_micros_to_subseconds( - seconds: int, micros: int, time_unit: TimeUnit | None + seconds: int, micro_seconds: int, time_unit: TimeUnit ) -> int: - if time_unit is None: - time_unit = "us" + """Convert seconds and microseconds to subseconds in given time unit.""" _validate_time_unit(time_unit) - if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + micros) + return 1_000 * (seconds * 1_000_000 + micro_seconds) elif time_unit == "us": - return seconds * 1_000_000 + micros + return seconds * 1_000_000 + micro_seconds elif time_unit == "ms": - return seconds * 1_000 + micros // 1_000 + return seconds * 1_000 + micro_seconds // 1_000 -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: +def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit = "us") -> int: """Convert a python datetime to a timestamp in given time unit.""" if dt.tzinfo is None: # Make sure to use UTC rather than system time zone. dt = dt.replace(tzinfo=timezone.utc) - micros = dt.microsecond + micro_seconds = dt.microsecond seconds = _timestamp_in_seconds(dt) return _seconds_and_micros_to_subseconds( - seconds=seconds, micros=micros, time_unit=time_unit + seconds=seconds, micro_seconds=micro_seconds, time_unit=time_unit ) -def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: +def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit = "us") -> int: """Convert a Python timedelta object to a total number of subseconds.""" - micros = td.microseconds + micro_seconds = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds return _seconds_and_micros_to_subseconds( - seconds=seconds, micros=micros, time_unit=time_unit + seconds=seconds, micro_seconds=micro_seconds, time_unit=time_unit ) @@ -164,11 +162,8 @@ def _to_python_time(value: int) -> time: ) -def _to_python_timedelta( - value: int | float, time_unit: TimeUnit | None = "ns" -) -> timedelta: +def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta: _validate_time_unit(time_unit) - if time_unit == "ns": return timedelta(microseconds=value // 1_000) elif time_unit == "us": @@ -185,7 +180,7 @@ def _to_python_date(value: int | float) -> date: def _to_python_datetime( value: int | float, - time_unit: TimeUnit | None = "ns", + time_unit: TimeUnit = "ns", time_zone: str | None = None, ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 623a011ae202..90591b5f2b9f 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -60,8 +60,6 @@ def test_timedelta_to_pl_timedelta() -> None: assert out == 86_400_000_000 out = _timedelta_to_pl_timedelta(timedelta(days=1), "ms") assert out == 86_400_000 - out = _timedelta_to_pl_timedelta(timedelta(days=1), time_unit=None) - assert out == 86_400_000_000 @pytest.mark.parametrize( From 2aee87082c38b198b405114714d714bdf3d281f2 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 18:57:26 +0300 Subject: [PATCH 10/22] allow time_unit=None again --- py-polars/polars/utils/convert.py | 18 ++++++++++-------- py-polars/tests/unit/utils/test_utils.py | 2 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 4dc3e5b38c2d..f258a175dd18 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -7,13 +7,14 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, get_args, overload from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo -from polars.type_aliases import TimeUnit if TYPE_CHECKING: from collections.abc import Reversible from datetime import date, tzinfo from decimal import Decimal + from polars.type_aliases import TimeUnit + if sys.version_info >= (3, 10): from typing import ParamSpec else: @@ -47,13 +48,12 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 SECONDS_PER_DAY = 60 * 60 * 24 EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) -VALID_TIME_UNITS = get_args(TimeUnit) def _validate_time_unit(time_unit: TimeUnit) -> None: - if time_unit not in VALID_TIME_UNITS: + if time_unit not in ("ms", "us", "ns"): raise ValueError( - f"`time_unit` must be one of {set(VALID_TIME_UNITS)}, got {time_unit!r}" + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" ) @@ -116,9 +116,11 @@ def _date_to_pl_date(d: date) -> int: def _seconds_and_micros_to_subseconds( - seconds: int, micro_seconds: int, time_unit: TimeUnit + seconds: int, micro_seconds: int, time_unit: TimeUnit | None ) -> int: """Convert seconds and microseconds to subseconds in given time unit.""" + if time_unit is None: + time_unit = "us" _validate_time_unit(time_unit) if time_unit == "ns": return 1_000 * (seconds * 1_000_000 + micro_seconds) @@ -128,7 +130,7 @@ def _seconds_and_micros_to_subseconds( return seconds * 1_000 + micro_seconds // 1_000 -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit = "us") -> int: +def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: """Convert a python datetime to a timestamp in given time unit.""" if dt.tzinfo is None: # Make sure to use UTC rather than system time zone. @@ -140,7 +142,7 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit = "us") -> int: ) -def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit = "us") -> int: +def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" micro_seconds = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds @@ -180,7 +182,7 @@ def _to_python_date(value: int | float) -> date: def _to_python_datetime( value: int | float, - time_unit: TimeUnit = "ns", + time_unit: TimeUnit | None = "ns", time_zone: str | None = None, ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 90591b5f2b9f..623a011ae202 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -60,6 +60,8 @@ def test_timedelta_to_pl_timedelta() -> None: assert out == 86_400_000_000 out = _timedelta_to_pl_timedelta(timedelta(days=1), "ms") assert out == 86_400_000 + out = _timedelta_to_pl_timedelta(timedelta(days=1), time_unit=None) + assert out == 86_400_000_000 @pytest.mark.parametrize( From 2d2708f8929c19598ef316142e2590f1e02bec74 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 19:12:26 +0300 Subject: [PATCH 11/22] linting --- py-polars/polars/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index f258a175dd18..ccd8178de657 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -4,7 +4,7 @@ from datetime import datetime, time, timedelta, timezone from decimal import Context from functools import lru_cache -from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, get_args, overload +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeVar, overload from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo From 3392245427d50df302b499fc1fc305e4ae5bbe5c Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 19:13:50 +0300 Subject: [PATCH 12/22] validate type --- py-polars/polars/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index ccd8178de657..c329c47971c3 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -50,7 +50,7 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) -def _validate_time_unit(time_unit: TimeUnit) -> None: +def _validate_time_unit(time_unit: Any) -> None: if time_unit not in ("ms", "us", "ns"): raise ValueError( f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" From 485e0d3dad0d798df0c65de90392b5dfb606fef5 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 19:25:10 +0300 Subject: [PATCH 13/22] mypy stuff --- py-polars/polars/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index c329c47971c3..b997eb6c2812 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -164,7 +164,7 @@ def _to_python_time(value: int) -> time: ) -def _to_python_timedelta(value: int | float, time_unit: TimeUnit = "ns") -> timedelta: +def _to_python_timedelta(value: int | float, time_unit: TimeUnit | None = "ns") -> timedelta: _validate_time_unit(time_unit) if time_unit == "ns": return timedelta(microseconds=value // 1_000) From 2ae7b05d9f86999897e19a46ba54e8367be912ed Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 19:26:24 +0300 Subject: [PATCH 14/22] pre-commit --- py-polars/polars/utils/convert.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index b997eb6c2812..86e932afde70 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -164,7 +164,9 @@ def _to_python_time(value: int) -> time: ) -def _to_python_timedelta(value: int | float, time_unit: TimeUnit | None = "ns") -> timedelta: +def _to_python_timedelta( + value: int | float, time_unit: TimeUnit | None = "ns" +) -> timedelta: _validate_time_unit(time_unit) if time_unit == "ns": return timedelta(microseconds=value // 1_000) From a20403ab56af8328c2ccc63810182fc0a95817c9 Mon Sep 17 00:00:00 2001 From: ran Date: Tue, 17 Oct 2023 23:09:03 +0300 Subject: [PATCH 15/22] handle None --- py-polars/polars/utils/convert.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 86e932afde70..6817646f3f1d 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -167,6 +167,8 @@ def _to_python_time(value: int) -> time: def _to_python_timedelta( value: int | float, time_unit: TimeUnit | None = "ns" ) -> timedelta: + if time_unit is None: + time_unit = "us" _validate_time_unit(time_unit) if time_unit == "ns": return timedelta(microseconds=value // 1_000) From 29253e06f7fe1a16a019f7a113f66894c4c31d92 Mon Sep 17 00:00:00 2001 From: ran Date: Wed, 18 Oct 2023 09:05:58 +0300 Subject: [PATCH 16/22] _fromtimestamp variable introduced 6mo ago but never used --- py-polars/polars/utils/convert.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 6817646f3f1d..880772ed8149 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -57,8 +57,6 @@ def _validate_time_unit(time_unit: Any) -> None: ) -_fromtimestamp = datetime.fromtimestamp - def _timestamp_in_seconds(dt: datetime) -> int: du = dt - EPOCH_UTC From 92885266671923552807283638e2bca16c9a3af7 Mon Sep 17 00:00:00 2001 From: ran Date: Wed, 18 Oct 2023 09:53:28 +0300 Subject: [PATCH 17/22] remove validation func --- py-polars/polars/utils/convert.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 880772ed8149..c80127fc3fc7 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -50,14 +50,6 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) -def _validate_time_unit(time_unit: Any) -> None: - if time_unit not in ("ms", "us", "ns"): - raise ValueError( - f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" - ) - - - def _timestamp_in_seconds(dt: datetime) -> int: du = dt - EPOCH_UTC return du.days * SECONDS_PER_DAY + du.seconds @@ -105,7 +97,9 @@ def _negate_duration(duration: str) -> str: def _time_to_pl_time(t: time) -> int: t = t.replace(tzinfo=timezone.utc) - return int((t.hour * 3_600 + t.minute * 60 + t.second) * 1e9 + t.microsecond * 1e3) + return ( + t.hour * 3_600 + t.minute * 60 + t.second + ) * 1_000_000_000 + t.microsecond * 1_000 def _date_to_pl_date(d: date) -> int: @@ -117,15 +111,15 @@ def _seconds_and_micros_to_subseconds( seconds: int, micro_seconds: int, time_unit: TimeUnit | None ) -> int: """Convert seconds and microseconds to subseconds in given time unit.""" - if time_unit is None: - time_unit = "us" - _validate_time_unit(time_unit) if time_unit == "ns": return 1_000 * (seconds * 1_000_000 + micro_seconds) - elif time_unit == "us": + elif time_unit == "us" or time_unit is None: return seconds * 1_000_000 + micro_seconds elif time_unit == "ms": return seconds * 1_000 + micro_seconds // 1_000 + raise ValueError( + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + ) def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: @@ -165,15 +159,15 @@ def _to_python_time(value: int) -> time: def _to_python_timedelta( value: int | float, time_unit: TimeUnit | None = "ns" ) -> timedelta: - if time_unit is None: - time_unit = "us" - _validate_time_unit(time_unit) if time_unit == "ns": return timedelta(microseconds=value // 1_000) - elif time_unit == "us": + elif time_unit == "us" or time_unit is None: return timedelta(microseconds=value) elif time_unit == "ms": return timedelta(milliseconds=value) + raise ValueError( + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + ) @lru_cache(256) From af466df6df566d454f2cb487059e4471b4ffe9b6 Mon Sep 17 00:00:00 2001 From: ran Date: Wed, 18 Oct 2023 10:13:02 +0300 Subject: [PATCH 18/22] revert function encapsulation for the sake of perf --- py-polars/polars/utils/convert.py | 57 +++++++++++++++++++------------ 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index c80127fc3fc7..ac3a3c8f43db 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -107,10 +107,13 @@ def _date_to_pl_date(d: date) -> int: return int(dt.timestamp()) // SECONDS_PER_DAY -def _seconds_and_micros_to_subseconds( - seconds: int, micro_seconds: int, time_unit: TimeUnit | None -) -> int: - """Convert seconds and microseconds to subseconds in given time unit.""" +def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: + """Convert a python datetime to a timestamp in given time unit.""" + if dt.tzinfo is None: + # Make sure to use UTC rather than system time zone. + dt = dt.replace(tzinfo=timezone.utc) + micro_seconds = dt.microsecond + seconds = _timestamp_in_seconds(dt) if time_unit == "ns": return 1_000 * (seconds * 1_000_000 + micro_seconds) elif time_unit == "us" or time_unit is None: @@ -122,24 +125,18 @@ def _seconds_and_micros_to_subseconds( ) -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: - """Convert a python datetime to a timestamp in given time unit.""" - if dt.tzinfo is None: - # Make sure to use UTC rather than system time zone. - dt = dt.replace(tzinfo=timezone.utc) - micro_seconds = dt.microsecond - seconds = _timestamp_in_seconds(dt) - return _seconds_and_micros_to_subseconds( - seconds=seconds, micro_seconds=micro_seconds, time_unit=time_unit - ) - - def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" micro_seconds = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds - return _seconds_and_micros_to_subseconds( - seconds=seconds, micro_seconds=micro_seconds, time_unit=time_unit + if time_unit == "ns": + return 1_000 * (seconds * 1_000_000 + micro_seconds) + elif time_unit == "us" or time_unit is None: + return seconds * 1_000_000 + micro_seconds + elif time_unit == "ms": + return seconds * 1_000 + micro_seconds // 1_000 + raise ValueError( + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" ) @@ -161,7 +158,7 @@ def _to_python_timedelta( ) -> timedelta: if time_unit == "ns": return timedelta(microseconds=value // 1_000) - elif time_unit == "us" or time_unit is None: + elif time_unit == "us": return timedelta(microseconds=value) elif time_unit == "ms": return timedelta(milliseconds=value) @@ -183,9 +180,27 @@ def _to_python_datetime( ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" if not time_zone: - return EPOCH + _to_python_timedelta(value, time_unit) + if time_unit == "ns": + return EPOCH + timedelta(microseconds=value // 1_000) + elif time_unit == "us": + return EPOCH + timedelta(microseconds=value) + elif time_unit == "ms": + return EPOCH + timedelta(milliseconds=value) + else: + raise ValueError( + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + ) elif _ZONEINFO_AVAILABLE: - dt = EPOCH_UTC + _to_python_timedelta(value, time_unit) + if time_unit == "ns": + dt = EPOCH_UTC + timedelta(microseconds=value // 1_000) + elif time_unit == "us": + dt = EPOCH_UTC + timedelta(microseconds=value) + elif time_unit == "ms": + dt = EPOCH_UTC + timedelta(milliseconds=value) + else: + raise ValueError( + f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + ) return _localize(dt, time_zone) else: raise ImportError( From 0bb8d0bd6a12c860c0f09bdc907b06ceb91bc238 Mon Sep 17 00:00:00 2001 From: ran Date: Thu, 19 Oct 2023 21:08:08 +0300 Subject: [PATCH 19/22] suggested changes --- py-polars/polars/utils/convert.py | 40 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index ac3a3c8f43db..52d074d70c2d 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -45,7 +45,7 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 _views: list[Reversible[Any]] = [{}.keys(), {}.values(), {}.items()] _reverse_mapping_views = tuple(type(reversed(view)) for view in _views) -SECONDS_PER_DAY = 60 * 60 * 24 +SECONDS_PER_DAY = 86_400 EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) @@ -112,32 +112,28 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: if dt.tzinfo is None: # Make sure to use UTC rather than system time zone. dt = dt.replace(tzinfo=timezone.utc) - micro_seconds = dt.microsecond + microseconds = dt.microsecond seconds = _timestamp_in_seconds(dt) if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + micro_seconds) + return 1_000 * (seconds * 1_000_000 + microseconds) elif time_unit == "us" or time_unit is None: - return seconds * 1_000_000 + micro_seconds + return seconds * 1_000_000 + microseconds elif time_unit == "ms": - return seconds * 1_000 + micro_seconds // 1_000 + return seconds * 1_000 + microseconds // 1_000 raise ValueError( f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" ) - def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" - micro_seconds = td.microseconds + microseconds = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + micro_seconds) + return 1_000 * (seconds * 1_000_000 + microseconds) elif time_unit == "us" or time_unit is None: - return seconds * 1_000_000 + micro_seconds + return seconds * 1_000_000 + microseconds elif time_unit == "ms": - return seconds * 1_000 + micro_seconds // 1_000 - raise ValueError( - f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" - ) + return seconds * 1_000 + microseconds // 1_000 def _to_python_time(value: int) -> time: @@ -163,7 +159,7 @@ def _to_python_timedelta( elif time_unit == "ms": return timedelta(milliseconds=value) raise ValueError( - f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" ) @@ -180,26 +176,26 @@ def _to_python_datetime( ) -> datetime: """Convert polars int64 timestamp to Python datetime.""" if not time_zone: - if time_unit == "ns": - return EPOCH + timedelta(microseconds=value // 1_000) - elif time_unit == "us": + if time_unit == "us": return EPOCH + timedelta(microseconds=value) + elif time_unit == "ns": + return EPOCH + timedelt (microseconds=value // 1_000) elif time_unit == "ms": return EPOCH + timedelta(milliseconds=value) else: raise ValueError( - f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" ) elif _ZONEINFO_AVAILABLE: - if time_unit == "ns": - dt = EPOCH_UTC + timedelta(microseconds=value // 1_000) - elif time_unit == "us": + if time_unit == "us": dt = EPOCH_UTC + timedelta(microseconds=value) + elif time_unit == "ns": + dt = EPOCH_UTC + timedelta(microseconds=value // 1_000) elif time_unit == "ms": dt = EPOCH_UTC + timedelta(milliseconds=value) else: raise ValueError( - f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" + f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" ) return _localize(dt, time_zone) else: From 3ba669787e9e389652d828baf30f11283c028f43 Mon Sep 17 00:00:00 2001 From: ran Date: Thu, 19 Oct 2023 21:16:28 +0300 Subject: [PATCH 20/22] accidental typo --- py-polars/polars/utils/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 52d074d70c2d..7d0bb979ee58 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -124,6 +124,7 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" ) + def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int: """Convert a Python timedelta object to a total number of subseconds.""" microseconds = td.microseconds @@ -179,7 +180,7 @@ def _to_python_datetime( if time_unit == "us": return EPOCH + timedelta(microseconds=value) elif time_unit == "ns": - return EPOCH + timedelt (microseconds=value // 1_000) + return EPOCH + timedelta(microseconds=value // 1_000) elif time_unit == "ms": return EPOCH + timedelta(milliseconds=value) else: From ef91ad4ad5d534a80c72575c9cc229a71010f27a Mon Sep 17 00:00:00 2001 From: ran Date: Sat, 28 Oct 2023 10:23:38 +0300 Subject: [PATCH 21/22] conflict --- py-polars/polars/utils/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index ec380519b8f6..85caae3266b3 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -97,7 +97,7 @@ def _negate_duration(duration: str) -> str: return f"-{duration}" -def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit) -> int: +def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: """Convert a python datetime to a timestamp in given time unit.""" if dt.tzinfo is None: # Make sure to use UTC rather than system time zone. From a7b74e6994d945d8e2ce6c68f19bf325f673a62c Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 9 Nov 2023 15:46:02 +0100 Subject: [PATCH 22/22] Further cleanup --- py-polars/polars/utils/convert.py | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index 7d0bb979ee58..14dc5e574f63 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -46,6 +46,11 @@ def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 _reverse_mapping_views = tuple(type(reversed(view)) for view in _views) SECONDS_PER_DAY = 86_400 +SECONDS_PER_HOUR = 3_600 +NS_PER_SECOND = 1_000_000_000 +US_PER_SECOND = 1_000_000 +MS_PER_SECOND = 1_000 + EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) EPOCH_UTC = datetime(1970, 1, 1, tzinfo=timezone.utc) @@ -97,9 +102,9 @@ def _negate_duration(duration: str) -> str: def _time_to_pl_time(t: time) -> int: t = t.replace(tzinfo=timezone.utc) - return ( - t.hour * 3_600 + t.minute * 60 + t.second - ) * 1_000_000_000 + t.microsecond * 1_000 + seconds = t.hour * SECONDS_PER_HOUR + t.minute * 60 + t.second + microseconds = t.microsecond + return seconds * NS_PER_SECOND + microseconds * 1_000 def _date_to_pl_date(d: date) -> int: @@ -115,11 +120,11 @@ def _datetime_to_pl_timestamp(dt: datetime, time_unit: TimeUnit | None) -> int: microseconds = dt.microsecond seconds = _timestamp_in_seconds(dt) if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + microseconds) + return seconds * NS_PER_SECOND + microseconds * 1_000 elif time_unit == "us" or time_unit is None: - return seconds * 1_000_000 + microseconds + return seconds * US_PER_SECOND + microseconds elif time_unit == "ms": - return seconds * 1_000 + microseconds // 1_000 + return seconds * MS_PER_SECOND + microseconds // 1_000 raise ValueError( f"`time_unit` must be one of {{'ms', 'us', 'ns'}}, got {time_unit!r}" ) @@ -130,11 +135,11 @@ def _timedelta_to_pl_timedelta(td: timedelta, time_unit: TimeUnit | None) -> int microseconds = td.microseconds seconds = td.days * SECONDS_PER_DAY + td.seconds if time_unit == "ns": - return 1_000 * (seconds * 1_000_000 + microseconds) + return seconds * NS_PER_SECOND + microseconds * 1_000 elif time_unit == "us" or time_unit is None: - return seconds * 1_000_000 + microseconds + return seconds * US_PER_SECOND + microseconds elif time_unit == "ms": - return seconds * 1_000 + microseconds // 1_000 + return seconds * MS_PER_SECOND + microseconds // 1_000 def _to_python_time(value: int) -> time: @@ -142,7 +147,7 @@ def _to_python_time(value: int) -> time: if value == 0: return time(microsecond=0) else: - seconds, nanoseconds = divmod(value, 1_000_000_000) + seconds, nanoseconds = divmod(value, NS_PER_SECOND) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return time( @@ -159,9 +164,10 @@ def _to_python_timedelta( return timedelta(microseconds=value) elif time_unit == "ms": return timedelta(milliseconds=value) - raise ValueError( - f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" - ) + else: + raise ValueError( + f"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {time_unit!r}" + ) @lru_cache(256)