Skip to content

Commit

Permalink
Merge branch 'main' into bug-cut-non-nano
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 23, 2023
2 parents 72bca9d + 1c606d5 commit da1e76b
Show file tree
Hide file tree
Showing 62 changed files with 557 additions and 500 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,15 @@ jobs:
name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
env:
PATTERN: ${{ matrix.pattern }}
EXTRA_APT: ${{ matrix.extra_apt || '' }}
LANG: ${{ matrix.lang || 'C.UTF-8' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: 'auto'
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
Expand Down Expand Up @@ -145,8 +146,8 @@ jobs:
fetch-depth: 0

- name: Extra installs
# xsel for clipboard tests
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
run: sudo apt-get update && sudo apt-get install -y ${{ matrix.extra_apt }}
if: ${{ matrix.extra_apt }}

- name: Generate extra locales
# These extra locales will be available for locale.setlocale() calls in tests
Expand Down
3 changes: 2 additions & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand All @@ -58,5 +60,4 @@ dependencies:
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- pyqt5>=5.15.8
- tzdata>=2022.7
3 changes: 2 additions & 1 deletion ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -43,6 +44,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand Down Expand Up @@ -73,5 +75,4 @@ dependencies:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- dataframe-api-compat>=0.1.7
- pyqt5>=5.15.8
- tzdata>=2022.7
3 changes: 2 additions & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand All @@ -38,6 +39,7 @@ dependencies:
- numexpr>=2.8.4
- odfpy>=1.4.1
- qtpy>=2.3.0
- pyqt>=5.15.9
- openpyxl>=3.1.0
- psycopg2>=2.9.6
- pyarrow>=10.0.1
Expand All @@ -58,5 +60,4 @@ dependencies:
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- pyqt5>=5.15.8
- tzdata>=2022.7
3 changes: 2 additions & 1 deletion ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -44,6 +45,7 @@ dependencies:
- psycopg2=2.9.6
- pyarrow=10.0.1
- pymysql=1.0.2
- pyqt=5.15.9
- pyreadstat=1.2.0
- pytables=3.8.0
- python-calamine=0.1.6
Expand All @@ -61,5 +63,4 @@ dependencies:
- adbc-driver-postgresql==0.8.0
- adbc-driver-sqlite==0.8.0
- dataframe-api-compat==0.1.7
- pyqt5==5.15.8
- tzdata==2022.7
3 changes: 2 additions & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand All @@ -58,5 +60,4 @@ dependencies:
- pip:
- adbc-driver-postgresql>=0.8.0
- adbc-driver-sqlite>=0.8.0
- pyqt5>=5.15.8
- tzdata>=2022.7
2 changes: 2 additions & 0 deletions ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-localserver>=0.7.1
- pytest-qt>=4.2.0
- boto3

# required dependencies
Expand Down Expand Up @@ -42,6 +43,7 @@ dependencies:
- psycopg2>=2.9.6
- pyarrow>=10.0.1
- pymysql>=1.0.2
- pyqt>=5.15.9
- pyreadstat>=1.2.0
- pytables>=3.8.0
- python-calamine>=0.1.6
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Installable with ``pip install "pandas[clipboard]"``.
========================= ================== =============== =============================================================
Dependency Minimum Version pip extra Notes
========================= ================== =============== =============================================================
PyQt4/PyQt5 5.15.8 clipboard Clipboard I/O
PyQt4/PyQt5 5.15.9 clipboard Clipboard I/O
qtpy 2.3.0 clipboard Clipboard I/O
========================= ================== =============== =============================================================

Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ Datetimelike
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing mixed-type objects with a mix of timezones or mix of timezone-awareness failing to raise ``ValueError`` (:issue:`55693`)
- Bug in :meth:`DatetimeIndex.shift` with non-nanosecond resolution incorrectly returning with nanosecond resolution (:issue:`56117`)
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
Expand All @@ -461,6 +462,7 @@ Datetimelike
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
- Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`)
- Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`)
- Bug in the results of :func:`pd.to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`)
-

Timedelta
Expand Down Expand Up @@ -589,6 +591,7 @@ Other
- Bug in :meth:`Dataframe.from_dict` which would always sort the rows of the created :class:`DataFrame`. (:issue:`55683`)
- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
- Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
- Bug in the error message when assigning an empty dataframe to a column (:issue:`55956`)
-

.. ***DO NOT USE THIS SECTION***
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ dependencies:
- pytest>=7.3.2
- pytest-cov
- pytest-xdist>=2.2.0
- pytest-qt>=4.2.0
- pyqt>=5.15.9
- coverage

# required dependencies
Expand Down
33 changes: 20 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,16 @@ cpdef array_to_datetime(
utc=utc,
creso=state.creso,
)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = iresult.view("M8[s]").reshape(result.shape)
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.view(f"M8[{abbrev}]").reshape(result.shape)
return result, tz_out


Expand Down Expand Up @@ -823,14 +828,16 @@ def array_to_datetime_with_tz(
# We encountered mismatched resolutions, need to re-parse with
# the correct one.
return array_to_datetime_with_tz(values, tz=tz, creso=creso)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# We didn't find any non-NaT to infer from, default to "ns"
result = result.view("M8[ns]")
elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = result.view("M8[s]")
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
else:
abbrev = npy_unit_to_abbrev(creso)
result = result.view(f"M8[{abbrev}]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1

cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
cpdef (int64_t, int) precision_from_unit(
cdef (int64_t, int) precision_from_unit(
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
)

Expand Down
5 changes: 1 addition & 4 deletions pandas/_libs/tslibs/conversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,5 @@ import numpy as np
DT64NS_DTYPE: np.dtype
TD64NS_DTYPE: np.dtype

def precision_from_unit(
in_reso: int,
out_reso: int = ...,
) -> tuple[int, int]: ... # (int64_t, _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
def cast_from_unit_vectorized(values: np.ndarray, unit: str) -> np.ndarray: ...
79 changes: 78 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
cimport cython

import numpy as np

cimport numpy as cnp
from libc.math cimport log10
from numpy cimport (
float64_t,
int32_t,
int64_t,
)
Expand Down Expand Up @@ -37,6 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
NPY_FR_us,
astype_overflowsafe,
check_dts_bounds,
convert_reso,
dts_to_iso_string,
Expand Down Expand Up @@ -74,6 +78,7 @@ from pandas._libs.tslibs.tzconversion cimport (
from pandas._libs.tslibs.util cimport (
is_float_object,
is_integer_object,
is_nan,
)

# ----------------------------------------------------------------------
Expand All @@ -86,6 +91,78 @@ TD64NS_DTYPE = np.dtype("m8[ns]")
# ----------------------------------------------------------------------
# Unit Conversion Helpers

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.overflowcheck(True)
def cast_from_unit_vectorized(
ndarray values,
str unit,
):
"""
Vectorized analogue to cast_from_unit.
"""
cdef:
int64_t m
int p
NPY_DATETIMEUNIT in_reso, out_reso
Py_ssize_t i

assert values.dtype.kind == "f"

if unit in "YM":
if not (((values % 1) == 0) | np.isnan(values)).all():
# GH#47267 it is clear that 2 "M" corresponds to 1970-02-01,
# but not clear what 2.5 "M" corresponds to, so we will
# disallow that case.
raise ValueError(
f"Conversion of non-round float with unit={unit} "
"is ambiguous"
)

# GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y"
# and 150 we'd get 2120-01-01 09:00:00
values = values.astype(f"M8[{unit}]")
dtype = np.dtype("M8[ns]")
return astype_overflowsafe(values, dtype=dtype, copy=False).view("i8")

in_reso = abbrev_to_npy_unit(unit)
out_reso = abbrev_to_npy_unit("ns")
m, p = precision_from_unit(in_reso, out_reso)

cdef:
ndarray[int64_t] base, out
ndarray[float64_t] frac
tuple shape = (<object>values).shape

out = np.empty(shape, dtype="i8")
base = np.empty(shape, dtype="i8")
frac = np.empty(shape, dtype="f8")

for i in range(len(values)):
if is_nan(values[i]):
base[i] = NPY_NAT
else:
base[i] = <int64_t>values[i]
frac[i] = values[i] - base[i]

if p:
frac = np.round(frac, p)

try:
for i in range(len(values)):
if base[i] == NPY_NAT:
out[i] = NPY_NAT
else:
out[i] = <int64_t>(base[i] * m) + <int64_t>(frac[i] * m)
except (OverflowError, FloatingPointError) as err:
# FloatingPointError can be issued if we have float dtype and have
# set np.errstate(over="raise")
raise OutOfBoundsDatetime(
f"cannot convert input {values[i]} with the unit '{unit}'"
) from err
return out


cdef int64_t cast_from_unit(
object ts,
str unit,
Expand Down Expand Up @@ -155,7 +232,7 @@ cdef int64_t cast_from_unit(
) from err


cpdef (int64_t, int) precision_from_unit(
cdef (int64_t, int) precision_from_unit(
NPY_DATETIMEUNIT in_reso,
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
):
Expand Down
14 changes: 10 additions & 4 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,16 @@ def array_strptime(
creso=state.creso,
)

# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.base.view(f"M8[{abbrev}]")
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
# ensures that insert and concat-like operations with NaT
# do not upcast units
result = iresult.base.view("M8[s]")
else:
# Otherwise we can use the single reso that we encountered and avoid
# a second pass.
abbrev = npy_unit_to_abbrev(state.creso)
result = iresult.base.view(f"M8[{abbrev}]")
return result, result_timezone.base


Expand Down
Loading

0 comments on commit da1e76b

Please sign in to comment.