From 187e2b050e916e88ec9f0c8f68f4e7cd04fcf07b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 May 2023 11:10:32 -0700 Subject: [PATCH] BUG: pd.array with non-nano (#52859) * BUG: pd.array with non-nano * GH ref --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/construction.py | 41 +++++++++++++++++++------------ pandas/tests/arrays/test_array.py | 31 ++++++++--------------- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3d61dc0709d93..35f9f623bf8ef 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -332,6 +332,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :func:`DataFrame.style.to_latex` and :func:`DataFrame.style.to_html` if the DataFrame contains integers with more digits than can be represented by floating point double precision (:issue:`52272`) +- Bug in :func:`array` when given a ``datetime64`` or ``timedelta64`` dtype with unit of "s", "us", or "ms" returning :class:`PandasArray` instead of :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`52859`) - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`) - Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d626afa0c6e79..fc82cdaf7bf7d 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -19,7 +19,11 @@ from numpy import ma from pandas._libs import lib -from pandas._libs.tslibs.period import Period +from pandas._libs.tslibs import ( + Period, + get_unit_from_dtype, + is_supported_unit, +) from pandas._typing import ( AnyArrayLike, ArrayLike, @@ -28,10 +32,7 @@ T, ) -from pandas.core.dtypes.base import ( - ExtensionDtype, - _registry as registry, -) +from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_object_array_from_listlike, @@ -42,12 +43,10 @@ maybe_promote, ) from pandas.core.dtypes.common import ( - is_datetime64_ns_dtype, is_dtype_equal, - is_extension_array_dtype, is_list_like, is_object_dtype, - is_timedelta64_ns_dtype, + pandas_dtype, ) from pandas.core.dtypes.dtypes import PandasDtype from pandas.core.dtypes.generic import ( @@ -310,8 +309,8 @@ def array( data = extract_array(data, extract_numpy=True) # this returns None for not-found dtypes. - if isinstance(dtype, str): - dtype = registry.find(dtype) or dtype + if dtype is not None: + dtype = pandas_dtype(dtype) if isinstance(data, ExtensionArray) and ( dtype is None or is_dtype_equal(dtype, data.dtype) @@ -321,8 +320,8 @@ def array( return data.copy() return data - if is_extension_array_dtype(dtype): - cls = cast(ExtensionDtype, dtype).construct_array_type() + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() return cls._from_sequence(data, dtype=dtype, copy=copy) if dtype is None: @@ -365,12 +364,22 @@ def array( return BooleanArray._from_sequence(data, copy=copy) # Pandas overrides NumPy for - # 1. datetime64[ns] - # 2. timedelta64[ns] + # 1. datetime64[ns,us,ms,s] + # 2. timedelta64[ns,us,ms,s] # so that a DatetimeArray is returned. - if is_datetime64_ns_dtype(dtype): + if ( + lib.is_np_dtype(dtype, "M") + # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type + # "Optional[dtype[Any]]"; expected "dtype[Any]" + and is_supported_unit(get_unit_from_dtype(dtype)) # type: ignore[arg-type] + ): return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) - elif is_timedelta64_ns_dtype(dtype): + if ( + lib.is_np_dtype(dtype, "m") + # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type + # "Optional[dtype[Any]]"; expected "dtype[Any]" + and is_supported_unit(get_unit_from_dtype(dtype)) # type: ignore[arg-type] + ): return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) return PandasArray._from_sequence(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 59e5c6fa2dda3..337cdaa26a3d4 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -5,8 +5,6 @@ import pytest import pytz -from pandas.core.dtypes.base import _registry as registry - import pandas as pd import pandas._testing as tm from pandas.api.extensions import register_extension_dtype @@ -80,6 +78,11 @@ np.dtype("datetime64[ns]"), DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), ), + ( + [1, 2], + np.dtype("datetime64[s]"), + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[s]")), + ), ( np.array([1, 2], dtype="datetime64[ns]"), None, @@ -119,6 +122,11 @@ np.dtype("timedelta64[ns]"), TimedeltaArray._from_sequence(["1H", "2H"]), ), + ( + np.array([1, 2], dtype="m8[s]"), + np.dtype("timedelta64[s]"), + TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[s]")), + ), ( pd.TimedeltaIndex(["1H", "2H"]), None, @@ -404,25 +412,6 @@ def test_array_unboxes(index_or_series): tm.assert_equal(result, expected) -@pytest.fixture -def registry_without_decimal(): - """Fixture yielding 'registry' with no DecimalDtype entries""" - idx = registry.dtypes.index(DecimalDtype) - registry.dtypes.pop(idx) - yield - registry.dtypes.append(DecimalDtype) - - -def test_array_not_registered(registry_without_decimal): - # check we aren't on it - assert registry.find("decimal") is None - data = [decimal.Decimal("1"), decimal.Decimal("2")] - - result = pd.array(data, dtype=DecimalDtype) - expected = DecimalArray._from_sequence(data) - tm.assert_equal(result, expected) - - def test_array_to_numpy_na(): # GH#40638 arr = pd.array([pd.NA, 1], dtype="string")