diff --git a/.circleci/config.yml b/.circleci/config.yml index dfaade1d69c75..ac9db5f451bf3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -47,8 +47,8 @@ jobs: - run: name: Build aarch64 wheels command: | - pip3 install cibuildwheel==2.12.1 - cibuildwheel --output-dir wheelhouse + pip3 install cibuildwheel==2.14.1 + cibuildwheel --prerelease-pythons --output-dir wheelhouse environment: CIBW_BUILD: << parameters.cibw-build >> @@ -91,4 +91,5 @@ workflows: only: /^v.*/ matrix: parameters: - cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"] + # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12 + cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"] diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index ac83f50578573..a9651ae26934b 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -311,12 +311,16 @@ jobs: # To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs # to the corresponding posix/windows-macos/sdist etc. workflows. # Feel free to modify this comment as necessary. - if: false # Uncomment this to freeze the workflow, comment it to unfreeze + #if: false # Uncomment this to freeze the workflow, comment it to unfreeze runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macOS-latest, windows-latest] + # TODO: Disable macOS for now, Github Actions bug where python is not + # symlinked correctly to 3.12 + # xref https://github.com/actions/setup-python/issues/701 + #os: [ubuntu-22.04, macOS-latest, windows-latest] + os: [ubuntu-22.04, windows-latest] timeout-minutes: 180 @@ -340,21 +344,21 @@ jobs: - name: Set up Python Dev Version uses: actions/setup-python@v4 with: - python-version: '3.11-dev' + python-version: '3.12-dev' - name: Install dependencies run: | python --version - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy python -m pip install git+https://github.com/nedbat/coveragepy.git python -m pip install versioneer[toml] - python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 + python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 python -m pip list - name: Build Pandas run: | - python -m pip install -e . --no-build-isolation --no-index + python -m pip install -ve . --no-build-isolation --no-index - name: Build Version run: | diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e692b337f4b0d..759cacb299550 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -93,7 +93,8 @@ jobs: - [macos-12, macosx_*] - [windows-2022, win_amd64] # TODO: support PyPy? - python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]] + # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12 + python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]] env: IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }} IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} @@ -117,6 +118,7 @@ jobs: #with: # package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} env: + CIBW_PRERELEASE_PYTHONS: True CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} - name: Set up Python diff --git a/meson.build b/meson.build index adbf87f8e3390..a927b59abeaf9 100644 --- a/meson.build +++ b/meson.build @@ -27,6 +27,13 @@ versioneer = files('generate_version.py') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'c') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'cpp') +# Allow supporting older numpys than the version compiled against +# Set the define to the min supported version of numpy for pandas +# e.g. right now this is targeting numpy 1.21+ +add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c') +add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'cpp') + + if fs.exists('_version_meson.py') py.install_sources('_version_meson.py', pure: false, subdir: 'pandas') else diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index bfcfd5c74351a..434517474d8bd 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -19,6 +19,7 @@ ISMUSL, PY310, PY311, + PY312, PYPY, ) import pandas.compat.compressors @@ -189,5 +190,6 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "ISMUSL", "PY310", "PY311", + "PY312", "PYPY", ] diff --git a/pandas/compat/_constants.py b/pandas/compat/_constants.py index 7ef427604ee06..7bc3fbaaefebf 100644 --- a/pandas/compat/_constants.py +++ b/pandas/compat/_constants.py @@ -15,6 +15,7 @@ PY310 = sys.version_info >= (3, 10) PY311 = sys.version_info >= (3, 11) +PY312 = sys.version_info >= (3, 12) PYPY = platform.python_implementation() == "PyPy" ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "") REF_COUNT = 2 if PY311 else 3 @@ -24,5 +25,6 @@ "ISMUSL", "PY310", "PY311", + "PY312", "PYPY", ] diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index f8c8e6d87ff13..2f94856702465 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -543,15 +543,18 @@ def visit_UnaryOp(self, node, **kwargs): def visit_Name(self, node, **kwargs): return self.term_type(node.id, self.env, **kwargs) + # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min def visit_NameConstant(self, node, **kwargs) -> Term: return self.const_type(node.value, self.env) + # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min def visit_Num(self, node, **kwargs) -> Term: - return self.const_type(node.n, self.env) + return self.const_type(node.value, self.env) def visit_Constant(self, node, **kwargs) -> Term: - return self.const_type(node.n, self.env) + return self.const_type(node.value, self.env) + # TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min def visit_Str(self, node, **kwargs): name = self.env.add_tmp(node.s) return self.term_type(name, self.env) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7213c6093abbf..2f044905d33f9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import abc from datetime import datetime import functools from itertools import zip_longest @@ -3788,6 +3789,11 @@ def get_loc(self, key): try: return self._engine.get_loc(casted_key) except KeyError as err: + if isinstance(casted_key, slice) or ( + isinstance(casted_key, abc.Iterable) + and any(isinstance(x, slice) for x in casted_key) + ): + raise InvalidIndexError(key) raise KeyError(key) from err except TypeError: # If we have a listlike key, _check_indexing_error will raise diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1617b7c750c3c..d5a292335a5f6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -32,7 +32,10 @@ to_offset, ) from pandas.compat.numpy import function as nv -from pandas.errors import NullFrequencyError +from pandas.errors import ( + InvalidIndexError, + NullFrequencyError, +) from pandas.util._decorators import ( Appender, cache_readonly, @@ -165,7 +168,7 @@ def __contains__(self, key: Any) -> bool: hash(key) try: self.get_loc(key) - except (KeyError, TypeError, ValueError): + except (KeyError, TypeError, ValueError, InvalidIndexError): return False return True diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9fb83b3d55df9..ebe37d605ecbb 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -742,7 +742,12 @@ def _get_setitem_indexer(self, key): ax = self.obj._get_axis(0) - if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key): + if ( + isinstance(ax, MultiIndex) + and self.name != "iloc" + and is_hashable(key) + and not isinstance(key, slice) + ): with suppress(KeyError, InvalidIndexError): # TypeError e.g. passed a bool return ax.get_loc(key) @@ -1063,6 +1068,14 @@ def _getitem_nested_tuple(self, tup: tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here + def _contains_slice(x: object) -> bool: + # Check if object is a slice or a tuple containing a slice + if isinstance(x, tuple): + return any(isinstance(v, slice) for v in x) + elif isinstance(x, slice): + return True + return False + for key in tup: check_dict_or_set_indexers(key) @@ -1073,7 +1086,10 @@ def _getitem_nested_tuple(self, tup: tuple): if self.name != "loc": # This should never be reached, but let's be explicit about it raise ValueError("Too many indices") # pragma: no cover - if all(is_hashable(x) or com.is_null_slice(x) for x in tup): + if all( + (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x) + for x in tup + ): # GH#10521 Series should reduce MultiIndex dimensions instead of # DataFrame, IndexingError is not raised when slice(None,None,None) # with one row. @@ -1422,7 +1438,15 @@ def _convert_to_indexer(self, key, axis: AxisInt): ): raise IndexingError("Too many indexers") - if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)): + # Slices are not valid keys passed in by the user, + # even though they are hashable in Python 3.12 + contains_slice = False + if isinstance(key, tuple): + contains_slice = any(isinstance(v, slice) for v in key) + + if is_scalar(key) or ( + isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice + ): # Otherwise get_loc will raise InvalidIndexError # if we are a label return me diff --git a/pandas/core/series.py b/pandas/core/series.py index 21cce4c491e14..8c5eea02c60d8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1022,7 +1022,12 @@ def __getitem__(self, key): elif key_is_scalar: return self._get_value(key) - if is_hashable(key): + # Convert generator to list before going through hashable part + # (We will iterate through the generator there to check for slices) + if is_iterator(key): + key = list(key) + + if is_hashable(key) and not isinstance(key, slice): # Otherwise index.get_value will raise InvalidIndexError try: # For labels that don't resolve as scalars like tuples and frozensets @@ -1042,9 +1047,6 @@ def __getitem__(self, key): # Do slice check before somewhat-costly is_bool_indexer return self._getitem_slice(key) - if is_iterator(key): - key = list(key) - if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) key = np.asarray(key, dtype=bool) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1079bf3ee2067..467e8d2c3ff58 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2070,6 +2070,11 @@ class SQLiteTable(SQLTable): """ def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + self._register_date_adapters() + + def _register_date_adapters(self) -> None: # GH 8341 # register an adapter callable for datetime.time object import sqlite3 @@ -2080,8 +2085,27 @@ def _adapt_time(t) -> str: # This is faster than strftime return f"{t.hour:02d}:{t.minute:02d}:{t.second:02d}.{t.microsecond:06d}" + # Also register adapters for date/datetime and co + # xref https://docs.python.org/3.12/library/sqlite3.html#adapter-and-converter-recipes + # Python 3.12+ doesn't auto-register adapters for us anymore + + adapt_date_iso = lambda val: val.isoformat() + adapt_datetime_iso = lambda val: val.isoformat() + adapt_datetime_epoch = lambda val: int(val.timestamp()) + sqlite3.register_adapter(time, _adapt_time) - super().__init__(*args, **kwargs) + + sqlite3.register_adapter(date, adapt_date_iso) + sqlite3.register_adapter(datetime, adapt_datetime_iso) + sqlite3.register_adapter(datetime, adapt_datetime_epoch) + + convert_date = lambda val: date.fromisoformat(val.decode()) + convert_datetime = lambda val: datetime.fromisoformat(val.decode()) + convert_timestamp = lambda val: datetime.fromtimestamp(int(val)) + + sqlite3.register_converter("date", convert_date) + sqlite3.register_converter("datetime", convert_datetime) + sqlite3.register_converter("timestamp", convert_timestamp) def sql_schema(self) -> str: return str(";\n".join(self.table)) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index c376b43f4d4dd..83c672bc87e14 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -501,7 +501,7 @@ def _validate_names(self) -> None: children = self.iterparse[next(iter(self.iterparse))] else: parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces) - children = parent.findall("*") if parent else [] + children = parent.findall("*") if parent is not None else [] if is_list_like(self.names): if len(self.names) < len(children): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index e6e1363603e09..e986fb5db9992 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -9,6 +9,7 @@ import numpy as np import pytest +from pandas.compat import PY312 from pandas.errors import ( NumExprClobberingError, PerformanceWarning, @@ -561,22 +562,16 @@ def test_unary_in_array(self): # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI # but cannot reproduce locally result = np.array( - pd.eval( - "[-True, True, ~True, +True," - "-False, False, ~False, +False," - "-37, 37, ~37, +37]" - ), + pd.eval("[-True, True, +True, -False, False, +False, -37, 37, ~37, +37]"), dtype=np.object_, ) expected = np.array( [ -True, True, - ~True, +True, -False, False, - ~False, +False, -37, 37, @@ -705,9 +700,13 @@ def test_disallow_python_keywords(self): def test_true_false_logic(self): # GH 25823 - assert pd.eval("not True") == -2 - assert pd.eval("not False") == -1 - assert pd.eval("True and not True") == 0 + # This behavior is deprecated in Python 3.12 + with tm.maybe_produces_warning( + DeprecationWarning, PY312, check_stacklevel=False + ): + assert pd.eval("not True") == -2 + assert pd.eval("not False") == -1 + assert pd.eval("True and not True") == 0 def test_and_logic_string_match(self): # GH 25823 diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 562f2fbe55c25..3b2a5ae902888 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -143,6 +143,8 @@ def _check_align(df, cond, other, check_dtypes=True): check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + # Ignore deprecation warning in Python 3.12 for inverting a bool + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_where_invalid(self): # invalid conditions df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 3bc55786e1d2f..26c92e1f93865 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -176,10 +176,8 @@ def test_contains_requires_hashable_raises(self, index): class TestGetLoc: def test_get_loc_non_hashable(self, index): - # MultiIndex and Index raise TypeError, others InvalidIndexError - - with pytest.raises((TypeError, InvalidIndexError), match="slice"): - index.get_loc(slice(0, 1)) + with pytest.raises(InvalidIndexError, match="[0, 1]"): + index.get_loc([0, 1]) def test_get_loc_non_scalar_hashable(self, index): # GH52877 diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index d4d8d909adef6..eecacf29de872 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -717,7 +717,11 @@ def test_date_parser_int_bug(all_parsers): StringIO(data), index_col=0, parse_dates=[0], - date_parser=lambda x: datetime.utcfromtimestamp(int(x)), + # Note: we must pass tz and then drop the tz attribute + # (if we don't CI will flake out depending on the runner's local time) + date_parser=lambda x: datetime.fromtimestamp(int(x), tz=timezone.utc).replace( + tzinfo=None + ), ) expected = DataFrame( [ diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 025a612dc47d2..b8b05af609aa2 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -40,7 +40,7 @@ def test_bad_quote_char(all_parsers, kwargs, msg): "quoting,msg", [ ("foo", '"quoting" must be an integer|Argument'), - (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] + (10, 'bad "quoting" value'), # quoting must be in the range [0, 3] ], ) def test_bad_quoting(all_parsers, quoting, msg): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 2dac346bc54d5..f5c9c576abc24 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -197,7 +197,7 @@ def test_radd_tdscalar(self, td, fixed_now_ts): ], ) def test_timestamp_add_timedelta64_unit(self, other, expected_difference): - now = datetime.utcnow() + now = datetime.now(timezone.utc) ts = Timestamp(now).as_unit("ns") result = ts + other valdiff = result._value - ts._value diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 283a3a9e7148d..f4f6361023b46 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -306,7 +306,7 @@ def compare(x, y): compare(Timestamp.now(), datetime.now()) compare(Timestamp.now("UTC"), datetime.now(pytz.timezone("UTC"))) - compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.utcnow(), datetime.now(timezone.utc)) compare(Timestamp.today(), datetime.today()) current_time = calendar.timegm(datetime.now().utctimetuple()) @@ -326,7 +326,7 @@ def compare(x, y): datetime.fromtimestamp(current_time, utc), ) - date_component = datetime.utcnow() + date_component = datetime.now(timezone.utc) time_component = (date_component + timedelta(minutes=10)).time() compare( Timestamp.combine(date_component, time_component), @@ -345,7 +345,7 @@ def compare(x, y): compare(Timestamp.now(), datetime.now()) compare(Timestamp.now("UTC"), datetime.now(tzutc())) - compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.utcnow(), datetime.now(timezone.utc)) compare(Timestamp.today(), datetime.today()) current_time = calendar.timegm(datetime.now().utctimetuple()) @@ -356,7 +356,7 @@ def compare(x, y): Timestamp.fromtimestamp(current_time), datetime.fromtimestamp(current_time) ) - date_component = datetime.utcnow() + date_component = datetime.now(timezone.utc) time_component = (date_component + timedelta(minutes=10)).time() compare( Timestamp.combine(date_component, time_component), diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index dfc8afbdf3acb..3190759f9812e 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -230,9 +230,9 @@ def test_basic_getitem_setitem_corner(datetime_series): # OK msg = r"unhashable type(: 'slice')?" with pytest.raises(TypeError, match=msg): - datetime_series[[5, slice(None, None)]] + datetime_series[[5, [None, None]]] with pytest.raises(TypeError, match=msg): - datetime_series[[5, slice(None, None)]] = 2 + datetime_series[[5, [None, None]]] = 2 def test_slice(string_series, object_series, using_copy_on_write): diff --git a/pyproject.toml b/pyproject.toml index 76cd3d21c1fcd..815f7ab7e286f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,11 @@ requires = [ "meson==1.0.1", "wheel", "Cython>=0.29.33,<3", # Note: sync with setup.py, environment.yml and asv.conf.json - "oldest-supported-numpy>=2022.8.16", + # Note: numpy 1.25 has a backwards compatible C API by default + # we don't want to force users to compile with 1.25 though + # (Ideally, in the future, though, oldest-supported-numpy can be dropped when our min numpy is 1.25.x) + "oldest-supported-numpy>=2022.8.16; python_version<'3.12'", + "numpy>=1.21.6; python_version>='3.12'", "versioneer[toml]" ]