From f793bcb7c0b176983cd1f75c0a12658bf55b2484 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 16 Nov 2023 09:50:37 +0100 Subject: [PATCH] build(python): Support Python 3.12 (#12094) Co-authored-by: alexander-beedie --- .github/workflows/benchmark.yml | 2 +- .github/workflows/docs-global.yml | 2 +- .github/workflows/docs-python.yml | 2 +- .github/workflows/lint-python.yml | 4 +-- .github/workflows/test-bytecode-parser.yml | 2 +- .github/workflows/test-python.yml | 19 +++++++---- docs/development/contributing/index.md | 2 +- py-polars/polars/dataframe/frame.py | 2 +- py-polars/polars/functions/as_datatype.py | 2 +- py-polars/polars/lazyframe/frame.py | 2 +- py-polars/pyproject.toml | 5 ++- py-polars/requirements-dev.txt | 7 +++- py-polars/tests/docs/run_doctest.py | 10 ++++++ py-polars/tests/unit/io/test_database_read.py | 14 ++++++++ .../tests/unit/io/test_database_write.py | 32 +++++++++++++------ py-polars/tests/unit/test_polars_import.py | 17 ++++++---- 16 files changed, 90 insertions(+), 34 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 254da13172e3..bbeacf4e73d2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -33,7 +33,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' - name: Create virtual environment run: | diff --git a/.github/workflows/docs-global.yml b/.github/workflows/docs-global.yml index af62d25fc4cb..a5fa95678864 100644 --- a/.github/workflows/docs-global.yml +++ b/.github/workflows/docs-global.yml @@ -45,7 +45,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' - name: Create virtual environment run: | diff --git a/.github/workflows/docs-python.yml b/.github/workflows/docs-python.yml index 07366d178afa..b36b10e3c95e 100644 --- a/.github/workflows/docs-python.yml +++ b/.github/workflows/docs-python.yml @@ -31,7 +31,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' cache: pip cache-dependency-path: py-polars/docs/requirements-docs.txt diff --git a/.github/workflows/lint-python.yml b/.github/workflows/lint-python.yml index cddcf41e1cd7..3765cdd25b23 100644 --- a/.github/workflows/lint-python.yml +++ b/.github/workflows/lint-python.yml @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.12' - name: Install Python dependencies run: pip install -r requirements-lint.txt @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.11'] + python-version: ['3.8', '3.12'] defaults: run: working-directory: py-polars diff --git a/.github/workflows/test-bytecode-parser.yml b/.github/workflows/test-bytecode-parser.yml index b46b30012992..082b38f40ec4 100644 --- a/.github/workflows/test-bytecode-parser.yml +++ b/.github/workflows/test-bytecode-parser.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 7a6d8cd1bb55..e5c1940bc538 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -34,11 +34,11 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] - python-version: ['3.8', '3.11'] - exclude: + os: [ubuntu-latest] + python-version: ['3.8', '3.11', '3.12'] + include: - os: windows-latest - python-version: '3.8' + python-version: '3.12' steps: - uses: actions/checkout@v4 @@ -77,20 +77,25 @@ jobs: - name: Run tests and report coverage if: github.ref_name != 'main' - run: pytest --cov -n auto --dist loadgroup -m "not benchmark and not docs" + env: + # TODO: Re-enable coverage for for Ubuntu + Python 3.12 tests + # Currently skipped due to performance issues in coverage: + # https://github.com/nedbat/coveragepy/issues/1665 + COV: ${{ !(matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12') && '--cov' || '' }} + run: pytest $COV -n auto --dist loadgroup -m "not benchmark and not docs" - name: Run tests async reader tests if: github.ref_name != 'main' && matrix.os != 'windows-latest' run: POLARS_FORCE_ASYNC=1 pytest -m "not benchmark and not docs" tests/unit/io/ - name: Run doctests - if: github.ref_name != 'main' && matrix.os != 'windows-latest' + if: github.ref_name != 'main' && matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' run: | python tests/docs/run_doctest.py pytest tests/docs/test_user_guide.py -m docs - name: Check import without optional dependencies - if: github.ref_name != 'main' && matrix.os != 'windows-latest' + if: github.ref_name != 'main' && matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' run: | declare -a deps=("pandas" "pyarrow" diff --git a/docs/development/contributing/index.md b/docs/development/contributing/index.md index db6c89f7735a..5f062b74bee4 100644 --- a/docs/development/contributing/index.md +++ b/docs/development/contributing/index.md @@ -64,7 +64,7 @@ rustup toolchain install nightly --component miri ``` Next, install Python, for example using [pyenv](https://github.com/pyenv/pyenv#installation). -We recommend using the latest Python version (`3.11`). +We recommend using the latest Python version (`3.12`). Make sure you deactivate any active virtual environments or conda environments, as the steps below will create a new virtual environment for Polars. You will need Python even if you intend to work on the Rust code only, as we rely on the Python tests to verify all functionality. diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c0a2a5fa6363..19700f2a2d22 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1261,7 +1261,7 @@ def schema(self) -> SchemaDict: ... } ... ) >>> df.schema - OrderedDict([('foo', Int64), ('bar', Float64), ('ham', Utf8)]) + OrderedDict({'foo': Int64, 'bar': Float64, 'ham': Utf8}) """ return OrderedDict(zip(self.columns, self.dtypes)) diff --git a/py-polars/polars/functions/as_datatype.py b/py-polars/polars/functions/as_datatype.py index 5100291eae4d..0b328b0ad1ba 100644 --- a/py-polars/polars/functions/as_datatype.py +++ b/py-polars/polars/functions/as_datatype.py @@ -459,7 +459,7 @@ def struct( Use keyword arguments to easily name each struct field. >>> df.select(pl.struct(p="int", q="bool").alias("my_struct")).schema - OrderedDict([('my_struct', Struct([Field('p', Int64), Field('q', Boolean)]))]) + OrderedDict({'my_struct': Struct([Field('p', Int64), Field('q', Boolean)])}) """ pyexprs = parse_as_list_of_expressions(*exprs, **named_exprs) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 98a3cf172d39..4d305d3cdb93 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -731,7 +731,7 @@ def schema(self) -> SchemaDict: ... } ... ) >>> lf.schema - OrderedDict([('foo', Int64), ('bar', Float64), ('ham', Utf8)]) + OrderedDict({'foo': Int64, 'bar': Float64, 'ham': Utf8}) """ return OrderedDict(self._ldf.schema()) diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 51caf893c4f4..8c2da790a236 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Rust", "Topic :: Scientific/Engineering", ] @@ -214,7 +215,9 @@ filterwarnings = [ # Ignore warnings issued by dependency internals "ignore:.*is_sparse is deprecated.*:FutureWarning", "ignore:FigureCanvasAgg is non-interactive:UserWarning", - # Introspection under PyCharm IDE can generate this in 3.12 + "ignore:datetime.datetime.utcfromtimestamp\\(\\) is deprecated.*:DeprecationWarning", + "ignore:datetime.datetime.utcnow\\(\\) is deprecated.*:DeprecationWarning", + # Introspection under PyCharm IDE can generate this in Python 3.12 "ignore:.*co_lnotab is deprecated, use co_lines.*:DeprecationWarning", ] xfail_strict = true diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index e5b1f4203d4d..d5950b552d05 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -26,11 +26,16 @@ tzdata; platform_system == 'Windows' # Database SQLAlchemy adbc_driver_sqlite; python_version >= '3.9' and platform_system != 'Windows' -connectorx +# TODO: Remove version constraint for connectorx when Python 3.12 is supported: +# https://github.com/sfu-db/connector-x/issues/527 +connectorx; python_version <= '3.11' # Cloud cloudpickle fsspec s3fs[boto3] +# TODO: Unpin and remove aiohttp here when 3.9.0 is released: +# https://github.com/aio-libs/aiohttp/issues/7739 +aiohttp==3.9.0b1 # Spreadsheet ezodf lxml diff --git a/py-polars/tests/docs/run_doctest.py b/py-polars/tests/docs/run_doctest.py index 1d279088b2b4..ae3d8bf38feb 100644 --- a/py-polars/tests/docs/run_doctest.py +++ b/py-polars/tests/docs/run_doctest.py @@ -72,6 +72,16 @@ def modules_in_path(p: Path) -> Iterator[ModuleType]: # Set doctests to fail on warnings warnings.simplefilter("error", DeprecationWarning) + warnings.filterwarnings( + "ignore", + message="datetime.datetime.utcfromtimestamp\\(\\) is deprecated.*", + category=DeprecationWarning, + ) + warnings.filterwarnings( + "ignore", + message="datetime.datetime.utcnow\\(\\) is deprecated.*", + category=DeprecationWarning, + ) OutputChecker = doctest.OutputChecker diff --git a/py-polars/tests/unit/io/test_database_read.py b/py-polars/tests/unit/io/test_database_read.py index 824ffb989fd2..fc5295bea59e 100644 --- a/py-polars/tests/unit/io/test_database_read.py +++ b/py-polars/tests/unit/io/test_database_read.py @@ -34,6 +34,12 @@ def adbc_sqlite_connect(*args: Any, **kwargs: Any) -> Any: def create_temp_sqlite_db(test_db: str) -> None: Path(test_db).unlink(missing_ok=True) + def convert_date(val: bytes) -> date: + """Convert ISO 8601 date to datetime.date object.""" + return date.fromisoformat(val.decode()) + + sqlite3.register_converter("date", convert_date) + # NOTE: at the time of writing adcb/connectorx have weak SQLite support (poor or # no bool/date/datetime dtypes, for example) and there is a bug in connectorx that # causes float rounding < py 3.11, hence we are only testing/storing simple values @@ -183,6 +189,10 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: # noqa: D102 schema_overrides={"id": pl.UInt8}, ), id="uri: connectorx", + marks=pytest.mark.skipif( + sys.version_info > (3, 11), + reason="connectorx cannot be installed on Python 3.12 yet.", + ), ), pytest.param( *DatabaseReadTestParams( @@ -584,6 +594,10 @@ def test_read_database_exceptions( read_database(**params) +@pytest.mark.skipif( + sys.version_info > (3, 11), + reason="connectorx cannot be installed on Python 3.12 yet.", +) @pytest.mark.parametrize( "uri", [ diff --git a/py-polars/tests/unit/io/test_database_write.py b/py-polars/tests/unit/io/test_database_write.py index 8a4227fe564c..4b19c1c4bb2d 100644 --- a/py-polars/tests/unit/io/test_database_write.py +++ b/py-polars/tests/unit/io/test_database_write.py @@ -23,12 +23,16 @@ def adbc_sqlite_driver_version(*args: Any, **kwargs: Any) -> str: return "n/a" -@pytest.mark.write_disk() -@pytest.mark.parametrize("engine", ["adbc", "sqlalchemy"]) +@pytest.mark.skipif( + sys.version_info > (3, 11), + reason="connectorx cannot be installed on Python 3.12 yet.", +) @pytest.mark.skipif( sys.version_info < (3, 9) or sys.platform == "win32", reason="adbc_driver_sqlite not available below Python 3.9 / on Windows", ) +@pytest.mark.write_disk() +@pytest.mark.parametrize("engine", ["adbc", "sqlalchemy"]) def test_write_database_create(engine: DbWriteEngine, tmp_path: Path) -> None: df = pl.DataFrame( { @@ -51,12 +55,16 @@ def test_write_database_create(engine: DbWriteEngine, tmp_path: Path) -> None: assert_frame_equal(result, df) -@pytest.mark.write_disk() -@pytest.mark.parametrize("engine", ["adbc", "sqlalchemy"]) +@pytest.mark.skipif( + sys.version_info > (3, 11), + reason="connectorx cannot be installed on Python 3.12 yet.", +) @pytest.mark.skipif( sys.version_info < (3, 9) or sys.platform == "win32", reason="adbc_driver_sqlite not available below Python 3.9 / on Windows", ) +@pytest.mark.write_disk() +@pytest.mark.parametrize("engine", ["adbc", "sqlalchemy"]) def test_write_database_append(engine: DbWriteEngine, tmp_path: Path) -> None: df = pl.DataFrame( { @@ -96,6 +104,10 @@ def test_write_database_append(engine: DbWriteEngine, tmp_path: Path) -> None: assert_frame_equal(result, pl.concat([df, df])) +@pytest.mark.skipif( + sys.version_info < (3, 9) or sys.platform == "win32", + reason="adbc_driver_sqlite not available below Python 3.9 / on Windows", +) @pytest.mark.write_disk() @pytest.mark.parametrize( "engine", @@ -106,13 +118,15 @@ def test_write_database_append(engine: DbWriteEngine, tmp_path: Path) -> None: reason="ADBC SQLite driver has a bug with quoted/qualified table names", ), ), - "sqlalchemy", + pytest.param( + "sqlalchemy", + marks=pytest.mark.skipif( + sys.version_info > (3, 11), + reason="connectorx cannot be installed on Python 3.12 yet.", + ), + ), ], ) -@pytest.mark.skipif( - sys.version_info < (3, 9) or sys.platform == "win32", - reason="adbc_driver_sqlite not available below Python 3.9 / on Windows", -) def test_write_database_create_quoted_tablename( engine: DbWriteEngine, tmp_path: Path ) -> None: diff --git a/py-polars/tests/unit/test_polars_import.py b/py-polars/tests/unit/test_polars_import.py index 7093f69d59e1..51beee946e93 100644 --- a/py-polars/tests/unit/test_polars_import.py +++ b/py-polars/tests/unit/test_polars_import.py @@ -28,12 +28,11 @@ def _import_timings() -> bytes: # assemble suitable command to get polars module import timing; # run in a separate process to ensure clean timing results. cmd = f'{sys.executable} -X importtime -c "import polars"' - output = ( + return ( subprocess.run(cmd, shell=True, capture_output=True) .stderr.replace(b"import time:", b"") .strip() ) - return output def _import_timings_as_frame(n_tries: int) -> tuple[pl.DataFrame, int]: @@ -56,9 +55,15 @@ def _import_timings_as_frame(n_tries: int) -> tuple[pl.DataFrame, int]: import_timings.append(df_import) - # note: if a qualifying import time was already achieved, we won't get here - df_fastest_import = sorted(import_timings, key=_import_time_from_frame)[0] - return df_fastest_import, _import_time_from_frame(df_fastest_import) + # note: if a qualifying import time was already achieved, we won't get here. + # if we do, let's see all the failed timings to help see what's going on: + import_times = [_import_time_from_frame(df) for df in import_timings] + msg = "\n".join(f"({idx}) {tm:,}μs" for idx, tm in enumerate(import_times)) + min_max = f"Min => {min(import_times):,}μs, Max => {max(import_times):,}μs)" + print(f"\nImport times achieved over {n_tries} tries:\n{min_max}\n\n{msg}") + + sorted_timing_frames = sorted(import_timings, key=_import_time_from_frame) + return sorted_timing_frames[0], min(import_times) @pytest.mark.skipif(sys.platform == "win32", reason="Unreliable on Windows") @@ -70,7 +75,7 @@ def test_polars_import() -> None: # note: reduce noise by allowing up to 'n' tries (but return immediately if/when # a qualifying time is achieved, so we don't waste time running unnecessary tests) - df_import, polars_import_time = _import_timings_as_frame(n_tries=5) + df_import, polars_import_time = _import_timings_as_frame(n_tries=10) with pl.Config( # get a complete view of what's going on in case of failure