From 4c19115a0397e946acfa1835cb6d7ccb2fcd2c59 Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Wed, 24 Jul 2024 06:39:25 +0200 Subject: [PATCH] feat(python): Optimise `read_excel` when using "calamine" engine with the latest `fastexcel` (#17735) Signed-off-by: Luka Peschke --- py-polars/polars/io/spreadsheet/functions.py | 9 +++++++-- py-polars/requirements-dev.txt | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index 8efc3f315b24..36e0ef9a462d 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -7,6 +7,7 @@ from typing import IO, TYPE_CHECKING, Any, Callable, NoReturn, Sequence, overload import polars._reexport as pl +from polars import from_arrow from polars import functions as F from polars._utils.deprecation import ( deprecate_renamed_parameter, @@ -882,8 +883,12 @@ def _read_spreadsheet_calamine( read_options["dtypes"] = parser_dtypes - ws = parser.load_sheet_by_name(name=sheet_name, **read_options) - df = ws.to_polars() + if fastexcel_version < (0, 11, 2): + ws = parser.load_sheet_by_name(name=sheet_name, **read_options) + df = ws.to_polars() + else: + ws_arrow = parser.load_sheet_eager(sheet_name, **read_options) + df = from_arrow(ws_arrow) # note: even if we applied parser dtypes we still re-apply schema_overrides # natively as we can refine integer/float types, temporal precision, etc. diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index 41ec214869e1..e9a964206fc0 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -38,7 +38,7 @@ cloudpickle fsspec s3fs[boto3] # Spreadsheet -fastexcel>=0.9 +fastexcel>=0.11.5 openpyxl xlsx2csv xlsxwriter