Skip to content

Commit

Permalink
fix(python): improved xlsx2csv defaults for read_excel
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Oct 28, 2023
1 parent ec2876a commit b6797f2
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions py-polars/polars/io/spreadsheet/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def read_excel(
The ``openpyxl`` package can also be used to parse Excel data; it has slightly
better default type detection, but is slower than ``xlsx2csv``. If you have a sheet
that is better read using this package you can set the engine as "openpyxl" (if you
use this engine then both `xlsx2csv_options` and `read_csv_options` cannot be set).
use this engine then neither `xlsx2csv_options` nor `read_csv_options` can be set).
>>> pl.read_excel(
... source="test.xlsx",
Expand All @@ -231,13 +231,24 @@ def read_excel(
... ) # doctest: +SKIP
"""
if xlsx2csv_options is None:
xlsx2csv_options = {}

if read_csv_options is None:
read_csv_options = {"truncate_ragged_lines": True}
elif "truncate_ragged_lines" not in read_csv_options:
read_csv_options["truncate_ragged_lines"] = True
if engine is None:
engine = "xlsx2csv"

# establish good default values
if engine == "xlsx2csv":
if xlsx2csv_options is None:
xlsx2csv_options = {}
if read_csv_options is None:
read_csv_options = {}

read_csv_options.setdefault("truncate_ragged_lines", True)
for option, value in {
"exclude_hidden_sheets": False,
"skip_empty_lines": False,
"skip_hidden_rows": False,
"floatformat": "%f",
}.items():
xlsx2csv_options.setdefault(option, value)

return _read_spreadsheet(
sheet_id,
Expand Down Expand Up @@ -394,7 +405,7 @@ def _read_spreadsheet(
sheet_id: int | Sequence[int] | None,
sheet_name: str | list[str] | tuple[str] | None,
source: str | BytesIO | Path | BinaryIO | bytes,
engine: Literal["xlsx2csv", "openpyxl", "pyxlsb", "ods"] | None,
engine: Literal["xlsx2csv", "openpyxl", "pyxlsb", "ods"],
engine_options: dict[str, Any] | None = None,
read_csv_options: dict[str, Any] | None = None,
schema_overrides: SchemaDict | None = None,
Expand Down
Binary file modified py-polars/tests/unit/io/files/example.xlsx
Binary file not shown.

0 comments on commit b6797f2

Please sign in to comment.