diff --git a/docs/src/python/user-guide/misc/styling.py b/docs/src/python/user-guide/misc/styling.py new file mode 100644 index 000000000000..5b570e4f1e49 --- /dev/null +++ b/docs/src/python/user-guide/misc/styling.py @@ -0,0 +1,151 @@ +# --8<-- [start:dataframe] +import polars as pl +import polars.selectors as cs + +path = "docs/data/iris.csv" + +df = ( + pl.scan_csv(path) + .group_by("species") + .agg(cs.starts_with("petal").mean().round(3)) + .collect() +) +print(df) +# --8<-- [end:dataframe] + +# --8<-- [start:structure-header] +df.style.tab_header(title="Iris Data", subtitle="Mean measurement values per species") +# --8<-- [end:structure-header] + +# --8<-- [start:structure-header-out] +print( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ).as_raw_html() +) +# --8<-- [end:structure-header-out] + + +# --8<-- [start:structure-stub] +df.style.tab_stub(rowname_col="species") +# --8<-- [end:structure-stub] + +# --8<-- [start:structure-stub-out] +print(df.style.tab_stub(rowname_col="species").as_raw_html()) +# --8<-- [end:structure-stub-out] + +# --8<-- [start:structure-spanner] +( + df.style.tab_spanner("Petal", cs.starts_with("petal")).cols_label( + petal_length="Length", petal_width="Width" + ) +) +# --8<-- [end:structure-spanner] + +# --8<-- [start:structure-spanner-out] +print( + df.style.tab_spanner("Petal", cs.starts_with("petal")) + .cols_label(petal_length="Length", petal_width="Width") + .as_raw_html() +) +# --8<-- [end:structure-spanner-out] + +# --8<-- [start:format-number] +df.style.fmt_number("petal_width", decimals=1) +# --8<-- [end:format-number] + + +# --8<-- [start:format-number-out] +print(df.style.fmt_number("petal_width", decimals=1).as_raw_html()) +# --8<-- [end:format-number-out] + + +# --8<-- [start:style-simple] +from great_tables import loc, style + +df.style.tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), +) +# --8<-- [end:style-simple] + +# --8<-- [start:style-simple-out] +from great_tables import loc, style + +print( + df.style.tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ).as_raw_html() +) +# --8<-- [end:style-simple-out] + + +# --8<-- [start:style-bold-column] +from great_tables import loc, style + +df.style.tab_style( + style.text(weight="bold"), + loc.body(columns="species"), +) +# --8<-- [end:style-bold-column] + +# --8<-- [start:style-bold-column-out] +from great_tables import loc, style + +print( + df.style.tab_style( + style.text(weight="bold"), + loc.body(columns="species"), + ).as_raw_html() +) +# --8<-- [end:style-bold-column-out] + +# --8<-- [start:full-example] +from great_tables import loc, style + +( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ) + .tab_stub(rowname_col="species") + .cols_label(petal_length="Length", petal_width="Width") + .tab_spanner("Petal", cs.starts_with("petal")) + .fmt_number("petal_width", decimals=2) + .tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ) +) +# --8<-- [end:full-example] + +# --8<-- [start:full-example-out] +from great_tables import loc, style + +print( + df.style.tab_header( + title="Iris Data", subtitle="Mean measurement values per species" + ) + .tab_stub(rowname_col="species") + .cols_label(petal_length="Length", petal_width="Width") + .tab_spanner("Petal", cs.starts_with("petal")) + .fmt_number("petal_width", decimals=2) + .tab_style( + style.fill("yellow"), + loc.body( + rows=pl.col("petal_length") == pl.col("petal_length").max(), + ), + ) + .tab_style( + style.text(weight="bold"), + loc.body(columns="species"), + ) + .as_raw_html() +) +# --8<-- [end:full-example-out] diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index 03ac7f534bfc..5795f7905fce 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -89,6 +89,7 @@ pip install 'polars[numpy,fsspec]' | xlsx2csv | Support for reading from Excel files | | deltalake | Support for reading from Delta Lake Tables | | plot | Support for plotting Dataframes | +| style | Support for styling Dataframes | | timezone | Timezone support, only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows, otherwise no dependencies will be installed | ### Rust diff --git a/docs/user-guide/misc/styling.md b/docs/user-guide/misc/styling.md new file mode 100644 index 000000000000..57a8d44d75e1 --- /dev/null +++ b/docs/user-guide/misc/styling.md @@ -0,0 +1,65 @@ +# Styling + +Data in a Polars `DataFrame` can be styled for presentation use the `DataFrame.style` property. This returns a `GT` object from [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html), which enables structuring, formatting, and styling for table display. + +{{code_block('user-guide/misc/styling','dataframe',[])}} + +```python exec="on" result="text" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:dataframe" +``` + +## Structure: add header title + +{{code_block('user-guide/misc/styling','structure-header',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-header-out" +``` + +## Structure: add row stub + +{{code_block('user-guide/misc/styling','structure-stub',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-stub-out" +``` + +## Structure: add column spanner + +{{code_block('user-guide/misc/styling','structure-spanner',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:structure-spanner-out" +``` + +## Format: limit decimal places + +{{code_block('user-guide/misc/styling','format-number',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:format-number-out" +``` + +## Style: highlight max row + +{{code_block('user-guide/misc/styling','style-simple',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:style-simple-out" +``` + +## Style: bold species column + +{{code_block('user-guide/misc/styling','style-bold-column',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:style-bold-column-out" +``` + +## Full example + +{{code_block('user-guide/misc/styling','full-example',[])}} + +```python exec="on" session="user-guide/misc/styling" +--8<-- "python/user-guide/misc/styling.py:full-example-out" +``` diff --git a/mkdocs.yml b/mkdocs.yml index 6673d17741ce..c0394149aa11 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -81,6 +81,7 @@ nav: - Misc: - user-guide/misc/multiprocessing.md - user-guide/misc/visualization.md + - user-guide/misc/styling.md - user-guide/misc/comparison.md - API reference: api/index.md diff --git a/py-polars/docs/source/reference/dataframe/index.rst b/py-polars/docs/source/reference/dataframe/index.rst index 509b1e3f5ede..21f7cac4c046 100644 --- a/py-polars/docs/source/reference/dataframe/index.rst +++ b/py-polars/docs/source/reference/dataframe/index.rst @@ -17,6 +17,7 @@ This page gives an overview of all public DataFrame methods. modify_select miscellaneous plot + style .. currentmodule:: polars diff --git a/py-polars/docs/source/reference/dataframe/style.rst b/py-polars/docs/source/reference/dataframe/style.rst new file mode 100644 index 000000000000..ebe9a3fabf2e --- /dev/null +++ b/py-polars/docs/source/reference/dataframe/style.rst @@ -0,0 +1,7 @@ +===== +Style +===== + +.. currentmodule:: polars + +.. autoproperty:: DataFrame.style \ No newline at end of file diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 25c4a3cdfea7..daf69b373b2f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -76,12 +76,14 @@ UInt64, ) from polars.dependencies import ( + _GREAT_TABLES_AVAILABLE, _HVPLOT_AVAILABLE, _PANDAS_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, + great_tables, hvplot, import_optional, ) @@ -112,6 +114,7 @@ import jax import numpy.typing as npt import torch + from great_tables import GT from hvplot.plotting.core import hvPlotTabularPolars from xlsxwriter import Workbook @@ -607,6 +610,62 @@ def plot(self) -> hvPlotTabularPolars: hvplot.post_patch() return hvplot.plotting.core.hvPlotTabularPolars(self) + @property + @unstable() + def style(self) -> GT: + """ + Create a Great Table for styling. + + .. warning:: + This functionality is currently considered **unstable**. It may be + changed at any point without it being considered a breaking change. + + Polars does not implement styling logic itself, but instead defers to + the Great Tables package. Please see the `Great Tables reference `_ + for more information and documentation. + + Examples + -------- + Import some styling helpers, and create example data: + + >>> import polars.selectors as cs + >>> from great_tables import loc, style + >>> df = pl.DataFrame( + ... { + ... "site_id": [0, 1, 2], + ... "measure_a": [5, 4, 6], + ... "measure_b": [7, 3, 3], + ... } + ... ) + + Emphasize the site_id as row names: + + >>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP + + Fill the background for the highest measure_a value row: + + >>> df.style.tab_style( + ... style.fill("yellow"), + ... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()), + ... ) # doctest: +SKIP + + Put a spanner (high-level label) over measure columns: + + >>> df.style.tab_spanner( + ... "Measures", cs.starts_with("measure") + ... ) # doctest: +SKIP + + Format measure_b values to two decimal places: + + >>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP + + """ + if not _GREAT_TABLES_AVAILABLE: + msg = "great_tables is required for `.style`" + raise ModuleNotFoundError(msg) + + return great_tables.GT(self) + @property def shape(self) -> tuple[int, int]: """ diff --git a/py-polars/polars/dependencies.py b/py-polars/polars/dependencies.py index d970fb5673ff..0917cb179c6a 100644 --- a/py-polars/polars/dependencies.py +++ b/py-polars/polars/dependencies.py @@ -11,6 +11,7 @@ _DELTALAKE_AVAILABLE = True _FSSPEC_AVAILABLE = True _GEVENT_AVAILABLE = True +_GREAT_TABLES_AVAILABLE = True _HVPLOT_AVAILABLE = True _HYPOTHESIS_AVAILABLE = True _NUMPY_AVAILABLE = True @@ -152,6 +153,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: import deltalake import fsspec import gevent + import great_tables import hvplot import hypothesis import numpy @@ -175,6 +177,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: # heavy/optional third party libs deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake") fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec") + great_tables, _GREAT_TABLES_AVAILABLE = _lazy_import("great_tables") hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot") hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis") numpy, _NUMPY_AVAILABLE = _lazy_import("numpy") @@ -301,6 +304,7 @@ def import_optional( "deltalake", "fsspec", "gevent", + "great_tables", "hvplot", "numpy", "pandas", diff --git a/py-polars/polars/meta/versions.py b/py-polars/polars/meta/versions.py index 6a5eb8a788f1..02b71c6a92bb 100644 --- a/py-polars/polars/meta/versions.py +++ b/py-polars/polars/meta/versions.py @@ -69,6 +69,7 @@ def _get_dependency_info() -> dict[str, str]: "fastexcel", "fsspec", "gevent", + "great_tables", "hvplot", "matplotlib", "nest_asyncio", diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index bd6cc2954a26..7a377e566f79 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -56,11 +56,12 @@ plot = ["hvplot >= 0.9.1"] pyarrow = ["pyarrow >= 7.0.0"] pydantic = ["pydantic"] sqlalchemy = ["sqlalchemy", "pandas"] +style = ["great-tables >= 0.8.0"] timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"] xlsx2csv = ["xlsx2csv >= 0.8.0"] xlsxwriter = ["xlsxwriter"] all = [ - "polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]", + "polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,style,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]", ] [tool.maturin] @@ -89,6 +90,7 @@ module = [ "deltalake.*", "fsspec.*", "gevent", + "great_tables", "hvplot.*", "jax.*", "kuzu", diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index eaf3ea2c2804..aae62fe07ede 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -47,6 +47,8 @@ zstandard # Plotting hvplot>=0.9.1 matplotlib +# Styling +great-tables>=0.8.0; python_version >= '3.9' # Other gevent nest_asyncio