Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): Add DataFrame.style namespace #16809

Merged
merged 12 commits into from
Jun 13, 2024
151 changes: 151 additions & 0 deletions docs/src/python/user-guide/misc/styling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# --8<-- [start:dataframe]
import polars as pl
import polars.selectors as cs

path = "docs/data/iris.csv"

df = (
pl.scan_csv(path)
.group_by("species")
.agg(cs.starts_with("petal").mean().round(3))
.collect()
)
print(df)
# --8<-- [end:dataframe]

# --8<-- [start:structure-header]
df.style.tab_header(title="Iris Data", subtitle="Mean measurement values per species")
# --8<-- [end:structure-header]

# --8<-- [start:structure-header-out]
print(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
).as_raw_html()
)
# --8<-- [end:structure-header-out]


# --8<-- [start:structure-stub]
df.style.tab_stub(rowname_col="species")
# --8<-- [end:structure-stub]

# --8<-- [start:structure-stub-out]
print(df.style.tab_stub(rowname_col="species").as_raw_html())
# --8<-- [end:structure-stub-out]

# --8<-- [start:structure-spanner]
(
df.style.tab_spanner("Petal", cs.starts_with("petal")).cols_label(
petal_length="Length", petal_width="Width"
)
)
# --8<-- [end:structure-spanner]

# --8<-- [start:structure-spanner-out]
print(
df.style.tab_spanner("Petal", cs.starts_with("petal"))
.cols_label(petal_length="Length", petal_width="Width")
.as_raw_html()
)
# --8<-- [end:structure-spanner-out]

# --8<-- [start:format-number]
df.style.fmt_number("petal_width", decimals=1)
# --8<-- [end:format-number]


# --8<-- [start:format-number-out]
print(df.style.fmt_number("petal_width", decimals=1).as_raw_html())
# --8<-- [end:format-number-out]


# --8<-- [start:style-simple]
from great_tables import loc, style

df.style.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
# --8<-- [end:style-simple]

# --8<-- [start:style-simple-out]
from great_tables import loc, style

print(
df.style.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
).as_raw_html()
)
# --8<-- [end:style-simple-out]


# --8<-- [start:style-bold-column]
from great_tables import loc, style

df.style.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
)
# --8<-- [end:style-bold-column]

# --8<-- [start:style-bold-column-out]
from great_tables import loc, style

print(
df.style.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
).as_raw_html()
)
# --8<-- [end:style-bold-column-out]

# --8<-- [start:full-example]
from great_tables import loc, style

(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
)
.tab_stub(rowname_col="species")
.cols_label(petal_length="Length", petal_width="Width")
.tab_spanner("Petal", cs.starts_with("petal"))
.fmt_number("petal_width", decimals=2)
.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
)
# --8<-- [end:full-example]

# --8<-- [start:full-example-out]
from great_tables import loc, style

print(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
)
.tab_stub(rowname_col="species")
.cols_label(petal_length="Length", petal_width="Width")
.tab_spanner("Petal", cs.starts_with("petal"))
.fmt_number("petal_width", decimals=2)
.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
)
.as_raw_html()
)
# --8<-- [end:full-example-out]
65 changes: 65 additions & 0 deletions docs/user-guide/misc/styling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Styling

Data in a Polars `DataFrame` can be styled for presentation use the `DataFrame.style` property. This returns a `GT` object from [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html), which enables structuring, formatting, and styling for table display.

{{code_block('user-guide/misc/styling','dataframe',[])}}

```python exec="on" result="text" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:dataframe"
```

## Structure: add header title

{{code_block('user-guide/misc/styling','structure-header',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-header-out"
```

## Structure: add row stub

{{code_block('user-guide/misc/styling','structure-stub',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-stub-out"
```

## Structure: add column spanner

{{code_block('user-guide/misc/styling','structure-spanner',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-spanner-out"
```

## Format: limit decimal places

{{code_block('user-guide/misc/styling','format-number',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:format-number-out"
```

## Style: highlight max row

{{code_block('user-guide/misc/styling','style-simple',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:style-simple-out"
```

## Style: bold species column

{{code_block('user-guide/misc/styling','style-bold-column',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:style-bold-column-out"
```

## Full example

{{code_block('user-guide/misc/styling','full-example',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:full-example-out"
```
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ nav:
- Misc:
- user-guide/misc/multiprocessing.md
- user-guide/misc/visualization.md
- user-guide/misc/styling.md
- user-guide/misc/comparison.md

- API reference: api/index.md
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This page gives an overview of all public DataFrame methods.
modify_select
miscellaneous
plot
style

.. currentmodule:: polars

Expand Down
7 changes: 7 additions & 0 deletions py-polars/docs/source/reference/dataframe/style.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
=====
Style
=====

.. currentmodule:: polars

.. autoproperty:: DataFrame.style
59 changes: 59 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@
UInt64,
)
from polars.dependencies import (
_GREAT_TABLES_AVAILABLE,
_HVPLOT_AVAILABLE,
_PANDAS_AVAILABLE,
_PYARROW_AVAILABLE,
_check_for_numpy,
_check_for_pandas,
_check_for_pyarrow,
great_tables,
hvplot,
import_optional,
)
Expand Down Expand Up @@ -112,6 +114,7 @@
import jax
import numpy.typing as npt
import torch
from great_tables import GT
from hvplot.plotting.core import hvPlotTabularPolars
from xlsxwriter import Workbook

Expand Down Expand Up @@ -607,6 +610,62 @@ def plot(self) -> hvPlotTabularPolars:
hvplot.post_patch()
return hvplot.plotting.core.hvPlotTabularPolars(self)

@property
@unstable()
def style(self) -> GT:
"""
Create a Great Table for styling.

.. warning::
This functionality is currently considered **unstable**. It may be
changed at any point without it being considered a breaking change.

Polars does not implement styling logic itself, but instead defers to
the Great Tables package. Please see the `Great Tables reference <https://posit-dev.github.io/great-tables/reference/>`_
for more information and documentation.

Examples
--------
Import some styling helpers, and create example data:

>>> import polars.selectors as cs
>>> from great_tables import loc, style
>>> df = pl.DataFrame(
... {
... "site_id": [0, 1, 2],
... "measure_a": [5, 4, 6],
... "measure_b": [7, 3, 3],
... }
... )

Emphasize the site_id as row names:

>>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP

Fill the background for the highest measure_a value row:

>>> df.style.tab_style(
... style.fill("yellow"),
... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()),
... ) # doctest: +SKIP

Put a spanner (high-level label) over measure columns:

>>> df.style.tab_spanner(
... "Measures", cs.starts_with("measure")
... ) # doctest: +SKIP

Format measure_b values to two decimal places:

>>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP

"""
if not _GREAT_TABLES_AVAILABLE:
msg = "great_tables is required for `.style`"
raise ModuleNotFoundError(msg)

return great_tables.GT(self)

@property
def shape(self) -> tuple[int, int]:
"""
Expand Down
4 changes: 4 additions & 0 deletions py-polars/polars/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_DELTALAKE_AVAILABLE = True
_FSSPEC_AVAILABLE = True
_GEVENT_AVAILABLE = True
_GREAT_TABLES_AVAILABLE = True
_HVPLOT_AVAILABLE = True
_HYPOTHESIS_AVAILABLE = True
_NUMPY_AVAILABLE = True
Expand Down Expand Up @@ -152,6 +153,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
import deltalake
import fsspec
import gevent
import great_tables
import hvplot
import hypothesis
import numpy
Expand All @@ -175,6 +177,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
# heavy/optional third party libs
deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake")
fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec")
great_tables, _GREAT_TABLES_AVAILABLE = _lazy_import("great_tables")
hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot")
hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis")
numpy, _NUMPY_AVAILABLE = _lazy_import("numpy")
Expand Down Expand Up @@ -301,6 +304,7 @@ def import_optional(
"deltalake",
"fsspec",
"gevent",
"great_tables",
"hvplot",
"numpy",
"pandas",
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/meta/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def _get_dependency_info() -> dict[str, str]:
"fastexcel",
"fsspec",
"gevent",
"great_tables",
"hvplot",
"matplotlib",
"nest_asyncio",
Expand Down
4 changes: 3 additions & 1 deletion py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@ plot = ["hvplot >= 0.9.1"]
pyarrow = ["pyarrow >= 7.0.0"]
pydantic = ["pydantic"]
sqlalchemy = ["sqlalchemy", "pandas"]
style = ["great-tables >= 0.8.0"]
MarcoGorelli marked this conversation as resolved.
Show resolved Hide resolved
timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"]
xlsx2csv = ["xlsx2csv >= 0.8.0"]
xlsxwriter = ["xlsxwriter"]
all = [
"polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]",
"polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,style,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]",
]

[tool.maturin]
Expand Down Expand Up @@ -89,6 +90,7 @@ module = [
"deltalake.*",
"fsspec.*",
"gevent",
"great_tables",
"hvplot.*",
"jax.*",
"kuzu",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ zstandard
# Plotting
hvplot>=0.9.1
matplotlib
# Styling
great-tables>=0.8.0; python_version >= '3.9'
# Other
gevent
nest_asyncio
Expand Down