From 716cd5683fdbda20d619b021c9c999bd57fc73b6 Mon Sep 17 00:00:00 2001 From: Alicja <32685541+alicja-januszkiewicz@users.noreply.github.com> Date: Mon, 16 Oct 2023 08:34:48 +0100 Subject: [PATCH] feat(rust): right-align numeric columns (#7475) Co-authored-by: alexander-beedie Co-authored-by: Alexander Beedie --- crates/polars-core/src/config.rs | 1 + crates/polars-core/src/fmt.rs | 47 +++++++++--- py-polars/polars/config.py | 111 ++++++++++++++++++++++++++- py-polars/src/functions/meta.rs | 13 ++++ py-polars/src/lib.rs | 4 + py-polars/tests/unit/test_cfg.py | 128 ++++++++++++++++++++++++++++++- 6 files changed, 289 insertions(+), 15 deletions(-) diff --git a/crates/polars-core/src/config.rs b/crates/polars-core/src/config.rs index 14d6130b3c6f..72ddf90fa639 100644 --- a/crates/polars-core/src/config.rs +++ b/crates/polars-core/src/config.rs @@ -3,6 +3,7 @@ pub(crate) const FMT_MAX_COLS: &str = "POLARS_FMT_MAX_COLS"; pub(crate) const FMT_MAX_ROWS: &str = "POLARS_FMT_MAX_ROWS"; pub(crate) const FMT_STR_LEN: &str = "POLARS_FMT_STR_LEN"; pub(crate) const FMT_TABLE_CELL_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_ALIGNMENT"; +pub(crate) const FMT_TABLE_CELL_NUMERIC_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT"; pub(crate) const FMT_TABLE_DATAFRAME_SHAPE_BELOW: &str = "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW"; pub(crate) const FMT_TABLE_FORMATTING: &str = "POLARS_FMT_TABLE_FORMATTING"; pub(crate) const FMT_TABLE_HIDE_COLUMN_DATA_TYPES: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES"; diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index e480007cd8e0..24758ca320aa 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::fmt::{Debug, Display, Formatter, Write}; use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::RwLock; use std::{fmt, str}; #[cfg(any( @@ -33,6 +34,7 @@ pub enum FloatFmt { Full, } static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8); +static FLOAT_PRECISION: RwLock> = RwLock::new(None); pub fn get_float_fmt() -> FloatFmt { match FLOAT_FMT.load(Ordering::Relaxed) { @@ -42,10 +44,18 @@ pub fn get_float_fmt() -> FloatFmt { } } +pub fn get_float_precision() -> Option { + *FLOAT_PRECISION.read().unwrap() +} + pub fn set_float_fmt(fmt: FloatFmt) { FLOAT_FMT.store(fmt as u8, Ordering::Relaxed) } +pub fn set_float_precision(precision: Option) { + *FLOAT_PRECISION.write().unwrap() = precision; +} + macro_rules! format_array { ($f:ident, $a:expr, $dtype:expr, $name:expr, $array_type:expr) => {{ write!( @@ -655,19 +665,24 @@ impl Display for DataFrame { } // set alignment of cells, if defined - if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() { - // for (column_index, column) in table.column_iter_mut().enumerate() { + if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() + | std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT).is_ok() + { let str_preset = std::env::var(FMT_TABLE_CELL_ALIGNMENT) .unwrap_or_else(|_| "DEFAULT".to_string()); - for column in table.column_iter_mut() { - if str_preset == "RIGHT" { - column.set_cell_alignment(CellAlignment::Right); - } else if str_preset == "LEFT" { - column.set_cell_alignment(CellAlignment::Left); - } else if str_preset == "CENTER" { - column.set_cell_alignment(CellAlignment::Center); - } else { - column.set_cell_alignment(CellAlignment::Left); + let num_preset = std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT) + .unwrap_or_else(|_| str_preset.to_string()); + for (column_index, column) in table.column_iter_mut().enumerate() { + let dtype = fields[column_index].data_type(); + let mut preset = str_preset.as_str(); + if dtype.is_numeric() { + preset = num_preset.as_str(); + } + match preset { + "RIGHT" => column.set_cell_alignment(CellAlignment::Right), + "LEFT" => column.set_cell_alignment(CellAlignment::Left), + "CENTER" => column.set_cell_alignment(CellAlignment::Center), + _ => {}, } } } @@ -709,6 +724,16 @@ const SCIENTIFIC_BOUND: f64 = 999999.0; fn fmt_float(f: &mut Formatter<'_>, width: usize, v: T) -> fmt::Result { let v: f64 = NumCast::from(v).unwrap(); + + let float_precision = get_float_precision(); + + if let Some(precision) = float_precision { + if format!("{v:.precision$}", precision = precision).len() > 19 { + return write!(f, "{v:>width$.precision$e}", precision = precision); + } + return write!(f, "{v:>width$.precision$}", precision = precision); + } + if matches!(get_float_fmt(), FloatFmt::Full) { return write!(f, "{v:>width$}"); } diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index 80f471714e38..2d557d914354 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -11,15 +11,23 @@ from polars.utils.various import normalize_filepath -# dummy func required (so docs build) +# dummy funcs required here (so that docs build) def _get_float_fmt() -> str: # pragma: no cover return "n/a" +def _get_float_precision() -> int: + return -1 + + # note: module not available when building docs with contextlib.suppress(ImportError): from polars.polars import get_float_fmt as _get_float_fmt # type: ignore[no-redef] + from polars.polars import ( # type: ignore[no-redef] + get_float_precision as _get_float_precision, + ) from polars.polars import set_float_fmt as _set_float_fmt + from polars.polars import set_float_precision as _set_float_precision if sys.version_info >= (3, 10): @@ -60,7 +68,9 @@ def _get_float_fmt() -> str: # pragma: no cover "POLARS_FMT_MAX_COLS", "POLARS_FMT_MAX_ROWS", "POLARS_FMT_STR_LEN", + "POLARS_FMT_NUM_LEN", "POLARS_FMT_TABLE_CELL_ALIGNMENT", + "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT", "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW", "POLARS_FMT_TABLE_FORMATTING", "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES", @@ -77,7 +87,10 @@ def _get_float_fmt() -> str: # pragma: no cover # vars that set the rust env directly should declare themselves here as the Config # method name paired with a callable that returns the current state of that value: -_POLARS_CFG_DIRECT_VARS = {"set_fmt_float": _get_float_fmt} +_POLARS_CFG_DIRECT_VARS = { + "set_fmt_float": _get_float_fmt, + "set_float_precision": _get_float_precision, +} class Config(contextlib.ContextDecorator): @@ -253,6 +266,7 @@ def restore_defaults(cls) -> type[Config]: # apply any 'direct' setting values cls.set_fmt_float() + cls.set_float_precision() return cls @classmethod @@ -348,7 +362,7 @@ def state( } if not env_only: for cfg_methodname, get_value in _POLARS_CFG_DIRECT_VARS.items(): - config_state[cfg_methodname] = get_value() + config_state[cfg_methodname] = get_value() # type: ignore[assignment] return config_state @@ -428,6 +442,47 @@ def set_auto_structify(cls, active: bool | None = False) -> type[Config]: os.environ["POLARS_AUTO_STRUCTIFY"] = str(int(active)) return cls + @classmethod + def set_float_precision(cls, precision: int | None = None) -> type[Config]: + """ + Control the number of decimal places displayed for floating point values. + + Parameters + ---------- + precision : int + Number of decimal places to display; set to ``None`` to revert to the + default/standard behaviour. + + Notes + ----- + When setting this to a larger value you should ensure that you are aware of both + the limitations of floating point representations, and of the precision of the + data that you are looking at. + + This setting only applies to Float32 and Float64 dtypes; it does not cover + Decimal dtype values (which are displayed at their native level of precision). + + Examples + -------- + >>> from math import pi, e + >>> df = pl.DataFrame({"const": ["pi", "e"], "value": [pi, e]}) + >>> with pl.Config(float_precision=15): + ... print(repr(df)) + ... + shape: (2, 2) + ┌───────┬───────────────────┐ + │ const ┆ value │ + │ --- ┆ --- │ + │ str ┆ f64 │ + ╞═══════╪═══════════════════╡ + │ pi ┆ 3.141592653589793 │ + │ e ┆ 2.718281828459045 │ + └───────┴───────────────────┘ + + """ + _set_float_precision(precision) + return cls + @classmethod def set_fmt_float(cls, fmt: FloatFmt | None = "mixed") -> type[Config]: """ @@ -647,6 +702,56 @@ def set_tbl_cell_alignment( os.environ["POLARS_FMT_TABLE_CELL_ALIGNMENT"] = format return cls + @classmethod + def set_tbl_cell_numeric_alignment( + cls, format: Literal["LEFT", "CENTER", "RIGHT"] | None + ) -> type[Config]: + """ + Set table cell alignment for numeric columns. + + Parameters + ---------- + format : str + * "LEFT": left aligned + * "CENTER": center aligned + * "RIGHT": right aligned + + Examples + -------- + >>> from datetime import date + >>> df = pl.DataFrame( + ... { + ... "abc": [11, 2, 333], + ... "mno": [date.today(), None, date.today()], + ... "xyz": [True, False, None], + ... } + ... ) + >>> pl.Config.set_tbl_cell_numeric_alignment("RIGHT") # doctest: +SKIP + # ... + # shape: (3, 3) + # ┌─────┬────────────┬───────┐ + # │ abc ┆ mno ┆ xyz │ + # │ --- ┆ --- ┆ --- │ + # │ i64 ┆ date ┆ bool │ + # ╞═════╪════════════╪═══════╡ + # │ 11 ┆ 2023-09-05 ┆ true │ + # │ 2 ┆ null ┆ false │ + # │ 333 ┆ 2023-09-05 ┆ null │ + # └─────┴────────────┴───────┘ + + Raises + ------ + KeyError: if alignment string not recognised. + + """ + if format is None: + os.environ.pop("POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT", None) + elif format not in {"LEFT", "CENTER", "RIGHT"}: + raise ValueError(f"invalid alignment: {format!r}") + else: + os.environ["POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT"] = format + return cls + @classmethod def set_tbl_cols(cls, n: int | None) -> type[Config]: """ diff --git a/py-polars/src/functions/meta.rs b/py-polars/src/functions/meta.rs index 467c65ffc133..80f69e37a05d 100644 --- a/py-polars/src/functions/meta.rs +++ b/py-polars/src/functions/meta.rs @@ -46,3 +46,16 @@ pub fn get_float_fmt() -> PyResult { }; Ok(strfmt.to_string()) } + +#[pyfunction] +pub fn set_float_precision(precision: Option) -> PyResult<()> { + use polars_core::fmt::set_float_precision; + set_float_precision(precision); + Ok(()) +} + +#[pyfunction] +pub fn get_float_precision() -> PyResult> { + use polars_core::fmt::get_float_precision; + Ok(get_float_precision()) +} diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 06c5763c91e9..7a599f1c1261 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -229,6 +229,10 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> { .unwrap(); m.add_wrapped(wrap_pyfunction!(functions::meta::get_float_fmt)) .unwrap(); + m.add_wrapped(wrap_pyfunction!(functions::meta::set_float_precision)) + .unwrap(); + m.add_wrapped(wrap_pyfunction!(functions::meta::get_float_precision)) + .unwrap(); // Functions - misc m.add_wrapped(wrap_pyfunction!(functions::misc::dtype_str_repr)) diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py index 72a9e22d276a..54b766e3cdaf 100644 --- a/py-polars/tests/unit/test_cfg.py +++ b/py-polars/tests/unit/test_cfg.py @@ -7,7 +7,7 @@ import pytest import polars as pl -from polars.config import _POLARS_CFG_ENV_VARS, _get_float_fmt +from polars.config import _POLARS_CFG_ENV_VARS, _get_float_fmt, _get_float_precision @pytest.fixture(autouse=True) @@ -509,6 +509,121 @@ def test_shape_format_for_big_numbers() -> None: ) +def test_numeric_right_alignment() -> None: + pl.Config.set_tbl_cell_numeric_alignment("RIGHT") + + df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + assert ( + str(df) == "shape: (3, 3)\n" + "┌─────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ i64 ┆ i64 ┆ i64 │\n" + "╞═════╪═════╪═════╡\n" + "│ 1 ┆ 4 ┆ 7 │\n" + "│ 2 ┆ 5 ┆ 8 │\n" + "│ 3 ┆ 6 ┆ 9 │\n" + "└─────┴─────┴─────┘" + ) + + df = pl.DataFrame( + {"a": [1.1, 2.22, 3.333], "b": [4.0, 5.0, 6.0], "c": [7.0, 8.0, 9.0]} + ) + with pl.Config(): + pl.Config.set_fmt_float("full") + assert ( + str(df) == "shape: (3, 3)\n" + "┌───────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═══════╪═════╪═════╡\n" + "│ 1.1 ┆ 4 ┆ 7 │\n" + "│ 2.22 ┆ 5 ┆ 8 │\n" + "│ 3.333 ┆ 6 ┆ 9 │\n" + "└───────┴─────┴─────┘" + ) + + with pl.Config(fmt_float="mixed"): + assert ( + str(df) == "shape: (3, 3)\n" + "┌───────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═══════╪═════╪═════╡\n" + "│ 1.1 ┆ 4.0 ┆ 7.0 │\n" + "│ 2.22 ┆ 5.0 ┆ 8.0 │\n" + "│ 3.333 ┆ 6.0 ┆ 9.0 │\n" + "└───────┴─────┴─────┘" + ) + + with pl.Config(float_precision=6): + assert str(df) == ( + "shape: (3, 3)\n" + "┌──────────┬──────────┬──────────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞══════════╪══════════╪══════════╡\n" + "│ 1.100000 ┆ 4.000000 ┆ 7.000000 │\n" + "│ 2.220000 ┆ 5.000000 ┆ 8.000000 │\n" + "│ 3.333000 ┆ 6.000000 ┆ 9.000000 │\n" + "└──────────┴──────────┴──────────┘" + ) + with pl.Config(float_precision=None): + assert ( + str(df) == "shape: (3, 3)\n" + "┌───────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═══════╪═════╪═════╡\n" + "│ 1.1 ┆ 4.0 ┆ 7.0 │\n" + "│ 2.22 ┆ 5.0 ┆ 8.0 │\n" + "│ 3.333 ┆ 6.0 ┆ 9.0 │\n" + "└───────┴─────┴─────┘" + ) + + df = pl.DataFrame( + {"a": [1.1, 22.2, 3.33], "b": [444, 55.5, 6.6], "c": [77.7, 8888, 9.9999]} + ) + with pl.Config(fmt_float="full", float_precision=1): + assert ( + str(df) == "shape: (3, 3)\n" + "┌──────┬───────┬────────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞══════╪═══════╪════════╡\n" + "│ 1.1 ┆ 444.0 ┆ 77.7 │\n" + "│ 22.2 ┆ 55.5 ┆ 8888.0 │\n" + "│ 3.3 ┆ 6.6 ┆ 10.0 │\n" + "└──────┴───────┴────────┘" + ) + + df = pl.DataFrame( + { + "a": [1100000000000000000.1, 22200000000000000.2, 33330000000000000.33333], + "b": [40000000000000000000.0, 5, 600000000000000000.0], + "c": [700000.0, 80000000000000000.0, 900], + } + ) + with pl.Config(float_precision=2): + assert ( + str(df) == "shape: (3, 3)\n" + "┌─────────┬─────────┬───────────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═════════╪═════════╪═══════════╡\n" + "│ 1.10e18 ┆ 4.00e19 ┆ 700000.00 │\n" + "│ 2.22e16 ┆ 5.00 ┆ 8.00e16 │\n" + "│ 3.33e16 ┆ 6.00e17 ┆ 900.00 │\n" + "└─────────┴─────────┴───────────┘" + ) + + @pytest.mark.write_disk() def test_config_load_save(tmp_path: Path) -> None: for file in ( @@ -520,6 +635,7 @@ def test_config_load_save(tmp_path: Path) -> None: pl.Config.set_tbl_cols(12) pl.Config.set_verbose(True) pl.Config.set_fmt_float("full") + pl.Config.set_float_precision(6) assert os.environ.get("POLARS_VERBOSE") == "1" if file is None: @@ -533,6 +649,8 @@ def test_config_load_save(tmp_path: Path) -> None: # ...modify the same options... pl.Config.set_tbl_cols(10) pl.Config.set_verbose(False) + pl.Config.set_fmt_float("mixed") + pl.Config.set_float_precision(2) assert os.environ.get("POLARS_VERBOSE") == "0" # ...load back from config file/string... @@ -555,6 +673,7 @@ def test_config_load_save(tmp_path: Path) -> None: assert os.environ.get("POLARS_FMT_MAX_COLS") == "12" assert os.environ.get("POLARS_VERBOSE") == "1" assert _get_float_fmt() == "full" + assert _get_float_precision() == 6 # restore all default options (unsets from env) pl.Config.restore_defaults() @@ -565,6 +684,7 @@ def test_config_load_save(tmp_path: Path) -> None: assert os.environ.get("POLARS_FMT_MAX_COLS") is None assert os.environ.get("POLARS_VERBOSE") is None assert _get_float_fmt() == "mixed" + assert _get_float_precision() is None # ref: #11094 with pl.Config( @@ -659,6 +779,12 @@ def test_set_fmt_str_lengths_invalid_length() -> None: ("POLARS_FMT_MAX_ROWS", "set_tbl_rows", 3, "3"), ("POLARS_FMT_STR_LEN", "set_fmt_str_lengths", 42, "42"), ("POLARS_FMT_TABLE_CELL_ALIGNMENT", "set_tbl_cell_alignment", "RIGHT", "RIGHT"), + ( + "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT", + "set_tbl_cell_numeric_alignment", + "RIGHT", + "RIGHT", + ), ("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES", "set_tbl_hide_column_names", True, "1"), ( "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW",