From 716cd5683fdbda20d619b021c9c999bd57fc73b6 Mon Sep 17 00:00:00 2001
From: Alicja <32685541+alicja-januszkiewicz@users.noreply.github.com>
Date: Mon, 16 Oct 2023 08:34:48 +0100
Subject: [PATCH] feat(rust): right-align numeric columns (#7475)

Co-authored-by: alexander-beedie <alexander.m.beedie@icloud.com>
Co-authored-by: Alexander Beedie <alexander-beedie@users.noreply.github.com>
---
 crates/polars-core/src/config.rs |   1 +
 crates/polars-core/src/fmt.rs    |  47 +++++++++---
 py-polars/polars/config.py       | 111 ++++++++++++++++++++++++++-
 py-polars/src/functions/meta.rs  |  13 ++++
 py-polars/src/lib.rs             |   4 +
 py-polars/tests/unit/test_cfg.py | 128 ++++++++++++++++++++++++++++++-
 6 files changed, 289 insertions(+), 15 deletions(-)
diff --git a/crates/polars-core/src/config.rs b/crates/polars-core/src/config.rs
index 14d6130b3c6f..72ddf90fa639 100644
--- a/crates/polars-core/src/config.rs
+++ b/crates/polars-core/src/config.rs
@@ -3,6 +3,7 @@ pub(crate) const FMT_MAX_COLS: &str = "POLARS_FMT_MAX_COLS";
 pub(crate) const FMT_MAX_ROWS: &str = "POLARS_FMT_MAX_ROWS";
 pub(crate) const FMT_STR_LEN: &str = "POLARS_FMT_STR_LEN";
 pub(crate) const FMT_TABLE_CELL_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_ALIGNMENT";
+pub(crate) const FMT_TABLE_CELL_NUMERIC_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT";
 pub(crate) const FMT_TABLE_DATAFRAME_SHAPE_BELOW: &str = "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW";
 pub(crate) const FMT_TABLE_FORMATTING: &str = "POLARS_FMT_TABLE_FORMATTING";
 pub(crate) const FMT_TABLE_HIDE_COLUMN_DATA_TYPES: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES";
diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs
index e480007cd8e0..24758ca320aa 100644
--- a/crates/polars-core/src/fmt.rs
+++ b/crates/polars-core/src/fmt.rs
@@ -2,6 +2,7 @@
 use std::borrow::Cow;
 use std::fmt::{Debug, Display, Formatter, Write};
 use std::sync::atomic::{AtomicU8, Ordering};
+use std::sync::RwLock;
 use std::{fmt, str};
 
 #[cfg(any(
@@ -33,6 +34,7 @@ pub enum FloatFmt {
     Full,
 }
 static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8);
+static FLOAT_PRECISION: RwLock<Option<usize>> = RwLock::new(None);
 
 pub fn get_float_fmt() -> FloatFmt {
     match FLOAT_FMT.load(Ordering::Relaxed) {
@@ -42,10 +44,18 @@ pub fn get_float_fmt() -> FloatFmt {
     }
 }
 
+pub fn get_float_precision() -> Option<usize> {
+    *FLOAT_PRECISION.read().unwrap()
+}
+
 pub fn set_float_fmt(fmt: FloatFmt) {
     FLOAT_FMT.store(fmt as u8, Ordering::Relaxed)
 }
 
+pub fn set_float_precision(precision: Option<usize>) {
+    *FLOAT_PRECISION.write().unwrap() = precision;
+}
+
 macro_rules! format_array {
     ($f:ident, $a:expr, $dtype:expr, $name:expr, $array_type:expr) => {{
         write!(
@@ -655,19 +665,24 @@ impl Display for DataFrame {
             }
 
             // set alignment of cells, if defined
-            if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() {
-                // for (column_index, column) in table.column_iter_mut().enumerate() {
+            if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok()
+                | std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT).is_ok()
+            {
                 let str_preset = std::env::var(FMT_TABLE_CELL_ALIGNMENT)
                     .unwrap_or_else(|_| "DEFAULT".to_string());
-                for column in table.column_iter_mut() {
-                    if str_preset == "RIGHT" {
-                        column.set_cell_alignment(CellAlignment::Right);
-                    } else if str_preset == "LEFT" {
-                        column.set_cell_alignment(CellAlignment::Left);
-                    } else if str_preset == "CENTER" {
-                        column.set_cell_alignment(CellAlignment::Center);
-                    } else {
-                        column.set_cell_alignment(CellAlignment::Left);
+                let num_preset = std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT)
+                    .unwrap_or_else(|_| str_preset.to_string());
+                for (column_index, column) in table.column_iter_mut().enumerate() {
+                    let dtype = fields[column_index].data_type();
+                    let mut preset = str_preset.as_str();
+                    if dtype.is_numeric() {
+                        preset = num_preset.as_str();
+                    }
+                    match preset {
+                        "RIGHT" => column.set_cell_alignment(CellAlignment::Right),
+                        "LEFT" => column.set_cell_alignment(CellAlignment::Left),
+                        "CENTER" => column.set_cell_alignment(CellAlignment::Center),
+                        _ => {},
                     }
                 }
             }
@@ -709,6 +724,16 @@ const SCIENTIFIC_BOUND: f64 = 999999.0;
 
 fn fmt_float<T: Num + NumCast>(f: &mut Formatter<'_>, width: usize, v: T) -> fmt::Result {
     let v: f64 = NumCast::from(v).unwrap();
+
+    let float_precision = get_float_precision();
+
+    if let Some(precision) = float_precision {
+        if format!("{v:.precision$}", precision = precision).len() > 19 {
+            return write!(f, "{v:>width$.precision$e}", precision = precision);
+        }
+        return write!(f, "{v:>width$.precision$}", precision = precision);
+    }
+
     if matches!(get_float_fmt(), FloatFmt::Full) {
         return write!(f, "{v:>width$}");
     }
diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py
index 80f471714e38..2d557d914354 100644
--- a/py-polars/polars/config.py
+++ b/py-polars/polars/config.py
@@ -11,15 +11,23 @@
 from polars.utils.various import normalize_filepath
 
 
-# dummy func required (so docs build)
+# dummy funcs required here (so that docs build)
 def _get_float_fmt() -> str:  # pragma: no cover
     return "n/a"
 
 
+def _get_float_precision() -> int:
+    return -1
+
+
 # note: module not available when building docs
 with contextlib.suppress(ImportError):
     from polars.polars import get_float_fmt as _get_float_fmt  # type: ignore[no-redef]
+    from polars.polars import (  # type: ignore[no-redef]
+        get_float_precision as _get_float_precision,
+    )
     from polars.polars import set_float_fmt as _set_float_fmt
+    from polars.polars import set_float_precision as _set_float_precision
 
 
 if sys.version_info >= (3, 10):
@@ -60,7 +68,9 @@ def _get_float_fmt() -> str:  # pragma: no cover
     "POLARS_FMT_MAX_COLS",
     "POLARS_FMT_MAX_ROWS",
     "POLARS_FMT_STR_LEN",
+    "POLARS_FMT_NUM_LEN",
     "POLARS_FMT_TABLE_CELL_ALIGNMENT",
+    "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT",
     "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW",
     "POLARS_FMT_TABLE_FORMATTING",
     "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES",
@@ -77,7 +87,10 @@ def _get_float_fmt() -> str:  # pragma: no cover
 
 # vars that set the rust env directly should declare themselves here as the Config
 # method name paired with a callable that returns the current state of that value:
-_POLARS_CFG_DIRECT_VARS = {"set_fmt_float": _get_float_fmt}
+_POLARS_CFG_DIRECT_VARS = {
+    "set_fmt_float": _get_float_fmt,
+    "set_float_precision": _get_float_precision,
+}
 
 
 class Config(contextlib.ContextDecorator):
@@ -253,6 +266,7 @@ def restore_defaults(cls) -> type[Config]:
 
         # apply any 'direct' setting values
         cls.set_fmt_float()
+        cls.set_float_precision()
         return cls
 
     @classmethod
@@ -348,7 +362,7 @@ def state(
         }
         if not env_only:
             for cfg_methodname, get_value in _POLARS_CFG_DIRECT_VARS.items():
-                config_state[cfg_methodname] = get_value()
+                config_state[cfg_methodname] = get_value()  # type: ignore[assignment]
 
         return config_state
 
@@ -428,6 +442,47 @@ def set_auto_structify(cls, active: bool | None = False) -> type[Config]:
             os.environ["POLARS_AUTO_STRUCTIFY"] = str(int(active))
         return cls
 
+    @classmethod
+    def set_float_precision(cls, precision: int | None = None) -> type[Config]:
+        """
+        Control the number of decimal places displayed for floating point values.
+
+        Parameters
+        ----------
+        precision : int
+            Number of decimal places to display; set to ``None`` to revert to the
+            default/standard behaviour.
+
+        Notes
+        -----
+        When setting this to a larger value you should ensure that you are aware of both
+        the limitations of floating point representations, and of the precision of the
+        data that you are looking at.
+
+        This setting only applies to Float32 and Float64 dtypes; it does not cover
+        Decimal dtype values (which are displayed at their native level of precision).
+
+        Examples
+        --------
+        >>> from math import pi, e
+        >>> df = pl.DataFrame({"const": ["pi", "e"], "value": [pi, e]})
+        >>> with pl.Config(float_precision=15):
+        ...     print(repr(df))
+        ...
+        shape: (2, 2)
+        ┌───────┬───────────────────┐
+        │ const ┆ value             │
+        │ ---   ┆ ---               │
+        │ str   ┆ f64               │
+        ╞═══════╪═══════════════════╡
+        │ pi    ┆ 3.141592653589793 │
+        │ e     ┆ 2.718281828459045 │
+        └───────┴───────────────────┘
+
+        """
+        _set_float_precision(precision)
+        return cls
+
     @classmethod
     def set_fmt_float(cls, fmt: FloatFmt | None = "mixed") -> type[Config]:
         """
@@ -647,6 +702,56 @@ def set_tbl_cell_alignment(
             os.environ["POLARS_FMT_TABLE_CELL_ALIGNMENT"] = format
         return cls
 
+    @classmethod
+    def set_tbl_cell_numeric_alignment(
+        cls, format: Literal["LEFT", "CENTER", "RIGHT"] | None
+    ) -> type[Config]:
+        """
+        Set table cell alignment for numeric columns.
+
+        Parameters
+        ----------
+        format : str
+            * "LEFT": left aligned
+            * "CENTER": center aligned
+            * "RIGHT": right aligned
+
+        Examples
+        --------
+        >>> from datetime import date
+        >>> df = pl.DataFrame(
+        ...     {
+        ...         "abc": [11, 2, 333],
+        ...         "mno": [date.today(), None, date.today()],
+        ...         "xyz": [True, False, None],
+        ...     }
+        ... )
+        >>> pl.Config.set_tbl_cell_numeric_alignment("RIGHT")  # doctest: +SKIP
+        # ...
+        # shape: (3, 3)
+        # ┌─────┬────────────┬───────┐
+        # │ abc ┆ mno        ┆ xyz   │
+        # │ --- ┆ ---        ┆ ---   │
+        # │ i64 ┆ date       ┆ bool  │
+        # ╞═════╪════════════╪═══════╡
+        # │  11 ┆ 2023-09-05 ┆ true  │
+        # │   2 ┆ null       ┆ false │
+        # │ 333 ┆ 2023-09-05 ┆ null  │
+        # └─────┴────────────┴───────┘
+
+        Raises
+        ------
+        KeyError: if alignment string not recognised.
+
+        """
+        if format is None:
+            os.environ.pop("POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT", None)
+        elif format not in {"LEFT", "CENTER", "RIGHT"}:
+            raise ValueError(f"invalid alignment: {format!r}")
+        else:
+            os.environ["POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT"] = format
+        return cls
+
     @classmethod
     def set_tbl_cols(cls, n: int | None) -> type[Config]:
         """
diff --git a/py-polars/src/functions/meta.rs b/py-polars/src/functions/meta.rs
index 467c65ffc133..80f69e37a05d 100644
--- a/py-polars/src/functions/meta.rs
+++ b/py-polars/src/functions/meta.rs
@@ -46,3 +46,16 @@ pub fn get_float_fmt() -> PyResult<String> {
     };
     Ok(strfmt.to_string())
 }
+
+#[pyfunction]
+pub fn set_float_precision(precision: Option<usize>) -> PyResult<()> {
+    use polars_core::fmt::set_float_precision;
+    set_float_precision(precision);
+    Ok(())
+}
+
+#[pyfunction]
+pub fn get_float_precision() -> PyResult<Option<usize>> {
+    use polars_core::fmt::get_float_precision;
+    Ok(get_float_precision())
+}
diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs
index 06c5763c91e9..7a599f1c1261 100644
--- a/py-polars/src/lib.rs
+++ b/py-polars/src/lib.rs
@@ -229,6 +229,10 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
         .unwrap();
     m.add_wrapped(wrap_pyfunction!(functions::meta::get_float_fmt))
         .unwrap();
+    m.add_wrapped(wrap_pyfunction!(functions::meta::set_float_precision))
+        .unwrap();
+    m.add_wrapped(wrap_pyfunction!(functions::meta::get_float_precision))
+        .unwrap();
 
     // Functions - misc
     m.add_wrapped(wrap_pyfunction!(functions::misc::dtype_str_repr))
diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py
index 72a9e22d276a..54b766e3cdaf 100644
--- a/py-polars/tests/unit/test_cfg.py
+++ b/py-polars/tests/unit/test_cfg.py
@@ -7,7 +7,7 @@
 import pytest
 
 import polars as pl
-from polars.config import _POLARS_CFG_ENV_VARS, _get_float_fmt
+from polars.config import _POLARS_CFG_ENV_VARS, _get_float_fmt, _get_float_precision
 
 
 @pytest.fixture(autouse=True)
@@ -509,6 +509,121 @@ def test_shape_format_for_big_numbers() -> None:
     )
 
 
+def test_numeric_right_alignment() -> None:
+    pl.Config.set_tbl_cell_numeric_alignment("RIGHT")
+
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    assert (
+        str(df) == "shape: (3, 3)\n"
+        "┌─────┬─────┬─────┐\n"
+        "│   a ┆   b ┆   c │\n"
+        "│ --- ┆ --- ┆ --- │\n"
+        "│ i64 ┆ i64 ┆ i64 │\n"
+        "╞═════╪═════╪═════╡\n"
+        "│   1 ┆   4 ┆   7 │\n"
+        "│   2 ┆   5 ┆   8 │\n"
+        "│   3 ┆   6 ┆   9 │\n"
+        "└─────┴─────┴─────┘"
+    )
+
+    df = pl.DataFrame(
+        {"a": [1.1, 2.22, 3.333], "b": [4.0, 5.0, 6.0], "c": [7.0, 8.0, 9.0]}
+    )
+    with pl.Config():
+        pl.Config.set_fmt_float("full")
+        assert (
+            str(df) == "shape: (3, 3)\n"
+            "┌───────┬─────┬─────┐\n"
+            "│     a ┆   b ┆   c │\n"
+            "│   --- ┆ --- ┆ --- │\n"
+            "│   f64 ┆ f64 ┆ f64 │\n"
+            "╞═══════╪═════╪═════╡\n"
+            "│   1.1 ┆   4 ┆   7 │\n"
+            "│  2.22 ┆   5 ┆   8 │\n"
+            "│ 3.333 ┆   6 ┆   9 │\n"
+            "└───────┴─────┴─────┘"
+        )
+
+    with pl.Config(fmt_float="mixed"):
+        assert (
+            str(df) == "shape: (3, 3)\n"
+            "┌───────┬─────┬─────┐\n"
+            "│     a ┆   b ┆   c │\n"
+            "│   --- ┆ --- ┆ --- │\n"
+            "│   f64 ┆ f64 ┆ f64 │\n"
+            "╞═══════╪═════╪═════╡\n"
+            "│   1.1 ┆ 4.0 ┆ 7.0 │\n"
+            "│  2.22 ┆ 5.0 ┆ 8.0 │\n"
+            "│ 3.333 ┆ 6.0 ┆ 9.0 │\n"
+            "└───────┴─────┴─────┘"
+        )
+
+    with pl.Config(float_precision=6):
+        assert str(df) == (
+            "shape: (3, 3)\n"
+            "┌──────────┬──────────┬──────────┐\n"
+            "│        a ┆        b ┆        c │\n"
+            "│      --- ┆      --- ┆      --- │\n"
+            "│      f64 ┆      f64 ┆      f64 │\n"
+            "╞══════════╪══════════╪══════════╡\n"
+            "│ 1.100000 ┆ 4.000000 ┆ 7.000000 │\n"
+            "│ 2.220000 ┆ 5.000000 ┆ 8.000000 │\n"
+            "│ 3.333000 ┆ 6.000000 ┆ 9.000000 │\n"
+            "└──────────┴──────────┴──────────┘"
+        )
+        with pl.Config(float_precision=None):
+            assert (
+                str(df) == "shape: (3, 3)\n"
+                "┌───────┬─────┬─────┐\n"
+                "│     a ┆   b ┆   c │\n"
+                "│   --- ┆ --- ┆ --- │\n"
+                "│   f64 ┆ f64 ┆ f64 │\n"
+                "╞═══════╪═════╪═════╡\n"
+                "│   1.1 ┆ 4.0 ┆ 7.0 │\n"
+                "│  2.22 ┆ 5.0 ┆ 8.0 │\n"
+                "│ 3.333 ┆ 6.0 ┆ 9.0 │\n"
+                "└───────┴─────┴─────┘"
+            )
+
+    df = pl.DataFrame(
+        {"a": [1.1, 22.2, 3.33], "b": [444, 55.5, 6.6], "c": [77.7, 8888, 9.9999]}
+    )
+    with pl.Config(fmt_float="full", float_precision=1):
+        assert (
+            str(df) == "shape: (3, 3)\n"
+            "┌──────┬───────┬────────┐\n"
+            "│    a ┆     b ┆      c │\n"
+            "│  --- ┆   --- ┆    --- │\n"
+            "│  f64 ┆   f64 ┆    f64 │\n"
+            "╞══════╪═══════╪════════╡\n"
+            "│  1.1 ┆ 444.0 ┆   77.7 │\n"
+            "│ 22.2 ┆  55.5 ┆ 8888.0 │\n"
+            "│  3.3 ┆   6.6 ┆   10.0 │\n"
+            "└──────┴───────┴────────┘"
+        )
+
+    df = pl.DataFrame(
+        {
+            "a": [1100000000000000000.1, 22200000000000000.2, 33330000000000000.33333],
+            "b": [40000000000000000000.0, 5, 600000000000000000.0],
+            "c": [700000.0, 80000000000000000.0, 900],
+        }
+    )
+    with pl.Config(float_precision=2):
+        assert (
+            str(df) == "shape: (3, 3)\n"
+            "┌─────────┬─────────┬───────────┐\n"
+            "│       a ┆       b ┆         c │\n"
+            "│     --- ┆     --- ┆       --- │\n"
+            "│     f64 ┆     f64 ┆       f64 │\n"
+            "╞═════════╪═════════╪═══════════╡\n"
+            "│ 1.10e18 ┆ 4.00e19 ┆ 700000.00 │\n"
+            "│ 2.22e16 ┆    5.00 ┆   8.00e16 │\n"
+            "│ 3.33e16 ┆ 6.00e17 ┆    900.00 │\n"
+            "└─────────┴─────────┴───────────┘"
+        )
+
+
 @pytest.mark.write_disk()
 def test_config_load_save(tmp_path: Path) -> None:
     for file in (
@@ -520,6 +635,7 @@ def test_config_load_save(tmp_path: Path) -> None:
         pl.Config.set_tbl_cols(12)
         pl.Config.set_verbose(True)
         pl.Config.set_fmt_float("full")
+        pl.Config.set_float_precision(6)
         assert os.environ.get("POLARS_VERBOSE") == "1"
 
         if file is None:
@@ -533,6 +649,8 @@ def test_config_load_save(tmp_path: Path) -> None:
         # ...modify the same options...
         pl.Config.set_tbl_cols(10)
         pl.Config.set_verbose(False)
+        pl.Config.set_fmt_float("mixed")
+        pl.Config.set_float_precision(2)
         assert os.environ.get("POLARS_VERBOSE") == "0"
 
         # ...load back from config file/string...
@@ -555,6 +673,7 @@ def test_config_load_save(tmp_path: Path) -> None:
         assert os.environ.get("POLARS_FMT_MAX_COLS") == "12"
         assert os.environ.get("POLARS_VERBOSE") == "1"
         assert _get_float_fmt() == "full"
+        assert _get_float_precision() == 6
 
         # restore all default options (unsets from env)
         pl.Config.restore_defaults()
@@ -565,6 +684,7 @@ def test_config_load_save(tmp_path: Path) -> None:
         assert os.environ.get("POLARS_FMT_MAX_COLS") is None
         assert os.environ.get("POLARS_VERBOSE") is None
         assert _get_float_fmt() == "mixed"
+        assert _get_float_precision() is None
 
     # ref: #11094
     with pl.Config(
@@ -659,6 +779,12 @@ def test_set_fmt_str_lengths_invalid_length() -> None:
         ("POLARS_FMT_MAX_ROWS", "set_tbl_rows", 3, "3"),
         ("POLARS_FMT_STR_LEN", "set_fmt_str_lengths", 42, "42"),
         ("POLARS_FMT_TABLE_CELL_ALIGNMENT", "set_tbl_cell_alignment", "RIGHT", "RIGHT"),
+        (
+            "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT",
+            "set_tbl_cell_numeric_alignment",
+            "RIGHT",
+            "RIGHT",
+        ),
         ("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES", "set_tbl_hide_column_names", True, "1"),
         (
             "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW",