diff --git a/crates/polars-core/src/config.rs b/crates/polars-core/src/config.rs index d941137244064..765ba090ca36d 100644 --- a/crates/polars-core/src/config.rs +++ b/crates/polars-core/src/config.rs @@ -3,6 +3,7 @@ pub(crate) const FMT_MAX_COLS: &str = "POLARS_FMT_MAX_COLS"; pub(crate) const FMT_MAX_ROWS: &str = "POLARS_FMT_MAX_ROWS"; pub(crate) const FMT_STR_LEN: &str = "POLARS_FMT_STR_LEN"; pub(crate) const FMT_TABLE_CELL_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_ALIGNMENT"; +pub(crate) const FMT_TABLE_CELL_NUMERIC_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT"; pub(crate) const FMT_TABLE_DATAFRAME_SHAPE_BELOW: &str = "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW"; pub(crate) const FMT_TABLE_FORMATTING: &str = "POLARS_FMT_TABLE_FORMATTING"; pub(crate) const FMT_TABLE_HIDE_COLUMN_DATA_TYPES: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES"; diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index a309cc249a87e..4f8658b365e2a 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -33,6 +33,7 @@ pub enum FloatFmt { Full, } static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8); +static FLOAT_PRECISION: AtomicU8 = AtomicU8::new(u8::MAX); pub fn get_float_fmt() -> FloatFmt { match FLOAT_FMT.load(Ordering::Relaxed) { @@ -42,10 +43,18 @@ pub fn get_float_fmt() -> FloatFmt { } } +pub fn get_float_precision() -> u8 { + FLOAT_PRECISION.load(Ordering::Relaxed) +} + pub fn set_float_fmt(fmt: FloatFmt) { FLOAT_FMT.store(fmt as u8, Ordering::Relaxed) } +pub fn set_float_precision(precision: u8) { + FLOAT_PRECISION.store(precision, Ordering::Relaxed) +} + macro_rules! format_array { ($f:ident, $a:expr, $dtype:expr, $name:expr, $array_type:expr) => {{ write!( @@ -656,19 +665,24 @@ impl Display for DataFrame { } // set alignment of cells, if defined - if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() { - // for (column_index, column) in table.column_iter_mut().enumerate() { + if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() + | std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT).is_ok() + { let str_preset = std::env::var(FMT_TABLE_CELL_ALIGNMENT) .unwrap_or_else(|_| "DEFAULT".to_string()); - for column in table.column_iter_mut() { - if str_preset == "RIGHT" { - column.set_cell_alignment(CellAlignment::Right); - } else if str_preset == "LEFT" { - column.set_cell_alignment(CellAlignment::Left); - } else if str_preset == "CENTER" { - column.set_cell_alignment(CellAlignment::Center); - } else { - column.set_cell_alignment(CellAlignment::Left); + let num_preset = std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT) + .unwrap_or_else(|_| str_preset.to_string()); + for (column_index, column) in table.column_iter_mut().enumerate() { + let dtype = fields[column_index].data_type(); + let mut preset = str_preset.as_str(); + if dtype.to_physical().is_numeric() { + preset = num_preset.as_str(); + } + match preset { + "RIGHT" => column.set_cell_alignment(CellAlignment::Right), + "LEFT" => column.set_cell_alignment(CellAlignment::Left), + "CENTER" => column.set_cell_alignment(CellAlignment::Center), + _ => {} } } } @@ -710,6 +724,15 @@ const SCIENTIFIC_BOUND: f64 = 999999.0; fn fmt_float(f: &mut Formatter<'_>, width: usize, v: T) -> fmt::Result { let v: f64 = NumCast::from(v).unwrap(); + + let precision = get_float_precision(); + if precision != u8::MAX { + if format!("{v:.precision$}", precision = precision as usize).len() > 19 { + return write!(f, "{v:>width$.precision$e}", precision = precision as usize); + } + return write!(f, "{v:>width$.precision$}", precision = precision as usize); + } + if matches!(get_float_fmt(), FloatFmt::Full) { return write!(f, "{v:>width$}"); } diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index 6ada4300c22d1..8b76a57aeb6e8 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -14,10 +14,18 @@ def _get_float_fmt() -> str: # pragma: no cover return "n/a" +def _get_float_precision() -> str: + return "n/a" + + # note: module not available when building docs with contextlib.suppress(ImportError): from polars.polars import get_float_fmt as _get_float_fmt # type: ignore[no-redef] + from polars.polars import ( # type: ignore[no-redef] + get_float_precision as _get_float_precision, + ) from polars.polars import set_float_fmt as _set_float_fmt + from polars.polars import set_float_precision as _set_float_precision if TYPE_CHECKING: from types import TracebackType @@ -35,7 +43,9 @@ def _get_float_fmt() -> str: # pragma: no cover "POLARS_FMT_MAX_COLS", "POLARS_FMT_MAX_ROWS", "POLARS_FMT_STR_LEN", + "POLARS_FMT_NUM_LEN", "POLARS_FMT_TABLE_CELL_ALIGNMENT", + "POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT", "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW", "POLARS_FMT_TABLE_FORMATTING", "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES", @@ -51,7 +61,10 @@ def _get_float_fmt() -> str: # pragma: no cover # vars that set the rust env directly should declare themselves here as the Config # method name paired with a callable that returns the current state of that value: -_POLARS_CFG_DIRECT_VARS = {"set_fmt_float": _get_float_fmt} +_POLARS_CFG_DIRECT_VARS = { + "set_fmt_float": _get_float_fmt, + "set_float_precision": _get_float_precision, +} class Config(contextlib.ContextDecorator): @@ -186,6 +199,7 @@ def restore_defaults(cls) -> type[Config]: # apply any 'direct' setting values cls.set_fmt_float() + cls.set_float_precision() return cls @classmethod @@ -441,6 +455,46 @@ def set_tbl_cell_alignment( os.environ["POLARS_FMT_TABLE_CELL_ALIGNMENT"] = format return cls + @classmethod + def set_tbl_cell_numeric_alignment( + cls, format: Literal["LEFT", "CENTER", "RIGHT"] + ) -> type[Config]: + """ + Set table cell alignment for numeric columns. + + Parameters + ---------- + format : str + * "LEFT": left aligned + * "CENTER": center aligned + * "RIGHT": right aligned + + Examples + -------- + >>> df = pl.DataFrame( + ... {"column_abc": [11, 2, 333], "column_xyz": [True, False, True]} + ... ) + >>> pl.Config.set_tbl_cell_numeric_alignment("RIGHT") # doctest: +SKIP + # ... + # shape: (3, 2) + # ┌────────────┬────────────┐ + # │ column_abc ┆ column_xyz │ + # │ --- ┆ --- │ + # │ i64 ┆ bool │ + # ╞════════════╪════════════╡ + # │ 11 ┆ true │ + # │ 2 ┆ false │ + # │ 333 ┆ true │ + # └────────────┴────────────┘ + + Raises + ------ + KeyError: if alignment string not recognised. + + """ + os.environ["POLARS_FMT_TABLE_CELL_NUMERIC_ALIGNMENT"] = format + return cls + @classmethod def set_tbl_cols(cls, n: int) -> type[Config]: """ @@ -775,3 +829,17 @@ def set_verbose(cls, active: bool = True) -> type[Config]: """Enable additional verbose/debug logging.""" os.environ["POLARS_VERBOSE"] = str(int(active)) return cls + + @classmethod + def set_float_precision(cls, precision: str = "255") -> type[Config]: + """ + Control how floating point values are displayed. + + Parameters + ---------- + precision : int + Number of decimal places to display + + """ + _set_float_precision(precision) + return cls diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index b7513cafa4332..38d6593d92bd9 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -66,6 +66,580 @@ static ALLOC: Jemalloc = Jemalloc; #[cfg(any(not(target_os = "linux"), use_mimalloc))] static ALLOC: MiMalloc = MiMalloc; +#[pyfunction] +fn col(name: &str) -> dsl::PyExpr { + dsl::col(name) +} + +#[pyfunction] +fn count() -> dsl::PyExpr { + dsl::count() +} + +#[pyfunction] +fn first() -> dsl::PyExpr { + dsl::first() +} + +#[pyfunction] +fn last() -> dsl::PyExpr { + dsl::last() +} + +#[pyfunction] +fn cols(names: Vec) -> dsl::PyExpr { + dsl::cols(names) +} + +#[pyfunction] +fn dtype_cols(dtypes: Vec>) -> PyResult { + let dtypes = vec_extract_wrapped(dtypes); + Ok(dsl::dtype_cols(dtypes)) +} + +#[pyfunction] +fn dtype_str_repr(dtype: Wrap) -> PyResult { + let dtype = dtype.0; + Ok(dtype.to_string()) +} + +#[pyfunction] +fn lit(value: &PyAny, allow_object: bool) -> PyResult { + dsl::lit(value, allow_object) +} + +#[pyfunction] +fn binary_expr(l: dsl::PyExpr, op: u8, r: dsl::PyExpr) -> dsl::PyExpr { + dsl::binary_expr(l, op, r) +} + +#[pyfunction] +fn fold(acc: PyExpr, lambda: PyObject, exprs: Vec) -> PyExpr { + dsl::fold(acc, lambda, exprs) +} + +#[pyfunction] +fn reduce(lambda: PyObject, exprs: Vec) -> PyExpr { + dsl::reduce(lambda, exprs) +} + +#[pyfunction] +fn cumfold(acc: PyExpr, lambda: PyObject, exprs: Vec, include_init: bool) -> PyExpr { + dsl::cumfold(acc, lambda, exprs, include_init) +} + +#[pyfunction] +fn cumreduce(lambda: PyObject, exprs: Vec) -> PyExpr { + dsl::cumreduce(lambda, exprs) +} + +#[pyfunction] +fn arange(start: PyExpr, end: PyExpr, step: i64) -> PyExpr { + polars_rs::lazy::dsl::arange(start.inner, end.inner, step).into() +} + +#[pyfunction] +fn repeat(value: &PyAny, n_times: PyExpr) -> PyResult { + if let Ok(true) = value.is_instance_of::() { + let val = value.extract::().unwrap(); + Ok(polars_rs::lazy::dsl::repeat(val, n_times.inner).into()) + } else if let Ok(int) = value.downcast::() { + let val = int.extract::().unwrap(); + + if val >= i32::MIN as i64 && val <= i32::MAX as i64 { + Ok(polars_rs::lazy::dsl::repeat(val as i32, n_times.inner).into()) + } else { + Ok(polars_rs::lazy::dsl::repeat(val, n_times.inner).into()) + } + } else if let Ok(float) = value.downcast::() { + let val = float.extract::().unwrap(); + Ok(polars_rs::lazy::dsl::repeat(val, n_times.inner).into()) + } else if let Ok(pystr) = value.downcast::() { + let val = pystr + .to_str() + .expect("could not transform Python string to Rust Unicode"); + Ok(polars_rs::lazy::dsl::repeat(val, n_times.inner).into()) + } else if value.is_none() { + Ok(polars_rs::lazy::dsl::repeat(Null {}, n_times.inner).into()) + } else { + Err(PyValueError::new_err(format!( + "could not convert value {:?} as a Literal", + value.str()? + ))) + } +} + +#[pyfunction] +fn pearson_corr(a: dsl::PyExpr, b: dsl::PyExpr, ddof: u8) -> dsl::PyExpr { + polars_rs::lazy::dsl::pearson_corr(a.inner, b.inner, ddof).into() +} + +#[pyfunction] +fn spearman_rank_corr( + a: dsl::PyExpr, + b: dsl::PyExpr, + ddof: u8, + propagate_nans: bool, +) -> dsl::PyExpr { + #[cfg(feature = "propagate_nans")] + { + polars_rs::lazy::dsl::spearman_rank_corr(a.inner, b.inner, ddof, propagate_nans).into() + } + #[cfg(not(feature = "propagate_nans"))] + { + panic!("activate 'popagate_nans'") + } +} + +#[pyfunction] +fn cov(a: dsl::PyExpr, b: dsl::PyExpr) -> dsl::PyExpr { + polars_rs::lazy::dsl::cov(a.inner, b.inner).into() +} + +#[pyfunction] +fn arg_sort_by(by: Vec, descending: Vec) -> dsl::PyExpr { + let by = by + .into_iter() + .map(|e| e.inner) + .collect::>(); + polars_rs::lazy::dsl::arg_sort_by(by, &descending).into() +} + +#[pyfunction] +fn when(predicate: PyExpr) -> dsl::When { + dsl::when(predicate) +} + +const VERSION: &str = env!("CARGO_PKG_VERSION"); +#[pyfunction] +fn get_polars_version() -> &'static str { + VERSION +} + +#[pyfunction] +fn enable_string_cache(toggle: bool) { + polars_rs::enable_string_cache(toggle) +} + +#[pyfunction] +fn using_string_cache() -> bool { + polars_rs::using_string_cache() +} + +#[pyfunction] +fn concat_str(s: Vec, separator: &str) -> dsl::PyExpr { + let s = s.into_iter().map(|e| e.inner).collect::>(); + polars_rs::lazy::dsl::concat_str(s, separator).into() +} + +#[pyfunction] +fn concat_lst(s: Vec) -> PyResult { + let s = s.into_iter().map(|e| e.inner).collect::>(); + let expr = polars_rs::lazy::dsl::concat_lst(s).map_err(PyPolarsErr::from)?; + Ok(expr.into()) +} + +macro_rules! set_unwrapped_or_0 { + ($($var:ident),+ $(,)?) => { + $(let $var = $var.map(|e| e.inner).unwrap_or(polars_rs::lazy::dsl::lit(0));)+ + }; +} + +#[pyfunction] +fn py_datetime( + year: dsl::PyExpr, + month: dsl::PyExpr, + day: dsl::PyExpr, + hour: Option, + minute: Option, + second: Option, + microsecond: Option, +) -> dsl::PyExpr { + let year = year.inner; + let month = month.inner; + let day = day.inner; + + set_unwrapped_or_0!(hour, minute, second, microsecond); + + let args = DatetimeArgs { + year, + month, + day, + hour, + minute, + second, + microsecond, + }; + + polars_rs::lazy::dsl::datetime(args).into() +} + +#[allow(clippy::too_many_arguments)] +#[pyfunction] +fn py_duration( + days: Option, + seconds: Option, + nanoseconds: Option, + microseconds: Option, + milliseconds: Option, + minutes: Option, + hours: Option, + weeks: Option, +) -> dsl::PyExpr { + set_unwrapped_or_0!( + days, + seconds, + nanoseconds, + microseconds, + milliseconds, + minutes, + hours, + weeks, + ); + + let args = DurationArgs { + days, + seconds, + nanoseconds, + microseconds, + milliseconds, + minutes, + hours, + weeks, + }; + + polars_rs::lazy::dsl::duration(args).into() +} + +#[pyfunction] +fn concat_df(dfs: &PyAny, py: Python) -> PyResult { + use polars_core::error::PolarsResult; + use polars_core::utils::rayon::prelude::*; + + let mut iter = dfs.iter()?; + let first = iter.next().unwrap()?; + + let first_rdf = get_df(first)?; + let identity_df = first_rdf.clear(); + + let mut rdfs: Vec> = vec![Ok(first_rdf)]; + + for item in iter { + let rdf = get_df(item?)?; + rdfs.push(Ok(rdf)); + } + + let identity = || Ok(identity_df.clone()); + + let df = py + .allow_threads(|| { + polars_core::POOL.install(|| { + rdfs.into_par_iter() + .fold(identity, |acc: PolarsResult, df| { + let mut acc = acc?; + acc.vstack_mut(&df?)?; + Ok(acc) + }) + .reduce(identity, |acc, df| { + let mut acc = acc?; + acc.vstack_mut(&df?)?; + Ok(acc) + }) + }) + }) + .map_err(PyPolarsErr::from)?; + + Ok(df.into()) +} + +#[pyfunction] +fn concat_lf(seq: &PyAny, rechunk: bool, parallel: bool) -> PyResult { + let len = seq.len()?; + let mut lfs = Vec::with_capacity(len); + + for res in seq.iter()? { + let item = res?; + let lf = get_lf(item)?; + lfs.push(lf); + } + + let lf = polars_rs::lazy::dsl::concat(lfs, rechunk, parallel).map_err(PyPolarsErr::from)?; + Ok(lf.into()) +} + +#[pyfunction] +fn py_diag_concat_df(dfs: &PyAny) -> PyResult { + let iter = dfs.iter()?; + + let dfs = iter + .map(|item| { + let item = item?; + get_df(item) + }) + .collect::>>()?; + + let df = diag_concat_df(&dfs).map_err(PyPolarsErr::from)?; + Ok(df.into()) +} + +#[pyfunction] +fn py_diag_concat_lf(lfs: &PyAny, rechunk: bool, parallel: bool) -> PyResult { + let iter = lfs.iter()?; + + let lfs = iter + .map(|item| { + let item = item?; + get_lf(item) + }) + .collect::>>()?; + + let lf = polars_rs::lazy::dsl::functions::diag_concat_lf(lfs, rechunk, parallel) + .map_err(PyPolarsErr::from)?; + Ok(lf.into()) +} + +#[pyfunction] +fn py_hor_concat_df(dfs: &PyAny) -> PyResult { + let iter = dfs.iter()?; + + let dfs = iter + .map(|item| { + let item = item?; + get_df(item) + }) + .collect::>>()?; + + let df = hor_concat_df(&dfs).map_err(PyPolarsErr::from)?; + Ok(df.into()) +} + +#[pyfunction] +fn concat_series(series: &PyAny) -> PyResult { + let mut iter = series.iter()?; + let first = iter.next().unwrap()?; + + let mut s = get_series(first)?; + + for res in iter { + let item = res?; + let item = get_series(item)?; + s.append(&item).map_err(PyPolarsErr::from)?; + } + Ok(s.into()) +} + +#[cfg(feature = "ipc")] +#[pyfunction] +fn ipc_schema(py: Python, py_f: PyObject) -> PyResult { + use polars_core::export::arrow::io::ipc::read::read_file_metadata; + let metadata = match get_either_file(py_f, false)? { + EitherRustPythonFile::Rust(mut r) => { + read_file_metadata(&mut r).map_err(PyPolarsErr::from)? + } + EitherRustPythonFile::Py(mut r) => read_file_metadata(&mut r).map_err(PyPolarsErr::from)?, + }; + + let dict = PyDict::new(py); + for field in metadata.schema.fields { + let dt: Wrap = Wrap((&field.data_type).into()); + dict.set_item(field.name, dt.to_object(py))?; + } + Ok(dict.to_object(py)) +} + +#[cfg(feature = "parquet")] +#[pyfunction] +fn parquet_schema(py: Python, py_f: PyObject) -> PyResult { + use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata}; + + let metadata = match get_either_file(py_f, false)? { + EitherRustPythonFile::Rust(mut r) => read_metadata(&mut r).map_err(PyPolarsErr::from)?, + EitherRustPythonFile::Py(mut r) => read_metadata(&mut r).map_err(PyPolarsErr::from)?, + }; + let arrow_schema = infer_schema(&metadata).map_err(PyPolarsErr::from)?; + + let dict = PyDict::new(py); + for field in arrow_schema.fields { + let dt: Wrap = Wrap((&field.data_type).into()); + dict.set_item(field.name, dt.to_object(py))?; + } + Ok(dict.to_object(py)) +} + +#[pyfunction] +fn collect_all(lfs: Vec, py: Python) -> PyResult> { + use polars_core::utils::rayon::prelude::*; + + let out = py.allow_threads(|| { + polars_core::POOL.install(|| { + lfs.par_iter() + .map(|lf| { + let df = lf.ldf.clone().collect()?; + Ok(PyDataFrame::new(df)) + }) + .collect::>>() + .map_err(PyPolarsErr::from) + }) + }); + + Ok(out?) +} + +#[pyfunction] +#[pyo3(signature = (pyexpr, lambda, output_type, apply_groups, returns_scalar))] +pub fn map_mul( + py: Python, + pyexpr: Vec, + lambda: PyObject, + output_type: Option>, + apply_groups: bool, + returns_scalar: bool, +) -> PyExpr { + lazy::map_mul( + &pyexpr, + py, + lambda, + output_type, + apply_groups, + returns_scalar, + ) +} + +#[pyfunction] +fn py_date_range( + start: i64, + stop: i64, + every: &str, + closed: Wrap, + name: &str, + time_unit: Wrap, + time_zone: Option, +) -> PyResult { + let date_range = polars_rs::time::date_range_impl( + name, + start, + stop, + Duration::parse(every), + closed.0, + time_unit.0, + time_zone.as_ref(), + ) + .map_err(PyPolarsErr::from)?; + Ok(date_range.into_series().into()) +} + +#[pyfunction] +fn py_date_range_lazy( + start: PyExpr, + end: PyExpr, + every: &str, + closed: Wrap, + name: String, + time_zone: Option, +) -> PyExpr { + let start = start.inner; + let end = end.inner; + let every = Duration::parse(every); + polars_rs::lazy::dsl::functions::date_range(name, start, end, every, closed.0, time_zone).into() +} + +#[pyfunction] +fn min_exprs(exprs: Vec) -> PyExpr { + let exprs = exprs.to_exprs(); + polars_rs::lazy::dsl::min_exprs(exprs).into() +} + +#[pyfunction] +fn max_exprs(exprs: Vec) -> PyExpr { + let exprs = exprs.to_exprs(); + polars_rs::lazy::dsl::max_exprs(exprs).into() +} + +#[pyfunction] +fn coalesce_exprs(exprs: Vec) -> PyExpr { + let exprs = exprs.to_exprs(); + polars_rs::lazy::dsl::coalesce(&exprs).into() +} + +#[pyfunction] +fn sum_exprs(exprs: Vec) -> PyExpr { + let exprs = exprs.to_exprs(); + polars_rs::lazy::dsl::sum_exprs(exprs).into() +} + +#[pyfunction] +fn as_struct(exprs: Vec) -> PyExpr { + let exprs = exprs.to_exprs(); + polars_rs::lazy::dsl::as_struct(&exprs).into() +} + +#[pyfunction] +fn arg_where(condition: PyExpr) -> PyExpr { + polars_rs::lazy::dsl::arg_where(condition.inner).into() +} + +#[pyfunction] +fn get_index_type(py: Python) -> PyObject { + Wrap(IDX_DTYPE).to_object(py) +} + +#[pyfunction] +fn threadpool_size() -> usize { + POOL.current_num_threads() +} + +#[pyfunction] +fn set_float_fmt(fmt: &str) -> PyResult<()> { + use polars_core::fmt::{set_float_fmt, FloatFmt}; + let fmt = match fmt { + "full" => FloatFmt::Full, + "mixed" => FloatFmt::Mixed, + e => { + return Err(PyValueError::new_err(format!( + "fmt must be one of {{'full', 'mixed'}}, got {e}", + ))) + } + }; + set_float_fmt(fmt); + Ok(()) +} + +#[pyfunction] +fn get_float_fmt() -> PyResult { + use polars_core::fmt::{get_float_fmt, FloatFmt}; + let strfmt = match get_float_fmt() { + FloatFmt::Full => "full", + FloatFmt::Mixed => "mixed", + }; + Ok(strfmt.to_string()) +} + +#[pyfunction] +fn set_float_precision(precision: &str) -> PyResult<()> { + use polars_core::fmt::set_float_precision; + let precision_u8 = match precision.parse::() { + Ok(value) => value, + Err(e) => { + return Err(PyValueError::new_err(format!( + "precision must be a number between 0-16, got {e}", + ))) + } + }; + if precision_u8 > 16 && precision_u8 != u8::MAX { + return Err(PyValueError::new_err(format!( + "maximum supported float precision is 16, got {precision_u8}", + ))); + } + set_float_precision(precision_u8); + Ok(()) +} + +#[pyfunction] +fn get_float_precision() -> PyResult { + use polars_core::fmt::get_float_precision; + Ok(get_float_precision().to_string()) +} + #[pymodule] fn polars(py: Python, m: &PyModule) -> PyResult<()> { // Classes @@ -267,5 +841,73 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> { pyo3_built!(py, build, "build", "time", "deps", "features", "host", "target", "git"), )?; + m.add_class::().unwrap(); + m.add_class::().unwrap(); + m.add_class::().unwrap(); + m.add_class::().unwrap(); + m.add_class::().unwrap(); + #[cfg(feature = "csv")] + m.add_class::().unwrap(); + #[cfg(feature = "sql")] + m.add_class::().unwrap(); + m.add_wrapped(wrap_pyfunction!(col)).unwrap(); + m.add_wrapped(wrap_pyfunction!(count)).unwrap(); + m.add_wrapped(wrap_pyfunction!(first)).unwrap(); + m.add_wrapped(wrap_pyfunction!(last)).unwrap(); + m.add_wrapped(wrap_pyfunction!(cols)).unwrap(); + m.add_wrapped(wrap_pyfunction!(dtype_cols)).unwrap(); + m.add_wrapped(wrap_pyfunction!(dtype_str_repr)).unwrap(); + m.add_wrapped(wrap_pyfunction!(lit)).unwrap(); + m.add_wrapped(wrap_pyfunction!(fold)).unwrap(); + m.add_wrapped(wrap_pyfunction!(cumfold)).unwrap(); + m.add_wrapped(wrap_pyfunction!(reduce)).unwrap(); + m.add_wrapped(wrap_pyfunction!(cumreduce)).unwrap(); + m.add_wrapped(wrap_pyfunction!(binary_expr)).unwrap(); + m.add_wrapped(wrap_pyfunction!(arange)).unwrap(); + m.add_wrapped(wrap_pyfunction!(pearson_corr)).unwrap(); + m.add_wrapped(wrap_pyfunction!(cov)).unwrap(); + m.add_wrapped(wrap_pyfunction!(arg_sort_by)).unwrap(); + m.add_wrapped(wrap_pyfunction!(when)).unwrap(); + m.add_wrapped(wrap_pyfunction!(get_polars_version)).unwrap(); + m.add_wrapped(wrap_pyfunction!(enable_string_cache)) + .unwrap(); + m.add_wrapped(wrap_pyfunction!(using_string_cache)).unwrap(); + m.add_wrapped(wrap_pyfunction!(concat_str)).unwrap(); + m.add_wrapped(wrap_pyfunction!(concat_lst)).unwrap(); + m.add_wrapped(wrap_pyfunction!(concat_df)).unwrap(); + m.add_wrapped(wrap_pyfunction!(concat_lf)).unwrap(); + m.add_wrapped(wrap_pyfunction!(concat_series)).unwrap(); + #[cfg(feature = "ipc")] + m.add_wrapped(wrap_pyfunction!(ipc_schema)).unwrap(); + #[cfg(feature = "parquet")] + m.add_wrapped(wrap_pyfunction!(parquet_schema)).unwrap(); + m.add_wrapped(wrap_pyfunction!(collect_all)).unwrap(); + m.add_wrapped(wrap_pyfunction!(spearman_rank_corr)).unwrap(); + m.add_wrapped(wrap_pyfunction!(map_mul)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_diag_concat_df)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_diag_concat_lf)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_hor_concat_df)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_datetime)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_duration)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_date_range)).unwrap(); + m.add_wrapped(wrap_pyfunction!(py_date_range_lazy)).unwrap(); + m.add_wrapped(wrap_pyfunction!(sum_exprs)).unwrap(); + m.add_wrapped(wrap_pyfunction!(min_exprs)).unwrap(); + m.add_wrapped(wrap_pyfunction!(max_exprs)).unwrap(); + m.add_wrapped(wrap_pyfunction!(as_struct)).unwrap(); + m.add_wrapped(wrap_pyfunction!(repeat)).unwrap(); + m.add_wrapped(wrap_pyfunction!(threadpool_size)).unwrap(); + m.add_wrapped(wrap_pyfunction!(arg_where)).unwrap(); + m.add_wrapped(wrap_pyfunction!(get_index_type)).unwrap(); + m.add_wrapped(wrap_pyfunction!(coalesce_exprs)).unwrap(); + m.add_wrapped(wrap_pyfunction!(set_float_fmt)).unwrap(); + m.add_wrapped(wrap_pyfunction!(get_float_fmt)).unwrap(); + #[cfg(feature = "object")] + m.add_wrapped(wrap_pyfunction!(register_object_builder)) + .unwrap(); + m.add_wrapped(wrap_pyfunction!(set_float_precision)) + .unwrap(); + m.add_wrapped(wrap_pyfunction!(get_float_precision)) + .unwrap(); Ok(()) } diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py index c76f9a682eb82..aa2bd72838fc6 100644 --- a/py-polars/tests/unit/test_cfg.py +++ b/py-polars/tests/unit/test_cfg.py @@ -7,7 +7,7 @@ import pytest import polars as pl -from polars.config import _get_float_fmt +from polars.config import _get_float_fmt, _get_float_precision from polars.exceptions import StringCacheMismatchError from polars.testing import assert_frame_equal @@ -506,6 +506,101 @@ def test_shape_format_for_big_numbers() -> None: ) +def test_numeric_right_alignment() -> None: + pl.Config.set_tbl_cell_numeric_alignment("RIGHT") + + df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + assert ( + str(df) == "shape: (3, 3)\n" + "┌─────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ i64 ┆ i64 ┆ i64 │\n" + "╞═════╪═════╪═════╡\n" + "│ 1 ┆ 4 ┆ 7 │\n" + "│ 2 ┆ 5 ┆ 8 │\n" + "│ 3 ┆ 6 ┆ 9 │\n" + "└─────┴─────┴─────┘" + ) + + df = pl.DataFrame( + {"a": [1.1, 2.22, 3.333], "b": [4.0, 5.0, 6.0], "c": [7.0, 8.0, 9.0]} + ) + with pl.Config(): + pl.Config.set_fmt_float("full") + assert ( + str(df) == "shape: (3, 3)\n" + "┌───────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═══════╪═════╪═════╡\n" + "│ 1.1 ┆ 4 ┆ 7 │\n" + "│ 2.22 ┆ 5 ┆ 8 │\n" + "│ 3.333 ┆ 6 ┆ 9 │\n" + "└───────┴─────┴─────┘" + ) + + with pl.Config(): + pl.Config.set_fmt_float("mixed") + assert ( + str(df) == "shape: (3, 3)\n" + "┌───────┬─────┬─────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═══════╪═════╪═════╡\n" + "│ 1.1 ┆ 4.0 ┆ 7.0 │\n" + "│ 2.22 ┆ 5.0 ┆ 8.0 │\n" + "│ 3.333 ┆ 6.0 ┆ 9.0 │\n" + "└───────┴─────┴─────┘" + ) + + df = pl.DataFrame( + {"a": [1.1, 22.2, 3.33], "b": [444, 55.5, 6.6], "c": [77.7, 8888, 9.9999]} + ) + with pl.Config(): + pl.Config.set_fmt_float("full") + pl.Config.set_float_precision(1) + assert ( + str(df) == "shape: (3, 3)\n" + "┌──────┬───────┬────────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞══════╪═══════╪════════╡\n" + "│ 1.1 ┆ 444.0 ┆ 77.7 │\n" + "│ 22.2 ┆ 55.5 ┆ 8888.0 │\n" + "│ 3.3 ┆ 6.6 ┆ 10.0 │\n" + "└──────┴───────┴────────┘" + ) + + df = pl.DataFrame( + { + "a": [1100000000000000000.1, 22200000000000000.2, 33330000000000000.33333], + "b": [40000000000000000000.0, 5, 600000000000000000.0], + "c": [700000.0, 80000000000000000.0, 900], + } + ) + with pl.Config(): + pl.Config.set_float_precision(2) + assert ( + str(df) == "shape: (3, 3)\n" + "┌─────────┬─────────┬───────────┐\n" + "│ a ┆ b ┆ c │\n" + "│ --- ┆ --- ┆ --- │\n" + "│ f64 ┆ f64 ┆ f64 │\n" + "╞═════════╪═════════╪═══════════╡\n" + "│ 1.10e18 ┆ 4.00e19 ┆ 700000.00 │\n" + "│ 2.22e16 ┆ 5.00 ┆ 8.00e16 │\n" + "│ 3.33e16 ┆ 6.00e17 ┆ 900.00 │\n" + "└─────────┴─────────┴───────────┘" + ) + # test nonsensical float precision raises an error + with pytest.raises(ValueError): + pl.Config.set_float_precision(50) + + def test_string_cache() -> None: df1 = pl.DataFrame({"a": ["foo", "bar", "ham"], "b": [1, 2, 3]}) df2 = pl.DataFrame({"a": ["foo", "spam", "eggs"], "c": [3, 2, 2]}) @@ -540,6 +635,7 @@ def test_config_load_save(tmp_path: Path) -> None: pl.Config.set_tbl_cols(12) pl.Config.set_verbose(True) pl.Config.set_fmt_float("full") + pl.Config.set_float_precision(6) assert os.environ.get("POLARS_VERBOSE") == "1" cfg = pl.Config.save(file) @@ -549,6 +645,8 @@ def test_config_load_save(tmp_path: Path) -> None: # ...modify the same options... pl.Config.set_tbl_cols(10) pl.Config.set_verbose(False) + pl.Config.set_fmt_float("mixed") + pl.Config.set_float_precision("2") assert os.environ.get("POLARS_VERBOSE") == "0" # ...load back from config... @@ -560,6 +658,7 @@ def test_config_load_save(tmp_path: Path) -> None: assert os.environ.get("POLARS_FMT_MAX_COLS") == "12" assert os.environ.get("POLARS_VERBOSE") == "1" assert _get_float_fmt() == "full" + assert _get_float_precision() == 6 # restore all default options (unsets from env) pl.Config.restore_defaults() @@ -570,6 +669,7 @@ def test_config_load_save(tmp_path: Path) -> None: assert os.environ.get("POLARS_FMT_MAX_COLS") is None assert os.environ.get("POLARS_VERBOSE") is None assert _get_float_fmt() == "mixed" + assert _get_float_precision() == 255 def test_config_scope() -> None: