From 0dd810463b94469749221d3deeebaf7151ad711f Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Thu, 13 Jul 2023 23:17:00 +0200
Subject: [PATCH 01/37] docs(python): add logo `link` entry to sphinx conf and
 factor-out website root paths (#9864)

---
 py-polars/docs/source/conf.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/py-polars/docs/source/conf.py b/py-polars/docs/source/conf.py
index ccf9d53ec62f..31916f3c6106 100644
--- a/py-polars/docs/source/conf.py
+++ b/py-polars/docs/source/conf.py
@@ -90,18 +90,26 @@
 autosummary_generate = True
 numpydoc_show_class_members = False
 
+# key site root paths
+static_assets_root = "https://raw.githubusercontent.com/pola-rs/polars-static/master"
+github_root = "https://github.com/pola-rs/polars"
+web_root = "https://pola-rs.github.io"
+
 html_theme_options = {
     "external_links": [
         {
             "name": "User Guide",
-            "url": "https://pola-rs.github.io/polars-book/user-guide/index.html",
+            "url": f"{web_root}/polars-book/user-guide/index.html",
+        },
+        {
+            "name": "Powered by Xomnia",
+            "url": "https://www.xomnia.com/",
         },
-        {"name": "Powered by Xomnia", "url": "https://www.xomnia.com/"},
     ],
     "icon_links": [
         {
             "name": "GitHub",
-            "url": "https://github.com/pola-rs/polars",
+            "url": github_root,
             "icon": "fa-brands fa-github",
         },
         {
@@ -116,8 +124,9 @@
         },
     ],
     "logo": {
-        "image_light": "https://raw.githubusercontent.com/pola-rs/polars-static/master/logos/polars-logo-dark-medium.png",
-        "image_dark": "https://raw.githubusercontent.com/pola-rs/polars-static/master/logos/polars-logo-dimmed-medium.png",
+        "image_light": f"{static_assets_root}/logos/polars-logo-dark-medium.png",
+        "image_dark": f"{static_assets_root}/logos/polars-logo-dimmed-medium.png",
+        "link": f"{web_root}/polars/py-polars/html/reference/index.html",
     },
 }
 
@@ -125,12 +134,12 @@
     {
         "rel": "icon",
         "sizes": "32x32",
-        "href": "https://raw.githubusercontent.com/pola-rs/polars-static/master/icons/favicon-32x32.png",
+        "href": f"{static_assets_root}/icons/favicon-32x32.png",
     },
     {
         "rel": "apple-touch-icon",
         "sizes": "180x180",
-        "href": "https://raw.githubusercontent.com/pola-rs/polars-static/master/icons/touchicon-180x180.png",
+        "href": f"{static_assets_root}/icons/touchicon-180x180.png",
     },
 ]
 
@@ -195,10 +204,7 @@ def linkcode_resolve(domain, info):
     polars_root = os.path.abspath(f"{conf_dir_path}/../../polars")
 
     fn = os.path.relpath(fn, start=polars_root)
-
-    return (
-        f"https://github.com/pola-rs/polars/blob/main/py-polars/polars/{fn}{linespec}"
-    )
+    return f"{github_root}/blob/main/py-polars/polars/{fn}{linespec}"
 
 
 def _minify_classpaths(s: str) -> str:

From ef69f4bf32e3cfadf4546f216ec762cceaa0006c Mon Sep 17 00:00:00 2001
From: messense <messense@icloud.com>
Date: Fri, 14 Jul 2023 13:58:14 +0800
Subject: [PATCH 02/37] perf(python): Use `pyo3::intern` to avoid needlessly
 recreating PyString (#9853)

---
 py-polars/Cargo.lock        |   2 +-
 py-polars/src/conversion.rs | 143 ++++++++++++++++++++----------------
 2 files changed, 80 insertions(+), 65 deletions(-)

diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
index 29b6b3fd89ae..0bcb9e8e6f1b 100644
--- a/py-polars/Cargo.lock
+++ b/py-polars/Cargo.lock
@@ -1718,7 +1718,7 @@ dependencies = [
 
 [[package]]
 name = "py-polars"
-version = "0.18.6"
+version = "0.18.7"
 dependencies = [
  "ahash",
  "built",
diff --git a/py-polars/src/conversion.rs b/py-polars/src/conversion.rs
index d6620682386e..ff63d360a75d 100644
--- a/py-polars/src/conversion.rs
+++ b/py-polars/src/conversion.rs
@@ -23,7 +23,7 @@ use pyo3::prelude::*;
 use pyo3::types::{
     PyBool, PyBytes, PyDict, PyFloat, PyList, PySequence, PyString, PyTuple, PyType,
 };
-use pyo3::{PyAny, PyResult};
+use pyo3::{intern, PyAny, PyResult};
 use smartstring::alias::String as SmartString;
 
 use crate::error::PyPolarsErr;
@@ -71,17 +71,17 @@ impl<T> From<T> for Wrap<T> {
 
 // extract a Rust DataFrame from a python DataFrame, that is DataFrame<PyDataFrame<RustDataFrame>>
 pub(crate) fn get_df(obj: &PyAny) -> PyResult<DataFrame> {
-    let pydf = obj.getattr("_df")?;
+    let pydf = obj.getattr(intern!(obj.py(), "_df"))?;
     Ok(pydf.extract::<PyDataFrame>()?.df)
 }
 
 pub(crate) fn get_lf(obj: &PyAny) -> PyResult<LazyFrame> {
-    let pydf = obj.getattr("_ldf")?;
+    let pydf = obj.getattr(intern!(obj.py(), "_ldf"))?;
     Ok(pydf.extract::<PyLazyFrame>()?.ldf)
 }
 
 pub(crate) fn get_series(obj: &PyAny) -> PyResult<Series> {
-    let pydf = obj.getattr("_s")?;
+    let pydf = obj.getattr(intern!(obj.py(), "_s"))?;
     Ok(pydf.extract::<PySeries>()?.series)
 }
 
@@ -226,11 +226,11 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
                 s.into_py(py)
             }
             AnyValue::Date(v) => {
-                let convert = utils.getattr("_to_python_date").unwrap();
+                let convert = utils.getattr(intern!(py, "_to_python_date")).unwrap();
                 convert.call1((v,)).unwrap().into_py(py)
             }
             AnyValue::Datetime(v, time_unit, time_zone) => {
-                let convert = utils.getattr("_to_python_datetime").unwrap();
+                let convert = utils.getattr(intern!(py, "_to_python_datetime")).unwrap();
                 let time_unit = time_unit.to_ascii();
                 convert
                     .call1((v, time_unit, time_zone.as_ref().map(|s| s.as_str())))
@@ -238,12 +238,12 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
                     .into_py(py)
             }
             AnyValue::Duration(v, time_unit) => {
-                let convert = utils.getattr("_to_python_timedelta").unwrap();
+                let convert = utils.getattr(intern!(py, "_to_python_timedelta")).unwrap();
                 let time_unit = time_unit.to_ascii();
                 convert.call1((v, time_unit)).unwrap().into_py(py)
             }
             AnyValue::Time(v) => {
-                let convert = utils.getattr("_to_python_time").unwrap();
+                let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap();
                 convert.call1((v,)).unwrap().into_py(py)
             }
             AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(),
@@ -262,7 +262,7 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
             AnyValue::Binary(v) => v.into_py(py),
             AnyValue::BinaryOwned(v) => v.into_py(py),
             AnyValue::Decimal(v, scale) => {
-                let convert = utils.getattr("_to_python_decimal").unwrap();
+                let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap();
                 const N: usize = 3;
                 let mut buf = [0_u128; N];
                 let n_digits = decimal_to_digits(v.abs(), &mut buf);
@@ -287,84 +287,88 @@ impl ToPyObject for Wrap<DataType> {
         let pl = POLARS.as_ref(py);
 
         match &self.0 {
-            DataType::Int8 => pl.getattr("Int8").unwrap().into(),
-            DataType::Int16 => pl.getattr("Int16").unwrap().into(),
-            DataType::Int32 => pl.getattr("Int32").unwrap().into(),
-            DataType::Int64 => pl.getattr("Int64").unwrap().into(),
-            DataType::UInt8 => pl.getattr("UInt8").unwrap().into(),
-            DataType::UInt16 => pl.getattr("UInt16").unwrap().into(),
-            DataType::UInt32 => pl.getattr("UInt32").unwrap().into(),
-            DataType::UInt64 => pl.getattr("UInt64").unwrap().into(),
-            DataType::Float32 => pl.getattr("Float32").unwrap().into(),
-            DataType::Float64 => pl.getattr("Float64").unwrap().into(),
+            DataType::Int8 => pl.getattr(intern!(py, "Int8")).unwrap().into(),
+            DataType::Int16 => pl.getattr(intern!(py, "Int16")).unwrap().into(),
+            DataType::Int32 => pl.getattr(intern!(py, "Int32")).unwrap().into(),
+            DataType::Int64 => pl.getattr(intern!(py, "Int64")).unwrap().into(),
+            DataType::UInt8 => pl.getattr(intern!(py, "UInt8")).unwrap().into(),
+            DataType::UInt16 => pl.getattr(intern!(py, "UInt16")).unwrap().into(),
+            DataType::UInt32 => pl.getattr(intern!(py, "UInt32")).unwrap().into(),
+            DataType::UInt64 => pl.getattr(intern!(py, "UInt64")).unwrap().into(),
+            DataType::Float32 => pl.getattr(intern!(py, "Float32")).unwrap().into(),
+            DataType::Float64 => pl.getattr(intern!(py, "Float64")).unwrap().into(),
             DataType::Decimal(precision, scale) => pl
-                .getattr("Decimal")
+                .getattr(intern!(py, "Decimal"))
                 .unwrap()
                 .call1((*scale, *precision))
                 .unwrap()
                 .into(),
-            DataType::Boolean => pl.getattr("Boolean").unwrap().into(),
-            DataType::Utf8 => pl.getattr("Utf8").unwrap().into(),
-            DataType::Binary => pl.getattr("Binary").unwrap().into(),
+            DataType::Boolean => pl.getattr(intern!(py, "Boolean")).unwrap().into(),
+            DataType::Utf8 => pl.getattr(intern!(py, "Utf8")).unwrap().into(),
+            DataType::Binary => pl.getattr(intern!(py, "Binary")).unwrap().into(),
             DataType::Array(inner, size) => {
                 let inner = Wrap(*inner.clone()).to_object(py);
-                let list_class = pl.getattr("Array").unwrap();
+                let list_class = pl.getattr(intern!(py, "Array")).unwrap();
                 list_class.call1((*size, inner)).unwrap().into()
             }
             DataType::List(inner) => {
                 let inner = Wrap(*inner.clone()).to_object(py);
-                let list_class = pl.getattr("List").unwrap();
+                let list_class = pl.getattr(intern!(py, "List")).unwrap();
                 list_class.call1((inner,)).unwrap().into()
             }
-            DataType::Date => pl.getattr("Date").unwrap().into(),
+            DataType::Date => pl.getattr(intern!(py, "Date")).unwrap().into(),
             DataType::Datetime(tu, tz) => {
-                let datetime_class = pl.getattr("Datetime").unwrap();
+                let datetime_class = pl.getattr(intern!(py, "Datetime")).unwrap();
                 datetime_class
                     .call1((tu.to_ascii(), tz.clone()))
                     .unwrap()
                     .into()
             }
             DataType::Duration(tu) => {
-                let duration_class = pl.getattr("Duration").unwrap();
+                let duration_class = pl.getattr(intern!(py, "Duration")).unwrap();
                 duration_class.call1((tu.to_ascii(),)).unwrap().into()
             }
             #[cfg(feature = "object")]
-            DataType::Object(_) => pl.getattr("Object").unwrap().into(),
-            DataType::Categorical(_) => pl.getattr("Categorical").unwrap().into(),
-            DataType::Time => pl.getattr("Time").unwrap().into(),
+            DataType::Object(_) => pl.getattr(intern!(py, "Object")).unwrap().into(),
+            DataType::Categorical(_) => pl.getattr(intern!(py, "Categorical")).unwrap().into(),
+            DataType::Time => pl.getattr(intern!(py, "Time")).unwrap().into(),
             DataType::Struct(fields) => {
-                let field_class = pl.getattr("Field").unwrap();
+                let field_class = pl.getattr(intern!(py, "Field")).unwrap();
                 let iter = fields.iter().map(|fld| {
                     let name = fld.name().as_str();
                     let dtype = Wrap(fld.data_type().clone()).to_object(py);
                     field_class.call1((name, dtype)).unwrap()
                 });
                 let fields = PyList::new(py, iter);
-                let struct_class = pl.getattr("Struct").unwrap();
+                let struct_class = pl.getattr(intern!(py, "Struct")).unwrap();
                 struct_class.call1((fields,)).unwrap().into()
             }
-            DataType::Null => pl.getattr("Null").unwrap().into(),
-            DataType::Unknown => pl.getattr("Unknown").unwrap().into(),
+            DataType::Null => pl.getattr(intern!(py, "Null")).unwrap().into(),
+            DataType::Unknown => pl.getattr(intern!(py, "Unknown")).unwrap().into(),
         }
     }
 }
 
 impl FromPyObject<'_> for Wrap<Field> {
     fn extract(ob: &PyAny) -> PyResult<Self> {
-        let name = ob.getattr("name")?.str()?.to_str()?;
-        let dtype = ob.getattr("dtype")?.extract::<Wrap<DataType>>()?;
+        let py = ob.py();
+        let name = ob.getattr(intern!(py, "name"))?.str()?.to_str()?;
+        let dtype = ob
+            .getattr(intern!(py, "dtype"))?
+            .extract::<Wrap<DataType>>()?;
         Ok(Wrap(Field::new(name, dtype.0)))
     }
 }
 
 impl FromPyObject<'_> for Wrap<DataType> {
     fn extract(ob: &PyAny) -> PyResult<Self> {
+        let py = ob.py();
         let type_name = ob.get_type().name()?;
 
         let dtype = match type_name {
             "DataTypeClass" => {
                 // just the class, not an object
-                let name = ob.getattr("__name__")?.str()?.to_str()?;
+                let name = ob.getattr(intern!(py, "__name__"))?.str()?.to_str()?;
                 match name {
                     "UInt8" => DataType::UInt8,
                     "UInt16" => DataType::UInt16,
@@ -400,36 +404,36 @@ impl FromPyObject<'_> for Wrap<DataType> {
                 }
             }
             "Duration" => {
-                let time_unit = ob.getattr("time_unit").unwrap();
+                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
                 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
                 DataType::Duration(time_unit)
             }
             "Datetime" => {
-                let time_unit = ob.getattr("time_unit").unwrap();
+                let time_unit = ob.getattr(intern!(py, "time_unit")).unwrap();
                 let time_unit = time_unit.extract::<Wrap<TimeUnit>>()?.0;
-                let time_zone = ob.getattr("time_zone").unwrap();
+                let time_zone = ob.getattr(intern!(py, "time_zone")).unwrap();
                 let time_zone = time_zone.extract()?;
                 DataType::Datetime(time_unit, time_zone)
             }
             "Decimal" => {
-                let precision = ob.getattr("precision")?.extract()?;
-                let scale = ob.getattr("scale")?.extract()?;
+                let precision = ob.getattr(intern!(py, "precision"))?.extract()?;
+                let scale = ob.getattr(intern!(py, "scale"))?.extract()?;
                 DataType::Decimal(precision, Some(scale))
             }
             "List" => {
-                let inner = ob.getattr("inner").unwrap();
+                let inner = ob.getattr(intern!(py, "inner")).unwrap();
                 let inner = inner.extract::<Wrap<DataType>>()?;
                 DataType::List(Box::new(inner.0))
             }
             "Array" => {
-                let inner = ob.getattr("inner").unwrap();
-                let width = ob.getattr("width").unwrap();
+                let inner = ob.getattr(intern!(py, "inner")).unwrap();
+                let width = ob.getattr(intern!(py, "width")).unwrap();
                 let inner = inner.extract::<Wrap<DataType>>()?;
                 let width = width.extract::<usize>()?;
                 DataType::Array(Box::new(inner.0), width)
             }
             "Struct" => {
-                let fields = ob.getattr("fields")?;
+                let fields = ob.getattr(intern!(py, "fields"))?;
                 let fields = fields
                     .extract::<Vec<Wrap<Field>>>()?
                     .into_iter()
@@ -503,7 +507,7 @@ impl ToPyObject for Wrap<&StructChunked> {
 impl ToPyObject for Wrap<&DurationChunked> {
     fn to_object(&self, py: Python) -> PyObject {
         let utils = UTILS.as_ref(py);
-        let convert = utils.getattr("_to_python_timedelta").unwrap();
+        let convert = utils.getattr(intern!(py, "_to_python_timedelta")).unwrap();
         let time_unit = Wrap(self.0.time_unit()).to_object(py);
         let iter = self
             .0
@@ -516,7 +520,7 @@ impl ToPyObject for Wrap<&DurationChunked> {
 impl ToPyObject for Wrap<&DatetimeChunked> {
     fn to_object(&self, py: Python) -> PyObject {
         let utils = UTILS.as_ref(py);
-        let convert = utils.getattr("_to_python_datetime").unwrap();
+        let convert = utils.getattr(intern!(py, "_to_python_datetime")).unwrap();
         let time_unit = Wrap(self.0.time_unit()).to_object(py);
         let time_zone = self.0.time_zone().to_object(py);
         let iter = self
@@ -530,7 +534,7 @@ impl ToPyObject for Wrap<&DatetimeChunked> {
 impl ToPyObject for Wrap<&TimeChunked> {
     fn to_object(&self, py: Python) -> PyObject {
         let utils = UTILS.as_ref(py);
-        let convert = utils.getattr("_to_python_time").unwrap();
+        let convert = utils.getattr(intern!(py, "_to_python_time")).unwrap();
         let iter = self
             .0
             .into_iter()
@@ -542,7 +546,7 @@ impl ToPyObject for Wrap<&TimeChunked> {
 impl ToPyObject for Wrap<&DateChunked> {
     fn to_object(&self, py: Python) -> PyObject {
         let utils = UTILS.as_ref(py);
-        let convert = utils.getattr("_to_python_date").unwrap();
+        let convert = utils.getattr(intern!(py, "_to_python_date")).unwrap();
         let iter = self
             .0
             .into_iter()
@@ -554,7 +558,7 @@ impl ToPyObject for Wrap<&DateChunked> {
 impl ToPyObject for Wrap<&DecimalChunked> {
     fn to_object(&self, py: Python) -> PyObject {
         let utils = UTILS.as_ref(py);
-        let convert = utils.getattr("_to_python_decimal").unwrap();
+        let convert = utils.getattr(intern!(py, "_to_python_decimal")).unwrap();
         let py_scale = (-(self.0.scale() as i32)).to_object(py);
         // if we don't know precision, the only safe bet is to set it to 39
         let py_precision = self.0.precision().unwrap_or(39).to_object(py);
@@ -611,7 +615,7 @@ fn convert_date(ob: &PyAny) -> PyResult<Wrap<AnyValue>> {
     Python::with_gil(|py| {
         let date = UTILS
             .as_ref(py)
-            .getattr("_date_to_pl_date")
+            .getattr(intern!(py, "_date_to_pl_date"))
             .unwrap()
             .call1((ob,))
             .unwrap();
@@ -624,7 +628,9 @@ fn convert_datetime(ob: &PyAny) -> PyResult<Wrap<AnyValue>> {
         // windows
         #[cfg(target_arch = "windows")]
         let (seconds, microseconds) = {
-            let convert = UTILS.getattr(py, "_datetime_for_anyvalue_windows").unwrap();
+            let convert = UTILS
+                .getattr(py, intern!(py, "_datetime_for_anyvalue_windows"))
+                .unwrap();
             let out = convert.call1(py, (ob,)).unwrap();
             let out: (i64, i64) = out.extract(py).unwrap();
             out
@@ -632,7 +638,9 @@ fn convert_datetime(ob: &PyAny) -> PyResult<Wrap<AnyValue>> {
         // unix
         #[cfg(not(target_arch = "windows"))]
         let (seconds, microseconds) = {
-            let convert = UTILS.getattr(py, "_datetime_for_anyvalue").unwrap();
+            let convert = UTILS
+                .getattr(py, intern!(py, "_datetime_for_anyvalue"))
+                .unwrap();
             let out = convert.call1(py, (ob,)).unwrap();
             let out: (i64, i64) = out.extract(py).unwrap();
             out
@@ -737,7 +745,7 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
         }
 
         fn get_series_el(ob: &PyAny) -> PyResult<Wrap<AnyValue<'static>>> {
-            let py_pyseries = ob.getattr("_s").unwrap();
+            let py_pyseries = ob.getattr(intern!(ob.py(), "_s")).unwrap();
             let series = py_pyseries.extract::<PySeries>().unwrap().series;
             Ok(Wrap(AnyValue::List(series)))
         }
@@ -755,9 +763,9 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
             Python::with_gil(|py| {
                 let td = UTILS
                     .as_ref(py)
-                    .getattr("_timedelta_to_pl_timedelta")
+                    .getattr(intern!(py, "_timedelta_to_pl_timedelta"))
                     .unwrap()
-                    .call1((ob, "us"))
+                    .call1((ob, intern!(py, "us")))
                     .unwrap();
                 let v = td.extract::<i64>().unwrap();
                 Ok(Wrap(AnyValue::Duration(v, TimeUnit::Microseconds)))
@@ -768,7 +776,7 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
             Python::with_gil(|py| {
                 let time = UTILS
                     .as_ref(py)
-                    .getattr("_time_to_pl_time")
+                    .getattr(intern!(py, "_time_to_pl_time"))
                     .unwrap()
                     .call1((ob,))
                     .unwrap();
@@ -778,8 +786,11 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
         }
 
         fn get_decimal(ob: &PyAny) -> PyResult<Wrap<AnyValue>> {
-            let (sign, digits, exp): (i8, Vec<u8>, i32) =
-                ob.call_method0("as_tuple").unwrap().extract().unwrap();
+            let (sign, digits, exp): (i8, Vec<u8>, i32) = ob
+                .call_method0(intern!(ob.py(), "as_tuple"))
+                .unwrap()
+                .extract()
+                .unwrap();
             // note: using Vec<u8> is not the most efficient thing here (input is a tuple)
             let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
                 PyErr::from(PyPolarsErr::Other(
@@ -820,7 +831,7 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
                             get_struct
                         } else if ob.is_instance_of::<PyList>() || ob.is_instance_of::<PyTuple>() {
                             get_list
-                        } else if ob.hasattr("_s").unwrap() {
+                        } else if ob.hasattr(intern!(py, "_s")).unwrap() {
                             get_series_el
                         }
                         // TODO: this heap allocs on failure
@@ -845,8 +856,12 @@ impl<'s> FromPyObject<'s> for Wrap<AnyValue<'s>> {
 
                                     // Can't use pyo3::types::PyDateTime with abi3-py37 feature,
                                     // so need this workaround instead of `isinstance(ob, datetime)`.
-                                    let bases =
-                                        ob.get_type().getattr("__bases__").unwrap().iter().unwrap();
+                                    let bases = ob
+                                        .get_type()
+                                        .getattr(intern!(py, "__bases__"))
+                                        .unwrap()
+                                        .iter()
+                                        .unwrap();
                                     for base in bases {
                                         let parent_type =
                                             base.unwrap().str().unwrap().to_str().unwrap();

From 1f440c8b038317aca9572c7ba42ed76f2a7ee4fb Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Fri, 14 Jul 2023 10:09:07 +0200
Subject: [PATCH 03/37] feat(rust, python): `polars_warn!` macro (#9868)

---
 polars/polars-core/src/prelude.rs             |  4 ++-
 polars/polars-error/src/lib.rs                |  4 +++
 polars/polars-error/src/warning.rs            | 32 +++++++++++++++++++
 .../polars-lazy/polars-plan/src/dsl/string.rs |  1 -
 polars/polars-lazy/src/frame/mod.rs           |  2 +-
 .../src/physical_plan/expressions/mod.rs      |  2 +-
 py-polars/Cargo.lock                          |  1 +
 py-polars/Cargo.toml                          |  1 +
 py-polars/polars/utils/__init__.py            |  3 +-
 py-polars/polars/utils/various.py             |  9 ++++++
 py-polars/src/on_startup.rs                   | 20 ++++++++++--
 11 files changed, 72 insertions(+), 7 deletions(-)
 create mode 100644 polars/polars-error/src/warning.rs

diff --git a/polars/polars-core/src/prelude.rs b/polars/polars-core/src/prelude.rs
index 922bb0aab783..891eb02e7ec5 100644
--- a/polars/polars-core/src/prelude.rs
+++ b/polars/polars-core/src/prelude.rs
@@ -32,7 +32,9 @@ pub use crate::chunked_array::temporal::conversion::*;
 pub use crate::chunked_array::ChunkedArray;
 pub(crate) use crate::chunked_array::{to_array, ChunkIdIter};
 pub use crate::datatypes::*;
-pub use crate::error::{polars_bail, polars_ensure, polars_err, PolarsError, PolarsResult};
+pub use crate::error::{
+    polars_bail, polars_ensure, polars_err, polars_warn, PolarsError, PolarsResult,
+};
 #[cfg(feature = "asof_join")]
 pub use crate::frame::asof_join::*;
 pub use crate::frame::explode::MeltArgs;
diff --git a/polars/polars-error/src/lib.rs b/polars/polars-error/src/lib.rs
index 572922b61e64..57cc9446427c 100644
--- a/polars/polars-error/src/lib.rs
+++ b/polars/polars-error/src/lib.rs
@@ -1,9 +1,13 @@
+mod warning;
+
 use std::borrow::Cow;
 use std::error::Error;
 use std::fmt::{self, Display, Formatter};
 use std::ops::Deref;
 use std::{env, io};
 
+pub use warning::*;
+
 #[derive(Debug)]
 pub struct ErrString(Cow<'static, str>);
 
diff --git a/polars/polars-error/src/warning.rs b/polars/polars-error/src/warning.rs
new file mode 100644
index 000000000000..4a2edd77d533
--- /dev/null
+++ b/polars/polars-error/src/warning.rs
@@ -0,0 +1,32 @@
+type WarningFunction = fn(&str);
+static mut WARNING_FUNCTION: Option<WarningFunction> = None;
+
+/// Set the function that will be called by the `polars_warn!` macro.
+/// You can use this to set logging in polars.
+///
+/// # Safety
+/// The caller must ensure there is no other thread accessing this function
+/// or calling `polars_warn!`.
+pub unsafe fn set_warning_function(function: WarningFunction) {
+    WARNING_FUNCTION = Some(function)
+}
+
+fn eprintln(fmt: &str) {
+    eprintln!("{}", fmt);
+}
+
+pub fn get_warning_function() -> WarningFunction {
+    unsafe { WARNING_FUNCTION.unwrap_or(eprintln) }
+}
+#[macro_export]
+macro_rules! polars_warn {
+    ($fmt:literal, $($arg:tt)+) => {
+        {{
+        let func = $crate::get_warning_function();
+        func(format!($fmt, $($arg)+).as_ref())
+        }}
+    };
+    ($($arg:tt)+) => {
+        polars_warn!("{}", $($arg)+);
+    };
+}
diff --git a/polars/polars-lazy/polars-plan/src/dsl/string.rs b/polars/polars-lazy/polars-plan/src/dsl/string.rs
index f9eb6acfd61b..8d878d4be5a9 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/string.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/string.rs
@@ -1,4 +1,3 @@
-use polars_arrow::array::ValueSize;
 #[cfg(feature = "dtype-struct")]
 use polars_arrow::export::arrow::array::{MutableArray, MutableUtf8Array};
 #[cfg(feature = "dtype-struct")]
diff --git a/polars/polars-lazy/src/frame/mod.rs b/polars/polars-lazy/src/frame/mod.rs
index b7a305c3445b..593333c96f82 100644
--- a/polars/polars-lazy/src/frame/mod.rs
+++ b/polars/polars-lazy/src/frame/mod.rs
@@ -482,7 +482,7 @@ impl LazyFrame {
         let streaming = self.opt_state.streaming;
         #[cfg(feature = "cse")]
         if streaming && self.opt_state.common_subplan_elimination {
-            eprintln!("Cannot combine 'streaming' with 'common_subplan_elimination'. CSE will be turned off.");
+            polars_warn!("Cannot combine 'streaming' with 'common_subplan_elimination'. CSE will be turned off.");
             opt_state.common_subplan_elimination = false;
         }
         let lp_top = optimize(self.logical_plan, opt_state, lp_arena, expr_arena, scratch)?;
diff --git a/polars/polars-lazy/src/physical_plan/expressions/mod.rs b/polars/polars-lazy/src/physical_plan/expressions/mod.rs
index ef8c731f52a4..befd3655be99 100644
--- a/polars/polars-lazy/src/physical_plan/expressions/mod.rs
+++ b/polars/polars-lazy/src/physical_plan/expressions/mod.rs
@@ -402,7 +402,7 @@ impl<'a> AggregationContext<'a> {
                 #[cfg(debug_assertions)]
                 {
                     if self.groups.len() > s.len() {
-                        eprintln!("groups may be out of bounds; more groups than elements in a series is only possible in dynamic groupby")
+                        polars_warn!("groups may be out of bounds; more groups than elements in a series is only possible in dynamic groupby")
                     }
                 }
 
diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
index 0bcb9e8e6f1b..9e09aa089cdc 100644
--- a/py-polars/Cargo.lock
+++ b/py-polars/Cargo.lock
@@ -1733,6 +1733,7 @@ dependencies = [
  "polars",
  "polars-algo",
  "polars-core",
+ "polars-error",
  "polars-lazy",
  "pyo3",
  "pyo3-built",
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
index f8d06264b6da..9cf2d39685f9 100644
--- a/py-polars/Cargo.toml
+++ b/py-polars/Cargo.toml
@@ -24,6 +24,7 @@ numpy = "0.19"
 once_cell = "1"
 polars-algo = { path = "../polars/polars-algo", default-features = false }
 polars-core = { path = "../polars/polars-core", features = ["python"], default-features = false }
+polars-error = { path = "../polars/polars-error" }
 polars-lazy = { path = "../polars/polars-lazy", features = ["python"], default-features = false }
 pyo3 = { version = "0.19", features = ["abi3-py38", "extension-module", "multiple-pymethods"] }
 pyo3-built = { version = "0.4", optional = true }
diff --git a/py-polars/polars/utils/__init__.py b/py-polars/polars/utils/__init__.py
index b955557aee12..97b5d6caa20a 100644
--- a/py-polars/polars/utils/__init__.py
+++ b/py-polars/polars/utils/__init__.py
@@ -19,7 +19,7 @@
 )
 from polars.utils.meta import get_idx_type, get_index_type, threadpool_size
 from polars.utils.show_versions import show_versions
-from polars.utils.various import NoDefault, no_default
+from polars.utils.various import NoDefault, _polars_warn, no_default
 
 __all__ = [
     "NoDefault",
@@ -41,4 +41,5 @@
     "_to_python_timedelta",
     "_datetime_for_anyvalue",
     "_datetime_for_anyvalue_windows",
+    "_polars_warn",
 ]
diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/utils/various.py
index ec15af5ed3a5..b5a50cfd9b49 100644
--- a/py-polars/polars/utils/various.py
+++ b/py-polars/polars/utils/various.py
@@ -4,6 +4,7 @@
 import os
 import re
 import sys
+import warnings
 from collections.abc import MappingView, Sized
 from enum import Enum
 from typing import TYPE_CHECKING, Any, Generator, Iterable, Literal, Sequence, TypeVar
@@ -404,3 +405,11 @@ def _get_stack_locals(
                     return objects
         stack_frame = stack_frame.f_back
     return objects
+
+
+# this is called from rust
+def _polars_warn(msg: str) -> None:
+    warnings.warn(
+        msg,
+        stacklevel=find_stacklevel(),
+    )
diff --git a/py-polars/src/on_startup.rs b/py-polars/src/on_startup.rs
index be19ebe17a08..f0bbbb17c84c 100644
--- a/py-polars/src/on_startup.rs
+++ b/py-polars/src/on_startup.rs
@@ -8,12 +8,13 @@ use polars_core::chunked_array::object::registry::AnonymousObjectBuilder;
 use polars_core::error::PolarsError::ComputeError;
 use polars_core::error::PolarsResult;
 use polars_core::frame::DataFrame;
+use pyo3::intern;
 use pyo3::prelude::*;
 
 use crate::apply::lazy::{call_lambda_with_series, ToSeries};
 use crate::dataframe::PyDataFrame;
 use crate::prelude::{python_udf, ObjectValue};
-use crate::py_modules::POLARS;
+use crate::py_modules::{POLARS, UTILS};
 use crate::Wrap;
 
 fn python_function_caller_series(s: Series, lambda: &PyObject) -> PolarsResult<Series> {
@@ -56,6 +57,19 @@ fn python_function_caller_df(df: DataFrame, lambda: &PyObject) -> PolarsResult<D
     })
 }
 
+fn warning_function(msg: &str) {
+    Python::with_gil(|py| {
+        let warn_fn = UTILS
+            .as_ref(py)
+            .getattr(intern!(py, "_polars_warn"))
+            .unwrap();
+
+        if let Err(e) = warn_fn.call1((msg,)) {
+            eprintln!("{e}")
+        }
+    });
+}
+
 #[pyfunction]
 pub fn __register_startup_deps() {
     if !registry::is_object_builder_registered() {
@@ -77,9 +91,11 @@ pub fn __register_startup_deps() {
         unsafe { python_udf::CALL_SERIES_UDF_PYTHON = Some(python_function_caller_series) }
         // register DATAFRAME UDF
         unsafe { python_udf::CALL_DF_UDF_PYTHON = Some(python_function_caller_df) }
+        // register warning function for `polars_warn!`
+        unsafe { polars_error::set_warning_function(warning_function) };
         Python::with_gil(|py| {
             // init AnyValue LUT
             crate::conversion::LUT.set(py, Default::default()).unwrap();
-        })
+        });
     }
 }

From 333d1805e33170eeffc8f0074926501f28e63bb5 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 14 Jul 2023 10:13:20 +0200
Subject: [PATCH 04/37] chore(rust): Clean up workspace definition (#9861)

---
 .github/workflows/lint-rust.yml |  2 +-
 Cargo.toml                      | 25 ++++++-------------------
 polars/polars-core/Cargo.toml   |  1 +
 3 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/lint-rust.yml b/.github/workflows/lint-rust.yml
index a21f0b32c520..61cedbc4472e 100644
--- a/.github/workflows/lint-rust.yml
+++ b/.github/workflows/lint-rust.yml
@@ -60,7 +60,7 @@ jobs:
           save-if: ${{ github.ref_name == 'main' }}
 
       - name: Run cargo clippy
-        run: cargo clippy -- -D warnings
+        run: cargo clippy --all-targets -- -D warnings
 
   rustfmt:
     if: github.ref_name != 'main'
diff --git a/Cargo.toml b/Cargo.toml
index 56bdc542e159..6fa06baf9c27 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,25 +3,12 @@ resolver = "2"
 members = [
   "polars",
   "polars-cli",
-  "polars/polars-core",
-  "polars/polars-io",
-  "polars/polars-time",
-  "polars/polars-utils",
-  "polars/polars-ops",
-  "polars/polars-algo",
-  "polars/polars-lazy",
-  "polars/polars-lazy/polars-plan",
-  "polars/polars-lazy/polars-pipe",
-  "polars/polars-sql",
-  "polars/polars-error",
-  "polars/polars-row",
-  "polars/polars-json",
-  "examples/read_csv",
-  "examples/read_json",
-  "examples/read_parquet",
-  "examples/read_parquet_cloud",
-  "examples/string_filter",
-  "examples/python_rust_compiled_function",
+  "polars/polars-*",
+  "polars/polars-lazy/polars-*",
+  "examples/*",
+]
+exclude = [
+  "examples/datasets",
 ]
 
 [workspace.package]
diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml
index 0b7821d04df2..a6ccda4ace62 100644
--- a/polars/polars-core/Cargo.toml
+++ b/polars/polars-core/Cargo.toml
@@ -183,6 +183,7 @@ wasm-timer = "0.2.5"
 
 [dev-dependencies]
 bincode = "1"
+serde_json = "1"
 
 [package.metadata.docs.rs]
 # not all because arrow 4.3 does not compile with simd

From 7b0527c02910653449c257db0756af8e4a102894 Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Fri, 14 Jul 2023 10:27:38 +0200
Subject: [PATCH 05/37] feat(rust,python,cli): add `LENGTH` and `OCTET_LENGTH`
 string functions for SQL (#9860)

---
 .../polars-plan/src/dsl/function_expr/mod.rs  |   8 +-
 .../src/dsl/function_expr/strings.rs          | 149 ++++++++++--------
 .../polars-lazy/polars-plan/src/dsl/string.rs |  14 +-
 polars/polars-sql/src/functions.rs            |  15 ++
 py-polars/src/expr/string.rs                  |  20 +--
 py-polars/tests/unit/test_sql.py              |  21 +++
 6 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
index 52b1922bffb4..3ffc1abe0380 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
@@ -618,6 +618,9 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
         match func {
             #[cfg(feature = "regex")]
             Contains { literal, strict } => map_as_slice!(strings::contains, literal, strict),
+            CountMatch(pat) => {
+                map!(strings::count_match, &pat)
+            }
             EndsWith { .. } => map_as_slice!(strings::ends_with),
             StartsWith { .. } => map_as_slice!(strings::starts_with),
             Extract { pat, group_index } => {
@@ -626,9 +629,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
             ExtractAll => {
                 map_as_slice!(strings::extract_all)
             }
-            CountMatch(pat) => {
-                map!(strings::count_match, &pat)
-            }
+            NChars => map!(strings::n_chars),
+            Length => map!(strings::lengths),
             #[cfg(feature = "string_justify")]
             Zfill(alignment) => {
                 map!(strings::zfill, alignment)
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/strings.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/strings.rs
index da1d7350a76a..60486b1cc073 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/strings.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/strings.rs
@@ -17,37 +17,39 @@ use super::*;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Clone, PartialEq, Debug, Eq, Hash)]
 pub enum StringFunction {
+    #[cfg(feature = "concat_str")]
+    ConcatHorizontal(String),
+    #[cfg(feature = "concat_str")]
+    ConcatVertical(String),
     #[cfg(feature = "regex")]
     Contains {
         literal: bool,
         strict: bool,
     },
-    StartsWith,
+    CountMatch(String),
     EndsWith,
+    Explode,
     Extract {
         pat: String,
         group_index: usize,
     },
-    #[cfg(feature = "string_justify")]
-    Zfill(usize),
+    ExtractAll,
+    #[cfg(feature = "string_from_radix")]
+    FromRadix(u32, bool),
+    NChars,
+    Length,
     #[cfg(feature = "string_justify")]
     LJust {
         width: usize,
         fillchar: char,
     },
-    #[cfg(feature = "string_justify")]
-    RJust {
-        width: usize,
-        fillchar: char,
+    Lowercase,
+    LStrip(Option<String>),
+    #[cfg(feature = "extract_jsonpath")]
+    JsonExtract {
+        dtype: Option<DataType>,
+        infer_schema_len: Option<usize>,
     },
-    ExtractAll,
-    CountMatch(String),
-    #[cfg(feature = "temporal")]
-    Strptime(DataType, StrptimeOptions),
-    #[cfg(feature = "concat_str")]
-    ConcatVertical(String),
-    #[cfg(feature = "concat_str")]
-    ConcatHorizontal(String),
     #[cfg(feature = "regex")]
     Replace {
         // negative is replace all
@@ -55,56 +57,58 @@ pub enum StringFunction {
         n: i64,
         literal: bool,
     },
-    Uppercase,
-    Lowercase,
-    #[cfg(feature = "nightly")]
-    Titlecase,
-    Strip(Option<String>),
+    #[cfg(feature = "string_justify")]
+    RJust {
+        width: usize,
+        fillchar: char,
+    },
     RStrip(Option<String>),
-    LStrip(Option<String>),
-    #[cfg(feature = "string_from_radix")]
-    FromRadix(u32, bool),
     Slice(i64, Option<u64>),
-    Explode,
+    StartsWith,
+    Strip(Option<String>),
+    #[cfg(feature = "temporal")]
+    Strptime(DataType, StrptimeOptions),
     #[cfg(feature = "dtype-decimal")]
     ToDecimal(usize),
-    #[cfg(feature = "extract_jsonpath")]
-    JsonExtract {
-        dtype: Option<DataType>,
-        infer_schema_len: Option<usize>,
-    },
+    #[cfg(feature = "nightly")]
+    Titlecase,
+    Uppercase,
+    #[cfg(feature = "string_justify")]
+    Zfill(usize),
 }
 
 impl StringFunction {
     pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult<Field> {
         use StringFunction::*;
         match self {
+            #[cfg(feature = "concat_str")]
+            ConcatVertical(_) | ConcatHorizontal(_) => mapper.with_same_dtype(),
             #[cfg(feature = "regex")]
             Contains { .. } => mapper.with_dtype(DataType::Boolean),
+            CountMatch(_) => mapper.with_dtype(DataType::UInt32),
             EndsWith | StartsWith => mapper.with_dtype(DataType::Boolean),
+            Explode => mapper.with_same_dtype(),
             Extract { .. } => mapper.with_same_dtype(),
             ExtractAll => mapper.with_dtype(DataType::List(Box::new(DataType::Utf8))),
-            CountMatch(_) => mapper.with_dtype(DataType::UInt32),
-            #[cfg(feature = "string_justify")]
-            Zfill { .. } | LJust { .. } | RJust { .. } => mapper.with_same_dtype(),
-            #[cfg(feature = "temporal")]
-            Strptime(dtype, _) => mapper.with_dtype(dtype.clone()),
-            #[cfg(feature = "concat_str")]
-            ConcatVertical(_) | ConcatHorizontal(_) => mapper.with_same_dtype(),
+            #[cfg(feature = "string_from_radix")]
+            FromRadix { .. } => mapper.with_dtype(DataType::Int32),
+            #[cfg(feature = "extract_jsonpath")]
+            JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
+            Length => mapper.with_dtype(DataType::UInt32),
+            NChars => mapper.with_dtype(DataType::UInt32),
             #[cfg(feature = "regex")]
             Replace { .. } => mapper.with_same_dtype(),
-            Uppercase | Lowercase | Strip(_) | LStrip(_) | RStrip(_) | Slice(_, _) => {
-                mapper.with_same_dtype()
-            }
+            #[cfg(feature = "temporal")]
+            Strptime(dtype, _) => mapper.with_dtype(dtype.clone()),
             #[cfg(feature = "nightly")]
             Titlecase => mapper.with_same_dtype(),
-            #[cfg(feature = "string_from_radix")]
-            FromRadix { .. } => mapper.with_dtype(DataType::Int32),
-            Explode => mapper.with_same_dtype(),
             #[cfg(feature = "dtype-decimal")]
             ToDecimal(_) => mapper.with_dtype(DataType::Decimal(None, None)),
-            #[cfg(feature = "extract_jsonpath")]
-            JsonExtract { dtype, .. } => mapper.with_opt_dtype(dtype.clone()),
+            Uppercase | Lowercase | Strip(_) | LStrip(_) | RStrip(_) | Slice(_, _) => {
+                mapper.with_same_dtype()
+            }
+            #[cfg(feature = "string_justify")]
+            Zfill { .. } | LJust { .. } | RJust { .. } => mapper.with_same_dtype(),
         }
     }
 }
@@ -114,42 +118,43 @@ impl Display for StringFunction {
         let s = match self {
             #[cfg(feature = "regex")]
             StringFunction::Contains { .. } => "contains",
-            StringFunction::StartsWith { .. } => "starts_with",
+            StringFunction::CountMatch(_) => "count_match",
             StringFunction::EndsWith { .. } => "ends_with",
             StringFunction::Extract { .. } => "extract",
-            #[cfg(feature = "string_justify")]
-            StringFunction::Zfill(_) => "zfill",
+            #[cfg(feature = "concat_str")]
+            StringFunction::ConcatHorizontal(_) => "concat_horizontal",
+            #[cfg(feature = "concat_str")]
+            StringFunction::ConcatVertical(_) => "concat_vertical",
+            StringFunction::Explode => "explode",
+            StringFunction::ExtractAll => "extract_all",
+            #[cfg(feature = "string_from_radix")]
+            StringFunction::FromRadix { .. } => "from_radix",
+            #[cfg(feature = "extract_jsonpath")]
+            StringFunction::JsonExtract { .. } => "json_extract",
             #[cfg(feature = "string_justify")]
             StringFunction::LJust { .. } => "str.ljust",
+            StringFunction::LStrip(_) => "lstrip",
+            StringFunction::Length => "str_lengths",
+            StringFunction::Lowercase => "lowercase",
+            StringFunction::NChars => "n_chars",
             #[cfg(feature = "string_justify")]
             StringFunction::RJust { .. } => "rjust",
-            StringFunction::ExtractAll => "extract_all",
-            StringFunction::CountMatch(_) => "count_match",
-            #[cfg(feature = "temporal")]
-            StringFunction::Strptime(_, _) => "strptime",
-            #[cfg(feature = "concat_str")]
-            StringFunction::ConcatVertical(_) => "concat_vertical",
-            #[cfg(feature = "concat_str")]
-            StringFunction::ConcatHorizontal(_) => "concat_horizontal",
+            StringFunction::RStrip(_) => "rstrip",
             #[cfg(feature = "regex")]
             StringFunction::Replace { .. } => "replace",
-            StringFunction::Uppercase => "uppercase",
-            StringFunction::Lowercase => "lowercase",
+            StringFunction::Slice(_, _) => "str_slice",
+            StringFunction::StartsWith { .. } => "starts_with",
+            StringFunction::Strip(_) => "strip",
+            #[cfg(feature = "temporal")]
+            StringFunction::Strptime(_, _) => "strptime",
             #[cfg(feature = "nightly")]
             StringFunction::Titlecase => "titlecase",
-            StringFunction::Strip(_) => "strip",
-            StringFunction::LStrip(_) => "lstrip",
-            StringFunction::RStrip(_) => "rstrip",
-            #[cfg(feature = "string_from_radix")]
-            StringFunction::FromRadix { .. } => "from_radix",
-            StringFunction::Slice(_, _) => "str_slice",
-            StringFunction::Explode => "explode",
             #[cfg(feature = "dtype-decimal")]
             StringFunction::ToDecimal(_) => "to_decimal",
-            #[cfg(feature = "extract_jsonpath")]
-            StringFunction::JsonExtract { .. } => "json_extract",
+            StringFunction::Uppercase => "uppercase",
+            #[cfg(feature = "string_justify")]
+            StringFunction::Zfill(_) => "zfill",
         };
-
         write!(f, "str.{s}")
     }
 }
@@ -170,6 +175,16 @@ pub(super) fn titlecase(s: &Series) -> PolarsResult<Series> {
     Ok(ca.to_titlecase().into_series())
 }
 
+pub(super) fn n_chars(s: &Series) -> PolarsResult<Series> {
+    let ca = s.utf8()?;
+    Ok(ca.str_n_chars().into_series())
+}
+
+pub(super) fn lengths(s: &Series) -> PolarsResult<Series> {
+    let ca = s.utf8()?;
+    Ok(ca.str_lengths().into_series())
+}
+
 #[cfg(feature = "regex")]
 pub(super) fn contains(s: &[Series], literal: bool, strict: bool) -> PolarsResult<Series> {
     let ca = &s[0].utf8()?;
diff --git a/polars/polars-lazy/polars-plan/src/dsl/string.rs b/polars/polars-lazy/polars-plan/src/dsl/string.rs
index 8d878d4be5a9..fe5409a3ef86 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/string.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/string.rs
@@ -455,7 +455,7 @@ impl StringNameSpace {
     }
 
     #[cfg(feature = "string_from_radix")]
-    /// Parse string in base radix into decimal
+    /// Parse string in base radix into decimal.
     pub fn from_radix(self, radix: u32, strict: bool) -> Expr {
         self.0
             .map_private(FunctionExpr::StringExpr(StringFunction::FromRadix(
@@ -463,6 +463,18 @@ impl StringNameSpace {
             )))
     }
 
+    /// Return the number of characters in the string (not bytes).
+    pub fn n_chars(self) -> Expr {
+        self.0
+            .map_private(FunctionExpr::StringExpr(StringFunction::NChars))
+    }
+
+    /// Return the number of bytes in the string (not characters).
+    pub fn lengths(self) -> Expr {
+        self.0
+            .map_private(FunctionExpr::StringExpr(StringFunction::Length))
+    }
+
     /// Slice the string values.
     pub fn str_slice(self, start: i64, length: Option<u64>) -> Expr {
         self.0
diff --git a/polars/polars-sql/src/functions.rs b/polars/polars-sql/src/functions.rs
index 34a73e9faaa1..3507becbac7b 100644
--- a/polars/polars-sql/src/functions.rs
+++ b/polars/polars-sql/src/functions.rs
@@ -161,6 +161,11 @@ pub(crate) enum PolarsSqlFunctions {
     /// SELECT LEFT(column_1, 3) from df;
     /// ```
     Left,
+    /// SQL 'length' function (characters)
+    /// ```sql
+    /// SELECT LENGTH(column_1) from df;
+    /// ```
+    Length,
     /// SQL 'lower' function
     /// ```sql
     /// SELECT LOWER(column_1) from df;
@@ -171,6 +176,11 @@ pub(crate) enum PolarsSqlFunctions {
     /// SELECT LTRIM(column_1) from df;
     /// ```
     LTrim,
+    /// SQL 'octet_length' function (bytes)
+    /// ```sql
+    /// SELECT OCTET_LENGTH(column_1) from df;
+    /// ```
+    OctetLength,
     /// SQL 'regexp_like' function
     /// ```sql
     /// SELECT REGEXP_LIKE(column_1,'xyz', 'i') from df;
@@ -368,6 +378,7 @@ impl PolarsSqlFunctions {
             "ltrim",
             "max",
             "min",
+            "octet_length",
             "pow",
             "radians",
             "round",
@@ -428,9 +439,11 @@ impl TryFrom<&'_ SQLFunction> for PolarsSqlFunctions {
             // String functions
             // ----
             "ends_with" => Self::EndsWith,
+            "length" => Self::Length,
             "left" => Self::Left,
             "lower" => Self::Lower,
             "ltrim" => Self::LTrim,
+            "octet_length" => Self::OctetLength,
             "regexp_like" => Self::RegexpLike,
             "rtrim" => Self::RTrim,
             "starts_with" => Self::StartsWith,
@@ -532,6 +545,7 @@ impl SqlFunctionVisitor<'_> {
                     }
                 }))
             }),
+            Length => self.visit_unary(|e| e.str().n_chars()),
             Lower => self.visit_unary(|e| e.str().to_lowercase()),
             LTrim => match function.args.len() {
                 1 => self.visit_unary(|e| e.str().lstrip(None)),
@@ -541,6 +555,7 @@ impl SqlFunctionVisitor<'_> {
                     function.args.len()
                 ),
             },
+            OctetLength => self.visit_unary(|e| e.str().lengths()),
             RegexpLike => match function.args.len() {
                 2 => self.visit_binary(|e, s| e.str().contains(s, true)),
                 3 => self.try_visit_ternary(|e, pat, flags| {
diff --git a/py-polars/src/expr/string.rs b/py-polars/src/expr/string.rs
index 033d686f6b1e..3dc76dc2c1a9 100644
--- a/py-polars/src/expr/string.rs
+++ b/py-polars/src/expr/string.rs
@@ -89,27 +89,11 @@ impl PyExpr {
     }
 
     fn str_lengths(&self) -> Self {
-        let function = |s: Series| {
-            let ca = s.utf8()?;
-            Ok(Some(ca.str_lengths().into_series()))
-        };
-        self.clone()
-            .inner
-            .map(function, GetOutput::from_type(DataType::UInt32))
-            .with_fmt("str.lengths")
-            .into()
+        self.inner.clone().str().lengths().into()
     }
 
     fn str_n_chars(&self) -> Self {
-        let function = |s: Series| {
-            let ca = s.utf8()?;
-            Ok(Some(ca.str_n_chars().into_series()))
-        };
-        self.clone()
-            .inner
-            .map(function, GetOutput::from_type(DataType::UInt32))
-            .with_fmt("str.n_chars")
-            .into()
+        self.inner.clone().str().n_chars().into()
     }
 
     #[cfg(feature = "lazy_regex")]
diff --git a/py-polars/tests/unit/test_sql.py b/py-polars/tests/unit/test_sql.py
index 3e2fefe9a1d0..27347cf8edd2 100644
--- a/py-polars/tests/unit/test_sql.py
+++ b/py-polars/tests/unit/test_sql.py
@@ -682,6 +682,27 @@ def test_sql_round_ndigits_errors() -> None:
         ctx.execute("SELECT ROUND(n,-1) AS n FROM df")
 
 
+def test_sql_string_lengths() -> None:
+    df = pl.DataFrame({"words": ["Café", None, "東京"]})
+
+    with pl.SQLContext(frame=df) as ctx:
+        res = ctx.execute(
+            """
+            SELECT
+              words,
+              LENGTH(words) AS n_chars,
+              OCTET_LENGTH(words) AS n_bytes
+            FROM frame
+            """
+        ).collect()
+
+    assert res.to_dict(False) == {
+        "words": ["Café", None, "東京"],
+        "n_chars": [4, None, 2],
+        "n_bytes": [5, None, 6],
+    }
+
+
 def test_sql_substr() -> None:
     df = pl.DataFrame(
         {

From bae44a0a49ea72a2686451f1ea5a7a19e5a0cb44 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Fri, 14 Jul 2023 10:51:46 +0200
Subject: [PATCH 06/37] feat(rust, python): Expr.cat.get_categories expression
 (#9869)

---
 .../logical/categorical/builder.rs            | 13 ++++++----
 polars/polars-lazy/polars-plan/src/dsl/cat.rs |  5 ++++
 .../polars-plan/src/dsl/function_expr/cat.rs  | 17 ++++++++++++-
 .../reference/expressions/categories.rst      |  1 +
 .../source/reference/series/categories.rst    |  1 +
 py-polars/polars/expr/categorical.py          | 24 +++++++++++++++++++
 py-polars/polars/series/categorical.py        | 18 ++++++++++++++
 py-polars/src/expr/categorical.rs             |  4 ++++
 .../tests/unit/namespaces/test_categorical.py |  6 +++++
 9 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/polars/polars-core/src/chunked_array/logical/categorical/builder.rs b/polars/polars-core/src/chunked_array/logical/categorical/builder.rs
index dd1b2d7f112d..93b46dd9475d 100644
--- a/polars/polars-core/src/chunked_array/logical/categorical/builder.rs
+++ b/polars/polars-core/src/chunked_array/logical/categorical/builder.rs
@@ -92,14 +92,19 @@ impl RevMapping {
         !self.is_global()
     }
 
-    /// Get the length of the [`RevMapping`]
-    pub fn len(&self) -> usize {
+    /// Get the categories in this RevMapping
+    pub fn get_categories(&self) -> &Utf8Array<i64> {
         match self {
-            Self::Global(_, a, _) => a.len(),
-            Self::Local(a) => a.len(),
+            Self::Global(_, a, _) => a,
+            Self::Local(a) => a,
         }
     }
 
+    /// Get the length of the [`RevMapping`]
+    pub fn len(&self) -> usize {
+        self.get_categories().len()
+    }
+
     /// Categorical to str
     pub fn get(&self, idx: u32) -> &str {
         match self {
diff --git a/polars/polars-lazy/polars-plan/src/dsl/cat.rs b/polars/polars-lazy/polars-plan/src/dsl/cat.rs
index 60e234acde60..e4a8601e73aa 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/cat.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/cat.rs
@@ -20,4 +20,9 @@ impl CategoricalNameSpace {
         self.0
             .map_private(CategoricalFunction::SetOrdering { lexical }.into())
     }
+
+    pub fn get_categories(self) -> Expr {
+        self.0
+            .map_private(CategoricalFunction::GetCategories.into())
+    }
 }
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/cat.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/cat.rs
index 07eeae7cf143..455ae39e8805 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/cat.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/cat.rs
@@ -5,11 +5,16 @@ use crate::map;
 #[derive(Clone, PartialEq, Debug, Eq, Hash)]
 pub enum CategoricalFunction {
     SetOrdering { lexical: bool },
+    GetCategories,
 }
 
 impl CategoricalFunction {
     pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult<Field> {
-        mapper.with_dtype(DataType::Boolean)
+        use CategoricalFunction::*;
+        match self {
+            SetOrdering { .. } => mapper.with_same_dtype(),
+            GetCategories => mapper.with_dtype(DataType::Utf8),
+        }
     }
 }
 
@@ -18,6 +23,7 @@ impl Display for CategoricalFunction {
         use CategoricalFunction::*;
         let s = match self {
             SetOrdering { .. } => "set_ordering",
+            GetCategories => "get_categories",
         };
         write!(f, "{s}")
     }
@@ -28,6 +34,7 @@ impl From<CategoricalFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
         use CategoricalFunction::*;
         match func {
             SetOrdering { lexical } => map!(set_ordering, lexical),
+            GetCategories => map!(get_categories),
         }
     }
 }
@@ -43,3 +50,11 @@ fn set_ordering(s: &Series, lexical: bool) -> PolarsResult<Series> {
     ca.set_lexical_sorted(lexical);
     Ok(ca.into_series())
 }
+
+fn get_categories(s: &Series) -> PolarsResult<Series> {
+    // categorical check
+    let ca = s.categorical()?;
+    let DataType::Categorical(Some(rev_map)) = ca.dtype() else { unreachable!()  };
+    let arr = rev_map.get_categories().clone().boxed();
+    Series::try_from((ca.name(), arr))
+}
diff --git a/py-polars/docs/source/reference/expressions/categories.rst b/py-polars/docs/source/reference/expressions/categories.rst
index 674702e23595..cd99c9a91d7a 100644
--- a/py-polars/docs/source/reference/expressions/categories.rst
+++ b/py-polars/docs/source/reference/expressions/categories.rst
@@ -9,4 +9,5 @@ The following methods are available under the `expr.cat` attribute.
    :toctree: api/
    :template: autosummary/accessor_method.rst
 
+    Expr.cat.get_categories
     Expr.cat.set_ordering
diff --git a/py-polars/docs/source/reference/series/categories.rst b/py-polars/docs/source/reference/series/categories.rst
index 5b0c1e70e40b..1fd15ba47a32 100644
--- a/py-polars/docs/source/reference/series/categories.rst
+++ b/py-polars/docs/source/reference/series/categories.rst
@@ -9,4 +9,5 @@ The following methods are available under the `Series.cat` attribute.
    :toctree: api/
    :template: autosummary/accessor_method.rst
 
+    Series.cat.get_categories
     Series.cat.set_ordering
diff --git a/py-polars/polars/expr/categorical.py b/py-polars/polars/expr/categorical.py
index 7fdc19b01663..ea25036d16e6 100644
--- a/py-polars/polars/expr/categorical.py
+++ b/py-polars/polars/expr/categorical.py
@@ -55,3 +55,27 @@ def set_ordering(self, ordering: CategoricalOrdering) -> Expr:
 
         """
         return wrap_expr(self._pyexpr.cat_set_ordering(ordering))
+
+    def get_categories(self) -> Expr:
+        """
+        Get the categories stored in this data type.
+
+        Examples
+        --------
+        >>> df = pl.Series(
+        ...     "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
+        ... ).to_frame()
+        >>> df.select(pl.col("cats").cat.get_categories())
+        shape: (3, 1)
+        ┌──────┐
+        │ cats │
+        │ ---  │
+        │ str  │
+        ╞══════╡
+        │ foo  │
+        │ bar  │
+        │ ham  │
+        └──────┘
+
+        """
+        return wrap_expr(self._pyexpr.cat_get_categories())
diff --git a/py-polars/polars/series/categorical.py b/py-polars/polars/series/categorical.py
index f19a2ec8c2dd..29720880f94b 100644
--- a/py-polars/polars/series/categorical.py
+++ b/py-polars/polars/series/categorical.py
@@ -56,3 +56,21 @@ def set_ordering(self, ordering: CategoricalOrdering) -> Series:
         └──────┴──────┘
 
         """
+
+    def get_categories(self) -> Series:
+        """
+        Get the categories stored in this data type.
+
+        Examples
+        --------
+        >>> s = pl.Series(["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical)
+        >>> s.cat.get_categories()
+        shape: (3,)
+        Series: '' [str]
+        [
+            "foo"
+            "bar"
+            "ham"
+        ]
+
+        """
diff --git a/py-polars/src/expr/categorical.rs b/py-polars/src/expr/categorical.rs
index 8bbfe0b752ec..80e85e97315e 100644
--- a/py-polars/src/expr/categorical.rs
+++ b/py-polars/src/expr/categorical.rs
@@ -9,4 +9,8 @@ impl PyExpr {
     fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
         self.inner.clone().cat().set_ordering(ordering.0).into()
     }
+
+    fn cat_get_categories(&self) -> Self {
+        self.inner.clone().cat().get_categories().into()
+    }
 }
diff --git a/py-polars/tests/unit/namespaces/test_categorical.py b/py-polars/tests/unit/namespaces/test_categorical.py
index 1159c2160a30..b2c2a5157864 100644
--- a/py-polars/tests/unit/namespaces/test_categorical.py
+++ b/py-polars/tests/unit/namespaces/test_categorical.py
@@ -72,3 +72,9 @@ def test_sort_categoricals_6014() -> None:
     assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]}
     out = df2.sort("key")
     assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]}
+
+
+def test_categorical_get_categories() -> None:
+    assert pl.Series(
+        "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
+    ).cat.get_categories().to_list() == ["foo", "bar", "ham"]

From f83e277d78da3238176de2027f23df0fef42565d Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Fri, 14 Jul 2023 13:47:09 +0200
Subject: [PATCH 07/37] fix(rust, python): fmt unknown dtype (#9872)

---
 polars/polars-core/src/datatypes/dtype.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polars/polars-core/src/datatypes/dtype.rs b/polars/polars-core/src/datatypes/dtype.rs
index cd3adc9c5f0d..57245791fb1c 100644
--- a/polars/polars-core/src/datatypes/dtype.rs
+++ b/polars/polars-core/src/datatypes/dtype.rs
@@ -332,7 +332,7 @@ impl Display for DataType {
             DataType::Categorical(_) => "cat",
             #[cfg(feature = "dtype-struct")]
             DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()),
-            DataType::Unknown => unreachable!(),
+            DataType::Unknown => "unknown",
         };
         f.write_str(s)
     }

From 5937c2251d782931e751ce025ebb39f2f5a91ab2 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Fri, 14 Jul 2023 14:43:52 +0200
Subject: [PATCH 08/37] feat(rust, python): allow set_sorted in streaming
 (#9876)

---
 polars/polars-core/src/series/series_trait.rs             | 3 +++
 .../polars-plan/src/dsl/function_expr/dispatch.rs         | 6 ++++++
 .../polars-lazy/polars-plan/src/dsl/function_expr/mod.rs  | 3 +++
 .../polars-plan/src/dsl/function_expr/schema.rs           | 1 +
 polars/polars-lazy/polars-plan/src/dsl/mod.rs             | 8 +-------
 polars/polars-lazy/src/physical_plan/streaming/checks.rs  | 4 ++++
 6 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/polars/polars-core/src/series/series_trait.rs b/polars/polars-core/src/series/series_trait.rs
index 95d04c772b2d..5a5871720861 100644
--- a/polars/polars-core/src/series/series_trait.rs
+++ b/polars/polars-core/src/series/series_trait.rs
@@ -4,6 +4,8 @@ use std::borrow::Cow;
 use std::sync::Arc;
 
 use polars_arrow::prelude::QuantileInterpolOptions;
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
 
 #[cfg(feature = "object")]
 use crate::chunked_array::object::PolarsObjectSafe;
@@ -11,6 +13,7 @@ pub use crate::prelude::ChunkCompare;
 use crate::prelude::*;
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub enum IsSorted {
     Ascending,
     Descending,
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/dispatch.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/dispatch.rs
index 831eb2bbff8c..5ca34b389efa 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/dispatch.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/dispatch.rs
@@ -26,3 +26,9 @@ pub(super) fn interpolate(s: &Series, method: InterpolationMethod) -> PolarsResu
 pub(super) fn to_physical(s: &Series) -> PolarsResult<Series> {
     Ok(s.to_physical_repr().into_owned())
 }
+
+pub(super) fn set_sorted_flag(s: &Series, sorted: IsSorted) -> PolarsResult<Series> {
+    let mut s = s.clone();
+    s.set_sorted_flag(sorted);
+    Ok(s)
+}
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
index 3ffc1abe0380..0493c231a429 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/mod.rs
@@ -230,6 +230,7 @@ pub enum FunctionExpr {
         seed: Option<u64>,
         fixed_seed: bool,
     },
+    SetSortedFlag(IsSorted),
 }
 
 impl Display for FunctionExpr {
@@ -335,6 +336,7 @@ impl Display for FunctionExpr {
             ToPhysical => "to_physical",
             #[cfg(feature = "random")]
             Random { method, .. } => method.into(),
+            SetSortedFlag(_) => "set_sorted",
         };
         write!(f, "{s}")
     }
@@ -607,6 +609,7 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
                 seed,
                 fixed_seed
             ),
+            SetSortedFlag(sorted) => map!(dispatch::set_sorted_flag, sorted),
         }
     }
 }
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/schema.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/schema.rs
index 00a031d3d63f..fc0f414b4c77 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/schema.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/schema.rs
@@ -250,6 +250,7 @@ impl FunctionExpr {
             ToPhysical => mapper.to_physical_type(),
             #[cfg(feature = "random")]
             Random { .. } => mapper.with_same_dtype(),
+            SetSortedFlag(_) => mapper.with_same_dtype(),
         }
     }
 }
diff --git a/polars/polars-lazy/polars-plan/src/dsl/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
index 5698a956f740..05d7923a3b82 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
@@ -1746,13 +1746,7 @@ impl Expr {
     /// This can lead to incorrect results if this `Series` is not sorted!!
     /// Use with care!
     pub fn set_sorted_flag(self, sorted: IsSorted) -> Expr {
-        self.apply(
-            move |mut s| {
-                s.set_sorted_flag(sorted);
-                Ok(Some(s))
-            },
-            GetOutput::same_type(),
-        )
+        self.apply_private(FunctionExpr::SetSortedFlag(sorted))
     }
 
     /// Cache this expression, so that it is executed only once per context.
diff --git a/polars/polars-lazy/src/physical_plan/streaming/checks.rs b/polars/polars-lazy/src/physical_plan/streaming/checks.rs
index 70c00053ef02..47d51f2067ee 100644
--- a/polars/polars-lazy/src/physical_plan/streaming/checks.rs
+++ b/polars/polars-lazy/src/physical_plan/streaming/checks.rs
@@ -21,6 +21,10 @@ pub(super) fn is_streamable(node: Node, expr_arena: &Arena<AExpr>, context: Cont
     let mut seen_column = false;
     let mut seen_lit_range = false;
     let all = expr_arena.iter(node).all(|(_, ae)| match ae {
+        AExpr::Function {
+            function: FunctionExpr::SetSortedFlag(_),
+            ..
+        } => true,
         AExpr::Function { options, .. } | AExpr::AnonymousFunction { options, .. } => match context
         {
             Context::Default => matches!(

From e14238305900fd776c0684f7c2a1530d1e5d8918 Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Fri, 14 Jul 2023 14:45:07 +0200
Subject: [PATCH 09/37] fix(python,rust,cli): preserve expression aliases when
 parsing SQL with `pl.sql_expr` (#9875)

---
 polars/polars-sql/src/functions.rs |  3 ++-
 polars/polars-sql/src/sql_expr.rs  | 16 +++++++++++-----
 py-polars/tests/unit/test_sql.py   | 14 ++++++++++++--
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/polars/polars-sql/src/functions.rs b/polars/polars-sql/src/functions.rs
index 3507becbac7b..70a75718a12e 100644
--- a/polars/polars-sql/src/functions.rs
+++ b/polars/polars-sql/src/functions.rs
@@ -380,6 +380,7 @@ impl PolarsSqlFunctions {
             "min",
             "octet_length",
             "pow",
+            "power",
             "radians",
             "round",
             "rtrim",
@@ -432,7 +433,7 @@ impl TryFrom<&'_ SQLFunction> for PolarsSqlFunctions {
             "log10" => Self::Log10,
             "log1p" => Self::Log1p,
             "log2" => Self::Log2,
-            "pow" => Self::Pow,
+            "pow" | "power" => Self::Pow,
             "round" => Self::Round,
 
             // ----
diff --git a/polars/polars-sql/src/sql_expr.rs b/polars/polars-sql/src/sql_expr.rs
index 8450c4d9d1db..85e99ff8dcc3 100644
--- a/polars/polars-sql/src/sql_expr.rs
+++ b/polars/polars-sql/src/sql_expr.rs
@@ -5,8 +5,8 @@ use polars_lazy::prelude::*;
 use polars_plan::prelude::{col, lit, when};
 use sqlparser::ast::{
     ArrayAgg, BinaryOperator as SQLBinaryOperator, BinaryOperator, DataType as SQLDataType,
-    Expr as SqlExpr, Function as SQLFunction, JoinConstraint, OrderByExpr, TrimWhereField,
-    UnaryOperator, Value as SqlValue,
+    Expr as SqlExpr, Function as SQLFunction, JoinConstraint, OrderByExpr, SelectItem,
+    TrimWhereField, UnaryOperator, Value as SqlValue,
 };
 use sqlparser::dialect::GenericDialect;
 use sqlparser::parser::{Parser, ParserOptions};
@@ -532,8 +532,14 @@ pub fn sql_expr<S: AsRef<str>>(s: S) -> PolarsResult<Expr> {
     });
 
     let mut ast = parser.try_with_sql(s.as_ref()).map_err(to_compute_err)?;
+    let expr = ast.parse_select_item().map_err(to_compute_err)?;
 
-    let expr = ast.parse_expr().map_err(to_compute_err)?;
-
-    parse_sql_expr(&expr, &ctx)
+    Ok(match &expr {
+        SelectItem::ExprWithAlias { expr, alias } => {
+            let expr = parse_sql_expr(expr, &ctx)?;
+            expr.alias(&alias.value)
+        }
+        SelectItem::UnnamedExpr(expr) => parse_sql_expr(expr, &ctx)?,
+        _ => polars_bail!(InvalidOperation: "Unable to parse '{}' as Expr", s.as_ref()),
+    })
 }
diff --git a/py-polars/tests/unit/test_sql.py b/py-polars/tests/unit/test_sql.py
index 27347cf8edd2..d2c9869ce718 100644
--- a/py-polars/tests/unit/test_sql.py
+++ b/py-polars/tests/unit/test_sql.py
@@ -773,7 +773,17 @@ def test_sql_expr() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": ["xyz", "abcde", None]})
     sql_exprs = (
         pl.sql_expr("MIN(a)"),
-        pl.sql_expr("SUBSTR(b,1,2)"),
+        pl.sql_expr("POWER(a,a) AS aa"),
+        pl.sql_expr("SUBSTR(b,1,2) AS b2"),
+    )
+    expected = pl.DataFrame(
+        {"a": [1, 1, 1], "aa": [1, 4, 27], "b2": ["yz", "bc", None]}
     )
-    expected = pl.DataFrame({"a": [1, 1, 1], "b": ["yz", "bc", None]})
     assert df.select(sql_exprs).frame_equal(expected)
+
+    # expect expressions that can't reasonably be parsed as expressions to raise
+    # (for example: those that explicitly reference tables and/or use wildcards)
+    with pytest.raises(
+        pl.InvalidOperationError, match=r"Unable to parse 'xyz\.\*' as Expr"
+    ):
+        pl.sql_expr("xyz.*")

From cb04b5be178139b62bd4c6e06ca96aa6e8e7fe91 Mon Sep 17 00:00:00 2001
From: chielP <chielpeters@live.nl>
Date: Fri, 14 Jul 2023 16:47:33 +0200
Subject: [PATCH 10/37] fix(rust): Allow None as exponent (#9880)

---
 .../polars-lazy/polars-plan/src/dsl/function_expr/pow.rs  | 8 ++++----
 py-polars/polars/expr/expr.py                             | 2 +-
 py-polars/polars/series/series.py                         | 4 ++--
 py-polars/tests/unit/test_series.py                       | 1 +
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/pow.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/pow.rs
index 09f7298ac301..bf8a7e92e0b0 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/pow.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/pow.rs
@@ -12,13 +12,13 @@ where
     ChunkedArray<T>: IntoSeries,
 {
     let dtype = T::get_dtype();
-    let exponent = exponent.cast(&dtype)?;
+    let exponent = exponent.strict_cast(&dtype)?;
     let exponent = base.unpack_series_matching_type(&exponent).unwrap();
 
     if exponent.len() == 1 {
-        let exponent_value = exponent
-            .get(0)
-            .ok_or_else(|| polars_err!(ComputeError: "exponent is null"))?;
+        let Some(exponent_value) = exponent.get(0) else {
+            return Ok(Some(Series::full_null(base.name(), base.len(), &dtype)))
+        };
         let s = match exponent_value.to_f64().unwrap() {
             a if a == 1.0 => base.clone().into_series(),
             // specialized sqrt will ensure (-inf)^0.5 = NaN
diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 7fe6103e859c..768bed40458d 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -4568,7 +4568,7 @@ def truediv(self, other: Any) -> Self:
         """
         return self.__truediv__(other)
 
-    def pow(self, exponent: int | float | Series | Expr) -> Self:
+    def pow(self, exponent: int | float | None | Series | Expr) -> Self:
         """
         Method equivalent of exponentiation operator ``expr ** exponent``.
 
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index efff8054ef61..b36da7732e86 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -828,7 +828,7 @@ def __rmul__(self, other: Any) -> Series:
             raise ValueError("first cast to integer before multiplying datelike dtypes")
         return self._arithmetic(other, "mul", "mul_<>")
 
-    def __pow__(self, exponent: int | float | Series) -> Series:
+    def __pow__(self, exponent: int | float | None | Series) -> Series:
         return self.pow(exponent)
 
     def __rpow__(self, other: Any) -> Series:
@@ -1421,7 +1421,7 @@ def product(self) -> int | float:
         """Reduce this Series to the product value."""
         return self.to_frame().select(F.col(self.name).product()).to_series().item()
 
-    def pow(self, exponent: int | float | Series) -> Series:
+    def pow(self, exponent: int | float | None | Series) -> Series:
         """
         Raise to the power of the given exponent.
 
diff --git a/py-polars/tests/unit/test_series.py b/py-polars/tests/unit/test_series.py
index 715150f5b5f0..5aa1215934b1 100644
--- a/py-polars/tests/unit/test_series.py
+++ b/py-polars/tests/unit/test_series.py
@@ -382,6 +382,7 @@ def test_power() -> None:
     assert_series_equal(a**a, pl.Series([1.0, 4.0], dtype=Float64))
     assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64))
     assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64))
+    assert_series_equal(a**None, pl.Series([None] * len(a), dtype=Float64))
     with pytest.raises(ValueError):
         c**2
     with pytest.raises(pl.ColumnNotFoundError):

From 9291ee10fb84335ec8a5aa55c3f994cbfbf0b2cb Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 14 Jul 2023 17:11:08 +0200
Subject: [PATCH 11/37] depr(python): Deprecate functions series input (#9878)

---
 py-polars/polars/functions/lazy.py            | 78 ++++++++++++-------
 .../tests/unit/functions/test_functions.py    | 42 +++++++---
 py-polars/tests/unit/test_df.py               | 14 ----
 3 files changed, 85 insertions(+), 49 deletions(-)

diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py
index ae0337ecb9b4..f970b4f36c9d 100644
--- a/py-polars/polars/functions/lazy.py
+++ b/py-polars/polars/functions/lazy.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import contextlib
+import warnings
 from datetime import date, datetime, time, timedelta
 from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence, overload
 
@@ -26,6 +27,7 @@
     _time_to_pl_time,
     _timedelta_to_pl_timedelta,
 )
+from polars.utils.various import find_stacklevel
 
 with contextlib.suppress(ImportError):  # Module not available when building docs
     import polars.polars as plr
@@ -320,6 +322,11 @@ def count(column: str | Series | None = None) -> Expr | int:
         return wrap_expr(plr.count())
 
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `count` is deprecated. Use `Series.len()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.len()
     return col(column).count()
 
@@ -377,6 +384,11 @@ def std(column: str | Series, ddof: int = 1) -> Expr | float | None:
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `std` is deprecated. Use `Series.std()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.std(ddof)
     return col(column).std(ddof)
 
@@ -421,6 +433,11 @@ def var(column: str | Series, ddof: int = 1) -> Expr | float | None:
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `var` is deprecated. Use `Series.var()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.var(ddof)
     return col(column).var(ddof)
 
@@ -451,11 +468,14 @@ def mean(column: str | Series) -> Expr | float | None:
     ╞═════╡
     │ 4.0 │
     └─────┘
-    >>> pl.mean(df["a"])
-    4.0
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `mean` is deprecated. Use `Series.mean()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.mean()
     return col(column).mean()
 
@@ -486,8 +506,6 @@ def avg(column: str | Series) -> Expr | float:
     ╞═════╡
     │ 4.0 │
     └─────┘
-    >>> pl.avg(df["a"])
-    4.0
 
     """
     return mean(column)
@@ -519,11 +537,14 @@ def median(column: str | Series) -> Expr | float | int | None:
     ╞═════╡
     │ 3.0 │
     └─────┘
-    >>> pl.median(df["a"])
-    3.0
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `median` is deprecated. Use `Series.median()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.median()
     return col(column).median()
 
@@ -554,11 +575,14 @@ def n_unique(column: str | Series) -> Expr | int:
     ╞═════╡
     │ 2   │
     └─────┘
-    >>> pl.n_unique(df["a"])
-    2
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `n_unique` is deprecated. Use `Series.n_unique()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.n_unique()
     return col(column).n_unique()
 
@@ -643,14 +667,17 @@ def first(column: str | Series | None = None) -> Expr | Any:
     ╞═════╡
     │ 1   │
     └─────┘
-    >>> pl.first(df["a"])
-    1
 
     """
     if column is None:
         return wrap_expr(plr.first())
 
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `first` is deprecated. Use `series[0]` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         if column.len() > 0:
             return column[0]
         else:
@@ -706,14 +733,17 @@ def last(column: str | Series | None = None) -> Expr:
     ╞═════╡
     │ 3   │
     └─────┘
-    >>> pl.last(df["a"])
-    3
 
     """
     if column is None:
         return wrap_expr(plr.last())
 
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `last` is deprecated. Use `series[-1]` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         if column.len() > 0:
             return column[-1]
         else:
@@ -766,16 +796,14 @@ def head(column: str | Series, n: int = 10) -> Expr | Series:
     │ 1   │
     │ 8   │
     └─────┘
-    >>> pl.head(df["a"], 2)
-    shape: (2,)
-    Series: 'a' [i64]
-    [
-        1
-        8
-    ]
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `head` is deprecated. Use `Series.head()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.head(n)
     return col(column).head(n)
 
@@ -825,16 +853,14 @@ def tail(column: str | Series, n: int = 10) -> Expr | Series:
     │ 8   │
     │ 3   │
     └─────┘
-    >>> pl.tail(df["a"], 2)
-    shape: (2,)
-    Series: 'a' [i64]
-    [
-        8
-        3
-    ]
 
     """
     if isinstance(column, pl.Series):
+        warnings.warn(
+            "passing a Series to `tail` is deprecated. Use `Series.tail()` instead.",
+            DeprecationWarning,
+            stacklevel=find_stacklevel(),
+        )
         return column.tail(n)
     return col(column).tail(n)
 
diff --git a/py-polars/tests/unit/functions/test_functions.py b/py-polars/tests/unit/functions/test_functions.py
index 464f551aa0d7..cc5d0cca9595 100644
--- a/py-polars/tests/unit/functions/test_functions.py
+++ b/py-polars/tests/unit/functions/test_functions.py
@@ -7,7 +7,7 @@
 import pytest
 
 import polars as pl
-from polars.testing import assert_frame_equal
+from polars.testing import assert_frame_equal, assert_series_equal
 
 
 def test_concat_align() -> None:
@@ -375,7 +375,8 @@ def test_lazy_functions() -> None:
     df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})
     out = df.select(pl.count("a"))
     assert list(out["a"]) == [3]
-    assert pl.count(df["a"]) == 3
+    with pytest.deprecated_call():
+        assert pl.count(df["a"]) == 3
     out = df.select(
         [
             pl.var("b").alias("1"),
@@ -392,10 +393,12 @@ def test_lazy_functions() -> None:
     )
     expected = 1.0
     assert np.isclose(out.to_series(0), expected)
-    assert np.isclose(pl.var(df["b"]), expected)  # type: ignore[arg-type]
+    with pytest.deprecated_call():
+        assert np.isclose(pl.var(df["b"]), expected)  # type: ignore[arg-type]
     expected = 1.0
     assert np.isclose(out.to_series(1), expected)
-    assert np.isclose(pl.std(df["b"]), expected)  # type: ignore[arg-type]
+    with pytest.deprecated_call():
+        assert np.isclose(pl.std(df["b"]), expected)  # type: ignore[arg-type]
     expected = 3
     assert np.isclose(out.to_series(2), expected)
     with pytest.deprecated_call():
@@ -410,19 +413,24 @@ def test_lazy_functions() -> None:
         assert np.isclose(pl.sum(df["b"]), expected)
     expected = 2
     assert np.isclose(out.to_series(5), expected)
-    assert np.isclose(pl.mean(df["b"]), expected)
+    with pytest.deprecated_call():
+        assert np.isclose(pl.mean(df["b"]), expected)
     expected = 2
     assert np.isclose(out.to_series(6), expected)
-    assert np.isclose(pl.median(df["b"]), expected)
+    with pytest.deprecated_call():
+        assert np.isclose(pl.median(df["b"]), expected)
     expected = 3
     assert np.isclose(out.to_series(7), expected)
-    assert np.isclose(pl.n_unique(df["b"]), expected)
+    with pytest.deprecated_call():
+        assert np.isclose(pl.n_unique(df["b"]), expected)
     expected = 1
     assert np.isclose(out.to_series(8), expected)
-    assert np.isclose(pl.first(df["b"]), expected)
+    with pytest.deprecated_call():
+        assert np.isclose(pl.first(df["b"]), expected)
     expected = 3
     assert np.isclose(out.to_series(9), expected)
-    assert np.isclose(pl.last(df["b"]), expected)
+    with pytest.deprecated_call():
+        assert np.isclose(pl.last(df["b"]), expected)
 
     # regex selection
     out = df.select(
@@ -435,3 +443,19 @@ def test_lazy_functions() -> None:
     assert out.rows() == [
         ({"a": "foo", "b": 3}, {"b": 1, "c": 1.0}, {"a": None, "c": 6.0})
     ]
+
+
+def test_head_tail(fruits_cars: pl.DataFrame) -> None:
+    res_expr = fruits_cars.select([pl.head("A", 2)])
+    with pytest.deprecated_call():
+        res_series = pl.head(fruits_cars["A"], 2)
+    expected = pl.Series("A", [1, 2])
+    assert_series_equal(res_expr.to_series(0), expected)
+    assert_series_equal(res_series, expected)
+
+    res_expr = fruits_cars.select([pl.tail("A", 2)])
+    with pytest.deprecated_call():
+        res_series = pl.tail(fruits_cars["A"], 2)
+    expected = pl.Series("A", [4, 5])
+    assert_series_equal(res_expr.to_series(0), expected)
+    assert_series_equal(res_series, expected)
diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py
index 366152fa1152..6f1ac0d0e019 100644
--- a/py-polars/tests/unit/test_df.py
+++ b/py-polars/tests/unit/test_df.py
@@ -2967,20 +2967,6 @@ def test_fill_null_limits() -> None:
     }
 
 
-def test_head_tail(fruits_cars: pl.DataFrame) -> None:
-    res_expr = fruits_cars.select([pl.head("A", 2)])
-    res_series = pl.head(fruits_cars["A"], 2)
-    expected = pl.Series("A", [1, 2])
-    assert_series_equal(res_expr.to_series(0), expected)
-    assert_series_equal(res_series, expected)
-
-    res_expr = fruits_cars.select([pl.tail("A", 2)])
-    res_series = pl.tail(fruits_cars["A"], 2)
-    expected = pl.Series("A", [4, 5])
-    assert_series_equal(res_expr.to_series(0), expected)
-    assert_series_equal(res_series, expected)
-
-
 def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None:
     res_expr = fruits_cars.select(pl.col("A").lower_bound())
     assert res_expr.item() == -9223372036854775808

From eb2797e90802ea59aa7fa917b73787d0c35aa745 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Fri, 14 Jul 2023 19:00:13 +0100
Subject: [PATCH 12/37] docs(python): note ordering guarantee for groupby
 (#9879)

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>
---
 py-polars/polars/dataframe/frame.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 41c59e2643f5..8e7b11ebd046 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -4641,6 +4641,15 @@ def groupby(
             Settings this to ``True`` blocks the possibility
             to run on the streaming engine.
 
+            .. note::
+                Within each group, the order of rows is always preserved, regardless
+                of this argument.
+
+        Returns
+        -------
+        GroupBy
+            Object which can be used to perform aggregations.
+
         Examples
         --------
         Group by one column and call ``agg`` to compute the grouped sum of another

From cde0be24e8f7c0e450dd7b2deae001cd52973916 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Fri, 14 Jul 2023 20:26:34 +0200
Subject: [PATCH 13/37] feat(rust, python): respect and allow more options in
 eager json parsing (#9882)

---
 polars/polars-core/src/schema.rs              |   7 ++
 polars/polars-io/src/json/mod.rs              | 104 +++++++++++++-----
 polars/polars-io/src/ndjson/buffer.rs         |  44 ++++++--
 polars/polars-io/src/ndjson/core.rs           |  38 +++++--
 polars/polars-io/src/utils.rs                 |  11 ++
 .../physical_plan/executors/scan/ndjson.rs    |   2 +-
 py-polars/polars/dataframe/frame.py           |  28 ++++-
 py-polars/polars/io/json.py                   |  26 ++++-
 py-polars/polars/io/ndjson.py                 |  32 +++++-
 py-polars/src/dataframe.rs                    |  89 +++++++++------
 py-polars/tests/unit/io/test_json.py          |  47 ++++++++
 11 files changed, 338 insertions(+), 90 deletions(-)

diff --git a/polars/polars-core/src/schema.rs b/polars/polars-core/src/schema.rs
index dd05c02e0dea..caac67990add 100644
--- a/polars/polars-core/src/schema.rs
+++ b/polars/polars-core/src/schema.rs
@@ -195,6 +195,13 @@ impl Schema {
             .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
     }
 
+    /// Get a mutable reference to the dtype of the field named `name`, or `Err(PolarsErr)` if the field doesn't exist
+    pub fn try_get_mut(&mut self, name: &str) -> PolarsResult<&mut DataType> {
+        self.inner
+            .get_mut(name)
+            .ok_or_else(|| polars_err!(SchemaFieldNotFound: "{}", name))
+    }
+
     /// Return all data about the field named `name`: its index in the schema, its name, and its dtype
     ///
     /// Returns `Some((index, &name, &dtype))` if the field exists, `None` if it doesn't.
diff --git a/polars/polars-io/src/json/mod.rs b/polars/polars-io/src/json/mod.rs
index 76cb6b452c10..79a246d5637b 100644
--- a/polars/polars-io/src/json/mod.rs
+++ b/polars/polars-io/src/json/mod.rs
@@ -157,20 +157,22 @@ where
 
 /// Reads JSON in one of the formats in [`JsonFormat`] into a DataFrame.
 #[must_use]
-pub struct JsonReader<R>
+pub struct JsonReader<'a, R>
 where
     R: MmapBytesReader,
 {
     reader: R,
     rechunk: bool,
+    ignore_errors: bool,
     infer_schema_len: Option<usize>,
     batch_size: usize,
     projection: Option<Vec<String>>,
-    schema: Option<ArrowSchema>,
+    schema: Option<SchemaRef>,
+    schema_overwrite: Option<&'a Schema>,
     json_format: JsonFormat,
 }
 
-impl<R> SerReader<R> for JsonReader<R>
+impl<'a, R> SerReader<R> for JsonReader<'a, R>
 where
     R: MmapBytesReader,
 {
@@ -178,10 +180,12 @@ where
         JsonReader {
             reader,
             rechunk: true,
+            ignore_errors: false,
             infer_schema_len: Some(100),
             batch_size: 8192,
             projection: None,
             schema: None,
+            schema_overwrite: None,
             json_format: JsonFormat::Json,
         }
     }
@@ -201,32 +205,63 @@ where
 
         let out = match self.json_format {
             JsonFormat::Json => {
+                polars_ensure!(!self.ignore_errors, InvalidOperation: "'ignore_errors' only supported in ndjson");
                 let mut bytes = rb.deref().to_vec();
                 let json_value =
                     simd_json::to_borrowed_value(&mut bytes).map_err(to_compute_err)?;
 
-                // likely struct type
-                let dtype = if let BorrowedValue::Array(values) = &json_value {
-                    // struct types may have missing fields so find supertype
-                    let dtype = values
-                        .iter()
-                        .take(self.infer_schema_len.unwrap_or(usize::MAX))
-                        .map(|value| {
-                            infer(value)
-                                .map_err(PolarsError::from)
-                                .map(|dt| DataType::from(&dt))
-                        })
-                        .fold_first_(|l, r| {
-                            let l = l?;
-                            let r = r?;
-                            try_get_supertype(&l, &r)
-                        })
-                        .unwrap()?;
-                    let dtype = DataType::List(Box::new(dtype));
-                    dtype.to_arrow()
+                // struct type
+                let dtype = if let Some(mut schema) = self.schema {
+                    if let Some(overwrite) = self.schema_overwrite {
+                        let mut_schema = Arc::make_mut(&mut schema);
+                        overwrite_schema(mut_schema, overwrite)?;
+                    }
+                    DataType::Struct(schema.iter_fields().collect()).to_arrow()
                 } else {
-                    infer(&json_value)?
+                    // infer
+                    if let BorrowedValue::Array(values) = &json_value {
+                        polars_ensure!(self.schema_overwrite.is_none() && self.schema.is_none(), ComputeError: "schema arguments not yet supported for Array json");
+
+                        // struct types may have missing fields so find supertype
+                        let dtype = values
+                            .iter()
+                            .take(self.infer_schema_len.unwrap_or(usize::MAX))
+                            .map(|value| {
+                                infer(value)
+                                    .map_err(PolarsError::from)
+                                    .map(|dt| DataType::from(&dt))
+                            })
+                            .fold_first_(|l, r| {
+                                let l = l?;
+                                let r = r?;
+                                try_get_supertype(&l, &r)
+                            })
+                            .unwrap()?;
+                        let dtype = DataType::List(Box::new(dtype));
+                        dtype.to_arrow()
+                    } else {
+                        let dtype = infer(&json_value)?;
+                        if let Some(overwrite) = self.schema_overwrite {
+                            let ArrowDataType::Struct(fields) = dtype else {
+                                    polars_bail!(ComputeError: "can only deserialize json objects")
+                                };
+
+                            let mut schema = Schema::from_iter(fields.iter());
+                            overwrite_schema(&mut schema, overwrite)?;
+
+                            DataType::Struct(
+                                schema
+                                    .into_iter()
+                                    .map(|(name, dt)| Field::new(&name, dt))
+                                    .collect(),
+                            )
+                            .to_arrow()
+                        } else {
+                            dtype
+                        }
+                    }
                 };
+
                 let arr = polars_json::json::deserialize(&json_value, dtype)?;
                 let arr = arr.as_any().downcast_ref::<StructArray>().ok_or_else(
                     || polars_err!(ComputeError: "can only deserialize json objects"),
@@ -237,12 +272,14 @@ where
                 let mut json_reader = CoreJsonReader::new(
                     rb,
                     None,
-                    None,
+                    self.schema,
+                    self.schema_overwrite,
                     None,
                     1024, // sample size
                     1 << 18,
                     false,
                     self.infer_schema_len,
+                    self.ignore_errors,
                 )?;
                 let mut df: DataFrame = json_reader.as_df()?;
                 if self.rechunk {
@@ -252,6 +289,7 @@ where
             }
         }?;
 
+        // TODO! Ensure we don't materialize the columns we don't need
         if let Some(proj) = &self.projection {
             out.select(proj)
         } else {
@@ -260,13 +298,19 @@ where
     }
 }
 
-impl<R> JsonReader<R>
+impl<'a, R> JsonReader<'a, R>
 where
     R: MmapBytesReader,
 {
     /// Set the JSON file's schema
-    pub fn with_schema(mut self, schema: &Schema) -> Self {
-        self.schema = Some(schema.to_arrow());
+    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
+        self.schema = Some(schema);
+        self
+    }
+
+    /// Overwrite parts of the inferred schema.
+    pub fn with_schema_overwrite(mut self, schema: &'a Schema) -> Self {
+        self.schema_overwrite = Some(schema);
         self
     }
 
@@ -305,4 +349,10 @@ where
         self.json_format = format;
         self
     }
+
+    /// Return a `null` if an error occurs during parsing.
+    pub fn with_ignore_errors(mut self, ignore: bool) -> Self {
+        self.ignore_errors = ignore;
+        self
+    }
 }
diff --git a/polars/polars-io/src/ndjson/buffer.rs b/polars/polars-io/src/ndjson/buffer.rs
index c5f9635a2168..654190777ea5 100644
--- a/polars/polars-io/src/ndjson/buffer.rs
+++ b/polars/polars-io/src/ndjson/buffer.rs
@@ -20,19 +20,23 @@ impl<'a> Hash for BufferKey<'a> {
     }
 }
 
-pub(crate) struct Buffer<'a>(&'a str, AnyValueBuffer<'a>);
+pub(crate) struct Buffer<'a> {
+    name: &'a str,
+    ignore_errors: bool,
+    buf: AnyValueBuffer<'a>,
+}
 
 impl Buffer<'_> {
     pub fn into_series(self) -> Series {
-        let mut s = self.1.into_series();
-        s.rename(self.0);
+        let mut s = self.buf.into_series();
+        s.rename(self.name);
         s
     }
 
     #[inline]
     pub(crate) fn add(&mut self, value: &Value) -> PolarsResult<()> {
         use AnyValueBuffer::*;
-        match &mut self.1 {
+        match &mut self.buf {
             Boolean(buf) => {
                 match value {
                     Value::Static(StaticNode::Bool(b)) => buf.append_value(*b),
@@ -109,7 +113,7 @@ impl Buffer<'_> {
                 Ok(())
             }
             All(dtype, buf) => {
-                let av = deserialize_all(value, dtype)?;
+                let av = deserialize_all(value, dtype, self.ignore_errors)?;
                 buf.push(av);
                 Ok(())
             }
@@ -117,19 +121,27 @@ impl Buffer<'_> {
         }
     }
     pub fn add_null(&mut self) {
-        self.1.add(AnyValue::Null).expect("should not fail");
+        self.buf.add(AnyValue::Null).expect("should not fail");
     }
 }
 pub(crate) fn init_buffers(
     schema: &Schema,
     capacity: usize,
+    ignore_errors: bool,
 ) -> PolarsResult<PlIndexMap<BufferKey, Buffer>> {
     schema
         .iter()
         .map(|(name, dtype)| {
             let av_buf = (dtype, capacity).into();
             let key = KnownKey::from(name.as_str());
-            Ok((BufferKey(key), Buffer(name, av_buf)))
+            Ok((
+                BufferKey(key),
+                Buffer {
+                    name,
+                    buf: av_buf,
+                    ignore_errors,
+                },
+            ))
         })
         .collect()
 }
@@ -163,7 +175,11 @@ where
     })
 }
 
-fn deserialize_all<'a>(json: &Value, dtype: &DataType) -> PolarsResult<AnyValue<'a>> {
+fn deserialize_all<'a>(
+    json: &Value,
+    dtype: &DataType,
+    ignore_errors: bool,
+) -> PolarsResult<AnyValue<'a>> {
     let out = match json {
         Value::Static(StaticNode::Bool(b)) => AnyValue::Boolean(*b),
         Value::Static(StaticNode::I64(i)) => AnyValue::Int64(*i),
@@ -173,11 +189,14 @@ fn deserialize_all<'a>(json: &Value, dtype: &DataType) -> PolarsResult<AnyValue<
         Value::String(s) => AnyValue::Utf8Owned(s.as_ref().into()),
         Value::Array(arr) => {
             let Some(inner_dtype) = dtype.inner_dtype() else {
+                if ignore_errors {
+                    return Ok(AnyValue::Null)
+                }
                 polars_bail!(ComputeError: "expected list/array in json value, got {}", dtype);
             };
             let vals: Vec<AnyValue> = arr
                 .iter()
-                .map(|val| deserialize_all(val, inner_dtype))
+                .map(|val| deserialize_all(val, inner_dtype, ignore_errors))
                 .collect::<PolarsResult<_>>()?;
             let s = Series::from_any_values_and_dtype("", &vals, inner_dtype, false)?;
             AnyValue::List(s)
@@ -191,7 +210,7 @@ fn deserialize_all<'a>(json: &Value, dtype: &DataType) -> PolarsResult<AnyValue<
                     .iter()
                     .map(|field| {
                         if let Some(value) = document.get(field.name.as_str()) {
-                            deserialize_all(value, &field.dtype)
+                            deserialize_all(value, &field.dtype, ignore_errors)
                         } else {
                             Ok(AnyValue::Null)
                         }
@@ -199,8 +218,11 @@ fn deserialize_all<'a>(json: &Value, dtype: &DataType) -> PolarsResult<AnyValue<
                     .collect::<PolarsResult<Vec<_>>>()?;
                 AnyValue::StructOwned(Box::new((vals, fields.clone())))
             } else {
+                if ignore_errors {
+                    return Ok(AnyValue::Null);
+                }
                 polars_bail!(
-                    ComputeError: "expected {dtype} in json value, got object",
+                    ComputeError: "expected {} in json value, got object", dtype,
                 );
             }
         }
diff --git a/polars/polars-io/src/ndjson/core.rs b/polars/polars-io/src/ndjson/core.rs
index 79a11501f20d..9417967e1e0e 100644
--- a/polars/polars-io/src/ndjson/core.rs
+++ b/polars/polars-io/src/ndjson/core.rs
@@ -1,4 +1,3 @@
-use std::borrow::Cow;
 use std::fs::File;
 use std::io::Cursor;
 use std::path::PathBuf;
@@ -30,9 +29,11 @@ where
     n_threads: Option<usize>,
     infer_schema_len: Option<usize>,
     chunk_size: usize,
-    schema: Option<&'a Schema>,
+    schema: Option<SchemaRef>,
+    schema_overwrite: Option<&'a Schema>,
     path: Option<PathBuf>,
     low_memory: bool,
+    ignore_errors: bool,
 }
 
 impl<'a, R> JsonLineReader<'a, R>
@@ -43,10 +44,16 @@ where
         self.n_rows = num_rows;
         self
     }
-    pub fn with_schema(mut self, schema: &'a Schema) -> Self {
+    pub fn with_schema(mut self, schema: SchemaRef) -> Self {
         self.schema = Some(schema);
         self
     }
+
+    pub fn with_schema_overwrite(mut self, schema: &'a Schema) -> Self {
+        self.schema_overwrite = Some(schema);
+        self
+    }
+
     pub fn with_rechunk(mut self, rechunk: bool) -> Self {
         self.rechunk = rechunk;
         self
@@ -102,9 +109,11 @@ where
             n_threads: None,
             infer_schema_len: Some(128),
             schema: None,
+            schema_overwrite: None,
             path: None,
             chunk_size: 1 << 18,
             low_memory: false,
+            ignore_errors: false,
         }
     }
     fn finish(mut self) -> PolarsResult<DataFrame> {
@@ -114,11 +123,13 @@ where
             reader_bytes,
             self.n_rows,
             self.schema,
+            self.schema_overwrite,
             self.n_threads,
             1024, // sample size
             self.chunk_size,
             self.low_memory,
             self.infer_schema_len,
+            self.ignore_errors,
         )?;
 
         let mut df: DataFrame = json_reader.as_df()?;
@@ -132,28 +143,31 @@ where
 pub(crate) struct CoreJsonReader<'a> {
     reader_bytes: Option<ReaderBytes<'a>>,
     n_rows: Option<usize>,
-    schema: Cow<'a, Schema>,
+    schema: SchemaRef,
     n_threads: Option<usize>,
     sample_size: usize,
     chunk_size: usize,
     low_memory: bool,
+    ignore_errors: bool,
 }
 impl<'a> CoreJsonReader<'a> {
     #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
         reader_bytes: ReaderBytes<'a>,
         n_rows: Option<usize>,
-        schema: Option<&'a Schema>,
+        schema: Option<SchemaRef>,
+        schema_overwrite: Option<&Schema>,
         n_threads: Option<usize>,
         sample_size: usize,
         chunk_size: usize,
         low_memory: bool,
         infer_schema_len: Option<usize>,
+        ignore_errors: bool,
     ) -> PolarsResult<CoreJsonReader<'a>> {
         let reader_bytes = reader_bytes;
 
-        let schema = match schema {
-            Some(schema) => Cow::Borrowed(schema),
+        let mut schema = match schema {
+            Some(schema) => schema,
             None => {
                 let bytes: &[u8] = &reader_bytes;
                 let mut cursor = Cursor::new(bytes);
@@ -161,9 +175,14 @@ impl<'a> CoreJsonReader<'a> {
                 let data_type = polars_json::ndjson::infer(&mut cursor, infer_schema_len)?;
                 let schema = StructArray::get_fields(&data_type).iter().collect();
 
-                Cow::Owned(schema)
+                Arc::new(schema)
             }
         };
+        if let Some(overwriting_schema) = schema_overwrite {
+            let schema = Arc::make_mut(&mut schema);
+            overwrite_schema(schema, overwriting_schema)?;
+        }
+
         Ok(CoreJsonReader {
             reader_bytes: Some(reader_bytes),
             schema,
@@ -172,6 +191,7 @@ impl<'a> CoreJsonReader<'a> {
             n_threads,
             chunk_size,
             low_memory,
+            ignore_errors,
         })
     }
     fn parse_json(&mut self, mut n_threads: usize, bytes: &[u8]) -> PolarsResult<DataFrame> {
@@ -212,7 +232,7 @@ impl<'a> CoreJsonReader<'a> {
             file_chunks
                 .into_par_iter()
                 .map(|(start_pos, stop_at_nbytes)| {
-                    let mut buffers = init_buffers(&self.schema, capacity)?;
+                    let mut buffers = init_buffers(&self.schema, capacity, self.ignore_errors)?;
                     parse_lines(&bytes[start_pos..stop_at_nbytes], &mut buffers)?;
                     DataFrame::new(
                         buffers
diff --git a/polars/polars-io/src/utils.rs b/polars/polars-io/src/utils.rs
index 0c2297fd5218..e9785bf3c629 100644
--- a/polars/polars-io/src/utils.rs
+++ b/polars/polars-io/src/utils.rs
@@ -106,6 +106,17 @@ pub(crate) fn update_row_counts2(dfs: &mut [DataFrame], offset: IdxSize) {
     }
 }
 
+#[cfg(feature = "json")]
+pub(crate) fn overwrite_schema(
+    schema: &mut Schema,
+    overwriting_schema: &Schema,
+) -> PolarsResult<()> {
+    for (k, value) in overwriting_schema.iter() {
+        *schema.try_get_mut(k)? = value.clone();
+    }
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use std::path::PathBuf;
diff --git a/polars/polars-lazy/src/physical_plan/executors/scan/ndjson.rs b/polars/polars-lazy/src/physical_plan/executors/scan/ndjson.rs
index 8687a22b3f74..d9e2cb70d63d 100644
--- a/polars/polars-lazy/src/physical_plan/executors/scan/ndjson.rs
+++ b/polars/polars-lazy/src/physical_plan/executors/scan/ndjson.rs
@@ -10,7 +10,7 @@ impl AnonymousScan for LazyJsonLineReader {
     fn scan(&self, scan_opts: AnonymousScanOptions) -> PolarsResult<DataFrame> {
         let schema = scan_opts.output_schema.unwrap_or(scan_opts.schema);
         JsonLineReader::from_path(&self.path)?
-            .with_schema(&schema)
+            .with_schema(schema)
             .with_rechunk(self.rechunk)
             .with_chunk_size(self.batch_size)
             .low_memory(self.low_memory)
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 8e7b11ebd046..dc648c8a2883 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -991,7 +991,13 @@ def _read_ipc(
         return self
 
     @classmethod
-    def _read_json(cls, source: str | Path | IOBase | bytes) -> Self:
+    def _read_json(
+        cls,
+        source: str | Path | IOBase | bytes,
+        *,
+        schema: SchemaDefinition | None = None,
+        schema_overrides: SchemaDefinition | None = None,
+    ) -> Self:
         """
         Read into a DataFrame from a JSON file.
 
@@ -1008,11 +1014,20 @@ def _read_json(cls, source: str | Path | IOBase | bytes) -> Self:
             source = normalise_filepath(source)
 
         self = cls.__new__(cls)
-        self._df = PyDataFrame.read_json(source, False)
+        self._df = PyDataFrame.read_json(
+            source, schema=schema, schema_overrides=schema_overrides
+        )
         return self
 
     @classmethod
-    def _read_ndjson(cls, source: str | Path | IOBase | bytes) -> Self:
+    def _read_ndjson(
+        cls,
+        source: str | Path | IOBase | bytes,
+        *,
+        schema: SchemaDefinition | None = None,
+        schema_overrides: SchemaDefinition | None = None,
+        ignore_errors: bool = False,
+    ) -> Self:
         """
         Read into a DataFrame from a newline delimited JSON file.
 
@@ -1029,7 +1044,12 @@ def _read_ndjson(cls, source: str | Path | IOBase | bytes) -> Self:
             source = normalise_filepath(source)
 
         self = cls.__new__(cls)
-        self._df = PyDataFrame.read_ndjson(source)
+        self._df = PyDataFrame.read_ndjson(
+            source,
+            ignore_errors=ignore_errors,
+            schema=schema,
+            schema_overrides=schema_overrides,
+        )
         return self
 
     @property
diff --git a/py-polars/polars/io/json.py b/py-polars/polars/io/json.py
index b72872efb437..f8c13adfe05a 100644
--- a/py-polars/polars/io/json.py
+++ b/py-polars/polars/io/json.py
@@ -9,9 +9,15 @@
     from pathlib import Path
 
     from polars import DataFrame
+    from polars.type_aliases import SchemaDefinition
 
 
-def read_json(source: str | Path | IOBase | bytes) -> DataFrame:
+def read_json(
+    source: str | Path | IOBase | bytes,
+    *,
+    schema: SchemaDefinition | None = None,
+    schema_overrides: SchemaDefinition | None = None,
+) -> DataFrame:
     """
     Read into a DataFrame from a JSON file.
 
@@ -19,10 +25,26 @@ def read_json(source: str | Path | IOBase | bytes) -> DataFrame:
     ----------
     source
         Path to a file or a file-like object.
+    schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
+        The DataFrame schema may be declared in several ways:
+
+        * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
+        * As a list of column names; in this case types are automatically inferred.
+        * As a list of (name,type) pairs; this is equivalent to the dictionary form.
+
+        If you supply a list of column names that does not match the names in the
+        underlying data, the names given here will overwrite them. The number
+        of names given in the schema should match the underlying data dimensions.
+    schema_overrides : dict, default None
+        Support type specification or override of one or more columns; note that
+        any dtypes inferred from the schema param will be overridden.
+        underlying data, the names given here will overwrite them.
 
     See Also
     --------
     read_ndjson
 
     """
-    return pl.DataFrame._read_json(source)
+    return pl.DataFrame._read_json(
+        source, schema=schema, schema_overrides=schema_overrides
+    )
diff --git a/py-polars/polars/io/ndjson.py b/py-polars/polars/io/ndjson.py
index e6607caa26b7..847a5ba30337 100644
--- a/py-polars/polars/io/ndjson.py
+++ b/py-polars/polars/io/ndjson.py
@@ -11,9 +11,16 @@
     from io import IOBase
 
     from polars import DataFrame, LazyFrame
+    from polars.type_aliases import SchemaDefinition
 
 
-def read_ndjson(source: str | Path | IOBase | bytes) -> DataFrame:
+def read_ndjson(
+    source: str | Path | IOBase | bytes,
+    *,
+    schema: SchemaDefinition | None = None,
+    schema_overrides: SchemaDefinition | None = None,
+    ignore_errors: bool = False,
+) -> DataFrame:
     """
     Read into a DataFrame from a newline delimited JSON file.
 
@@ -21,9 +28,30 @@ def read_ndjson(source: str | Path | IOBase | bytes) -> DataFrame:
     ----------
     source
         Path to a file or a file-like object.
+    schema : Sequence of str, (str,DataType) pairs, or a {str:DataType,} dict
+        The DataFrame schema may be declared in several ways:
+
+        * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
+        * As a list of column names; in this case types are automatically inferred.
+        * As a list of (name,type) pairs; this is equivalent to the dictionary form.
+
+        If you supply a list of column names that does not match the names in the
+        underlying data, the names given here will overwrite them. The number
+        of names given in the schema should match the underlying data dimensions.
+    schema_overrides : dict, default None
+        Support type specification or override of one or more columns; note that
+        any dtypes inferred from the schema param will be overridden.
+        underlying data, the names given here will overwrite them.
+    ignore_errors
+        Return `Null` if parsing fails because of schema mismatches.
 
     """
-    return pl.DataFrame._read_ndjson(source)
+    return pl.DataFrame._read_ndjson(
+        source,
+        schema=schema,
+        schema_overrides=schema_overrides,
+        ignore_errors=ignore_errors,
+    )
 
 
 def scan_ndjson(
diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs
index 2c03c27dbb32..3d8cf8de109a 100644
--- a/py-polars/src/dataframe.rs
+++ b/py-polars/src/dataframe.rs
@@ -351,41 +351,46 @@ impl PyDataFrame {
 
     #[staticmethod]
     #[cfg(feature = "json")]
-    pub fn read_json(py_f: &PyAny, json_lines: bool) -> PyResult<Self> {
+    pub fn read_json(
+        py_f: &PyAny,
+        schema: Option<Wrap<Schema>>,
+        schema_overrides: Option<Wrap<Schema>>,
+    ) -> PyResult<Self> {
+        // memmap the file first
         let mmap_bytes_r = get_mmap_bytes_reader(py_f)?;
-        if json_lines {
-            let out = JsonReader::new(mmap_bytes_r)
-                .with_json_format(JsonFormat::JsonLines)
-                .finish()
-                .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
-            Ok(out.into())
-        } else {
-            // memmap the file first
-            let mmap_bytes_r = get_mmap_bytes_reader(py_f)?;
-            let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
-            let bytes = mmap_read.deref();
-
-            // Happy path is our column oriented json as that is most performant
-            // on failure we try
-            match serde_json::from_slice::<DataFrame>(bytes) {
-                Ok(df) => Ok(df.into()),
-                Err(e) => {
-                    let msg = format!("{e}");
-                    // parsing succeeded, but the dataframe was invalid
-                    if msg.contains("successful parse invalid data") {
-                        let e = PyPolarsErr::from(PolarsError::ComputeError(msg.into()));
-                        Err(PyErr::from(e))
+        let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
+        let bytes = mmap_read.deref();
+
+        // Happy path is our column oriented json as that is most performant
+        // on failure we try
+        match serde_json::from_slice::<DataFrame>(bytes) {
+            Ok(df) => Ok(df.into()),
+            Err(e) => {
+                let msg = format!("{e}");
+                // parsing succeeded, but the dataframe was invalid
+                if msg.contains("successful parse invalid data") {
+                    let e = PyPolarsErr::from(PolarsError::ComputeError(msg.into()));
+                    Err(PyErr::from(e))
+                }
+                // parsing error
+                // try arrow json reader instead
+                // this is row oriented
+                else {
+                    let mut builder =
+                        JsonReader::new(mmap_bytes_r).with_json_format(JsonFormat::Json);
+
+                    if let Some(schema) = schema {
+                        builder = builder.with_schema(Arc::new(schema.0));
                     }
-                    // parsing error
-                    // try arrow json reader instead
-                    // this is row oriented
-                    else {
-                        let out = JsonReader::new(mmap_bytes_r)
-                            .with_json_format(JsonFormat::Json)
-                            .finish()
-                            .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
-                        Ok(out.into())
+
+                    if let Some(schema) = schema_overrides.as_ref() {
+                        builder = builder.with_schema_overwrite(&schema.0);
                     }
+
+                    let out = builder
+                        .finish()
+                        .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
+                    Ok(out.into())
                 }
             }
         }
@@ -393,11 +398,27 @@ impl PyDataFrame {
 
     #[staticmethod]
     #[cfg(feature = "json")]
-    pub fn read_ndjson(py_f: &PyAny) -> PyResult<Self> {
+    pub fn read_ndjson(
+        py_f: &PyAny,
+        ignore_errors: bool,
+        schema: Option<Wrap<Schema>>,
+        schema_overrides: Option<Wrap<Schema>>,
+    ) -> PyResult<Self> {
         let mmap_bytes_r = get_mmap_bytes_reader(py_f)?;
 
-        let out = JsonReader::new(mmap_bytes_r)
+        let mut builder = JsonReader::new(mmap_bytes_r)
             .with_json_format(JsonFormat::JsonLines)
+            .with_ignore_errors(ignore_errors);
+
+        if let Some(schema) = schema {
+            builder = builder.with_schema(Arc::new(schema.0));
+        }
+
+        if let Some(schema) = schema_overrides.as_ref() {
+            builder = builder.with_schema_overwrite(&schema.0);
+        }
+
+        let out = builder
             .finish()
             .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
         Ok(out.into())
diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py
index 4bb40ba2e43b..f762e045a404 100644
--- a/py-polars/tests/unit/io/test_json.py
+++ b/py-polars/tests/unit/io/test_json.py
@@ -152,3 +152,50 @@ def test_json_deserialize_9687() -> None:
     result = pl.read_json(json.dumps(response).encode())
 
     assert result.to_dict(False) == {k: [v] for k, v in response.items()}
+
+
+def test_ndjson_ignore_errors() -> None:
+    # this schema is inconsistent as "value" is string and object
+    jsonl = r"""{"Type":"insert","Key":[1],"SeqNo":1,"Timestamp":1,"Fields":[{"Name":"added_id","Value":2},{"Name":"body","Value":{"a": 1}}]}
+    {"Type":"insert","Key":[1],"SeqNo":1,"Timestamp":1,"Fields":[{"Name":"added_id","Value":2},{"Name":"body","Value":{"a": 1}}]}"""
+
+    buf = io.BytesIO(jsonl.encode())
+
+    # check if we can replace with nulls
+    assert pl.read_ndjson(buf, ignore_errors=True).to_dict(False) == {
+        "Type": ["insert", "insert"],
+        "Key": [[1], [1]],
+        "SeqNo": [1, 1],
+        "Timestamp": [1, 1],
+        "Fields": [
+            [{"Name": "added_id", "Value": "2"}, {"Name": "body", "Value": None}],
+            [{"Name": "added_id", "Value": "2"}, {"Name": "body", "Value": None}],
+        ],
+    }
+
+    schema = {
+        "Fields": pl.List(
+            pl.Struct([pl.Field("Name", pl.Utf8), pl.Field("Value", pl.Int64)])
+        )
+    }
+    # schema argument only parses Fields
+    assert pl.read_ndjson(buf, schema=schema, ignore_errors=True).to_dict(False) == {
+        "Fields": [
+            [{"Name": "added_id", "Value": 2}, {"Name": "body", "Value": None}],
+            [{"Name": "added_id", "Value": 2}, {"Name": "body", "Value": None}],
+        ]
+    }
+
+    # schema_overrides argument does schema inference, but overrides Fields
+    assert pl.read_ndjson(buf, schema_overrides=schema, ignore_errors=True).to_dict(
+        False
+    ) == {
+        "Type": ["insert", "insert"],
+        "Key": [[1], [1]],
+        "SeqNo": [1, 1],
+        "Timestamp": [1, 1],
+        "Fields": [
+            [{"Name": "added_id", "Value": 2}, {"Name": "body", "Value": None}],
+            [{"Name": "added_id", "Value": 2}, {"Name": "body", "Value": None}],
+        ],
+    }

From 147944c95a11643cf77da3b467938f8a35d6ed9e Mon Sep 17 00:00:00 2001
From: Alexander Beedie <alexander-beedie@users.noreply.github.com>
Date: Fri, 14 Jul 2023 20:27:31 +0200
Subject: [PATCH 14/37] feat(python): convenience support for parsing a list of
 SQL strings with `sql_expr` (#9881)

---
 py-polars/polars/functions/lazy.py | 38 ++++++++++++++++++++++++++----
 py-polars/tests/unit/test_sql.py   | 12 ++++++----
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py
index f970b4f36c9d..97a2a9aefa7f 100644
--- a/py-polars/polars/functions/lazy.py
+++ b/py-polars/polars/functions/lazy.py
@@ -2093,17 +2093,29 @@ def rolling_corr(
     )
 
 
-def sql_expr(sql: str) -> Expr:
+@overload
+def sql_expr(sql: str) -> Expr:  # type: ignore[misc]
+    ...
+
+
+@overload
+def sql_expr(sql: Sequence[str]) -> list[Expr]:
+    ...
+
+
+def sql_expr(sql: str | Sequence[str]) -> Expr | list[Expr]:
     """
-    Parse a SQL expression to a polars expression.
+    Parse one or more SQL expressions to polars expression(s).
 
     Parameters
     ----------
     sql
-        SQL expression
+        One or more SQL expressions.
 
     Examples
     --------
+    Parse a single SQL expression:
+
     >>> df = pl.DataFrame({"a": [2, 1]})
     >>> expr = pl.sql_expr("MAX(a)")
     >>> df.select(expr)
@@ -2115,5 +2127,23 @@ def sql_expr(sql: str) -> Expr:
     ╞═════╡
     │ 2   │
     └─────┘
+
+    Parse multiple SQL expressions:
+
+    >>> df.with_columns(
+    ...     *pl.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"]),
+    ... )
+    shape: (2, 3)
+    ┌─────┬─────┬───────┐
+    │ a   ┆ a_a ┆ a_txt │
+    │ --- ┆ --- ┆ ---   │
+    │ i64 ┆ f64 ┆ str   │
+    ╞═════╪═════╪═══════╡
+    │ 2   ┆ 4.0 ┆ 2     │
+    │ 1   ┆ 1.0 ┆ 1     │
+    └─────┴─────┴───────┘
     """
-    return wrap_expr(plr.sql_expr(sql))
+    if isinstance(sql, str):
+        return wrap_expr(plr.sql_expr(sql))
+    else:
+        return [wrap_expr(plr.sql_expr(q)) for q in sql]
diff --git a/py-polars/tests/unit/test_sql.py b/py-polars/tests/unit/test_sql.py
index d2c9869ce718..86a78ddddfd2 100644
--- a/py-polars/tests/unit/test_sql.py
+++ b/py-polars/tests/unit/test_sql.py
@@ -771,15 +771,17 @@ def test_register_context() -> None:
 
 def test_sql_expr() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": ["xyz", "abcde", None]})
-    sql_exprs = (
-        pl.sql_expr("MIN(a)"),
-        pl.sql_expr("POWER(a,a) AS aa"),
-        pl.sql_expr("SUBSTR(b,1,2) AS b2"),
+    sql_exprs = pl.sql_expr(
+        [
+            "MIN(a)",
+            "POWER(a,a) AS aa",
+            "SUBSTR(b,1,2) AS b2",
+        ]
     )
     expected = pl.DataFrame(
         {"a": [1, 1, 1], "aa": [1, 4, 27], "b2": ["yz", "bc", None]}
     )
-    assert df.select(sql_exprs).frame_equal(expected)
+    assert df.select(*sql_exprs).frame_equal(expected)
 
     # expect expressions that can't reasonably be parsed as expressions to raise
     # (for example: those that explicitly reference tables and/or use wildcards)

From 134d43edeb5d462a77b8d6475ea7746a44656342 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= <sorhawell@gmail.com>
Date: Fri, 14 Jul 2023 20:28:42 +0200
Subject: [PATCH 15/37] feat(rust): access `OptState` in `LazyFrame` to
 unit-test optimization toggle methods. (#9883)

---
 polars/polars-lazy/polars-plan/src/frame/opt_state.rs | 2 +-
 polars/polars-lazy/src/frame/mod.rs                   | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/polars/polars-lazy/polars-plan/src/frame/opt_state.rs b/polars/polars-lazy/polars-plan/src/frame/opt_state.rs
index f99ae1f4f8b8..28cf448c76f9 100644
--- a/polars/polars-lazy/polars-plan/src/frame/opt_state.rs
+++ b/polars/polars-lazy/polars-plan/src/frame/opt_state.rs
@@ -1,4 +1,4 @@
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug)]
 /// State of the allowed optimizations
 pub struct OptState {
     pub projection_pushdown: bool,
diff --git a/polars/polars-lazy/src/frame/mod.rs b/polars/polars-lazy/src/frame/mod.rs
index 593333c96f82..ed3641d0b001 100644
--- a/polars/polars-lazy/src/frame/mod.rs
+++ b/polars/polars-lazy/src/frame/mod.rs
@@ -110,6 +110,11 @@ impl LazyFrame {
         }
     }
 
+    /// Get current optimizations
+    pub fn get_current_optimizations(&self) -> OptState {
+        self.opt_state
+    }
+
     /// Set allowed optimizations
     pub fn with_optimizations(mut self, opt_state: OptState) -> Self {
         self.opt_state = opt_state;

From 9672a410bc60833b572a4c06599c26740e97f2ee Mon Sep 17 00:00:00 2001
From: Josh Magarick <jmagarick@gmail.com>
Date: Fri, 14 Jul 2023 23:08:05 -0700
Subject: [PATCH 16/37] refactor(rust): Rolling quantile and median use DynArgs
 (#9867)

---
 .../polars-arrow/src/kernels/rolling/mod.rs   |   8 +-
 .../src/kernels/rolling/no_nulls/quantile.rs  | 347 +++++-----------
 .../src/kernels/rolling/nulls/quantile.rs     | 379 ++++++------------
 .../src/kernels/rolling/window.rs             |  11 +-
 polars/polars-arrow/src/prelude.rs            |   2 +-
 .../src/frame/groupby/aggregations/mod.rs     |  32 +-
 polars/polars-lazy/polars-plan/src/dsl/mod.rs |   9 +-
 .../src/chunkedarray/rolling_window/floats.rs |  63 +--
 .../src/chunkedarray/rolling_window/ints.rs   |  11 +-
 .../src/chunkedarray/rolling_window/mod.rs    |   9 +-
 .../rolling_kernels/no_nulls.rs               |  21 +
 polars/polars-time/src/series/_trait.rs       |  18 +-
 .../src/series/implementations/floats.rs      |   9 +-
 .../src/series/implementations/integers.rs    |   9 +-
 polars/tests/it/core/rolling_window.rs        | 104 +++--
 py-polars/src/expr/general.rs                 |  17 +-
 16 files changed, 369 insertions(+), 680 deletions(-)

diff --git a/polars/polars-arrow/src/kernels/rolling/mod.rs b/polars/polars-arrow/src/kernels/rolling/mod.rs
index 12ad13b47f1f..0b1f2343ae88 100644
--- a/polars/polars-arrow/src/kernels/rolling/mod.rs
+++ b/polars/polars-arrow/src/kernels/rolling/mod.rs
@@ -10,7 +10,7 @@ use std::sync::Arc;
 use arrow::array::PrimitiveArray;
 use arrow::bitmap::{Bitmap, MutableBitmap};
 use arrow::types::NativeType;
-use num_traits::{Bounded, Float, NumCast, One, ToPrimitive, Zero};
+use num_traits::{Bounded, Float, NumCast, One, Zero};
 use window::*;
 
 use crate::data_types::IsFloat;
@@ -142,3 +142,9 @@ where
 pub struct RollingVarParams {
     pub ddof: u8,
 }
+
+#[derive(Clone, Copy, Debug)]
+pub struct RollingQuantileParams {
+    pub prob: f64,
+    pub interpol: QuantileInterpolOptions,
+}
diff --git a/polars/polars-arrow/src/kernels/rolling/no_nulls/quantile.rs b/polars/polars-arrow/src/kernels/rolling/no_nulls/quantile.rs
index 2eaa76309f01..c790e2a1706b 100644
--- a/polars/polars-arrow/src/kernels/rolling/no_nulls/quantile.rs
+++ b/polars/polars-arrow/src/kernels/rolling/no_nulls/quantile.rs
@@ -5,199 +5,139 @@ use polars_error::polars_ensure;
 
 use super::QuantileInterpolOptions::*;
 use super::*;
-use crate::index::IdxSize;
-use crate::trusted_len::TrustedLen;
 
-// used by agg_quantile
-pub fn rolling_quantile_by_iter<T, O>(
-    values: &[T],
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-    offsets: O,
-) -> ArrayRef
-where
-    O: Iterator<Item = (IdxSize, IdxSize)> + TrustedLen,
-    T: std::iter::Sum<T>
-        + NativeType
-        + Copy
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat,
-{
-    if values.is_empty() {
-        let out: Vec<T> = vec![];
-        return Box::new(PrimitiveArray::new(T::PRIMITIVE.into(), out.into(), None));
-    }
-
-    let mut sorted_window = SortedBuf::new(values, 0, 1);
-
-    let out = offsets
-        .map(|(start, len)| {
-            let end = start + len;
-
-            // safety:
-            // we are in bounds
-            if start == end {
-                None
-            } else {
-                let window = unsafe { sorted_window.update(start as usize, end as usize) };
-                Some(compute_quantile2(window, quantile, interpolation))
-            }
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Box::new(out)
+pub struct QuantileWindow<'a, T: NativeType + IsFloat + PartialOrd> {
+    sorted: SortedBuf<'a, T>,
+    prob: f64,
+    interpol: QuantileInterpolOptions,
 }
 
-pub(crate) fn compute_quantile2<T>(
-    vals: &[T],
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-) -> T
-where
-    T: std::iter::Sum<T>
-        + Copy
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat,
+impl<
+        'a,
+        T: NativeType
+            + IsFloat
+            + Float
+            + std::iter::Sum
+            + AddAssign
+            + SubAssign
+            + Div<Output = T>
+            + NumCast
+            + One
+            + Zero
+            + PartialOrd
+            + Sub<Output = T>,
+    > RollingAggWindowNoNulls<'a, T> for QuantileWindow<'a, T>
 {
-    let length = vals.len();
-
-    let mut idx = match interpolation {
-        QuantileInterpolOptions::Nearest => ((length as f64) * quantile) as usize,
-        QuantileInterpolOptions::Lower
-        | QuantileInterpolOptions::Midpoint
-        | QuantileInterpolOptions::Linear => ((length as f64 - 1.0) * quantile).floor() as usize,
-        QuantileInterpolOptions::Higher => ((length as f64 - 1.0) * quantile).ceil() as usize,
-    };
-
-    idx = std::cmp::min(idx, length - 1);
+    fn new(slice: &'a [T], start: usize, end: usize, params: DynArgs) -> Self {
+        let params = params.unwrap();
+        let params = params.downcast_ref::<RollingQuantileParams>().unwrap();
+        Self {
+            sorted: SortedBuf::new(slice, start, end),
+            prob: params.prob,
+            interpol: params.interpol,
+        }
+    }
 
-    match interpolation {
-        QuantileInterpolOptions::Midpoint => {
-            let top_idx = ((length as f64 - 1.0) * quantile).ceil() as usize;
-            if top_idx == idx {
-                // safety
-                // we are in bounds
-                unsafe { *vals.get_unchecked(idx) }
-            } else {
-                // safety
-                // we are in bounds
-                let (mid, mid_plus_1) =
-                    unsafe { (*vals.get_unchecked(idx), *vals.get_unchecked(idx + 1)) };
+    unsafe fn update(&mut self, start: usize, end: usize) -> T {
+        let vals = self.sorted.update(start, end);
+        let length = vals.len();
 
-                (mid + mid_plus_1) / T::from::<f64>(2.0f64).unwrap()
+        let mut idx = match self.interpol {
+            QuantileInterpolOptions::Nearest => ((length as f64) * self.prob) as usize,
+            QuantileInterpolOptions::Lower
+            | QuantileInterpolOptions::Midpoint
+            | QuantileInterpolOptions::Linear => {
+                ((length as f64 - 1.0) * self.prob).floor() as usize
             }
-        }
-        QuantileInterpolOptions::Linear => {
-            let float_idx = (length as f64 - 1.0) * quantile;
-            let top_idx = f64::ceil(float_idx) as usize;
-
-            if top_idx == idx {
+            QuantileInterpolOptions::Higher => ((length as f64 - 1.0) * self.prob).ceil() as usize,
+        };
+
+        idx = std::cmp::min(idx, length - 1);
+
+        match self.interpol {
+            QuantileInterpolOptions::Midpoint => {
+                let top_idx = ((length as f64 - 1.0) * self.prob).ceil() as usize;
+                if top_idx == idx {
+                    // safety
+                    // we are in bounds
+                    unsafe { *vals.get_unchecked(idx) }
+                } else {
+                    // safety
+                    // we are in bounds
+                    let (mid, mid_plus_1) =
+                        unsafe { (*vals.get_unchecked(idx), *vals.get_unchecked(idx + 1)) };
+
+                    (mid + mid_plus_1) / T::from::<f64>(2.0f64).unwrap()
+                }
+            }
+            QuantileInterpolOptions::Linear => {
+                let float_idx = (length as f64 - 1.0) * self.prob;
+                let top_idx = f64::ceil(float_idx) as usize;
+
+                if top_idx == idx {
+                    // safety
+                    // we are in bounds
+                    unsafe { *vals.get_unchecked(idx) }
+                } else {
+                    let proportion = T::from(float_idx - idx as f64).unwrap();
+                    proportion * (vals[top_idx] - vals[idx]) + vals[idx]
+                }
+            }
+            _ => {
                 // safety
                 // we are in bounds
                 unsafe { *vals.get_unchecked(idx) }
-            } else {
-                let proportion = T::from(float_idx - idx as f64).unwrap();
-                proportion * (vals[top_idx] - vals[idx]) + vals[idx]
             }
         }
-        _ => {
-            // safety
-            // we are in bounds
-            unsafe { *vals.get_unchecked(idx) }
-        }
     }
 }
 
-pub fn rolling_median<T>(
-    values: &[T],
-    window_size: usize,
-    min_periods: usize,
-    center: bool,
-    weights: Option<&[f64]>,
-    _params: DynArgs,
-) -> PolarsResult<ArrayRef>
-where
-    T: NativeType
-        + std::iter::Sum<T>
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + Zero
-        + IsFloat,
-{
-    rolling_quantile(
-        values,
-        0.5,
-        QuantileInterpolOptions::Linear,
-        window_size,
-        min_periods,
-        center,
-        weights,
-    )
-}
-
 pub fn rolling_quantile<T>(
     values: &[T],
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
     window_size: usize,
     min_periods: usize,
     center: bool,
     weights: Option<&[f64]>,
+    params: DynArgs,
 ) -> PolarsResult<ArrayRef>
 where
     T: NativeType
-        + std::iter::Sum<T>
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Add<Output = T>
-        + Sub<Output = T>
+        + IsFloat
+        + Float
+        + std::iter::Sum
+        + AddAssign
+        + SubAssign
         + Div<Output = T>
-        + Mul<Output = T>
+        + NumCast
+        + One
         + Zero
-        + IsFloat,
+        + PartialOrd
+        + Sub<Output = T>,
 {
     let offset_fn = match center {
         true => det_offsets_center,
         false => det_offsets,
     };
     match weights {
-        None => Ok(rolling_apply_quantile(
+        None => rolling_apply_agg_window::<QuantileWindow<_>, _, _>(
             values,
-            quantile,
-            interpolation,
             window_size,
             min_periods,
             offset_fn,
-            compute_quantile2,
-        )),
+            params,
+        ),
         Some(weights) => {
             let wsum = weights.iter().sum();
             polars_ensure!(
                 wsum != 0.0,
                 ComputeError: "Weighted quantile is undefined if weights sum to 0"
             );
+            let params = params.unwrap();
+            let params = params.downcast_ref::<RollingQuantileParams>().unwrap();
             Ok(rolling_apply_weighted_quantile(
                 values,
-                quantile,
-                interpolation,
+                params.prob,
+                params.interpol,
                 window_size,
                 min_periods,
                 offset_fn,
@@ -208,43 +148,6 @@ where
     }
 }
 
-fn rolling_apply_quantile<T, Fo, Fa>(
-    values: &[T],
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-    window_size: usize,
-    min_periods: usize,
-    det_offsets_fn: Fo,
-    aggregator: Fa,
-) -> ArrayRef
-where
-    Fo: Fn(Idx, WindowSize, Len) -> (Start, End),
-    Fa: Fn(&[T], f64, QuantileInterpolOptions) -> T,
-    T: Debug + NativeType + IsFloat + PartialOrd,
-{
-    let len = values.len();
-    let (start, end) = det_offsets_fn(0, window_size, len);
-    let mut sorted_window = SortedBuf::new(values, start, end);
-
-    let out = (0..len)
-        .map(|idx| {
-            let (start, end) = det_offsets_fn(idx, window_size, len);
-
-            // Safety:
-            // we are in bounds
-            let window = unsafe { sorted_window.update(start, end) };
-            aggregator(window, quantile, interpolation)
-        })
-        .collect_trusted::<Vec<T>>();
-
-    let validity = create_validity(min_periods, len, window_size, det_offsets_fn);
-    Box::new(PrimitiveArray::new(
-        T::PRIMITIVE.into(),
-        out.into(),
-        validity.map(|b| b.into()),
-    ))
-}
-
 #[inline]
 fn compute_wq<T>(buf: &[(T, f64)], p: f64, wsum: f64, interp: QuantileInterpolOptions) -> T
 where
@@ -348,73 +251,31 @@ mod test {
     #[test]
     fn test_rolling_median() {
         let values = &[1.0, 2.0, 3.0, 4.0];
-
-        let out = rolling_quantile(
-            values,
-            0.5,
-            QuantileInterpolOptions::Linear,
-            2,
-            2,
-            false,
-            None,
-        )
-        .unwrap();
+        let med_pars = Some(Arc::new(RollingQuantileParams {
+            prob: 0.5,
+            interpol: Linear,
+        }) as Arc<dyn Any + Send + Sync>);
+        let out = rolling_quantile(values, 2, 2, false, None, med_pars.clone()).unwrap();
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[None, Some(1.5), Some(2.5), Some(3.5)]);
 
-        let out = rolling_quantile(
-            values,
-            0.5,
-            QuantileInterpolOptions::Linear,
-            2,
-            1,
-            false,
-            None,
-        )
-        .unwrap();
+        let out = rolling_quantile(values, 2, 1, false, None, med_pars.clone()).unwrap();
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.0), Some(1.5), Some(2.5), Some(3.5)]);
 
-        let out = rolling_quantile(
-            values,
-            0.5,
-            QuantileInterpolOptions::Linear,
-            4,
-            1,
-            false,
-            None,
-        )
-        .unwrap();
+        let out = rolling_quantile(values, 4, 1, false, None, med_pars.clone()).unwrap();
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.0), Some(1.5), Some(2.0), Some(2.5)]);
 
-        let out = rolling_quantile(
-            values,
-            0.5,
-            QuantileInterpolOptions::Linear,
-            4,
-            1,
-            true,
-            None,
-        )
-        .unwrap();
+        let out = rolling_quantile(values, 4, 1, true, None, med_pars.clone()).unwrap();
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.5), Some(2.0), Some(2.5), Some(3.0)]);
 
-        let out = rolling_quantile(
-            values,
-            0.5,
-            QuantileInterpolOptions::Linear,
-            4,
-            4,
-            true,
-            None,
-        )
-        .unwrap();
+        let out = rolling_quantile(values, 4, 4, true, None, med_pars.clone()).unwrap();
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[None, None, Some(2.5), None]);
@@ -433,18 +294,26 @@ mod test {
         ];
 
         for interpol in interpol_options {
+            let min_pars = Some(Arc::new(RollingQuantileParams {
+                prob: 0.0,
+                interpol,
+            }) as Arc<dyn Any + Send + Sync>);
             let out1 = rolling_min(values, 2, 2, false, None, None).unwrap();
             let out1 = out1.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out1 = out1.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
-            let out2 = rolling_quantile(values, 0.0, interpol, 2, 2, false, None).unwrap();
+            let out2 = rolling_quantile(values, 2, 2, false, None, min_pars).unwrap();
             let out2 = out2.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out2 = out2.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
             assert_eq!(out1, out2);
 
+            let max_pars = Some(Arc::new(RollingQuantileParams {
+                prob: 1.0,
+                interpol,
+            }) as Arc<dyn Any + Send + Sync>);
             let out1 = rolling_max(values, 2, 2, false, None, None).unwrap();
             let out1 = out1.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out1 = out1.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
-            let out2 = rolling_quantile(values, 1.0, interpol, 2, 2, false, None).unwrap();
+            let out2 = rolling_quantile(values, 2, 2, false, None, max_pars).unwrap();
             let out2 = out2.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out2 = out2.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
             assert_eq!(out1, out2);
diff --git a/polars/polars-arrow/src/kernels/rolling/nulls/quantile.rs b/polars/polars-arrow/src/kernels/rolling/nulls/quantile.rs
index 337971272715..08602957a7c9 100644
--- a/polars/polars-arrow/src/kernels/rolling/nulls/quantile.rs
+++ b/polars/polars-arrow/src/kernels/rolling/nulls/quantile.rs
@@ -1,291 +1,134 @@
 use super::*;
-use crate::index::IdxSize;
-use crate::trusted_len::TrustedLen;
 
-// used by agg_quantile
-#[allow(clippy::too_many_arguments)]
-pub fn rolling_quantile_by_iter<T, O>(
-    values: &[T],
-    bitmap: &Bitmap,
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-    offsets: O,
-) -> ArrayRef
-where
-    O: Iterator<Item = (IdxSize, IdxSize)> + TrustedLen,
-    T: std::iter::Sum<T>
-        + NativeType
-        + Copy
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat
-        + AddAssign
-        + Zero,
-{
-    if values.is_empty() {
-        let out: Vec<T> = vec![];
-        return Box::new(PrimitiveArray::new(T::PRIMITIVE.into(), out.into(), None));
-    }
-
-    let len = values.len();
-    // Safety
-    // we are in bounds
-    let mut sorted_window = unsafe { SortedBufNulls::new(values, bitmap, 0, 1) };
-
-    let mut validity = MutableBitmap::with_capacity(len);
-    validity.extend_constant(len, true);
-
-    let out = offsets
-        .enumerate()
-        .map(|(idx, (start, len))| {
-            let end = start + len;
-
-            if start == end {
-                validity.set(idx, false);
-                T::default()
-            } else {
-                // safety
-                // we are in bounds
-                unsafe { sorted_window.update(start as usize, end as usize) };
-                let null_count = sorted_window.null_count;
-                let window = sorted_window.window();
-
-                match compute_quantile(window, null_count, quantile, interpolation, 1) {
-                    Some(val) => val,
-                    None => {
-                        validity.set(idx, false);
-                        T::default()
-                    }
-                }
-            }
-        })
-        .collect_trusted::<Vec<T>>();
-
-    Box::new(PrimitiveArray::new(
-        T::PRIMITIVE.into(),
-        out.into(),
-        Some(validity.into()),
-    ))
+pub struct QuantileWindow<'a, T: NativeType + IsFloat + PartialOrd> {
+    sorted: SortedBufNulls<'a, T>,
+    prob: f64,
+    interpol: QuantileInterpolOptions,
 }
 
-#[allow(clippy::too_many_arguments)]
-fn rolling_apply_quantile<T, Fo, Fa>(
-    values: &[T],
-    bitmap: &Bitmap,
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-    window_size: usize,
-    min_periods: usize,
-    det_offsets_fn: Fo,
-    aggregator: Fa,
-) -> ArrayRef
-where
-    Fo: Fn(Idx, WindowSize, Len) -> (Start, End) + Copy,
-    // &[Option<T>] -> window values
-    // usize -> null_count
-    // f764 ->  quantile
-    // QuantileInterpolOptions -> Interpolation option
-    // usize -> min_periods
-    Fa: Fn(&[Option<T>], usize, f64, QuantileInterpolOptions, usize) -> Option<T>,
-    T: Default + NativeType + IsFloat + PartialOrd,
+impl<
+        'a,
+        T: NativeType
+            + IsFloat
+            + Float
+            + std::iter::Sum
+            + AddAssign
+            + SubAssign
+            + Div<Output = T>
+            + NumCast
+            + One
+            + Zero
+            + PartialOrd
+            + Sub<Output = T>,
+    > RollingAggWindowNulls<'a, T> for QuantileWindow<'a, T>
 {
-    let len = values.len();
-    let (start, end) = det_offsets_fn(0, window_size, len);
-    // Safety
-    // we are in bounds
-    let mut sorted_window = unsafe { SortedBufNulls::new(values, bitmap, start, end) };
-
-    let mut validity = match create_validity(min_periods, len, window_size, det_offsets_fn) {
-        Some(v) => v,
-        None => {
-            let mut validity = MutableBitmap::with_capacity(len);
-            validity.extend_constant(len, true);
-            validity
+    unsafe fn new(
+        slice: &'a [T],
+        validity: &'a Bitmap,
+        start: usize,
+        end: usize,
+        params: DynArgs,
+    ) -> Self {
+        let params = params.unwrap();
+        let params = params.downcast_ref::<RollingQuantileParams>().unwrap();
+        Self {
+            sorted: SortedBufNulls::new(slice, validity, start, end),
+            prob: params.prob,
+            interpol: params.interpol,
         }
-    };
-
-    let out = (0..len)
-        .map(|idx| {
-            let (start, end) = det_offsets_fn(idx, window_size, len);
-
-            // safety
-            // we are in bounds
-            unsafe { sorted_window.update(start, end) };
-            let null_count = sorted_window.null_count;
-            let window = sorted_window.window();
-
-            match aggregator(window, null_count, quantile, interpolation, min_periods) {
-                Some(val) => val,
-                None => {
-                    validity.set(idx, false);
-                    T::default()
-                }
-            }
-        })
-        .collect_trusted::<Vec<T>>();
-
-    Box::new(PrimitiveArray::new(
-        T::PRIMITIVE.into(),
-        out.into(),
-        Some(validity.into()),
-    ))
-}
-
-fn compute_quantile<T>(
-    values: &[Option<T>],
-    null_count: usize,
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
-    min_periods: usize,
-) -> Option<T>
-where
-    T: NativeType
-        + std::iter::Sum<T>
-        + Zero
-        + AddAssign
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Default
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat,
-{
-    if (values.len() - null_count) < min_periods {
-        return None;
     }
-    // slice off nulls
-    let values = &values[null_count..];
-    let length = values.len();
 
-    let mut idx = match interpolation {
-        QuantileInterpolOptions::Nearest => ((length as f64) * quantile) as usize,
-        QuantileInterpolOptions::Lower
-        | QuantileInterpolOptions::Midpoint
-        | QuantileInterpolOptions::Linear => ((length as f64 - 1.0) * quantile).floor() as usize,
-        QuantileInterpolOptions::Higher => ((length as f64 - 1.0) * quantile).ceil() as usize,
-    };
-
-    idx = std::cmp::min(idx, length - 1);
-
-    // we can unwrap because we sliced of the nulls
-    match interpolation {
-        QuantileInterpolOptions::Midpoint => {
-            let top_idx = ((length as f64 - 1.0) * quantile).ceil() as usize;
-            Some(
-                (values[idx].unwrap() + values[top_idx].unwrap()) / T::from::<f64>(2.0f64).unwrap(),
-            )
+    unsafe fn update(&mut self, start: usize, end: usize) -> Option<T> {
+        let (values, null_count) = self.sorted.update(start, end);
+        // The min periods_issue will be taken care of when actually rolling
+        if null_count == values.len() {
+            return None;
         }
-        QuantileInterpolOptions::Linear => {
-            let float_idx = (length as f64 - 1.0) * quantile;
-            let top_idx = f64::ceil(float_idx) as usize;
+        // Nulls are guaranteed to be at the front
+        let values = &values[null_count..];
+        let length = values.len();
+
+        let mut idx = match self.interpol {
+            QuantileInterpolOptions::Nearest => ((length as f64) * self.prob) as usize,
+            QuantileInterpolOptions::Lower
+            | QuantileInterpolOptions::Midpoint
+            | QuantileInterpolOptions::Linear => {
+                ((length as f64 - 1.0) * self.prob).floor() as usize
+            }
+            QuantileInterpolOptions::Higher => ((length as f64 - 1.0) * self.prob).ceil() as usize,
+        };
+
+        idx = std::cmp::min(idx, length - 1);
 
-            if top_idx == idx {
-                Some(values[idx].unwrap())
-            } else {
-                let proportion = T::from(float_idx - idx as f64).unwrap();
+        // we can unwrap because we sliced of the nulls
+        match self.interpol {
+            QuantileInterpolOptions::Midpoint => {
+                let top_idx = ((length as f64 - 1.0) * self.prob).ceil() as usize;
                 Some(
-                    proportion * (values[top_idx].unwrap() - values[idx].unwrap())
-                        + values[idx].unwrap(),
+                    (values[idx].unwrap() + values[top_idx].unwrap())
+                        / T::from::<f64>(2.0f64).unwrap(),
                 )
             }
+            QuantileInterpolOptions::Linear => {
+                let float_idx = (length as f64 - 1.0) * self.prob;
+                let top_idx = f64::ceil(float_idx) as usize;
+
+                if top_idx == idx {
+                    Some(values[idx].unwrap())
+                } else {
+                    let proportion = T::from(float_idx - idx as f64).unwrap();
+                    Some(
+                        proportion * (values[top_idx].unwrap() - values[idx].unwrap())
+                            + values[idx].unwrap(),
+                    )
+                }
+            }
+            _ => Some(values[idx].unwrap()),
         }
-        _ => Some(values[idx].unwrap()),
     }
-}
-pub fn rolling_median<T>(
-    arr: &PrimitiveArray<T>,
-    window_size: usize,
-    min_periods: usize,
-    center: bool,
-    weights: Option<&[f64]>,
-    _params: DynArgs,
-) -> ArrayRef
-where
-    T: NativeType
-        + std::iter::Sum
-        + Zero
-        + AddAssign
-        + Copy
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Default
-        + Add<Output = T>
-        + Sub<Output = T>
-        + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat,
-{
-    rolling_quantile(
-        arr,
-        0.5,
-        QuantileInterpolOptions::Linear,
-        window_size,
-        min_periods,
-        center,
-        weights,
-    )
+
+    fn is_valid(&self, min_periods: usize) -> bool {
+        self.sorted.is_valid(min_periods)
+    }
 }
 
 pub fn rolling_quantile<T>(
     arr: &PrimitiveArray<T>,
-    quantile: f64,
-    interpolation: QuantileInterpolOptions,
     window_size: usize,
     min_periods: usize,
     center: bool,
     weights: Option<&[f64]>,
+    params: DynArgs,
 ) -> ArrayRef
 where
     T: NativeType
+        + IsFloat
+        + Float
         + std::iter::Sum
-        + Zero
         + AddAssign
-        + Copy
-        + PartialOrd
-        + ToPrimitive
-        + NumCast
-        + Default
-        + Add<Output = T>
-        + Sub<Output = T>
+        + SubAssign
         + Div<Output = T>
-        + Mul<Output = T>
-        + IsFloat,
+        + NumCast
+        + One
+        + Zero
+        + PartialOrd
+        + Sub<Output = T>,
 {
     if weights.is_some() {
         panic!("weights not yet supported on array with null values")
     }
-    if center {
-        rolling_apply_quantile(
-            arr.values().as_slice(),
-            arr.validity().as_ref().unwrap(),
-            quantile,
-            interpolation,
-            window_size,
-            min_periods,
-            det_offsets_center,
-            compute_quantile,
-        )
-    } else {
-        rolling_apply_quantile(
-            arr.values().as_slice(),
-            arr.validity().as_ref().unwrap(),
-            quantile,
-            interpolation,
-            window_size,
-            min_periods,
-            det_offsets,
-            compute_quantile,
-        )
-    }
+    let offset_fn = match center {
+        true => det_offsets_center,
+        false => det_offsets,
+    };
+    rolling_apply_agg_window::<QuantileWindow<_>, _, _>(
+        arr.values().as_slice(),
+        arr.validity().as_ref().unwrap(),
+        window_size,
+        min_periods,
+        offset_fn,
+        params,
+    )
 }
 
 #[cfg(test)]
@@ -304,28 +147,32 @@ mod test {
             buf,
             Some(Bitmap::from(&[true, false, true, true])),
         );
+        let med_pars = Some(Arc::new(RollingQuantileParams {
+            prob: 0.5,
+            interpol: QuantileInterpolOptions::Linear,
+        }) as Arc<dyn Any + Send + Sync>);
 
-        let out = rolling_quantile(arr, 0.5, QuantileInterpolOptions::Linear, 2, 2, false, None);
+        let out = rolling_quantile(arr, 2, 2, false, None, med_pars.clone());
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[None, None, None, Some(3.5)]);
 
-        let out = rolling_quantile(arr, 0.5, QuantileInterpolOptions::Linear, 2, 1, false, None);
+        let out = rolling_quantile(arr, 2, 1, false, None, med_pars.clone());
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.0), Some(1.0), Some(3.0), Some(3.5)]);
 
-        let out = rolling_quantile(arr, 0.5, QuantileInterpolOptions::Linear, 4, 1, false, None);
+        let out = rolling_quantile(arr, 4, 1, false, None, med_pars.clone());
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.0), Some(1.0), Some(2.0), Some(3.0)]);
 
-        let out = rolling_quantile(arr, 0.5, QuantileInterpolOptions::Linear, 4, 1, true, None);
+        let out = rolling_quantile(arr, 4, 1, true, None, med_pars.clone());
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[Some(1.0), Some(2.0), Some(3.0), Some(3.5)]);
 
-        let out = rolling_quantile(arr, 0.5, QuantileInterpolOptions::Linear, 4, 4, true, None);
+        let out = rolling_quantile(arr, 4, 4, true, None, med_pars.clone());
         let out = out.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
         let out = out.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
         assert_eq!(out, &[None, None, None, None]);
@@ -350,18 +197,26 @@ mod test {
         ];
 
         for interpol in interpol_options {
+            let min_pars = Some(Arc::new(RollingQuantileParams {
+                prob: 0.0,
+                interpol,
+            }) as Arc<dyn Any + Send + Sync>);
             let out1 = rolling_min(values, 2, 1, false, None, None);
             let out1 = out1.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out1 = out1.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
-            let out2 = rolling_quantile(values, 0.0, interpol, 2, 1, false, None);
+            let out2 = rolling_quantile(values, 2, 1, false, None, min_pars);
             let out2 = out2.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out2 = out2.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
             assert_eq!(out1, out2);
 
+            let max_pars = Some(Arc::new(RollingQuantileParams {
+                prob: 1.0,
+                interpol,
+            }) as Arc<dyn Any + Send + Sync>);
             let out1 = rolling_max(values, 2, 1, false, None, None);
             let out1 = out1.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out1 = out1.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
-            let out2 = rolling_quantile(values, 1.0, interpol, 2, 1, false, None);
+            let out2 = rolling_quantile(values, 2, 1, false, None, max_pars);
             let out2 = out2.as_any().downcast_ref::<PrimitiveArray<f64>>().unwrap();
             let out2 = out2.into_iter().map(|v| v.copied()).collect::<Vec<_>>();
             assert_eq!(out1, out2);
diff --git a/polars/polars-arrow/src/kernels/rolling/window.rs b/polars/polars-arrow/src/kernels/rolling/window.rs
index 2ab2d07ce10a..7471c4e2f174 100644
--- a/polars/polars-arrow/src/kernels/rolling/window.rs
+++ b/polars/polars-arrow/src/kernels/rolling/window.rs
@@ -166,15 +166,11 @@ impl<'a, T: NativeType + IsFloat + PartialOrd> SortedBufNulls<'a, T> {
         out
     }
 
-    pub(super) fn window(&self) -> &[Option<T>] {
-        &self.buf
-    }
-
     /// Update the window position by setting the `start` index and the `end` index.
     /// # Safety
     /// The caller must ensure that `start` and `end` are within bounds of `self.slice`
     ///
-    pub(super) unsafe fn update(&mut self, start: usize, end: usize) {
+    pub(super) unsafe fn update(&mut self, start: usize, end: usize) -> (&[Option<T>], usize) {
         // swap the whole buffer
         if start >= self.last_end {
             self.fill_and_sort_buf(start, end);
@@ -221,6 +217,11 @@ impl<'a, T: NativeType + IsFloat + PartialOrd> SortedBufNulls<'a, T> {
         }
         self.last_start = start;
         self.last_end = end;
+        (&self.buf, self.null_count)
+    }
+
+    pub(super) fn is_valid(&self, min_periods: usize) -> bool {
+        ((self.last_end - self.last_start) - self.null_count) >= min_periods
     }
 }
 
diff --git a/polars/polars-arrow/src/prelude.rs b/polars/polars-arrow/src/prelude.rs
index ba928d6b2f43..e1b1fd012f2a 100644
--- a/polars/polars-arrow/src/prelude.rs
+++ b/polars/polars-arrow/src/prelude.rs
@@ -6,7 +6,7 @@ pub use crate::bitmap::mutable::MutableBitmapExtension;
 pub use crate::data_types::*;
 pub use crate::index::*;
 pub use crate::kernels::rolling::no_nulls::QuantileInterpolOptions;
-pub use crate::kernels::rolling::{DynArgs, RollingVarParams};
+pub use crate::kernels::rolling::{DynArgs, RollingQuantileParams, RollingVarParams};
 
 pub type LargeStringArray = Utf8Array<i64>;
 pub type LargeBinaryArray = BinaryArray<i64>;
diff --git a/polars/polars-core/src/frame/groupby/aggregations/mod.rs b/polars/polars-core/src/frame/groupby/aggregations/mod.rs
index b7d3445e601d..c9400cd7d14c 100644
--- a/polars/polars-core/src/frame/groupby/aggregations/mod.rs
+++ b/polars/polars-core/src/frame/groupby/aggregations/mod.rs
@@ -12,10 +12,10 @@ use num_traits::{Bounded, Float, Num, NumCast, ToPrimitive, Zero};
 use polars_arrow::data_types::IsFloat;
 use polars_arrow::kernels::rolling;
 use polars_arrow::kernels::rolling::no_nulls::{
-    MaxWindow, MeanWindow, MinWindow, RollingAggWindowNoNulls, SumWindow, VarWindow,
+    MaxWindow, MeanWindow, MinWindow, QuantileWindow, RollingAggWindowNoNulls, SumWindow, VarWindow,
 };
 use polars_arrow::kernels::rolling::nulls::RollingAggWindowNulls;
-use polars_arrow::kernels::rolling::{DynArgs, RollingVarParams};
+use polars_arrow::kernels::rolling::{DynArgs, RollingQuantileParams, RollingVarParams};
 use polars_arrow::kernels::take_agg::*;
 use polars_arrow::prelude::QuantileInterpolOptions;
 use polars_arrow::trusted_len::TrustedLenPush;
@@ -271,6 +271,7 @@ where
     ChunkedArray<T>: QuantileDispatcher<K::Native>,
     ChunkedArray<K>: IntoSeries,
     K: PolarsNumericType,
+    <K as datatypes::PolarsNumericType>::Native: num_traits::Float,
 {
     let invalid_quantile = !(0.0..=1.0).contains(&quantile);
     if invalid_quantile {
@@ -298,19 +299,25 @@ where
                 let values = arr.values().as_slice();
                 let offset_iter = groups.iter().map(|[first, len]| (*first, *len));
                 let arr = match arr.validity() {
-                    None => rolling::no_nulls::rolling_quantile_by_iter(
+                    None => _rolling_apply_agg_window_no_nulls::<QuantileWindow<_>, _, _>(
                         values,
-                        quantile,
-                        interpol,
-                        offset_iter,
-                    ),
-                    Some(validity) => rolling::nulls::rolling_quantile_by_iter(
-                        values,
-                        validity,
-                        quantile,
-                        interpol,
                         offset_iter,
+                        Some(Arc::new(RollingQuantileParams {
+                            prob: quantile,
+                            interpol,
+                        })),
                     ),
+                    Some(validity) => {
+                        _rolling_apply_agg_window_nulls::<rolling::nulls::QuantileWindow<_>, _, _>(
+                            values,
+                            validity,
+                            offset_iter,
+                            Some(Arc::new(RollingQuantileParams {
+                                prob: quantile,
+                                interpol,
+                            })),
+                        )
+                    }
                 };
                 // the rolling kernels works on the dtype, this is not yet the float
                 // output type we need.
@@ -342,6 +349,7 @@ where
     ChunkedArray<T>: QuantileDispatcher<K::Native>,
     ChunkedArray<K>: IntoSeries,
     K: PolarsNumericType,
+    <K as datatypes::PolarsNumericType>::Native: num_traits::Float,
 {
     match groups {
         GroupsProxy::Idx(groups) => {
diff --git a/polars/polars-lazy/polars-plan/src/dsl/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
index 05d7923a3b82..3cf8f1cf29ae 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
@@ -1352,17 +1352,12 @@ impl Expr {
     ///
     /// See: [`RollingAgg::rolling_quantile`]
     #[cfg(feature = "rolling_window")]
-    pub fn rolling_quantile(
-        self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptions,
-    ) -> Expr {
+    pub fn rolling_quantile(self, options: RollingOptions) -> Expr {
         self.finish_rolling(
             options,
             "rolling_quantile",
             "rolling_quantile_by",
-            Arc::new(move |s, options| s.rolling_quantile(quantile, interpolation, options)),
+            Arc::new(|s, options| s.rolling_quantile(options)),
             GetOutput::float_type(),
         )
     }
diff --git a/polars/polars-time/src/chunkedarray/rolling_window/floats.rs b/polars/polars-time/src/chunkedarray/rolling_window/floats.rs
index 98f645876b30..6a87c6ca4943 100644
--- a/polars/polars-time/src/chunkedarray/rolling_window/floats.rs
+++ b/polars/polars-time/src/chunkedarray/rolling_window/floats.rs
@@ -80,58 +80,35 @@ where
     /// A window of length `window_size` will traverse the array. The values that fill this window
     /// will (optionally) be weighted according to the `weights` vector.
     fn rolling_median(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
-        if options.by.is_some() {
-            panic!("'rolling by' not yet supported for 'rolling_median', consider using 'groupby_rolling'")
-        }
+        // At the last possible second, right before we do computations, make sure we're using the
+        // right quantile parameters to get a median. This also lets us have the convenience of
+        // calling `rolling_median` from Rust without a bunch of dedicated functions that just call
+        // out to the `rolling_quantile` anyway.
+        let mut options = options.clone();
+        options.fn_params = Some(Arc::new(RollingQuantileParams {
+            prob: 0.5,
+            interpol: QuantileInterpolOptions::Linear,
+        }) as Arc<dyn std::any::Any + Send + Sync>);
         rolling_agg(
             &self.0,
             options,
-            &rolling::no_nulls::rolling_median,
-            &rolling::nulls::rolling_median,
-            None,
+            &rolling::no_nulls::rolling_quantile,
+            &rolling::nulls::rolling_quantile,
+            Some(&super::rolling_kernels::no_nulls::rolling_quantile),
         )
     }
 
     /// Apply a rolling quantile (moving quantile) over the values in this array.
     /// A window of length `window_size` will traverse the array. The values that fill this window
     /// will (optionally) be weighted according to the `weights` vector.
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
-        if options.by.is_some() {
-            panic!("'rolling by' not yet supported for 'rolling_quantile', consider using 'groupby_rolling'")
-        }
-
-        let options: RollingOptionsFixedWindow = options.into();
-        check_input(options.window_size, options.min_periods)?;
-        let ca = self.0.rechunk();
-
-        let arr = ca.downcast_iter().next().unwrap();
-        let arr = match self.0.has_validity() {
-            false => rolling::no_nulls::rolling_quantile(
-                arr.values(),
-                quantile,
-                interpolation,
-                options.window_size,
-                options.min_periods,
-                options.center,
-                options.weights.as_deref(),
-            )
-            .unwrap(),
-            _ => rolling::nulls::rolling_quantile(
-                arr,
-                quantile,
-                interpolation,
-                options.window_size,
-                options.min_periods,
-                options.center,
-                options.weights.as_deref(),
-            ),
-        };
-        Series::try_from((self.0.name(), arr))
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
+        rolling_agg(
+            &self.0,
+            options,
+            &rolling::no_nulls::rolling_quantile,
+            &rolling::nulls::rolling_quantile,
+            Some(&super::rolling_kernels::no_nulls::rolling_quantile),
+        )
     }
 
     fn rolling_var(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
diff --git a/polars/polars-time/src/chunkedarray/rolling_window/ints.rs b/polars/polars-time/src/chunkedarray/rolling_window/ints.rs
index 0e9b163a6ec7..a25664f98cae 100644
--- a/polars/polars-time/src/chunkedarray/rolling_window/ints.rs
+++ b/polars/polars-time/src/chunkedarray/rolling_window/ints.rs
@@ -32,15 +32,8 @@ where
         self.0.cast(&DataType::Float64)?.rolling_median(options)
     }
 
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
-        self.0
-            .cast(&DataType::Float64)?
-            .rolling_quantile(quantile, interpolation, options)
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
+        self.0.cast(&DataType::Float64)?.rolling_quantile(options)
     }
 
     fn rolling_min(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
diff --git a/polars/polars-time/src/chunkedarray/rolling_window/mod.rs b/polars/polars-time/src/chunkedarray/rolling_window/mod.rs
index 72ba20addd2c..ead6d9012eb9 100644
--- a/polars/polars-time/src/chunkedarray/rolling_window/mod.rs
+++ b/polars/polars-time/src/chunkedarray/rolling_window/mod.rs
@@ -14,8 +14,6 @@ use polars_arrow::data_types::IsFloat;
 use polars_arrow::export::arrow;
 #[cfg(feature = "rolling_window")]
 use polars_arrow::kernels::rolling;
-#[cfg(feature = "rolling_window")]
-use polars_arrow::prelude::QuantileInterpolOptions;
 use polars_core::prelude::*;
 
 #[cfg(feature = "rolling_window")]
@@ -191,12 +189,7 @@ pub trait RollingAgg {
     /// Apply a rolling quantile (moving quantile) over the values in this array.
     /// A window of length `window_size` will traverse the array. The values that fill this window
     /// will (optionally) be weighted according to the `weights` vector.
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series>;
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series>;
 
     /// Apply a rolling var (moving var) over the values in this array.
     /// A window of length `window_size` will traverse the array. The values that fill this window
diff --git a/polars/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs b/polars/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs
index 67522451833e..79a349009ead 100644
--- a/polars/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs
+++ b/polars/polars-time/src/chunkedarray/rolling_window/rolling_kernels/no_nulls.rs
@@ -150,3 +150,24 @@ where
     };
     rolling_apply_agg_window::<no_nulls::VarWindow<_>, _, _>(values, offset_iter, params)
 }
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn rolling_quantile<T>(
+    values: &[T],
+    period: Duration,
+    time: &[i64],
+    closed_window: ClosedWindow,
+    tu: TimeUnit,
+    tz: Option<&TimeZone>,
+    params: DynArgs,
+) -> PolarsResult<ArrayRef>
+where
+    T: NativeType + Float + std::iter::Sum<T> + SubAssign + AddAssign + IsFloat,
+{
+    let offset_iter = match tz {
+        #[cfg(feature = "timezones")]
+        Some(tz) => groupby_values_iter(period, time, closed_window, tu, tz.parse::<Tz>().ok()),
+        _ => groupby_values_iter(period, time, closed_window, tu, None),
+    };
+    rolling_apply_agg_window::<no_nulls::QuantileWindow<_>, _, _>(values, offset_iter, params)
+}
diff --git a/polars/polars-time/src/series/_trait.rs b/polars/polars-time/src/series/_trait.rs
index 03722e5ac06c..a4a913f39131 100644
--- a/polars/polars-time/src/series/_trait.rs
+++ b/polars/polars-time/src/series/_trait.rs
@@ -34,12 +34,7 @@ pub trait SeriesOpsTime {
     }
     /// Apply a rolling quantile to a Series.
     #[cfg(feature = "rolling_window")]
-    fn rolling_quantile(
-        &self,
-        _quantile: f64,
-        _interpolation: QuantileInterpolOptions,
-        _options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
+    fn rolling_quantile(&self, _options: RollingOptionsImpl) -> PolarsResult<Series> {
         invalid_operation!(self)
     }
 
@@ -85,16 +80,9 @@ impl SeriesOpsTime for Series {
     }
     /// Apply a rolling quantile to a Series.
     #[cfg(feature = "rolling_window")]
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
-        self.to_ops()
-            .rolling_quantile(quantile, interpolation, options)
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
+        self.to_ops().rolling_quantile(options)
     }
-
     #[cfg(feature = "rolling_window")]
     fn rolling_min(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
         self.to_ops().rolling_min(options)
diff --git a/polars/polars-time/src/series/implementations/floats.rs b/polars/polars-time/src/series/implementations/floats.rs
index e47e792483cd..d6a8d9377dab 100644
--- a/polars/polars-time/src/series/implementations/floats.rs
+++ b/polars/polars-time/src/series/implementations/floats.rs
@@ -27,13 +27,8 @@ where
     }
 
     #[cfg(feature = "rolling_window")]
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
-        RollingAgg::rolling_quantile(self, quantile, interpolation, options)
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
+        RollingAgg::rolling_quantile(self, options)
     }
 
     #[cfg(feature = "rolling_window")]
diff --git a/polars/polars-time/src/series/implementations/integers.rs b/polars/polars-time/src/series/implementations/integers.rs
index 1d35649ef93e..8ede537ba771 100644
--- a/polars/polars-time/src/series/implementations/integers.rs
+++ b/polars/polars-time/src/series/implementations/integers.rs
@@ -24,13 +24,8 @@ where
     }
 
     #[cfg(feature = "rolling_window")]
-    fn rolling_quantile(
-        &self,
-        quantile: f64,
-        interpolation: QuantileInterpolOptions,
-        options: RollingOptionsImpl,
-    ) -> PolarsResult<Series> {
-        RollingAgg::rolling_quantile(self, quantile, interpolation, options)
+    fn rolling_quantile(&self, options: RollingOptionsImpl) -> PolarsResult<Series> {
+        RollingAgg::rolling_quantile(self, options)
     }
 
     #[cfg(feature = "rolling_window")]
diff --git a/polars/tests/it/core/rolling_window.rs b/polars/tests/it/core/rolling_window.rs
index 6d037a5722db..babb587f3dba 100644
--- a/polars/tests/it/core/rolling_window.rs
+++ b/polars/tests/it/core/rolling_window.rs
@@ -1,3 +1,7 @@
+use std::any::Any;
+
+use polars_core::prelude::QuantileInterpolOptions::Linear;
+
 use super::*;
 
 #[test]
@@ -309,29 +313,27 @@ fn test_median_quantile_types() {
         })
         .unwrap();
 
+    let rq_params = Some(Arc::new(RollingQuantileParams {
+        prob: 0.3,
+        interpol: Linear,
+    }) as Arc<dyn Any + Send + Sync>);
     let rol_quantile = s
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     let rol_quantile_weighted = s
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                weights: Some(vec![1.0, 2.0]),
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            weights: Some(vec![1.0, 2.0]),
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     assert_eq!(*rol_med.dtype(), DataType::Float64);
@@ -358,28 +360,22 @@ fn test_median_quantile_types() {
         .unwrap();
 
     let rol_quantile = s
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     let rol_quantile_weighted = s
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                weights: Some(vec![1.0, 2.0]),
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            weights: Some(vec![1.0, 2.0]),
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     assert_eq!(*rol_med.dtype(), DataType::Float32);
@@ -406,28 +402,22 @@ fn test_median_quantile_types() {
         .unwrap();
 
     let rol_quantile = s1
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     let rol_quantile_weighted = s1
-        .rolling_quantile(
-            0.3,
-            QuantileInterpolOptions::Linear,
-            RollingOptionsImpl {
-                window_size: Duration::new(2),
-                min_periods: 1,
-                weights: Some(vec![1.0, 2.0]),
-                ..Default::default()
-            },
-        )
+        .rolling_quantile(RollingOptionsImpl {
+            window_size: Duration::new(2),
+            min_periods: 1,
+            weights: Some(vec![1.0, 2.0]),
+            fn_params: rq_params.clone(),
+            ..Default::default()
+        })
         .unwrap();
 
     assert_eq!(*rol_med.dtype(), DataType::Float64);
diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs
index 8a6ea0834bf7..fa7d0bf638e3 100644
--- a/py-polars/src/expr/general.rs
+++ b/py-polars/src/expr/general.rs
@@ -925,9 +925,12 @@ impl PyExpr {
             center,
             by,
             closed_window: closed.map(|c| c.0),
-            ..Default::default()
+            fn_params: Some(Arc::new(RollingQuantileParams {
+                prob: 0.5,
+                interpol: QuantileInterpolOptions::Linear,
+            }) as Arc<dyn Any + Send + Sync>),
         };
-        self.inner.clone().rolling_median(options).into()
+        self.inner.clone().rolling_quantile(options).into()
     }
 
     #[pyo3(signature = (quantile, interpolation, window_size, weights, min_periods, center, by, closed))]
@@ -950,13 +953,13 @@ impl PyExpr {
             center,
             by,
             closed_window: closed.map(|c| c.0),
-            ..Default::default()
+            fn_params: Some(Arc::new(RollingQuantileParams {
+                prob: quantile,
+                interpol: interpolation.0,
+            }) as Arc<dyn Any + Send + Sync>),
         };
 
-        self.inner
-            .clone()
-            .rolling_quantile(quantile, interpolation.0, options)
-            .into()
+        self.inner.clone().rolling_quantile(options).into()
     }
 
     fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {

From 003db95ddaefd2b995c0c280102c91c74d58043a Mon Sep 17 00:00:00 2001
From: Josh Magarick <jmagarick@gmail.com>
Date: Sat, 15 Jul 2023 00:15:57 -0700
Subject: [PATCH 17/37] perf(rust, python): Rolling min/max for partially
 sorted data (#9819)

---
 .../src/kernels/rolling/no_nulls/min_max.rs   | 545 +++++++-----------
 1 file changed, 211 insertions(+), 334 deletions(-)

diff --git a/polars/polars-arrow/src/kernels/rolling/no_nulls/min_max.rs b/polars/polars-arrow/src/kernels/rolling/no_nulls/min_max.rs
index 58de1a0ec9ee..42be11b9d9ae 100644
--- a/polars/polars-arrow/src/kernels/rolling/no_nulls/min_max.rs
+++ b/polars/polars-arrow/src/kernels/rolling/no_nulls/min_max.rs
@@ -3,180 +3,206 @@ use no_nulls::{rolling_apply_agg_window, RollingAggWindowNoNulls};
 
 use super::*;
 
-pub struct SortedMinMax<'a, T: NativeType> {
-    slice: &'a [T],
+#[inline]
+fn new_is_min<T: NativeType + IsFloat + PartialOrd>(old: &T, new: &T) -> bool {
+    compare_fn_nan_min(old, new).is_ge()
 }
 
-impl<'a, T: NativeType> RollingAggWindowNoNulls<'a, T> for SortedMinMax<'a, T> {
-    fn new(slice: &'a [T], _start: usize, _end: usize, _params: DynArgs) -> Self {
-        Self { slice }
-    }
-
-    #[inline]
-    unsafe fn update(&mut self, start: usize, _end: usize) -> T {
-        *self.slice.get_unchecked(start)
-    }
+#[inline]
+fn new_is_max<T: NativeType + IsFloat + PartialOrd>(old: &T, new: &T) -> bool {
+    compare_fn_nan_max(old, new).is_le()
 }
 
 #[inline]
-unsafe fn get_min_and_idx<T>(slice: &[T], start: usize, end: usize) -> Option<(usize, &T)>
+unsafe fn get_min_and_idx<T>(
+    slice: &[T],
+    start: usize,
+    end: usize,
+    sorted_to: usize,
+) -> Option<(usize, &T)>
 where
     T: NativeType + IsFloat + PartialOrd,
 {
-    // Reversed because min_by returns the first min if there's a tie but we want the last
-    slice
-        .get_unchecked(start..end)
-        .iter()
-        .enumerate()
-        .rev()
-        .min_by(|&a, &b| compare_fn_nan_min(a.1, b.1))
-}
-
-pub struct MinWindow<'a, T: NativeType + PartialOrd + IsFloat> {
-    slice: &'a [T],
-    min: T,
-    min_idx: usize,
-    last_start: usize,
-    last_end: usize,
-}
-
-impl<'a, T: NativeType + IsFloat + PartialOrd> RollingAggWindowNoNulls<'a, T> for MinWindow<'a, T> {
-    fn new(slice: &'a [T], start: usize, end: usize, _params: DynArgs) -> Self {
-        let (idx, min) =
-            unsafe { get_min_and_idx(slice, start, end).unwrap_or((0, &slice[start])) };
-        Self {
-            slice,
-            min: *min,
-            min_idx: start + idx,
-            last_start: start,
-            last_end: end,
-        }
-    }
-
-    unsafe fn update(&mut self, start: usize, end: usize) -> T {
-        //For details see: https://github.com/pola-rs/polars/pull/9277#issuecomment-1581401692
-        self.last_start = start; // Don't care where the last one started
-        let old_last_end = self.last_end; // But we need this
-        self.last_end = end;
-
-        let entering_start = std::cmp::max(old_last_end, start);
-        let entering = get_min_and_idx(self.slice, entering_start, end);
-        let empty_overlap = old_last_end <= start;
-
-        if entering.is_some_and(|em| compare_fn_nan_min(&self.min, em.1).is_ge() || empty_overlap) {
-            // If the entering min <= the current min return early, since no value in the overlap can be smaller than either.
-            self.min = *entering.unwrap().1;
-            self.min_idx = entering_start + entering.unwrap().0;
-            return self.min;
-        } else if self.min_idx >= start || empty_overlap {
-            // If the entering min isn't the smallest but the current min is between start and end we can still ignore the overlap
-            return self.min;
-        }
-        // Otherwise get the min of the overlapping window and the entering min
-        match (get_min_and_idx(self.slice, start, old_last_end), entering) {
-            (Some(pm), Some(em)) => {
-                if compare_fn_nan_min(pm.1, em.1).is_ge() {
-                    self.min = *em.1;
-                    self.min_idx = entering_start + em.0;
+    if sorted_to >= end {
+        // If we're sorted past the end we can just take the first element because this function
+        // won't be called on intervals that contain the previous min
+        Some((start, slice.get_unchecked(start)))
+    } else if sorted_to <= start {
+        // We have to inspect the whole range
+        // Reversed because min_by returns the first min if there's a tie but we want the last
+        slice
+            .get_unchecked(start..end)
+            .iter()
+            .enumerate()
+            .rev()
+            .min_by(|&a, &b| compare_fn_nan_min(a.1, b.1))
+            .map(|v| (v.0 + start, v.1))
+    } else {
+        // It's sorted in range start..sorted_to. Compare slice[start] to min over sorted_to..end
+        let s = (start, slice.get_unchecked(start));
+        slice
+            .get_unchecked(sorted_to..end)
+            .iter()
+            .enumerate()
+            .rev()
+            .min_by(|&a, &b| compare_fn_nan_min(a.1, b.1))
+            .map(|v| {
+                if new_is_min(s.1, v.1) {
+                    (v.0 + sorted_to, v.1)
                 } else {
-                    self.min = *pm.1;
-                    self.min_idx = start + pm.0;
+                    s
                 }
-            }
-            (Some(pm), None) => {
-                self.min = *pm.1;
-                self.min_idx = start + pm.0;
-            }
-            (None, Some(em)) => {
-                self.min = *em.1;
-                self.min_idx = entering_start + em.0;
-            }
-            // We shouldn't reach this, but it means
-            (None, None) => {}
-        }
-
-        self.min
+            })
     }
 }
 
 #[inline]
-unsafe fn get_max_and_idx<T>(slice: &[T], start: usize, end: usize) -> Option<(usize, &T)>
+unsafe fn get_max_and_idx<T>(
+    slice: &[T],
+    start: usize,
+    end: usize,
+    sorted_to: usize,
+) -> Option<(usize, &T)>
 where
     T: NativeType + IsFloat + PartialOrd,
 {
+    if sorted_to >= end {
+        Some((start, slice.get_unchecked(start)))
+    } else if sorted_to <= start {
+        slice
+            .get_unchecked(start..end)
+            .iter()
+            .enumerate()
+            .max_by(|&a, &b| compare_fn_nan_max(a.1, b.1))
+            .map(|v| (v.0 + start, v.1))
+    } else {
+        let s = (start, slice.get_unchecked(start));
+        slice
+            .get_unchecked(sorted_to..end)
+            .iter()
+            .enumerate()
+            .max_by(|&a, &b| compare_fn_nan_max(a.1, b.1))
+            .map(|v| {
+                if new_is_max(s.1, v.1) {
+                    (v.0 + sorted_to, v.1)
+                } else {
+                    s
+                }
+            })
+    }
+}
+
+#[inline]
+fn n_sorted_past_min<T: NativeType + IsFloat + PartialOrd>(slice: &[T]) -> usize {
     slice
-        .get_unchecked(start..end)
-        .iter()
-        .enumerate()
-        .max_by(|&a, &b| compare_fn_nan_max(a.1, b.1))
+        .windows(2)
+        .position(|x| compare_fn_nan_min(&x[0], &x[1]).is_gt())
+        .unwrap_or(slice.len() - 1)
 }
 
-pub struct MaxWindow<'a, T: NativeType> {
-    slice: &'a [T],
-    max: T,
-    max_idx: usize,
-    last_start: usize,
-    last_end: usize,
+#[inline]
+fn n_sorted_past_max<T: NativeType + IsFloat + PartialOrd>(slice: &[T]) -> usize {
+    slice
+        .windows(2)
+        .position(|x| compare_fn_nan_max(&x[0], &x[1]).is_lt())
+        .unwrap_or(slice.len() - 1)
 }
 
-impl<'a, T: NativeType + IsFloat + PartialOrd> RollingAggWindowNoNulls<'a, T> for MaxWindow<'a, T> {
-    fn new(slice: &'a [T], start: usize, end: usize, _params: DynArgs) -> Self {
-        let (idx, max) =
-            unsafe { get_max_and_idx(slice, start, end).unwrap_or((0, &slice[start])) };
-        Self {
-            slice,
-            max: *max,
-            max_idx: start + idx,
-            last_start: start,
-            last_end: end,
+// Min and max really are the same thing up to a difference in comparison direction, as represented
+// here by helpers we pass in. Making both with a macro helps keep behavior synchronized
+macro_rules! minmax_window {
+    ($m_window:tt, $get_m_and_idx:ident, $new_is_m:ident, $n_sorted_past:ident) => {
+        pub struct $m_window<'a, T: NativeType + PartialOrd + IsFloat> {
+            slice: &'a [T],
+            m: T,
+            m_idx: usize,
+            sorted_to: usize,
+            last_start: usize,
+            last_end: usize,
         }
-    }
 
-    unsafe fn update(&mut self, start: usize, end: usize) -> T {
-        self.last_start = start; // Don't care where the last one started
-        let old_last_end = self.last_end; // But we need this
-        self.last_end = end;
+        impl<'a, T: NativeType + IsFloat + PartialOrd> $m_window<'a, T> {
+            #[inline]
+            unsafe fn update_m_and_m_idx(&mut self, m_and_idx: (usize, &T)) {
+                self.m = *m_and_idx.1;
+                self.m_idx = m_and_idx.0;
+                if self.sorted_to <= self.m_idx {
+                    // Track how far past the current extremum values are sorted. Direction depends on min/max
+                    // Tracking sorted ranges lets us only do comparisons when we have to.
+                    self.sorted_to =
+                        self.m_idx + 1 + $n_sorted_past(&self.slice.get_unchecked(self.m_idx..));
+                }
+            }
+        }
 
-        let entering_start = std::cmp::max(old_last_end, start);
-        let entering = get_max_and_idx(self.slice, entering_start, end);
-        let empty_overlap = old_last_end < start;
+        impl<'a, T: NativeType + IsFloat + PartialOrd> RollingAggWindowNoNulls<'a, T>
+            for $m_window<'a, T>
+        {
+            fn new(slice: &'a [T], start: usize, end: usize, _params: DynArgs) -> Self {
+                let (idx, m) =
+                    unsafe { $get_m_and_idx(slice, start, end, 0).unwrap_or((0, &slice[start])) };
+                Self {
+                    slice,
+                    m: *m,
+                    m_idx: idx,
+                    sorted_to: idx + 1 + $n_sorted_past(&slice[idx..]),
+                    last_start: start,
+                    last_end: end,
+                }
+            }
 
-        if entering.is_some_and(|em| compare_fn_nan_max(&self.max, em.1).is_le() || empty_overlap) {
-            // If the entering max >= the current max return early, since no value in the overlap can be larger than either.
-            self.max = *entering.unwrap().1;
-            self.max_idx = entering_start + entering.unwrap().0;
-            return self.max;
-        } else if self.max_idx >= start || empty_overlap {
-            // If the entering max isn't the largest but the current max is between start and end we can still ignore the overlap
-            return self.max;
-        }
-        // Otherwise get the max of the overlapping window and the entering max
-        match (get_max_and_idx(self.slice, start, old_last_end), entering) {
-            (Some(pm), Some(em)) => {
-                if compare_fn_nan_max(pm.1, em.1).is_le() {
-                    self.max = *em.1;
-                    self.max_idx = entering_start + em.0;
+            unsafe fn update(&mut self, start: usize, end: usize) -> T {
+                //For details see: https://github.com/pola-rs/polars/pull/9277#issuecomment-1581401692
+                self.last_start = start; // Don't care where the last one started
+                let old_last_end = self.last_end; // But we need this
+                self.last_end = end;
+                let entering_start = std::cmp::max(old_last_end, start);
+                let entering = if end - entering_start == 1 {
+                    // Faster in the special, but common, case of a fixed window rolling by one
+                    Some((entering_start, self.slice.get_unchecked(entering_start)))
+                } else if old_last_end == end {
+                    // Edge case for shrinking windows
+                    None
                 } else {
-                    self.max = *pm.1;
-                    self.max_idx = start + pm.0;
+                    $get_m_and_idx(self.slice, entering_start, end, self.sorted_to)
+                };
+                let empty_overlap = old_last_end <= start;
+
+                if entering.is_some_and(|em| $new_is_m(&self.m, em.1) || empty_overlap) {
+                    // The entering extremum "beats" the previous extremum so we can ignore the overlap
+                    self.update_m_and_m_idx(entering.unwrap());
+                    return self.m;
+                } else if self.m_idx >= start || empty_overlap {
+                    // The previous extremum didn't drop off. Keep it
+                    return self.m;
                 }
+                // Otherwise get the min of the overlapping window and the entering min
+                match (
+                    $get_m_and_idx(self.slice, start, old_last_end, self.sorted_to),
+                    entering,
+                ) {
+                    (Some(pm), Some(em)) => {
+                        if $new_is_m(pm.1, em.1) {
+                            self.update_m_and_m_idx(em);
+                        } else {
+                            self.update_m_and_m_idx(pm);
+                        }
+                    }
+                    (Some(pm), None) => self.update_m_and_m_idx(pm),
+                    (None, Some(em)) => self.update_m_and_m_idx(em),
+                    // This would mean both the entering and previous windows are empty
+                    (None, None) => unreachable!(),
+                }
+
+                self.m
             }
-            (Some(pm), None) => {
-                self.max = *pm.1;
-                self.max_idx = start + pm.0;
-            }
-            (None, Some(em)) => {
-                self.max = *em.1;
-                self.max_idx = entering_start + em.0;
-            }
-            // We shouldn't reach this, but it means
-            (None, None) => {}
         }
-
-        self.max
-    }
+    };
 }
 
+minmax_window!(MinWindow, get_min_and_idx, new_is_min, n_sorted_past_min);
+minmax_window!(MaxWindow, get_max_and_idx, new_is_max, n_sorted_past_max);
+
 pub(crate) fn compute_min_weights<T>(values: &[T], weights: &[T]) -> T
 where
     T: NativeType + PartialOrd + std::ops::Mul<Output = T>,
@@ -206,206 +232,57 @@ where
     max
 }
 
-pub fn is_reverse_sorted_max<T: NativeType + PartialOrd + IsFloat>(values: &[T]) -> bool {
-    values
-        .windows(2)
-        .all(|w| match compare_fn_nan_min(&w[0], &w[1]) {
-            Ordering::Equal => true,
-            Ordering::Greater => true,
-            Ordering::Less => false,
-        })
-}
-
-pub fn rolling_max<T>(
-    values: &[T],
-    window_size: usize,
-    min_periods: usize,
-    center: bool,
-    weights: Option<&[f64]>,
-    _params: DynArgs,
-) -> PolarsResult<ArrayRef>
-where
-    T: NativeType + PartialOrd + IsFloat + Bounded + NumCast + Mul<Output = T>,
-{
-    match (center, weights) {
-        (true, None) => {
-            // will be O(n2) if we don't take this path we hope that we hit an early return on not sorted data
-            if is_reverse_sorted_max(values) {
-                rolling_apply_agg_window::<SortedMinMax<_>, _, _>(
+// Same as the window definition. The dispatch is identical up to the name.
+macro_rules! rolling_minmax_func {
+    ($rolling_m:ident, $window:tt, $wtd_f:ident) => {
+        pub fn $rolling_m<T>(
+            values: &[T],
+            window_size: usize,
+            min_periods: usize,
+            center: bool,
+            weights: Option<&[f64]>,
+            _params: DynArgs,
+        ) -> PolarsResult<ArrayRef>
+        where
+            T: NativeType + PartialOrd + IsFloat + Bounded + NumCast + Mul<Output = T>,
+        {
+            let offset_fn = match center {
+                true => det_offsets_center,
+                false => det_offsets,
+            };
+            match weights {
+                None => rolling_apply_agg_window::<$window<_>, _, _>(
                     values,
                     window_size,
                     min_periods,
-                    det_offsets_center,
+                    offset_fn,
                     None,
-                )
-            } else {
-                rolling_apply_agg_window::<MaxWindow<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets_center,
-                    None,
-                )
-            }
-        }
-        (false, None) => {
-            if is_reverse_sorted_max(values) {
-                rolling_apply_agg_window::<SortedMinMax<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets,
-                    None,
-                )
-            } else {
-                rolling_apply_agg_window::<MaxWindow<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets,
-                    None,
-                )
+                ),
+                Some(weights) => {
+                    assert!(
+                        T::is_float(),
+                        "implementation error, should only be reachable by float types"
+                    );
+                    let weights = weights
+                        .iter()
+                        .map(|v| NumCast::from(*v).unwrap())
+                        .collect::<Vec<_>>();
+                    no_nulls::rolling_apply_weights(
+                        values,
+                        window_size,
+                        min_periods,
+                        offset_fn,
+                        $wtd_f,
+                        &weights,
+                    )
+                }
             }
         }
-        (true, Some(weights)) => {
-            assert!(
-                T::is_float(),
-                "implementation error, should only be reachable by float types"
-            );
-            let weights = weights
-                .iter()
-                .map(|v| NumCast::from(*v).unwrap())
-                .collect::<Vec<_>>();
-            no_nulls::rolling_apply_weights(
-                values,
-                window_size,
-                min_periods,
-                det_offsets_center,
-                compute_max_weights,
-                &weights,
-            )
-        }
-        (false, Some(weights)) => {
-            assert!(
-                T::is_float(),
-                "implementation error, should only be reachable by float types"
-            );
-            let weights = weights
-                .iter()
-                .map(|v| NumCast::from(*v).unwrap())
-                .collect::<Vec<_>>();
-            no_nulls::rolling_apply_weights(
-                values,
-                window_size,
-                min_periods,
-                det_offsets,
-                compute_max_weights,
-                &weights,
-            )
-        }
-    }
+    };
 }
 
-pub fn is_sorted_min<T: NativeType + PartialOrd + IsFloat>(values: &[T]) -> bool {
-    values
-        .windows(2)
-        .all(|w| match compare_fn_nan_min(&w[0], &w[1]) {
-            Ordering::Equal => true,
-            Ordering::Less => true,
-            Ordering::Greater => false,
-        })
-}
-
-pub fn rolling_min<T>(
-    values: &[T],
-    window_size: usize,
-    min_periods: usize,
-    center: bool,
-    weights: Option<&[f64]>,
-    _params: DynArgs,
-) -> PolarsResult<ArrayRef>
-where
-    T: NativeType + PartialOrd + NumCast + Mul<Output = T> + Bounded + IsFloat,
-{
-    match (center, weights) {
-        (true, None) => {
-            // will be O(n2) if we don't take this path we hope that we hit an early return on not sorted data
-            if is_sorted_min(values) {
-                rolling_apply_agg_window::<SortedMinMax<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets_center,
-                    None,
-                )
-            } else {
-                rolling_apply_agg_window::<MinWindow<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets_center,
-                    None,
-                )
-            }
-        }
-        (false, None) => {
-            // will be O(n2)
-            if is_sorted_min(values) {
-                rolling_apply_agg_window::<SortedMinMax<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets,
-                    None,
-                )
-            } else {
-                rolling_apply_agg_window::<MinWindow<_>, _, _>(
-                    values,
-                    window_size,
-                    min_periods,
-                    det_offsets,
-                    None,
-                )
-            }
-        }
-        (true, Some(weights)) => {
-            assert!(
-                T::is_float(),
-                "implementation error, should only be reachable by float types"
-            );
-            let weights = weights
-                .iter()
-                .map(|v| NumCast::from(*v).unwrap())
-                .collect::<Vec<_>>();
-            no_nulls::rolling_apply_weights(
-                values,
-                window_size,
-                min_periods,
-                det_offsets_center,
-                compute_min_weights,
-                &weights,
-            )
-        }
-        (false, Some(weights)) => {
-            assert!(
-                T::is_float(),
-                "implementation error, should only be reachable by float types"
-            );
-            let weights = weights
-                .iter()
-                .map(|v| NumCast::from(*v).unwrap())
-                .collect::<Vec<_>>();
-            no_nulls::rolling_apply_weights(
-                values,
-                window_size,
-                min_periods,
-                det_offsets,
-                compute_min_weights,
-                &weights,
-            )
-        }
-    }
-}
+rolling_minmax_func!(rolling_min, MinWindow, compute_min_weights);
+rolling_minmax_func!(rolling_max, MaxWindow, compute_max_weights);
 
 #[cfg(test)]
 mod test {

From 5810a1dc9081eb0e8da32cca209d0a289cf79e52 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sat, 15 Jul 2023 09:57:32 +0200
Subject: [PATCH 18/37] fix(rust, python): sum aggregation empty set is 0, not
 null (#9894)

---
 .../src/chunked_array/upstream_traits.rs      | 11 ++---
 .../src/frame/groupby/aggregations/mod.rs     | 41 +++++++++++++++----
 polars/polars-lazy/src/tests/queries.rs       |  2 +-
 polars/tests/it/lazy/groupby.rs               |  2 +-
 py-polars/polars/expr/expr.py                 | 32 +++++++--------
 .../unit/operations/test_aggregations.py      |  4 ++
 .../tests/unit/operations/test_rolling.py     |  8 +++-
 .../tests/unit/operations/test_window.py      |  2 +-
 py-polars/tests/unit/test_queries.py          |  2 +-
 9 files changed, 66 insertions(+), 38 deletions(-)

diff --git a/polars/polars-core/src/chunked_array/upstream_traits.rs b/polars/polars-core/src/chunked_array/upstream_traits.rs
index 4c3dbb4ae232..45e514c42234 100644
--- a/polars/polars-core/src/chunked_array/upstream_traits.rs
+++ b/polars/polars-core/src/chunked_array/upstream_traits.rs
@@ -421,14 +421,9 @@ where
     fn from_par_iter<I: IntoParallelIterator<Item = T::Native>>(iter: I) -> Self {
         // Get linkedlist filled with different vec result from different threads
         let vectors = collect_into_linked_list(iter);
-        let capacity: usize = get_capacity_from_par_results(&vectors);
-
-        let mut av = Vec::<T::Native>::with_capacity(capacity);
-        for v in vectors {
-            av.extend_from_slice(&v)
-        }
-        let arr = to_array::<T>(av, None);
-        unsafe { NoNull::new(ChunkedArray::from_chunks("", vec![arr])) }
+        let vectors = vectors.into_iter().collect::<Vec<_>>();
+        let values = flatten_par(&vectors);
+        NoNull::new(ChunkedArray::new_vec("", values))
     }
 }
 
diff --git a/polars/polars-core/src/frame/groupby/aggregations/mod.rs b/polars/polars-core/src/frame/groupby/aggregations/mod.rs
index c9400cd7d14c..2c17f80fc3c2 100644
--- a/polars/polars-core/src/frame/groupby/aggregations/mod.rs
+++ b/polars/polars-core/src/frame/groupby/aggregations/mod.rs
@@ -29,6 +29,7 @@ use crate::frame::groupby::GroupsIndicator;
 use crate::prelude::*;
 use crate::series::implementations::SeriesWrap;
 use crate::series::IsSorted;
+use crate::utils::NoNull;
 use crate::{apply_method_physical_integer, POOL};
 
 fn idx2usize(idx: &[IdxSize]) -> impl Iterator<Item = usize> + ExactSizeIterator + '_ {
@@ -167,6 +168,17 @@ where
     ca.into_series()
 }
 
+// same helper as `_agg_helper_idx` but for aggregations that don't return an Option
+pub fn _agg_helper_idx_no_null<T, F>(groups: &GroupsIdx, f: F) -> Series
+where
+    F: Fn((IdxSize, &Vec<IdxSize>)) -> T::Native + Send + Sync,
+    T: PolarsNumericType,
+    ChunkedArray<T>: IntoSeries,
+{
+    let ca: NoNull<ChunkedArray<T>> = POOL.install(|| groups.into_par_iter().map(f).collect());
+    ca.into_inner().into_series()
+}
+
 // helper that iterates on the `all: Vec<Vec<u32>` collection
 // this doesn't have traverse the `first: Vec<u32>` memory and is therefore faster
 fn agg_helper_idx_on_all<T, F>(groups: &GroupsIdx, f: F) -> Series
@@ -189,6 +201,16 @@ where
     ca.into_series()
 }
 
+pub fn _agg_helper_slice_no_null<T, F>(groups: &[[IdxSize; 2]], f: F) -> Series
+where
+    F: Fn([IdxSize; 2]) -> T::Native + Send + Sync,
+    T: PolarsNumericType,
+    ChunkedArray<T>: IntoSeries,
+{
+    let ca: NoNull<ChunkedArray<T>> = POOL.install(|| groups.par_iter().copied().map(f).collect());
+    ca.into_inner().into_series()
+}
+
 #[inline(always)]
 fn take_min<T: PartialOrd>(a: T, b: T) -> T {
     if a < b {
@@ -548,19 +570,19 @@ where
                 let ca = self.rechunk();
                 let arr = ca.downcast_iter().next().unwrap();
                 let no_nulls = arr.null_count() == 0;
-                _agg_helper_idx::<T, _>(groups, |(first, idx)| {
+                _agg_helper_idx_no_null::<T, _>(groups, |(first, idx)| {
                     debug_assert!(idx.len() <= self.len());
                     if idx.is_empty() {
-                        None
+                        T::Native::zero()
                     } else if idx.len() == 1 {
-                        arr.get(first as usize)
+                        arr.get(first as usize).unwrap_or(T::Native::zero())
                     } else if no_nulls {
-                        Some(take_agg_no_null_primitive_iter_unchecked(
+                        take_agg_no_null_primitive_iter_unchecked(
                             arr,
                             idx2usize(idx),
                             |a, b| a + b,
                             T::Native::zero(),
-                        ))
+                        )
                     } else {
                         take_agg_primitive_iter_unchecked::<T::Native, _, _>(
                             arr,
@@ -569,6 +591,7 @@ where
                             T::Native::zero(),
                             idx.len() as IdxSize,
                         )
+                        .unwrap_or(T::Native::zero())
                     }
                 })
             }
@@ -593,14 +616,14 @@ where
                     };
                     Self::from_chunks("", vec![arr]).into_series()
                 } else {
-                    _agg_helper_slice::<T, _>(groups, |[first, len]| {
+                    _agg_helper_slice_no_null::<T, _>(groups, |[first, len]| {
                         debug_assert!(len <= self.len() as IdxSize);
                         match len {
-                            0 => None,
-                            1 => self.get(first as usize),
+                            0 => T::Native::zero(),
+                            1 => self.get(first as usize).unwrap_or(T::Native::zero()),
                             _ => {
                                 let arr_group = _slice_from_offsets(self, first, len);
-                                arr_group.sum()
+                                arr_group.sum().unwrap_or(T::Native::zero())
                             }
                         }
                     })
diff --git a/polars/polars-lazy/src/tests/queries.rs b/polars/polars-lazy/src/tests/queries.rs
index a1bc18dd27f7..2a6dc72fc4f9 100644
--- a/polars/polars-lazy/src/tests/queries.rs
+++ b/polars/polars-lazy/src/tests/queries.rs
@@ -907,7 +907,7 @@ fn test_lazy_groupby_filter() -> PolarsResult<()> {
 
     assert_eq!(
         Vec::from(out.column("b_sum").unwrap().i32().unwrap()),
-        [Some(6), None, None]
+        [Some(6), Some(0), Some(0)]
     );
     assert_eq!(
         Vec::from(out.column("b_first").unwrap().i32().unwrap()),
diff --git a/polars/tests/it/lazy/groupby.rs b/polars/tests/it/lazy/groupby.rs
index c74cf3363ed0..6e3ed7e09666 100644
--- a/polars/tests/it/lazy/groupby.rs
+++ b/polars/tests/it/lazy/groupby.rs
@@ -27,7 +27,7 @@ fn test_filter_sort_diff_2984() -> PolarsResult<()> {
         .sort("group", Default::default())
         .collect()?;
 
-    assert_eq!(Vec::from(out.column("id")?.i32()?), &[Some(1), None]);
+    assert_eq!(Vec::from(out.column("id")?.i32()?), &[Some(1), Some(0)]);
     Ok(())
 }
 
diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 768bed40458d..f1e7853c618d 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -3492,14 +3492,14 @@ def filter(self, predicate: Expr) -> Self:
         ...     ]
         ... ).sort("group_col")
         shape: (2, 3)
-        ┌───────────┬──────┬─────┐
-        │ group_col ┆ lt   ┆ gte │
-        │ ---       ┆ ---  ┆ --- │
-        │ str       ┆ i64  ┆ i64 │
-        ╞═══════════╪══════╪═════╡
-        │ g1        ┆ 1    ┆ 2   │
-        │ g2        ┆ null ┆ 3   │
-        └───────────┴──────┴─────┘
+        ┌───────────┬─────┬─────┐
+        │ group_col ┆ lt  ┆ gte │
+        │ ---       ┆ --- ┆ --- │
+        │ str       ┆ i64 ┆ i64 │
+        ╞═══════════╪═════╪═════╡
+        │ g1        ┆ 1   ┆ 2   │
+        │ g2        ┆ 0   ┆ 3   │
+        └───────────┴─────┴─────┘
 
         """
         return self._from_pyexpr(self._pyexpr.filter(predicate._pyexpr))
@@ -3530,14 +3530,14 @@ def where(self, predicate: Expr) -> Self:
         ...     ]
         ... ).sort("group_col")
         shape: (2, 3)
-        ┌───────────┬──────┬─────┐
-        │ group_col ┆ lt   ┆ gte │
-        │ ---       ┆ ---  ┆ --- │
-        │ str       ┆ i64  ┆ i64 │
-        ╞═══════════╪══════╪═════╡
-        │ g1        ┆ 1    ┆ 2   │
-        │ g2        ┆ null ┆ 3   │
-        └───────────┴──────┴─────┘
+        ┌───────────┬─────┬─────┐
+        │ group_col ┆ lt  ┆ gte │
+        │ ---       ┆ --- ┆ --- │
+        │ str       ┆ i64 ┆ i64 │
+        ╞═══════════╪═════╪═════╡
+        │ g1        ┆ 1   ┆ 2   │
+        │ g2        ┆ 0   ┆ 3   │
+        └───────────┴─────┴─────┘
 
         """
         return self.filter(predicate)
diff --git a/py-polars/tests/unit/operations/test_aggregations.py b/py-polars/tests/unit/operations/test_aggregations.py
index 425729fc95e9..b0e731c3c536 100644
--- a/py-polars/tests/unit/operations/test_aggregations.py
+++ b/py-polars/tests/unit/operations/test_aggregations.py
@@ -276,3 +276,7 @@ def test_sum_empty_and_null_set() -> None:
 
     series = pl.Series("a", [None])
     assert series.sum() == 0
+
+    df = pl.DataFrame({"a": [None, None, None], "b": [1, 1, 1]})
+    assert df.select(pl.sum("a")).item() == 0.0
+    assert df.groupby("b").agg(pl.sum("a"))["a"].item() == 0.0
diff --git a/py-polars/tests/unit/operations/test_rolling.py b/py-polars/tests/unit/operations/test_rolling.py
index 9218de269fb1..81096064da74 100644
--- a/py-polars/tests/unit/operations/test_rolling.py
+++ b/py-polars/tests/unit/operations/test_rolling.py
@@ -50,7 +50,13 @@ def test_rolling_kernels_and_groupby_rolling(
     out1 = example_df.select(
         [
             pl.col("dt"),
-            pl.col("values").rolling_sum(period, by="dt", closed=closed).alias("sum"),
+            # this differs from groupby aggregation because the empty window is
+            # null here
+            # where the sum aggregation of an empty set is 0
+            pl.col("values")
+            .rolling_sum(period, by="dt", closed=closed)
+            .fill_null(0)
+            .alias("sum"),
             pl.col("values").rolling_var(period, by="dt", closed=closed).alias("var"),
             pl.col("values").rolling_mean(period, by="dt", closed=closed).alias("mean"),
             pl.col("values").rolling_std(period, by="dt", closed=closed).alias("std"),
diff --git a/py-polars/tests/unit/operations/test_window.py b/py-polars/tests/unit/operations/test_window.py
index 54643364cee5..67ccd2c9ce63 100644
--- a/py-polars/tests/unit/operations/test_window.py
+++ b/py-polars/tests/unit/operations/test_window.py
@@ -308,7 +308,7 @@ def test_window_5868() -> None:
     df = pl.DataFrame({"a": [None, 1, 2, 3, 3, 3, 4, 4]})
 
     result = df.select(pl.col("a").sum().over("a")).get_column("a")
-    expected = pl.Series("a", [None, 1, 2, 9, 9, 9, 8, 8])
+    expected = pl.Series("a", [0, 1, 2, 9, 9, 9, 8, 8])
     assert_series_equal(result, expected)
 
     result = (
diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py
index 3e333b852df1..6f658157c275 100644
--- a/py-polars/tests/unit/test_queries.py
+++ b/py-polars/tests/unit/test_queries.py
@@ -194,7 +194,7 @@ def test_groupby_agg_equals_zero_3535() -> None:
     ).to_dict(False) == {
         "key": ["aa", "bb", "cc"],
         "val1": [10, 0, -99],
-        "val2": [None, 0.0, 10.5],
+        "val2": [0.0, 0.0, 10.5],
     }
 
 

From c8a98f9d4c0fb0273e59b51b653dd6dcec187f99 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sat, 15 Jul 2023 10:03:15 +0200
Subject: [PATCH 19/37] fix(python): Handle `DataFrame.vstack` stacking itself
 (#9895)

---
 py-polars/polars/dataframe/frame.py           | 24 ++++++----
 py-polars/polars/series/series.py             |  4 +-
 py-polars/src/dataframe.rs                    | 24 +++++-----
 .../tests/unit/{ => dataframe}/test_df.py     | 16 -------
 py-polars/tests/unit/dataframe/test_vstack.py | 46 +++++++++++++++++++
 py-polars/tests/unit/test_lazy.py             |  2 +-
 6 files changed, 77 insertions(+), 39 deletions(-)
 rename py-polars/tests/unit/{ => dataframe}/test_df.py (99%)
 create mode 100644 py-polars/tests/unit/dataframe/test_vstack.py

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index dc648c8a2883..f3f1a71b8f0d 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -5811,16 +5811,17 @@ def hstack(
         else:
             return self._from_pydf(self._df.hstack([s._s for s in columns]))
 
-    def vstack(self, df: DataFrame, *, in_place: bool = False) -> Self:
+    @deprecated_alias(df="other")
+    def vstack(self, other: DataFrame, *, in_place: bool = False) -> Self:
         """
         Grow this DataFrame vertically by stacking a DataFrame to it.
 
         Parameters
         ----------
-        df
+        other
             DataFrame to stack.
         in_place
-            Modify in place
+            Modify in place.
 
         Examples
         --------
@@ -5853,12 +5854,19 @@ def vstack(self, df: DataFrame, *, in_place: bool = False) -> Self:
 
         """
         if in_place:
-            self._df.vstack_mut(df._df)
-            return self
-        else:
-            return self._from_pydf(self._df.vstack(df._df))
+            try:
+                self._df.vstack_mut(other._df)
+                return self
+            except RuntimeError as exc:
+                if str(exc) == "Already mutably borrowed":
+                    self._df.vstack_mut(other._df.clone())
+                    return self
+                else:
+                    raise exc
+
+        return self._from_pydf(self._df.vstack(other._df))
 
-    def extend(self, other: Self) -> Self:
+    def extend(self, other: DataFrame) -> Self:
         """
         Extend the memory backed by this `DataFrame` with the values from `other`.
 
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index b36da7732e86..3f7b1a3f5c38 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -2358,12 +2358,12 @@ def append(self, other: Series, *, append_chunks: bool = True) -> Series:
                 self._s.append(other._s)
             else:
                 self._s.extend(other._s)
+            return self
         except RuntimeError as exc:
             if str(exc) == "Already mutably borrowed":
-                self.append(other.clone(), append_chunks=append_chunks)
+                return self.append(other.clone(), append_chunks=append_chunks)
             else:
                 raise exc
-        return self
 
     def filter(self, predicate: Series | list[bool]) -> Self:
         """
diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs
index 3d8cf8de109a..11aeb760cb60 100644
--- a/py-polars/src/dataframe.rs
+++ b/py-polars/src/dataframe.rs
@@ -940,33 +940,33 @@ impl PyDataFrame {
         self.df.width()
     }
 
+    pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
+        let columns = columns.to_series();
+        let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
+        Ok(df.into())
+    }
+
     pub fn hstack_mut(&mut self, columns: Vec<PySeries>) -> PyResult<()> {
         let columns = columns.to_series();
         self.df.hstack_mut(&columns).map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
-        let columns = columns.to_series();
-        let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
+    pub fn vstack(&self, other: &PyDataFrame) -> PyResult<Self> {
+        let df = self.df.vstack(&other.df).map_err(PyPolarsErr::from)?;
         Ok(df.into())
     }
 
-    pub fn extend(&mut self, df: &PyDataFrame) -> PyResult<()> {
-        self.df.extend(&df.df).map_err(PyPolarsErr::from)?;
+    pub fn vstack_mut(&mut self, other: &PyDataFrame) -> PyResult<()> {
+        self.df.vstack_mut(&other.df).map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    pub fn vstack_mut(&mut self, df: &PyDataFrame) -> PyResult<()> {
-        self.df.vstack_mut(&df.df).map_err(PyPolarsErr::from)?;
+    pub fn extend(&mut self, other: &PyDataFrame) -> PyResult<()> {
+        self.df.extend(&other.df).map_err(PyPolarsErr::from)?;
         Ok(())
     }
 
-    pub fn vstack(&mut self, df: &PyDataFrame) -> PyResult<Self> {
-        let df = self.df.vstack(&df.df).map_err(PyPolarsErr::from)?;
-        Ok(df.into())
-    }
-
     pub fn drop_in_place(&mut self, name: &str) -> PyResult<PySeries> {
         let s = self.df.drop_in_place(name).map_err(PyPolarsErr::from)?;
         Ok(PySeries { series: s })
diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
similarity index 99%
rename from py-polars/tests/unit/test_df.py
rename to py-polars/tests/unit/dataframe/test_df.py
index 6f1ac0d0e019..a2056ee74bef 100644
--- a/py-polars/tests/unit/test_df.py
+++ b/py-polars/tests/unit/dataframe/test_df.py
@@ -704,22 +704,6 @@ def test_hstack_dataframe(in_place: bool) -> None:
         assert_frame_equal(df_out, expected)
 
 
-@pytest.mark.parametrize("in_place", [True, False])
-def test_vstack(in_place: bool) -> None:
-    df1 = pl.DataFrame({"foo": [1, 2], "bar": [6, 7], "ham": ["a", "b"]})
-    df2 = pl.DataFrame({"foo": [3, 4], "bar": [8, 9], "ham": ["c", "d"]})
-
-    expected = pl.DataFrame(
-        {"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
-    )
-
-    out = df1.vstack(df2, in_place=in_place)
-    if in_place:
-        assert_frame_equal(df1, expected)
-    else:
-        assert_frame_equal(out, expected)
-
-
 def test_extend() -> None:
     with pl.StringCache():
         df1 = pl.DataFrame(
diff --git a/py-polars/tests/unit/dataframe/test_vstack.py b/py-polars/tests/unit/dataframe/test_vstack.py
new file mode 100644
index 000000000000..ecf88a2f987f
--- /dev/null
+++ b/py-polars/tests/unit/dataframe/test_vstack.py
@@ -0,0 +1,46 @@
+import pytest
+
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+@pytest.fixture()
+def df1() -> pl.DataFrame:
+    return pl.DataFrame({"foo": [1, 2], "bar": [6, 7], "ham": ["a", "b"]})
+
+
+@pytest.fixture()
+def df2() -> pl.DataFrame:
+    return pl.DataFrame({"foo": [3, 4], "bar": [8, 9], "ham": ["c", "d"]})
+
+
+def test_vstack(df1: pl.DataFrame, df2: pl.DataFrame) -> None:
+    result = df1.vstack(df2)
+    expected = pl.DataFrame(
+        {"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_vstack_in_place(df1: pl.DataFrame, df2: pl.DataFrame) -> None:
+    df1.vstack(df2, in_place=True)
+    expected = pl.DataFrame(
+        {"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
+    )
+    assert_frame_equal(df1, expected)
+
+
+def test_vstack_self(df1: pl.DataFrame) -> None:
+    result = df1.vstack(df1)
+    expected = pl.DataFrame(
+        {"foo": [1, 2, 1, 2], "bar": [6, 7, 6, 7], "ham": ["a", "b", "a", "b"]}
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_vstack_self_in_place(df1: pl.DataFrame) -> None:
+    df1.vstack(df1, in_place=True)
+    expected = pl.DataFrame(
+        {"foo": [1, 2, 1, 2], "bar": [6, 7, 6, 7], "ham": ["a", "b", "a", "b"]}
+    )
+    assert_frame_equal(df1, expected)
diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py
index 1d885bcd893c..377ccb624b55 100644
--- a/py-polars/tests/unit/test_lazy.py
+++ b/py-polars/tests/unit/test_lazy.py
@@ -1099,7 +1099,7 @@ def test_lazy_concat(df: pl.DataFrame) -> None:
 
     out = pl.concat([df.lazy(), df.lazy()]).collect()
     assert out.shape == shape
-    assert_frame_equal(out, df.vstack(df.clone()))
+    assert_frame_equal(out, df.vstack(df))
 
 
 def test_self_join() -> None:

From edc9894b1e5737ab6a0e1ba7b4c35e82d9f1ba21 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sat, 15 Jul 2023 10:49:07 +0200
Subject: [PATCH 20/37] feat(rust, python): pass through unknown schema in
 unnest (#9896)

---
 .../src/logical_plan/functions/mod.rs         | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/polars/polars-lazy/polars-plan/src/logical_plan/functions/mod.rs b/polars/polars-lazy/polars-plan/src/logical_plan/functions/mod.rs
index 540ce8962960..550040af3b73 100644
--- a/polars/polars-lazy/polars-plan/src/logical_plan/functions/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/logical_plan/functions/mod.rs
@@ -196,15 +196,23 @@ impl FunctionNode {
                     let mut new_schema = Schema::with_capacity(input_schema.len() * 2);
                     for (name, dtype) in input_schema.iter() {
                         if _columns.iter().any(|item| item.as_ref() == name.as_str()) {
-                            if let DataType::Struct(flds) = dtype {
-                                for fld in flds {
-                                    new_schema
-                                        .with_column(fld.name().clone(), fld.data_type().clone());
+                            match dtype {
+                                DataType::Struct(flds) => {
+                                    for fld in flds {
+                                        new_schema.with_column(
+                                            fld.name().clone(),
+                                            fld.data_type().clone(),
+                                        );
+                                    }
+                                }
+                                DataType::Unknown => {
+                                    // pass through unknown
+                                }
+                                _ => {
+                                    polars_bail!(
+                                        SchemaMismatch: "expected struct dtype, got: `{}`", dtype
+                                    );
                                 }
-                            } else {
-                                polars_bail!(
-                                    SchemaMismatch: "expected struct dtype, got: `{}`", dtype
-                                );
                             }
                         } else {
                             new_schema.with_column(name.clone(), dtype.clone());

From af8596582a7f1c300d225cc5ab5aba067a0434c8 Mon Sep 17 00:00:00 2001
From: Bela Stoyan <bela.stoyan@gmail.com>
Date: Sat, 15 Jul 2023 10:49:31 +0200
Subject: [PATCH 21/37] fix(rust,python) respect original series dtype when
 constructing `LitIter` (#9886)

---
 .../physical_plan/expressions/group_iter.rs   | 55 ++++++++++++-------
 py-polars/tests/unit/test_schema.py           | 22 ++++++++
 2 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/polars/polars-lazy/src/physical_plan/expressions/group_iter.rs b/polars/polars-lazy/src/physical_plan/expressions/group_iter.rs
index c7813506f18b..048b53e7e649 100644
--- a/polars/polars-lazy/src/physical_plan/expressions/group_iter.rs
+++ b/polars/polars-lazy/src/physical_plan/expressions/group_iter.rs
@@ -14,22 +14,29 @@ impl<'a> AggregationContext<'a> {
                 self.groups();
                 let s = self.series().rechunk();
                 let name = if keep_names { s.name() } else { "" };
-                Box::new(LitIter::new(
-                    s.array_ref(0).clone(),
-                    self.groups.len(),
-                    name,
-                ))
+                // safety: dtype is correct
+                unsafe {
+                    Box::new(LitIter::new(
+                        s.array_ref(0).clone(),
+                        self.groups.len(),
+                        s._dtype(),
+                        name,
+                    ))
+                }
             }
             AggState::AggregatedFlat(_) => {
                 self.groups();
                 let s = self.series();
                 let name = if keep_names { s.name() } else { "" };
-                Box::new(FlatIter::new(
-                    s.array_ref(0).clone(),
-                    self.groups.len(),
-                    s.dtype(),
-                    name,
-                ))
+                // safety: dtype is correct
+                unsafe {
+                    Box::new(FlatIter::new(
+                        s.array_ref(0).clone(),
+                        self.groups.len(),
+                        s.dtype(),
+                        name,
+                    ))
+                }
             }
             AggState::AggregatedList(_) => {
                 let s = self.series();
@@ -59,8 +66,15 @@ struct LitIter<'a> {
 }
 
 impl<'a> LitIter<'a> {
-    fn new(array: ArrayRef, len: usize, name: &str) -> Self {
-        let mut series_container = Box::pin(Series::try_from((name, array.clone())).unwrap());
+    /// # Safety
+    /// Caller must ensure the given `logical` dtype belongs to `array`.
+    unsafe fn new(array: ArrayRef, len: usize, logical: &DataType, name: &str) -> Self {
+        let mut series_container = Box::pin(Series::from_chunks_and_dtype_unchecked(
+            name,
+            vec![array],
+            logical,
+        ));
+
         let ref_s = &mut *series_container as *mut Series;
         Self {
             offset: 0,
@@ -100,13 +114,14 @@ struct FlatIter<'a> {
 }
 
 impl<'a> FlatIter<'a> {
-    fn new(array: ArrayRef, len: usize, logical: &DataType, name: &str) -> Self {
-        let mut series_container = Box::pin(
-            Series::try_from((name, array.clone()))
-                .unwrap()
-                .cast(logical)
-                .unwrap(),
-        );
+    /// # Safety
+    /// Caller must ensure the given `logical` dtype belongs to `array`.
+    unsafe fn new(array: ArrayRef, len: usize, logical: &DataType, name: &str) -> Self {
+        let mut series_container = Box::pin(Series::from_chunks_and_dtype_unchecked(
+            name,
+            vec![array.clone()],
+            logical,
+        ));
         let ref_s = &mut *series_container as *mut Series;
         Self {
             array,
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index 3f3a28cc0767..d88bc41df905 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from datetime import date, timedelta
 from typing import Any
 
 import pytest
@@ -513,3 +514,24 @@ def test_concat_vertically_relaxed() -> None:
         "a": [1.0, 0.2, 1.0, 2.0],
         "b": [None, 0.1, 2.0, 1.0],
     }
+
+
+def test_lit_iter_schema() -> None:
+    df = pl.DataFrame(
+        {
+            "key": ["A", "A", "A", "A"],
+            "dates": [
+                date(1970, 1, 1),
+                date(1970, 1, 1),
+                date(1970, 1, 2),
+                date(1970, 1, 3),
+            ],
+        }
+    )
+
+    assert df.groupby("key").agg(pl.col("dates").unique() + timedelta(days=1)).to_dict(
+        False
+    ) == {
+        "key": ["A"],
+        "dates": [[date(1970, 1, 2), date(1970, 1, 3), date(1970, 1, 4)]],
+    }

From 672922491bac1f144747d39b864106d90010fd1e Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sat, 15 Jul 2023 11:50:03 +0200
Subject: [PATCH 22/37] rust polars 0.31.0 (#9898)

---
 Cargo.toml                                     |  8 ++++----
 polars-cli/Cargo.toml                          |  6 +++---
 polars/Cargo.toml                              | 14 +++++++-------
 polars/polars-algo/Cargo.toml                  |  6 +++---
 polars/polars-arrow/Cargo.toml                 |  2 +-
 polars/polars-core/Cargo.toml                  |  8 ++++----
 polars/polars-error/Cargo.toml                 |  2 +-
 polars/polars-io/Cargo.toml                    | 12 ++++++------
 polars/polars-json/Cargo.toml                  |  6 +++---
 polars/polars-lazy/Cargo.toml                  | 18 +++++++++---------
 polars/polars-lazy/polars-pipe/Cargo.toml      | 14 +++++++-------
 polars/polars-lazy/polars-plan/Cargo.toml      | 12 ++++++------
 polars/polars-ops/Cargo.toml                   |  8 ++++----
 .../src/chunked_array/array/min_max.rs         |  2 +-
 .../src/chunked_array/list/any_all.rs          |  2 +-
 polars/polars-row/Cargo.toml                   |  4 ++--
 polars/polars-sql/Cargo.toml                   |  8 ++++----
 polars/polars-time/Cargo.toml                  |  8 ++++----
 18 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6fa06baf9c27..be81549ee883 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "0.30.0"
+version = "0.31.1"
 
 [workspace.dependencies]
 rayon = "1.6"
@@ -33,11 +33,11 @@ strum_macros = "0.25"
 [workspace.dependencies.arrow]
 package = "arrow2"
 # git = "https://github.com/jorgecarleitao/arrow2"
-git = "https://github.com/ritchie46/arrow2"
+# git = "https://github.com/ritchie46/arrow2"
 # rev = "2d2e7053f9a50810bfe9cecff25ab39089aef98e"
 # path = "../arrow2"
-branch = "polars_2023-06-26"
-version = "0.17"
+# branch = "polars_2023-06-26"
+version = "0.17.2"
 default-features = false
 features = [
   "compute_aggregate",
diff --git a/polars-cli/Cargo.toml b/polars-cli/Cargo.toml
index 8eca407c93ed..f6d8bd3c56c9 100644
--- a/polars-cli/Cargo.toml
+++ b/polars-cli/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "polars-cli"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 license = "MIT"
 repository = "https://github.com/pola-rs/polars"
@@ -28,11 +28,11 @@ ciborium = "0.2.0"
 clap = { version = "4.2.2", features = ["derive", "cargo"] }
 nu-ansi-term = { version = "0.47.0", optional = true }
 once_cell.workspace = true
-polars = { version = "0.30.0", path = "../polars", features = ["lazy", "sql", "dtype-full", "serde-lazy"] }
+polars = { version = "0.31.1", path = "../polars", features = ["lazy", "sql", "dtype-full", "serde-lazy"] }
 reedline = { version = "0.21.0" }
 serde = { version = "1.0.160", features = ["derive"] }
 sqlparser = "0.34"
 tmp_env = "0.1.1"
 
 [target.'cfg(target_os = "linux")'.dependencies]
-jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
+jemallocator = { version = "0.5.0", features = ["disable_initial_exec_tls"] }
diff --git a/polars/Cargo.toml b/polars/Cargo.toml
index cdd74c68a9cd..48a3ef38dc15 100644
--- a/polars/Cargo.toml
+++ b/polars/Cargo.toml
@@ -312,13 +312,13 @@ bench = [
 ]
 
 [dependencies]
-polars-algo = { version = "0.30.0", path = "./polars-algo", optional = true }
-polars-core = { version = "0.30.0", path = "./polars-core", features = ["docs"], default-features = false }
-polars-io = { version = "0.30.0", path = "./polars-io", features = [], default-features = false, optional = true }
-polars-lazy = { version = "0.30.0", path = "./polars-lazy", features = [], default-features = false, optional = true }
-polars-ops = { version = "0.30.0", path = "./polars-ops" }
-polars-sql = { version = "0.30.0", path = "./polars-sql", default-features = false, optional = true }
-polars-time = { version = "0.30.0", path = "./polars-time", default-features = false, optional = true }
+polars-algo = { version = "0.31.1", path = "./polars-algo", optional = true }
+polars-core = { version = "0.31.1", path = "./polars-core", features = ["docs"], default-features = false }
+polars-io = { version = "0.31.1", path = "./polars-io", features = [], default-features = false, optional = true }
+polars-lazy = { version = "0.31.1", path = "./polars-lazy", features = [], default-features = false, optional = true }
+polars-ops = { version = "0.31.1", path = "./polars-ops" }
+polars-sql = { version = "0.31.1", path = "./polars-sql", default-features = false, optional = true }
+polars-time = { version = "0.31.1", path = "./polars-time", default-features = false, optional = true }
 
 # enable js feature for getrandom to work in wasm
 [target.'cfg(target_family = "wasm")'.dependencies.getrandom]
diff --git a/polars/polars-algo/Cargo.toml b/polars/polars-algo/Cargo.toml
index 8f3d38b2659e..f3b2dc979ccf 100644
--- a/polars/polars-algo/Cargo.toml
+++ b/polars/polars-algo/Cargo.toml
@@ -9,9 +9,9 @@ description = "Algorithms built upon Polars primitives"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-polars-core = { version = "0.30.0", path = "../polars-core", features = ["dtype-categorical", "asof_join"], default-features = false }
-polars-lazy = { version = "0.30.0", path = "../polars-lazy", features = ["asof_join", "concat_str", "strings"] }
-polars-ops = { version = "0.30.0", path = "../polars-ops", features = ["dtype-categorical", "asof_join"], default-features = false }
+polars-core = { version = "0.31.1", path = "../polars-core", features = ["dtype-categorical", "asof_join"], default-features = false }
+polars-lazy = { version = "0.31.1", path = "../polars-lazy", features = ["asof_join", "concat_str", "strings"] }
+polars-ops = { version = "0.31.1", path = "../polars-ops", features = ["dtype-categorical", "asof_join"], default-features = false }
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/polars/polars-arrow/Cargo.toml b/polars/polars-arrow/Cargo.toml
index ad7f29ad3334..ef42608fe1cc 100644
--- a/polars/polars-arrow/Cargo.toml
+++ b/polars/polars-arrow/Cargo.toml
@@ -17,7 +17,7 @@ ethnum = { version = "1.3.2", optional = true }
 hashbrown.workspace = true
 multiversion.workspace = true
 num-traits.workspace = true
-polars-error = { version = "0.30.0", path = "../polars-error" }
+polars-error = { version = "0.31.1", path = "../polars-error" }
 serde = { version = "1", features = ["derive"], optional = true }
 thiserror.workspace = true
 
diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml
index a6ccda4ace62..f09c883b08c8 100644
--- a/polars/polars-core/Cargo.toml
+++ b/polars/polars-core/Cargo.toml
@@ -162,10 +162,10 @@ ndarray = { version = "0.15", optional = true, default_features = false }
 num-traits.workspace = true
 object_store = { version = "0.6.0", default-features = false, optional = true }
 once_cell.workspace = true
-polars-arrow = { version = "0.30.0", path = "../polars-arrow", features = ["compute"] }
-polars-error = { version = "0.30.0", path = "../polars-error" }
-polars-row = { version = "0.30.0", path = "../polars-row" }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow", features = ["compute"] }
+polars-error = { version = "0.31.1", path = "../polars-error" }
+polars-row = { version = "0.31.1", path = "../polars-row" }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
 rand = { version = "0.8", optional = true, features = ["small_rng", "std"] }
 rand_distr = { version = "0.4", optional = true }
 rayon.workspace = true
diff --git a/polars/polars-error/Cargo.toml b/polars/polars-error/Cargo.toml
index a5ded6c1bfe3..1f4fb3a9a393 100644
--- a/polars/polars-error/Cargo.toml
+++ b/polars/polars-error/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "polars-error"
-version = "0.30.0"
+version.workspace = true
 edition = "2021"
 license = "MIT"
 repository = "https://github.com/pola-rs/polars"
diff --git a/polars/polars-io/Cargo.toml b/polars/polars-io/Cargo.toml
index da041b52edf0..4c04dcd1d23c 100644
--- a/polars/polars-io/Cargo.toml
+++ b/polars/polars-io/Cargo.toml
@@ -67,12 +67,12 @@ memmap = { package = "memmap2", version = "0.5.2", optional = true }
 num-traits.workspace = true
 object_store = { version = "0.6.0", default-features = false, optional = true }
 once_cell = "1"
-polars-arrow = { version = "0.30.0", path = "../polars-arrow" }
-polars-core = { version = "0.30.0", path = "../polars-core", features = [], default-features = false }
-polars-error = { version = "0.30.0", path = "../polars-error", default-features = false }
-polars-json = { version = "0.30.0", optional = true, path = "../polars-json" }
-polars-time = { version = "0.30.0", path = "../polars-time", features = [], default-features = false, optional = true }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow" }
+polars-core = { version = "0.31.1", path = "../polars-core", features = [], default-features = false }
+polars-error = { version = "0.31.1", path = "../polars-error", default-features = false }
+polars-json = { version = "0.31.1", optional = true, path = "../polars-json" }
+polars-time = { version = "0.31.1", path = "../polars-time", features = [], default-features = false, optional = true }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
 rayon.workspace = true
 regex = "1.6"
 serde = { version = "1", features = ["derive"], optional = true }
diff --git a/polars/polars-json/Cargo.toml b/polars/polars-json/Cargo.toml
index 1a365e579e3b..bc3fef2e38fa 100644
--- a/polars/polars-json/Cargo.toml
+++ b/polars/polars-json/Cargo.toml
@@ -16,7 +16,7 @@ fallible-streaming-iterator = "0.1"
 hashbrown.workspace = true
 indexmap.workspace = true
 num-traits.workspace = true
-polars-arrow = { version = "0.30.0", path = "../polars-arrow", default-features = false }
-polars-error = { version = "0.30.0", path = "../polars-error" }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow", default-features = false }
+polars-error = { version = "0.31.1", path = "../polars-error" }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
 simd-json = { version = "0.10", features = ["allow-non-simd", "known-key"] }
diff --git a/polars/polars-lazy/Cargo.toml b/polars/polars-lazy/Cargo.toml
index ff3a9048b524..e43dba048ce8 100644
--- a/polars/polars-lazy/Cargo.toml
+++ b/polars/polars-lazy/Cargo.toml
@@ -17,15 +17,15 @@ ahash.workspace = true
 bitflags.workspace = true
 glob = "0.3"
 once_cell = "1"
-polars-arrow = { version = "0.30.0", path = "../polars-arrow" }
-polars-core = { version = "0.30.0", path = "../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
-polars-io = { version = "0.30.0", path = "../polars-io", features = ["lazy", "csv"], default-features = false }
-polars-json = { version = "0.30.0", path = "../polars-json", optional = true }
-polars-ops = { version = "0.30.0", path = "../polars-ops", default-features = false }
-polars-pipe = { version = "0.30.0", path = "./polars-pipe", optional = true }
-polars-plan = { version = "0.30.0", path = "./polars-plan" }
-polars-time = { version = "0.30.0", path = "../polars-time", optional = true }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow" }
+polars-core = { version = "0.31.1", path = "../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
+polars-io = { version = "0.31.1", path = "../polars-io", features = ["lazy", "csv"], default-features = false }
+polars-json = { version = "0.31.1", path = "../polars-json", optional = true }
+polars-ops = { version = "0.31.1", path = "../polars-ops", default-features = false }
+polars-pipe = { version = "0.31.1", path = "./polars-pipe", optional = true }
+polars-plan = { version = "0.31.1", path = "./polars-plan" }
+polars-time = { version = "0.31.1", path = "../polars-time", optional = true }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
 pyo3 = { version = "0.19", optional = true }
 rayon.workspace = true
 smartstring.workspace = true
diff --git a/polars/polars-lazy/polars-pipe/Cargo.toml b/polars/polars-lazy/polars-pipe/Cargo.toml
index 608648f9940d..5ee405d57da8 100644
--- a/polars/polars-lazy/polars-pipe/Cargo.toml
+++ b/polars/polars-lazy/polars-pipe/Cargo.toml
@@ -14,13 +14,13 @@ crossbeam-queue = { version = "0.3", optional = true }
 enum_dispatch = "0.3"
 hashbrown.workspace = true
 num-traits.workspace = true
-polars-arrow = { version = "0.30.0", path = "../../polars-arrow", default-features = false }
-polars-core = { version = "0.30.0", path = "../../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
-polars-io = { version = "0.30.0", path = "../../polars-io", default-features = false, features = ["ipc", "async"] }
-polars-ops = { version = "0.30.0", path = "../../polars-ops", features = ["search_sorted"] }
-polars-plan = { version = "0.30.0", path = "../polars-plan", default-features = false, features = ["compile"] }
-polars-row = { version = "0.30.0", path = "../../polars-row" }
-polars-utils = { version = "0.30.0", path = "../../polars-utils", features = ["sysinfo"] }
+polars-arrow = { version = "0.31.1", path = "../../polars-arrow", default-features = false }
+polars-core = { version = "0.31.1", path = "../../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
+polars-io = { version = "0.31.1", path = "../../polars-io", default-features = false, features = ["ipc", "async"] }
+polars-ops = { version = "0.31.1", path = "../../polars-ops", features = ["search_sorted"] }
+polars-plan = { version = "0.31.1", path = "../polars-plan", default-features = false, features = ["compile"] }
+polars-row = { version = "0.31.1", path = "../../polars-row" }
+polars-utils = { version = "0.31.1", path = "../../polars-utils", features = ["sysinfo"] }
 rayon.workspace = true
 smartstring = { version = "1" }
 
diff --git a/polars/polars-lazy/polars-plan/Cargo.toml b/polars/polars-lazy/polars-plan/Cargo.toml
index 0ef24d05de08..090f7eded60a 100644
--- a/polars/polars-lazy/polars-plan/Cargo.toml
+++ b/polars/polars-lazy/polars-plan/Cargo.toml
@@ -16,12 +16,12 @@ chrono-tz = { version = "0.8", optional = true }
 ciborium = { version = "0.2", optional = true }
 futures = { version = "0.3.25", optional = true }
 once_cell.workspace = true
-polars-arrow = { version = "0.30.0", path = "../../polars-arrow" }
-polars-core = { version = "0.30.0", path = "../../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
-polars-io = { version = "0.30.0", path = "../../polars-io", features = ["lazy", "csv"], default-features = false }
-polars-ops = { version = "0.30.0", path = "../../polars-ops", default-features = false }
-polars-time = { version = "0.30.0", path = "../../polars-time", optional = true }
-polars-utils = { version = "0.30.0", path = "../../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../../polars-arrow" }
+polars-core = { version = "0.31.1", path = "../../polars-core", features = ["lazy", "zip_with", "random"], default-features = false }
+polars-io = { version = "0.31.1", path = "../../polars-io", features = ["lazy", "csv"], default-features = false }
+polars-ops = { version = "0.31.1", path = "../../polars-ops", default-features = false }
+polars-time = { version = "0.31.1", path = "../../polars-time", optional = true }
+polars-utils = { version = "0.31.1", path = "../../polars-utils" }
 pyo3 = { version = "0.19", optional = true }
 rayon.workspace = true
 regex = { version = "1.6", optional = true }
diff --git a/polars/polars-ops/Cargo.toml b/polars/polars-ops/Cargo.toml
index dfd0ce03a412..dae80e271e0c 100644
--- a/polars/polars-ops/Cargo.toml
+++ b/polars/polars-ops/Cargo.toml
@@ -18,10 +18,10 @@ hex = { version = "0.4", optional = true }
 indexmap.workspace = true
 jsonpath_lib = { version = "0.3.0", optional = true, git = "https://github.com/ritchie46/jsonpath", branch = "improve_compiled" }
 memchr.workspace = true
-polars-arrow = { version = "0.30.0", path = "../polars-arrow", default-features = false }
-polars-core = { version = "0.30.0", path = "../polars-core", features = [], default-features = false }
-polars-json = { version = "0.30.0", optional = true, path = "../polars-json", default-features = false }
-polars-utils = { version = "0.30.0", path = "../polars-utils", default-features = false }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow", default-features = false }
+polars-core = { version = "0.31.1", path = "../polars-core", features = [], default-features = false }
+polars-json = { version = "0.31.1", optional = true, path = "../polars-json", default-features = false }
+polars-utils = { version = "0.31.1", path = "../polars-utils", default-features = false }
 serde = { version = "1", features = ["derive"], optional = true }
 serde_json = { version = "1", optional = true }
 smartstring.workspace = true
diff --git a/polars/polars-ops/src/chunked_array/array/min_max.rs b/polars/polars-ops/src/chunked_array/array/min_max.rs
index 22744120f7bb..ac4c85ced9ed 100644
--- a/polars/polars-ops/src/chunked_array/array/min_max.rs
+++ b/polars/polars-ops/src/chunked_array/array/min_max.rs
@@ -30,7 +30,7 @@ where
         (0..values.len())
             .step_by(width)
             .map(|start| {
-                let sliced = values.clone().sliced_unchecked(start, start + width);
+                let sliced = unsafe { values.clone().sliced_unchecked(start, start + width) };
                 arr_agg(sliced)
             })
             .collect()
diff --git a/polars/polars-ops/src/chunked_array/list/any_all.rs b/polars/polars-ops/src/chunked_array/list/any_all.rs
index 689955056865..550863cce5a2 100644
--- a/polars/polars-ops/src/chunked_array/list/any_all.rs
+++ b/polars/polars-ops/src/chunked_array/list/any_all.rs
@@ -30,7 +30,7 @@ where
         // TODO!
         // we can speed this upp if the boolean array doesn't have nulls
         // Then we can work directly on the byte slice.
-        let val = values.clone().sliced_unchecked(start, len);
+        let val = unsafe { values.clone().sliced_unchecked(start, len) };
         start = end;
         op(&val)
     });
diff --git a/polars/polars-row/Cargo.toml b/polars/polars-row/Cargo.toml
index d3120a649b64..5ccc9a647763 100644
--- a/polars/polars-row/Cargo.toml
+++ b/polars/polars-row/Cargo.toml
@@ -10,5 +10,5 @@ description = "Row encodings for the Polars DataFrame library"
 
 [dependencies]
 arrow.workspace = true
-polars-error = { version = "0.30.0", path = "../polars-error" }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-error = { version = "0.31.1", path = "../polars-error" }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
diff --git a/polars/polars-sql/Cargo.toml b/polars/polars-sql/Cargo.toml
index 30586f61797a..db7871220d6b 100644
--- a/polars/polars-sql/Cargo.toml
+++ b/polars/polars-sql/Cargo.toml
@@ -15,10 +15,10 @@ ipc = ["polars-lazy/ipc"]
 parquet = ["polars-lazy/parquet"]
 
 [dependencies]
-polars-arrow = { version = "0.30.0", path = "../polars-arrow", features = ["like"] }
-polars-core = { version = "0.30.0", path = "../polars-core", features = [] }
-polars-lazy = { version = "0.30.0", path = "../polars-lazy", features = ["compile", "strings", "cross_join", "trigonometry", "abs", "round_series", "log", "regex", "is_in", "meta", "cum_agg"] }
-polars-plan = { version = "0.30.0", path = "../polars-lazy/polars-plan", features = ["compile"] }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow", features = ["like"] }
+polars-core = { version = "0.31.1", path = "../polars-core", features = [] }
+polars-lazy = { version = "0.31.1", path = "../polars-lazy", features = ["compile", "strings", "cross_join", "trigonometry", "abs", "round_series", "log", "regex", "is_in", "meta", "cum_agg"] }
+polars-plan = { version = "0.31.1", path = "../polars-lazy/polars-plan", features = ["compile"] }
 serde = "1"
 serde_json = { version = "1" }
 # sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs.git", rev = "ae3b5844c839072c235965fe0d1bddc473dced87" }
diff --git a/polars/polars-time/Cargo.toml b/polars/polars-time/Cargo.toml
index 4eb725ff7150..e7e0ff20adee 100644
--- a/polars/polars-time/Cargo.toml
+++ b/polars/polars-time/Cargo.toml
@@ -15,10 +15,10 @@ chrono = { version = "0.4", default-features = false, features = ["std"] }
 chrono-tz = { version = "0.8", optional = true }
 now = "0.1"
 once_cell.workspace = true
-polars-arrow = { version = "0.30.0", path = "../polars-arrow", features = ["compute", "temporal"] }
-polars-core = { version = "0.30.0", path = "../polars-core", default-features = false, features = ["dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
-polars-ops = { version = "0.30.0", path = "../polars-ops" }
-polars-utils = { version = "0.30.0", path = "../polars-utils" }
+polars-arrow = { version = "0.31.1", path = "../polars-arrow", features = ["compute", "temporal"] }
+polars-core = { version = "0.31.1", path = "../polars-core", default-features = false, features = ["dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
+polars-ops = { version = "0.31.1", path = "../polars-ops" }
+polars-utils = { version = "0.31.1", path = "../polars-utils" }
 regex = "1.7.1"
 serde = { version = "1", features = ["derive"], optional = true }
 smartstring.workspace = true

From c35b9817a8230f50694b1cb5290ffd379e4777f0 Mon Sep 17 00:00:00 2001
From: Thomas Aarholt <thomasaarholt@gmail.com>
Date: Sat, 15 Jul 2023 16:41:17 +0200
Subject: [PATCH 23/37] docs(python): Mention func_horizontal on deprecated
 func docstrings (#9863)

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>
---
 .../polars/functions/aggregation/vertical.py  | 36 +++++++++++++++----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/py-polars/polars/functions/aggregation/vertical.py b/py-polars/polars/functions/aggregation/vertical.py
index 1f0e3adfbee1..fa345cbeb43c 100644
--- a/py-polars/polars/functions/aggregation/vertical.py
+++ b/py-polars/polars/functions/aggregation/vertical.py
@@ -40,7 +40,7 @@ def all(
 
     Otherwise, this function computes the bitwise AND horizontally across multiple
     columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.all_horizontal`` instead.
 
     Parameters
     ----------
@@ -50,6 +50,10 @@ def all(
     *more_exprs
         Additional columns to use in the aggregation, specified as positional arguments.
 
+    See Also
+    --------
+    all_horizontal
+
     Examples
     --------
     Selecting all columns.
@@ -126,7 +130,11 @@ def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | b
 
     Otherwise, this function computes the bitwise OR horizontally across multiple
     columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.any_horizontal`` instead.
+
+    See Also
+    --------
+    any_horizontal
 
     Parameters
     ----------
@@ -195,7 +203,7 @@ def max(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | A
 
     Otherwise, this function computes the maximum value horizontally across multiple
     columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.max_horizontal`` instead.
 
     Parameters
     ----------
@@ -205,6 +213,10 @@ def max(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | A
     *more_exprs
         Additional columns to use in the aggregation, specified as positional arguments.
 
+    See Also
+    --------
+    max_horizontal
+
     Examples
     --------
     Get the maximum value of a column by passing a single column name.
@@ -291,7 +303,7 @@ def min(
 
     Otherwise, this function computes the minimum value horizontally across multiple
     columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.min_horizontal`` instead.
 
     Parameters
     ----------
@@ -301,6 +313,10 @@ def min(
     *more_exprs
         Additional columns to use in the aggregation, specified as positional arguments.
 
+    See Also
+    --------
+    min_horizontal
+
     Examples
     --------
     Get the minimum value of a column by passing a single column name.
@@ -387,7 +403,7 @@ def sum(
     **This functionality is deprecated**.
 
     Otherwise, this function computes the sum horizontally across multiple columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.sum_horizontal`` instead.
 
     Parameters
     ----------
@@ -397,6 +413,10 @@ def sum(
     *more_exprs
         Additional columns to use in the aggregation, specified as positional arguments.
 
+    See Also
+    --------
+    sum_horizontal
+
     Examples
     --------
     Sum a column by name:
@@ -485,7 +505,7 @@ def cumsum(
 
     Otherwise, this function computes the cumulative sum horizontally across multiple
     columns.
-    **This functionality is deprecated**.
+    **This functionality is deprecated**, use ``pl.cumsum_horizontal`` instead.
 
     Parameters
     ----------
@@ -495,6 +515,10 @@ def cumsum(
     *more_exprs
         Additional columns to use in the aggregation, specified as positional arguments.
 
+    See Also
+    --------
+    cumsum_horizontal
+
     Examples
     --------
     >>> df = pl.DataFrame(

From 2f95f84f4ed0c50eb57938d98f2b9be0ee0d9cfc Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sat, 15 Jul 2023 17:46:14 +0200
Subject: [PATCH 24/37] chore(python): Workaround for PyCharm deprecation
 warning (#9907)

---
 .../polars/functions/aggregation/vertical.py  | 38 ++++++-------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/py-polars/polars/functions/aggregation/vertical.py b/py-polars/polars/functions/aggregation/vertical.py
index fa345cbeb43c..5d210e9fb1e8 100644
--- a/py-polars/polars/functions/aggregation/vertical.py
+++ b/py-polars/polars/functions/aggregation/vertical.py
@@ -100,11 +100,7 @@ def all(
         elif isinstance(exprs, str):
             return F.col(exprs).all()
 
-    warnings.warn(
-        "using `all` for horizontal computation is deprecated. Use `all_horizontal` instead.",
-        DeprecationWarning,
-        stacklevel=find_stacklevel(),
-    )
+    _warn_for_deprecated_horizontal_use("all")
     return F.all_horizontal(exprs, *more_exprs)
 
 
@@ -174,11 +170,7 @@ def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | b
         elif isinstance(exprs, str):
             return F.col(exprs).any()
 
-    warnings.warn(
-        "using `any` for horizontal computation is deprecated. Use `any_horizontal` instead.",
-        DeprecationWarning,
-        stacklevel=find_stacklevel(),
-    )
+    _warn_for_deprecated_horizontal_use("any")
     return F.any_horizontal(exprs, *more_exprs)
 
 
@@ -272,11 +264,7 @@ def max(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | A
         elif isinstance(exprs, str):
             return F.col(exprs).max()
 
-    warnings.warn(
-        "using `max` for horizontal computation is deprecated. Use `max_horizontal` instead.",
-        DeprecationWarning,
-        stacklevel=find_stacklevel(),
-    )
+    _warn_for_deprecated_horizontal_use("max")
     return F.max_horizontal(exprs, *more_exprs)
 
 
@@ -372,11 +360,7 @@ def min(
         elif isinstance(exprs, str):
             return F.col(exprs).min()
 
-    warnings.warn(
-        "using `min` for horizontal computation is deprecated. Use `min_horizontal` instead.",
-        DeprecationWarning,
-        stacklevel=find_stacklevel(),
-    )
+    _warn_for_deprecated_horizontal_use("min")
     return F.min_horizontal(exprs, *more_exprs)
 
 
@@ -473,11 +457,7 @@ def sum(
         elif isinstance(exprs, str):
             return F.col(exprs).sum()
 
-    warnings.warn(
-        "using `sum` for horizontal computation is deprecated. Use `sum_horizontal` instead.",
-        DeprecationWarning,
-        stacklevel=find_stacklevel(),
-    )
+    _warn_for_deprecated_horizontal_use("sum")
     return F.sum_horizontal(exprs, *more_exprs)
 
 
@@ -551,9 +531,13 @@ def cumsum(
         elif isinstance(exprs, str):
             return F.col(exprs).cumsum()
 
+    _warn_for_deprecated_horizontal_use("cumsum")
+    return F.cumsum_horizontal(exprs, *more_exprs)
+
+
+def _warn_for_deprecated_horizontal_use(name: str) -> None:
     warnings.warn(
-        "using `cumsum` for horizontal computation is deprecated. Use `cumsum_horizontal` instead.",
+        f"using `{name}` for horizontal computation is deprecated. Use `{name}_horizontal` instead.",
         DeprecationWarning,
         stacklevel=find_stacklevel(),
     )
-    return F.cumsum_horizontal(exprs, *more_exprs)

From d6b8fb5cf894b3fd9d3b9c26194a63d359e96b63 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sat, 15 Jul 2023 17:47:12 +0200
Subject: [PATCH 25/37] chore(python): Update autolabeler (#9885)

---
 .github/release-drafter.yml | 40 +++++++++++++------------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index fea9adc2ad04..3a1ae627cd7e 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,8 +1,10 @@
 categories:
   - title: 🏆 Highlights
     labels: highlight
-  - title: ⚠️ Breaking changes
+  - title: 💥 Breaking changes
     labels: breaking
+  - title: ⚠️ Deprecations
+    labels: deprecation
   - title: 🚀 Performance improvements
     labels: performance
   - title: ✨ Enhancements
@@ -12,13 +14,8 @@ categories:
   - title: 🛠️ Other improvements
     labels:
       - build
-      - chore
-      - ci
       - documentation
-      - refactor
-      - revert
-      - style
-      - test
+      - internal
 
 exclude-labels:
   - skip-changelog
@@ -28,7 +25,7 @@ change-template: '- $TITLE (#$NUMBER)'
 change-title-escapes: '\<*_&'
 replacers:
   # Remove conventional commits from titles
-  - search: '/- (build|chore|ci|docs|feat|fix|perf|refactor|release|revert|style|test)(\(.*\))?(\!)?\: /g'
+  - search: '/- (build|chore|depr|docs|feat|fix|perf|release)(\(.*\))?(\!)?\: /g'
     replace: '- '
 
 version-resolver:
@@ -39,22 +36,25 @@ version-resolver:
 autolabeler:
   - label: rust
     title:
-      - '/^(build|chore|ci|docs|feat|fix|perf|refactor|release|revert|style|test)\(.*rust.*\)/'
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*rust.*\))?\!?\:) /'
   - label: python
     title:
-      - '/^(build|chore|ci|docs|feat|fix|perf|refactor|release|revert|style|test)\(.*python.*\)/'
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*python.*\))?\!?\:) /'
+  - label: cli
+    title:
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)\(.*cli.*\)\!?\:) /'  # CLI tag not in global scope
   - label: breaking
     title:
-      - '/^(build|chore|ci|docs|feat|fix|perf|refactor|release|revert|style|test)(\(.*\))?\!\: /'
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*\))?\!\: /'
   - label: build
     title:
       - '/^build/'
-  - label: chore
+  - label: internal
     title:
       - '/^chore/'
-  - label: ci
+  - label: deprecation
     title:
-      - '/^ci/'
+      - '/^depr/'
   - label: documentation
     title:
       - '/^docs/'
@@ -67,21 +67,9 @@ autolabeler:
   - label: performance
     title:
       - '/^perf/'
-  - label: refactor
-    title:
-      - '/^refactor/'
   - label: release
     title:
       - '/^release/'
-  - label: revert
-    title:
-      - '/^revert/'
-  - label: style
-    title:
-      - '/^style/'
-  - label: test
-    title:
-      - '/^test/'
 
 template: |
   $CHANGES

From 66ce209dafb402f7f61c59db97827571c8dc39c2 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sat, 15 Jul 2023 18:00:35 +0200
Subject: [PATCH 26/37] chore: Fix autolabeler regex (#9909)

---
 .github/release-drafter.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index 3a1ae627cd7e..d7fb09be71da 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -36,13 +36,13 @@ version-resolver:
 autolabeler:
   - label: rust
     title:
-      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*rust.*\))?\!?\:) /'
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*rust.*\))?\!?\: /'
   - label: python
     title:
-      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*python.*\))?\!?\:) /'
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*python.*\))?\!?\: /'
   - label: cli
     title:
-      - '/^(build|chore|depr|docs|feat|fix|perf|release)\(.*cli.*\)\!?\:) /'  # CLI tag not in global scope
+      - '/^(build|chore|depr|docs|feat|fix|perf|release)\(.*cli.*\)\!?\: /'  # CLI tag not in global scope
   - label: breaking
     title:
       - '/^(build|chore|depr|docs|feat|fix|perf|release)(\(.*\))?\!\: /'

From fd871ebcab4caed85ec63c9276b23d69d92b243b Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Sat, 15 Jul 2023 18:27:00 +0100
Subject: [PATCH 27/37] docs(python): add big warnings about using apply
 (#9906)

---
 py-polars/polars/dataframe/frame.py   | 4 ++++
 py-polars/polars/dataframe/groupby.py | 4 ++++
 py-polars/polars/expr/expr.py         | 4 ++++
 py-polars/polars/functions/lazy.py    | 4 ++++
 py-polars/polars/lazyframe/groupby.py | 4 ++++
 py-polars/polars/series/series.py     | 4 ++++
 6 files changed, 24 insertions(+)

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index f3f1a71b8f0d..6ce26c79211f 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -5682,6 +5682,10 @@ def apply(
         """
         Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
 
+        .. warning::
+            This method is much slower than the native expressions API.
+            Only use it if you cannot implement your logic otherwise.
+
         The UDF will receive each row as a tuple of values: ``udf(row)``.
 
         Implementing logic using a Python function is almost always _significantly_
diff --git a/py-polars/polars/dataframe/groupby.py b/py-polars/polars/dataframe/groupby.py
index cad4e0a15b60..573938c59a08 100644
--- a/py-polars/polars/dataframe/groupby.py
+++ b/py-polars/polars/dataframe/groupby.py
@@ -252,6 +252,10 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
         """
         Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
 
+        .. warning::
+            This method is much slower than the native expressions API.
+            Only use it if you cannot implement your logic otherwise.
+
         Implementing logic using a Python function is almost always _significantly_
         slower and more memory intensive than implementing the same logic using
         the native expression API because:
diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index f1e7853c618d..27dd05178986 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -3613,6 +3613,10 @@ def apply(
         """
         Apply a custom/user-defined function (UDF) in a GroupBy or Projection context.
 
+        .. warning::
+            This method is much slower than the native expressions API.
+            Only use it if you cannot implement your logic otherwise.
+
         Depending on the context it has the following behavior:
 
         * Selection
diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py
index 97a2a9aefa7f..3503a3b0b791 100644
--- a/py-polars/polars/functions/lazy.py
+++ b/py-polars/polars/functions/lazy.py
@@ -1164,6 +1164,10 @@ def apply(
     """
     Apply a custom/user-defined function (UDF) in a GroupBy context.
 
+    .. warning::
+        This method is much slower than the native expressions API.
+        Only use it if you cannot implement your logic otherwise.
+
     Depending on the context it has the following behavior:
 
     * Select
diff --git a/py-polars/polars/lazyframe/groupby.py b/py-polars/polars/lazyframe/groupby.py
index fef864b8be3d..a75726cde9ec 100644
--- a/py-polars/polars/lazyframe/groupby.py
+++ b/py-polars/polars/lazyframe/groupby.py
@@ -163,6 +163,10 @@ def apply(
         """
         Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
 
+        .. warning::
+            This method is much slower than the native expressions API.
+            Only use it if you cannot implement your logic otherwise.
+
         Using this is considered an anti-pattern. This will be very slow because:
 
         - it forces the engine to materialize the whole `DataFrames` for the groups.
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index 3f7b1a3f5c38..a831c272b358 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -4364,6 +4364,10 @@ def apply(
         """
         Apply a custom/user-defined function (UDF) over elements in this Series.
 
+        .. warning::
+            This method is much slower than the native expressions API.
+            Only use it if you cannot implement your logic otherwise.
+
         If the function returns a different datatype, the return_dtype arg should
         be set, otherwise the method will fail.
 

From ed7724ce1cb9302ffb1b7fdcea2aaf4eb9d5f765 Mon Sep 17 00:00:00 2001
From: J van Zundert <mail@jeroenvanzundert.nl>
Date: Sat, 15 Jul 2023 19:02:06 +0100
Subject: [PATCH 28/37] chore(python): Add various unit tests (#9903)

---
 py-polars/polars/config.py                    |  2 +-
 py-polars/polars/utils/show_versions.py       |  2 +-
 py-polars/polars/utils/various.py             |  6 ---
 py-polars/tests/unit/namespaces/test_array.py | 11 +++++
 .../tests/unit/namespaces/test_struct.py      |  2 +
 py-polars/tests/unit/test_api.py              | 25 +++++++++++
 py-polars/tests/unit/test_cfg.py              | 43 ++++++++++++++++++-
 py-polars/tests/unit/test_show_graph.py       | 15 +++++++
 .../tests/unit/utils/test_parse_expr_input.py |  8 ++++
 py-polars/tests/unit/utils/test_utils.py      | 13 +++++-
 10 files changed, 117 insertions(+), 10 deletions(-)
 create mode 100644 py-polars/tests/unit/test_show_graph.py

diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py
index c16f95733a59..fc4e49f86929 100644
--- a/py-polars/polars/config.py
+++ b/py-polars/polars/config.py
@@ -10,7 +10,7 @@
 
 
 # dummy func required (so docs build)
-def _get_float_fmt() -> str:
+def _get_float_fmt() -> str:  # pragma: no cover
     return "n/a"
 
 
diff --git a/py-polars/polars/utils/show_versions.py b/py-polars/polars/utils/show_versions.py
index ce4b39ac3719..f34db533ec26 100644
--- a/py-polars/polars/utils/show_versions.py
+++ b/py-polars/polars/utils/show_versions.py
@@ -87,6 +87,6 @@ def _get_dependency_version(dep_name: str) -> str:
     if hasattr(module, "__version__"):
         module_version = module.__version__
     else:
-        module_version = importlib.metadata.version(dep_name)
+        module_version = importlib.metadata.version(dep_name)  # pragma: no cover
 
     return module_version
diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/utils/various.py
index b5a50cfd9b49..69d3cb502fe6 100644
--- a/py-polars/polars/utils/various.py
+++ b/py-polars/polars/utils/various.py
@@ -19,7 +19,6 @@
     Int64,
     Time,
     Utf8,
-    is_polars_dtype,
     unpack_dtypes,
 )
 from polars.dependencies import _PYARROW_AVAILABLE
@@ -72,11 +71,6 @@ def is_bool_sequence(val: object) -> TypeGuard[Sequence[bool]]:
     return isinstance(val, Sequence) and _is_iterable_of(val, bool)
 
 
-def is_dtype_sequence(val: object) -> TypeGuard[Sequence[PolarsDataType]]:
-    """Check whether the given object is a sequence of polars DataTypes."""
-    return isinstance(val, Sequence) and all(is_polars_dtype(x) for x in val)
-
-
 def is_int_sequence(val: object) -> TypeGuard[Sequence[int]]:
     """Check whether the given sequence is a sequence of integers."""
     return isinstance(val, Sequence) and _is_iterable_of(val, int)
diff --git a/py-polars/tests/unit/namespaces/test_array.py b/py-polars/tests/unit/namespaces/test_array.py
index f12b76172a56..ac69510cd8ed 100644
--- a/py-polars/tests/unit/namespaces/test_array.py
+++ b/py-polars/tests/unit/namespaces/test_array.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 import polars as pl
+from polars.testing import assert_frame_equal
 
 
 def test_arr_min_max() -> None:
@@ -14,6 +15,16 @@ def test_arr_sum() -> None:
     assert s.arr.sum().to_list() == [3, 7]
 
 
+def test_arr_unique() -> None:
+    df = pl.DataFrame(
+        {"a": pl.Series("a", [[1, 1], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64))}
+    )
+
+    out = df.select(pl.col("a").arr.unique(maintain_order=True))
+    expected = pl.DataFrame({"a": [[1], [4, 3]]})
+    assert_frame_equal(out, expected)
+
+
 def test_array_to_numpy() -> None:
     s = pl.Series([[1, 2], [3, 4], [5, 6]], dtype=pl.Array(width=2, inner=pl.Int64))
     assert (s.to_numpy() == np.array([[1, 2], [3, 4], [5, 6]])).all()
diff --git a/py-polars/tests/unit/namespaces/test_struct.py b/py-polars/tests/unit/namespaces/test_struct.py
index b0ab63ea094d..db9dec236160 100644
--- a/py-polars/tests/unit/namespaces/test_struct.py
+++ b/py-polars/tests/unit/namespaces/test_struct.py
@@ -15,6 +15,8 @@ def test_struct_various() -> None:
     assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
     assert s.struct.field("list").to_list() == [[1, 2], [3]]
     assert s.struct.field("int").to_list() == [1, 2]
+    assert s.struct["list"].to_list() == [[1, 2], [3]]
+    assert s.struct["int"].to_list() == [1, 2]
 
     assert_frame_equal(df.to_struct("my_struct").struct.unnest(), df)
     assert s.struct._ipython_key_completions_() == s.struct.fields
diff --git a/py-polars/tests/unit/test_api.py b/py-polars/tests/unit/test_api.py
index 94e130895652..206b7173b9b2 100644
--- a/py-polars/tests/unit/test_api.py
+++ b/py-polars/tests/unit/test_api.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import pytest
+
 import polars as pl
 from polars.testing import assert_frame_equal
 
@@ -137,3 +139,26 @@ def test_class_namespaces_are_registered() -> None:
                         assert (
                             ns in namespaces
                         ), f"{ns!r} should be registered in {pcls.__name__}._accessors"
+
+
+def test_namespace_cannot_override_builtin() -> None:
+    with pytest.raises(AttributeError):
+
+        @pl.api.register_dataframe_namespace("dt")
+        class CustomDt:
+            def __init__(self, df: pl.DataFrame):
+                self._df = df
+
+
+def test_namespace_warning_on_override() -> None:
+    @pl.api.register_dataframe_namespace("math")
+    class CustomMath:
+        def __init__(self, df: pl.DataFrame):
+            self._df = df
+
+    with pytest.raises(UserWarning):
+
+        @pl.api.register_dataframe_namespace("math")
+        class CustomMath2:
+            def __init__(self, df: pl.DataFrame):
+                self._df = df
diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py
index e4b9512cc537..1027a739fd51 100644
--- a/py-polars/tests/unit/test_cfg.py
+++ b/py-polars/tests/unit/test_cfg.py
@@ -513,7 +513,7 @@ def test_string_cache() -> None:
 
 @pytest.mark.write_disk()
 def test_config_load_save(tmp_path: Path) -> None:
-    for file in (None, tmp_path / "polars.config"):
+    for file in (None, tmp_path / "polars.config", str(tmp_path / "polars.config")):
         # set some config options...
         pl.Config.set_tbl_cols(12)
         pl.Config.set_verbose(True)
@@ -577,3 +577,44 @@ def test_config_scope() -> None:
 
     # expect scope-exit to restore original state
     assert pl.Config.state() == initial_state
+
+
+def test_config_raise_error_if_not_exist() -> None:
+    with pytest.raises(AttributeError), pl.Config(i_do_not_exist=True):
+        pass
+
+
+def test_config_state_env_only() -> None:
+    pl.Config.set_verbose(False)
+    pl.Config.set_fmt_float("full")
+
+    state_all = pl.Config.state(env_only=False)
+    state_env_only = pl.Config.state(env_only=True)
+    assert len(state_env_only) < len(state_all)
+    assert "set_fmt_float" in state_all
+    assert "set_fmt_float" not in state_env_only
+
+
+def test_activate_decimals() -> None:
+    with pl.Config() as cfg:
+        cfg.activate_decimals(True)
+        assert os.environ.get("POLARS_ACTIVATE_DECIMAL") == "1"
+        cfg.activate_decimals(False)
+        assert "POLARS_ACTIVATE_DECIMAL" not in os.environ
+
+
+def test_set_streaming_chunk_size() -> None:
+    with pl.Config() as cfg:
+        cfg.set_streaming_chunk_size(8)
+        assert os.environ.get("POLARS_STREAMING_CHUNK_SIZE") == "8"
+
+    with pytest.raises(ValueError), pl.Config() as cfg:
+        cfg.set_streaming_chunk_size(0)
+
+
+def test_set_fmt_str_lengths_invalid_length() -> None:
+    with pl.Config() as cfg:
+        with pytest.raises(ValueError):
+            cfg.set_fmt_str_lengths(0)
+        with pytest.raises(ValueError):
+            cfg.set_fmt_str_lengths(-2)
diff --git a/py-polars/tests/unit/test_show_graph.py b/py-polars/tests/unit/test_show_graph.py
new file mode 100644
index 000000000000..09a9b9484933
--- /dev/null
+++ b/py-polars/tests/unit/test_show_graph.py
@@ -0,0 +1,15 @@
+import polars as pl
+
+
+def test_show_graph() -> None:
+    # only test raw output, otherwise we need graphviz and matplotlib
+    ldf = pl.LazyFrame(
+        {
+            "a": ["a", "b", "a", "b", "b", "c"],
+            "b": [1, 2, 3, 4, 5, 6],
+            "c": [6, 5, 4, 3, 2, 1],
+        }
+    )
+    query = ldf.groupby("a", maintain_order=True).agg(pl.all().sum()).sort("a")
+    out = query.show_graph(raw_output=True)
+    assert isinstance(out, str)
diff --git a/py-polars/tests/unit/utils/test_parse_expr_input.py b/py-polars/tests/unit/utils/test_parse_expr_input.py
index 4e039382c1e7..b1eae283b31a 100644
--- a/py-polars/tests/unit/utils/test_parse_expr_input.py
+++ b/py-polars/tests/unit/utils/test_parse_expr_input.py
@@ -92,3 +92,11 @@ def test_parse_as_expression_structify() -> None:
     result = wrap_expr(parse_as_expression(pl.col("a", "b"), structify=True))
     expected = pl.struct("a", "b")
     assert_expr_equal(result, expected)
+
+
+def test_parse_as_expression_structify_multiple_outputs() -> None:
+    # note: this only works because assert_expr_equal evaluates on a dataframe with
+    # columns "a" and "b"
+    result = wrap_expr(parse_as_expression(pl.col("*"), structify=True))
+    expected = pl.struct("a", "b")
+    assert_expr_equal(result, expected)
diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py
index 388b6c2f5855..7119dab5cfed 100644
--- a/py-polars/tests/unit/utils/test_utils.py
+++ b/py-polars/tests/unit/utils/test_utils.py
@@ -16,7 +16,8 @@
     _timedelta_to_pl_timedelta,
 )
 from polars.utils.decorators import deprecate_nonkeyword_arguments, redirect
-from polars.utils.various import parse_version
+from polars.utils.meta import get_idx_type
+from polars.utils.various import _in_notebook, parse_version
 
 if TYPE_CHECKING:
     from polars.type_aliases import TimeUnit
@@ -158,3 +159,13 @@ def bar(self, upper: bool = False) -> str:
                 return "BAZ" if upper else "baz"
 
         assert DemoClass2().foo() == "BAZ"  # type: ignore[attr-defined]
+
+
+def test_get_idx_type_deprecation() -> None:
+    with pytest.deprecated_call():
+        get_idx_type()
+
+
+def test_in_notebook() -> None:
+    # private function, but easier to test this separately and mock it in the callers
+    assert not _in_notebook()

From b016b9ca8e85c531f57b87a8bb94a439f26b265d Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sat, 15 Jul 2023 20:03:24 +0200
Subject: [PATCH 29/37] chore(rust): Minor makeover for Rust Makefile (#9874)

---
 .github/workflows/lint-rust.yml | 10 ++--
 polars/Makefile                 | 85 +++++++++++++++++++--------------
 py-polars/Makefile              |  2 +-
 3 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/lint-rust.yml b/.github/workflows/lint-rust.yml
index 61cedbc4472e..bc7c18399145 100644
--- a/.github/workflows/lint-rust.yml
+++ b/.github/workflows/lint-rust.yml
@@ -90,9 +90,9 @@ jobs:
         env:
           MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows
           POLARS_ALLOW_EXTENSION: '1'
-        run: |
-          cargo miri test \
-          --no-default-features \
-          --features object \
-          -p polars-core \
+        run: >
+          cargo miri test
+          --no-default-features
+          --features object
+          -p polars-core
           -p polars-arrow
diff --git a/polars/Makefile b/polars/Makefile
index cdbc267f754e..8cdfaff4d96c 100644
--- a/polars/Makefile
+++ b/polars/Makefile
@@ -1,26 +1,46 @@
 .DEFAULT_GOAL := help
 
+SHELL=/bin/bash
 BASE ?= main
 
-.PHONY: fmt check check-features clippy clippy-default test test-doc integration-tests
-
-fmt:
+.PHONY: fmt
+fmt:  ## Run rustfmt and dprint
 	cargo fmt --all
 	dprint fmt
 
-generate_test_files:
-	cargo run -p polars-cli "select * from read_csv('../examples/datasets/foods1.csv')" -o parquet > ../examples/datasets/foods1.parquet
-	cargo run -p polars-cli "select * from read_csv('../examples/datasets/foods1.csv')" -o arrow > ../examples/datasets/foods1.ipc
-
-check:
+.PHONY: check
+check:  ## Run cargo check with all features
 	cargo check --all-targets --all-features
 
-clippy:
+.PHONY: clippy
+clippy:  ## Run clippy with all features
 	cargo clippy --all-targets --all-features
 
-clippy-default:
-	cargo clippy
+.PHONY: clippy-default
+clippy-default:  ## Run clippy with default features
+	cargo clippy --all-targets
+
+.PHONY: pre-commit
+pre-commit: fmt clippy clippy-default  ## Run autoformatting and linting
+
+.PHONY: check-features
+check-features:  ## Run cargo check for feature flag combinations (warning: slow)
+	cargo hack check --each-feature --no-dev-deps
+
+.PHONY: miri
+miri:  ## Run miri
+	# not tested on all features because miri does not support SIMD
+	# some tests are also filtered, because miri cannot deal with the rayon threadpool
+	# we ignore leaks because the thread pool of rayon is never killed.
+	MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows" \
+	POLARS_ALLOW_EXTENSION=1 \
+	cargo miri test \
+	    --no-default-features \
+	    --features object \
+	    -p polars-core \
+	    -p polars-arrow
 
+.PHONY: test
 test:  ## Run tests
 	cargo test --all-features \
 		-p polars-lazy \
@@ -34,22 +54,12 @@ test:  ## Run tests
 		-- \
 		--test-threads=2
 
-integration-tests:
-	cargo test --all-features --test it -- --test-threads=2
-
-miri:
-	# not tested on all features because miri does not support SIMD
-	# some tests are also filtered, because miri cannot deal with the rayon threadpool
-	# we ignore leaks because the thread pool of rayon is never killed.
-	MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows" \
-	POLARS_ALLOW_EXTENSION=1 \
-	cargo miri test \
-	    --no-default-features \
-	    --features object \
-	    -p polars-core \
-	    -p polars-arrow
+.PHONY: integration-tests
+integration-tests:  ## Run integration tests
+	cargo test --all-features --test it
 
-test-doc:
+.PHONY: test-doc
+test-doc:  ## Run doc examples
 	cargo test --doc \
 	    -p polars-lazy \
 	    -p polars-io \
@@ -57,19 +67,21 @@ test-doc:
 	    -p polars-arrow \
 		-p polars-sql
 
-pre-commit: fmt clippy clippy-default  ## Run autoformatting and linting
-
-
-check-features:
-	cargo hack check --each-feature --no-dev-deps
+.PHONY: generate_test_files
+generate_test_files:  ## Generate some datasets
+	cargo run -p polars-cli "select * from read_csv('../examples/datasets/foods1.csv')" -o parquet > ../examples/datasets/foods1.parquet
+	cargo run -p polars-cli "select * from read_csv('../examples/datasets/foods1.csv')" -o arrow > ../examples/datasets/foods1.ipc
 
-bench-save:
+.PHONY: bench-save
+bench-save:  ## Run benchmark and save
 	cargo bench --features=random --bench $(BENCH) -- --save-baseline $(SAVE)
 
-bench-cmp:
+.PHONY: bench-cmp
+bench-cmp:  ## Run benchmark and compare
 	cargo bench --features=random --bench $(BENCH) -- --load-baseline $(FEAT) --baseline $(BASE)
 
-doctest:
+.PHONY: doctest
+doctest:  ## Check that documentation builds
 	cargo doc --all-features -p polars-arrow
 	cargo doc --all-features -p polars-utils
 	cargo doc --features=docs-selection -p polars-core
@@ -80,7 +92,8 @@ doctest:
 	cargo doc --features=docs-selection -p polars
 	cargo doc --all-features -p polars-sql
 
-publish:
+.PHONY: publish
+publish:  ## Publish Polars crates
 	cargo publish --allow-dirty -p polars-error
 	cargo publish --allow-dirty -p polars-utils
 	cargo publish --allow-dirty -p polars-row
@@ -100,5 +113,5 @@ publish:
 
 .PHONY: help
 help:  ## Display this help screen
-	@echo -e "\033[1mAvailable commands:\033[0m\n"
+	@echo -e "\033[1mAvailable commands:\033[0m"
 	@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2}' | sort
diff --git a/py-polars/Makefile b/py-polars/Makefile
index ec717c3b0d09..fc636fcbcd2e 100644
--- a/py-polars/Makefile
+++ b/py-polars/Makefile
@@ -82,5 +82,5 @@ clean:  ## Clean up caches and build artifacts
 
 .PHONY: help
 help:  ## Display this help screen
-	@echo -e "\033[1mAvailable commands:\033[0m\n"
+	@echo -e "\033[1mAvailable commands:\033[0m"
 	@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2}' | sort

From e51326f89b6a2689ba173469107afbe76ccd8c3d Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sat, 15 Jul 2023 20:16:18 +0200
Subject: [PATCH 30/37] chore(rust): fix docs build and add to CI (#9904)

---
 .github/workflows/docs-rust.yml               |  3 +-
 polars/polars-core/Cargo.toml                 |  1 +
 polars/polars-core/src/datatypes/_serde.rs    |  4 +--
 .../src/series/implementations/decimal.rs     |  1 +
 polars/polars-core/src/series/ops/moment.rs   |  4 +--
 polars/polars-io/Cargo.toml                   | 11 +++++-
 .../polars-plan/src/dsl/functions/temporal.rs |  4 +--
 .../polars-lazy/polars-plan/src/dsl/list.rs   |  2 +-
 polars/polars-lazy/polars-plan/src/dsl/mod.rs |  2 +-
 py-polars/Cargo.lock                          | 35 ++++++++++---------
 10 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/docs-rust.yml b/.github/workflows/docs-rust.yml
index b20a479f1650..0439b307b1e0 100644
--- a/.github/workflows/docs-rust.yml
+++ b/.github/workflows/docs-rust.yml
@@ -32,7 +32,8 @@ jobs:
       - name: Build Rust documentation
         env:
           RUSTDOCFLAGS: --cfg docsrs -D warnings
-        run: cargo doc --features=docs-selection --package polars
+        working-directory: polars
+        run: make doctest
 
       - name: Create redirect to Polars crate and set no-jekyll
         if: ${{ github.ref_name == 'main' }}
diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml
index f09c883b08c8..695ea1fc613e 100644
--- a/polars/polars-core/Cargo.toml
+++ b/polars/polars-core/Cargo.toml
@@ -128,6 +128,7 @@ docs-selection = [
   "diff",
   "moment",
   "dtype-categorical",
+  "dtype-decimal",
   "rank",
   "diagonal_concat",
   "horizontal_concat",
diff --git a/polars/polars-core/src/datatypes/_serde.rs b/polars/polars-core/src/datatypes/_serde.rs
index 7ae368fdc30a..4277432ae5f3 100644
--- a/polars/polars-core/src/datatypes/_serde.rs
+++ b/polars/polars-core/src/datatypes/_serde.rs
@@ -1,8 +1,8 @@
 //! Having `Object<&;static> in [`DataType`] make serde tag the `Deserialize` trait bound 'static
 //! even though we skip serializing `Object`.
 //!
-//! We could use https://github.com/serde-rs/serde/issues/1712, but that gave problems caused by
-//! https://github.com/rust-lang/rust/issues/96956, so we make a dummy type without static
+//! We could use [serde_1712](https://github.com/serde-rs/serde/issues/1712), but that gave problems caused by
+//! [rust_96956](https://github.com/rust-lang/rust/issues/96956), so we make a dummy type without static
 pub use arrow::datatypes::DataType as ArrowDataType;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
diff --git a/polars/polars-core/src/series/implementations/decimal.rs b/polars/polars-core/src/series/implementations/decimal.rs
index fd7828165a64..2f054673d864 100644
--- a/polars/polars-core/src/series/implementations/decimal.rs
+++ b/polars/polars-core/src/series/implementations/decimal.rs
@@ -52,6 +52,7 @@ impl private::PrivateSeries for SeriesWrap<DecimalChunked> {
         self.0.dtype()
     }
 
+    #[cfg(feature = "zip_with")]
     fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult<Series> {
         Ok(self
             .0
diff --git a/polars/polars-core/src/series/ops/moment.rs b/polars/polars-core/src/series/ops/moment.rs
index 50785ae709f5..32e68945e7fa 100644
--- a/polars/polars-core/src/series/ops/moment.rs
+++ b/polars/polars-core/src/series/ops/moment.rs
@@ -48,7 +48,7 @@ impl Series {
     /// function `skewtest` can be used to determine if the skewness value
     /// is close enough to zero, statistically speaking.
     ///
-    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024
+    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
     pub fn skew(&self, bias: bool) -> PolarsResult<Option<f64>> {
         let mean = match self.mean() {
             Some(mean) => mean,
@@ -76,7 +76,7 @@ impl Series {
     /// If bias is `false` then the kurtosis is calculated using k statistics to
     /// eliminate bias coming from biased moment estimators
     ///
-    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1027
+    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1027)
     pub fn kurtosis(&self, fisher: bool, bias: bool) -> PolarsResult<Option<f64>> {
         let mean = match self.mean() {
             Some(mean) => mean,
diff --git a/polars/polars-io/Cargo.toml b/polars/polars-io/Cargo.toml
index 4c04dcd1d23c..09209622a32d 100644
--- a/polars/polars-io/Cargo.toml
+++ b/polars/polars-io/Cargo.toml
@@ -11,7 +11,16 @@ description = "IO related logic for the Polars DataFrame library"
 
 [features]
 # support for arrows json parsing
-json = ["arrow/io_json_write", "polars-json", "simd-json", "memmap", "lexical", "lexical-core", "serde_json"]
+json = [
+  "arrow/io_json_write",
+  "polars-json",
+  "simd-json",
+  "memmap",
+  "lexical",
+  "lexical-core",
+  "serde_json",
+  "dtype-struct",
+]
 # support for arrows ipc file parsing
 ipc = ["arrow/io_ipc", "arrow/io_ipc_compression", "memmap"]
 # support for arrows streaming ipc file parsing
diff --git a/polars/polars-lazy/polars-plan/src/dsl/functions/temporal.rs b/polars/polars-lazy/polars-plan/src/dsl/functions/temporal.rs
index c663c7e1bd62..53c556262b7a 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/functions/temporal.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/functions/temporal.rs
@@ -10,7 +10,7 @@ macro_rules! impl_unit_setter {
     };
 }
 
-/// Arguments used by [`datetime`] in order to produce an `Expr` of `Datetime`
+/// Arguments used by `datetime` in order to produce an `Expr` of `Datetime`
 ///
 /// Construct a `DatetimeArgs` with `DatetimeArgs::new(y, m, d)`. This will set the other time units to `lit(0)`. You
 /// can then set the other fields with the `with_*` methods, or use `with_hms` to set `hour`, `minute`, and `second` all
@@ -175,7 +175,7 @@ pub fn datetime(args: DatetimeArgs) -> Expr {
     .alias("datetime")
 }
 
-/// Arguments used by [`duration`] in order to produce an `Expr` of `Duration`
+/// Arguments used by `duration` in order to produce an `Expr` of `Duration`
 ///
 /// To construct a `DurationArgs`, use struct literal syntax with `..Default::default()` to leave unspecified fields at
 /// their default value of `lit(0)`, as demonstrated below.
diff --git a/polars/polars-lazy/polars-plan/src/dsl/list.rs b/polars/polars-lazy/polars-plan/src/dsl/list.rs
index bc362e01aef7..5f61f6e9195e 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/list.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/list.rs
@@ -243,7 +243,7 @@ impl ListNameSpace {
     ///
     /// # Schema
     ///
-    /// A polars [`LazyFrame`] needs to know the schema at all time. The caller therefore must provide
+    /// A polars `LazyFrame` needs to know the schema at all time. The caller therefore must provide
     /// an `upper_bound` of struct fields that will be set.
     /// If this is incorrectly downstream operation may fail. For instance an `all().sum()` expression
     /// will look in the current schema to determine which columns to select.
diff --git a/polars/polars-lazy/polars-plan/src/dsl/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
index 3cf8f1cf29ae..e43b1efb3537 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
@@ -1529,7 +1529,7 @@ impl Expr {
     /// function `skewtest` can be used to determine if the skewness value
     /// is close enough to zero, statistically speaking.
     ///
-    /// see: https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024
+    /// see: [scipy](https://github.com/scipy/scipy/blob/47bb6febaa10658c72962b9615d5d5aa2513fa3a/scipy/stats/stats.py#L1024)
     pub fn skew(self, bias: bool) -> Expr {
         self.apply(
             move |s| {
diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
index 9e09aa089cdc..4be7e133bb57 100644
--- a/py-polars/Cargo.lock
+++ b/py-polars/Cargo.lock
@@ -98,8 +98,9 @@ dependencies = [
 
 [[package]]
 name = "arrow2"
-version = "0.17.1"
-source = "git+https://github.com/ritchie46/arrow2?branch=polars_2023-06-26#e71d66689f6ebde0e01f185bad0db8ef46f5fc8e"
+version = "0.17.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e44f27e89e3edd8738a07c5e2c881efaa25e69be97a816d2df051685d460670c"
 dependencies = [
  "ahash",
  "arrow-format",
@@ -1429,7 +1430,7 @@ dependencies = [
 
 [[package]]
 name = "polars"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "getrandom",
  "polars-core",
@@ -1443,7 +1444,7 @@ dependencies = [
 
 [[package]]
 name = "polars-algo"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "polars-core",
  "polars-lazy",
@@ -1452,7 +1453,7 @@ dependencies = [
 
 [[package]]
 name = "polars-arrow"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "arrow2",
  "atoi",
@@ -1469,7 +1470,7 @@ dependencies = [
 
 [[package]]
 name = "polars-core"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "arrow2",
@@ -1502,7 +1503,7 @@ dependencies = [
 
 [[package]]
 name = "polars-error"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "arrow2",
  "regex",
@@ -1511,7 +1512,7 @@ dependencies = [
 
 [[package]]
 name = "polars-io"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "arrow2",
@@ -1546,7 +1547,7 @@ dependencies = [
 
 [[package]]
 name = "polars-json"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "arrow2",
@@ -1562,7 +1563,7 @@ dependencies = [
 
 [[package]]
 name = "polars-lazy"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "bitflags",
@@ -1584,7 +1585,7 @@ dependencies = [
 
 [[package]]
 name = "polars-ops"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "argminmax",
  "arrow2",
@@ -1605,7 +1606,7 @@ dependencies = [
 
 [[package]]
 name = "polars-pipe"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "crossbeam-channel",
  "crossbeam-queue",
@@ -1625,7 +1626,7 @@ dependencies = [
 
 [[package]]
 name = "polars-plan"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "arrow2",
@@ -1649,7 +1650,7 @@ dependencies = [
 
 [[package]]
 name = "polars-row"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "arrow2",
  "polars-error",
@@ -1658,7 +1659,7 @@ dependencies = [
 
 [[package]]
 name = "polars-sql"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "polars-arrow",
  "polars-core",
@@ -1671,7 +1672,7 @@ dependencies = [
 
 [[package]]
 name = "polars-time"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "arrow2",
  "atoi",
@@ -1690,7 +1691,7 @@ dependencies = [
 
 [[package]]
 name = "polars-utils"
-version = "0.30.0"
+version = "0.31.1"
 dependencies = [
  "ahash",
  "hashbrown 0.14.0",

From cc0795f7b980da385268d77216a9ae36dccdfcf4 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Sat, 15 Jul 2023 20:52:54 +0200
Subject: [PATCH 31/37] fix(python): don't SO on align_frames (#9911)

---
 py-polars/polars/functions/eager.py | 37 +++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py
index f8b6ac474164..10a2139ab25a 100644
--- a/py-polars/polars/functions/eager.py
+++ b/py-polars/polars/functions/eager.py
@@ -347,17 +347,34 @@ def align_frames(
     # create aligned master frame (this is the most expensive part; afterwards
     # we just subselect out the columns representing the component frames)
     eager = isinstance(frames[0], pl.DataFrame)
-    alignment_frame: LazyFrame = (
-        reduce(  # type: ignore[attr-defined]
-            lambda x, y: x.lazy().join(  # type: ignore[arg-type, return-value]
-                y.lazy(), how=how, on=align_on, suffix=str(id(y))
-            ),
-            frames,
+
+    # we stackoverflow on many frames
+    # so we branch on an arbitrary chosen large number of frames
+    if len(frames) < 250:
+        # lazy variant
+        # this can SO
+        alignment_frame: LazyFrame = (
+            reduce(  # type: ignore[attr-defined]
+                lambda x, y: x.lazy().join(  # type: ignore[arg-type, return-value]
+                    y.lazy(), how=how, on=align_on, suffix=str(id(y))
+                ),
+                frames,
+            )
+            .sort(by=align_on, descending=descending)
+            .collect(no_optimization=True)
+            .lazy()
+        )
+    else:
+        # eager variant
+        # this doesn't SO
+        alignment_frame = (
+            reduce(
+                lambda x, y: x.join(y, how=how, on=align_on, suffix=str(id(y))),
+                frames,
+            )
+            .sort(by=align_on, descending=descending)
+            .lazy()
         )
-        .sort(by=align_on, descending=descending)
-        .collect()
-        .lazy()
-    )
 
     # select-out aligned components from the master frame
     aligned_cols = set(alignment_frame.columns)

From da4df4fe65273eaec1c21af76eb951c29e7f6dd1 Mon Sep 17 00:00:00 2001
From: J van Zundert <mail@jeroenvanzundert.nl>
Date: Sun, 16 Jul 2023 07:50:40 +0100
Subject: [PATCH 32/37] chore(python): Use Pathlib everywhere (#9914)

---
 py-polars/docs/source/conf.py              |  7 ++++---
 py-polars/polars/config.py                 |  8 ++++----
 py-polars/polars/io/_utils.py              |  2 +-
 py-polars/polars/lazyframe/frame.py        |  5 ++---
 py-polars/polars/utils/various.py          | 15 +++++++--------
 py-polars/pyproject.toml                   |  1 +
 py-polars/scripts/check_stacklevels.py     |  6 +++---
 py-polars/tests/benchmark/test_release.py  |  5 ++---
 py-polars/tests/unit/io/conftest.py        |  4 +---
 py-polars/tests/unit/io/test_csv.py        |  8 +++-----
 py-polars/tests/unit/io/test_database.py   |  7 ++-----
 py-polars/tests/unit/io/test_lazy_csv.py   | 12 +++++-------
 py-polars/tests/unit/io/test_lazy_json.py  |  4 ++--
 py-polars/tests/unit/io/test_other.py      | 12 ++++++------
 py-polars/tests/unit/io/test_parquet.py    |  6 ++----
 py-polars/tests/unit/streaming/conftest.py |  4 +---
 py-polars/tests/unit/test_cfg.py           |  8 +++-----
 py-polars/tests/unit/test_sql.py           |  5 ++---
 18 files changed, 51 insertions(+), 68 deletions(-)

diff --git a/py-polars/docs/source/conf.py b/py-polars/docs/source/conf.py
index 31916f3c6106..6c4f0891e9a6 100644
--- a/py-polars/docs/source/conf.py
+++ b/py-polars/docs/source/conf.py
@@ -16,11 +16,12 @@
 import re
 import sys
 import warnings
+from pathlib import Path
 
 import sphinx_autosummary_accessors
 
 # add polars directory
-sys.path.insert(0, os.path.abspath("../.."))
+sys.path.insert(0, str(Path("../..").resolve()))
 
 # -- Project information -----------------------------------------------------
 
@@ -200,8 +201,8 @@ def linkcode_resolve(domain, info):
 
     linespec = f"#L{lineno}-L{lineno + len(source) - 1}" if lineno else ""
 
-    conf_dir_path = os.path.dirname(os.path.realpath(__file__))
-    polars_root = os.path.abspath(f"{conf_dir_path}/../../polars")
+    conf_dir_path = Path(__file__).absolute().parent
+    polars_root = (conf_dir_path.parent.parent / "polars").absolute()
 
     fn = os.path.relpath(fn, start=polars_root)
     return f"{github_root}/blob/main/py-polars/polars/{fn}{linespec}"
diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py
index fc4e49f86929..2ef42ad14af3 100644
--- a/py-polars/polars/config.py
+++ b/py-polars/polars/config.py
@@ -156,7 +156,7 @@ def load(cls, cfg: Path | str) -> type[Config]:
         """
         options = json.loads(
             Path(normalise_filepath(cfg)).read_text()
-            if isinstance(cfg, Path) or os.path.exists(cfg)
+            if isinstance(cfg, Path) or Path(cfg).exists()
             else cfg
         )
         os.environ.update(options.get("environment", {}))
@@ -221,9 +221,9 @@ def save(cls, file: Path | str | None = None) -> str:
             separators=(",", ":"),
         )
         if isinstance(file, (str, Path)):
-            file = os.path.abspath(normalise_filepath(file))
-            Path(file).write_text(options)
-            return file
+            file = Path(normalise_filepath(file)).resolve()
+            file.write_text(options)
+            return str(file)
 
         return options
 
diff --git a/py-polars/polars/io/_utils.py b/py-polars/polars/io/_utils.py
index a0bdceb7f20e..46d55343ec27 100644
--- a/py-polars/polars/io/_utils.py
+++ b/py-polars/polars/io/_utils.py
@@ -166,7 +166,7 @@ def managed_file(file: Any) -> Iterator[Any]:
     if isinstance(file, str):
         file = normalise_filepath(file, check_not_dir)
         if has_non_utf8_non_utf8_lossy_encoding:
-            with open(file, encoding=encoding_str) as f:
+            with Path(file).open(encoding=encoding_str) as f:
                 return _check_empty(
                     BytesIO(f.read().encode("utf8")), context=f"{file!r}"
                 )
diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
index 66d91f95fa87..f4ac1d5dced5 100644
--- a/py-polars/polars/lazyframe/frame.py
+++ b/py-polars/polars/lazyframe/frame.py
@@ -894,7 +894,7 @@ def show_graph(
         *,
         optimized: bool = True,
         show: bool = True,
-        output_path: str | None = None,
+        output_path: str | Path | None = None,
         raw_output: bool = False,
         figsize: tuple[float, float] = (16.0, 12.0),
         type_coercion: bool = True,
@@ -975,8 +975,7 @@ def show_graph(
             raise ImportError("Graphviz dot binary should be on your PATH") from None
 
         if output_path:
-            with Path(output_path).open(mode="wb") as file:
-                file.write(graph)
+            Path(output_path).write_bytes(graph)
 
         if not show:
             return None
diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/utils/various.py
index 69d3cb502fe6..d6820aaf3b23 100644
--- a/py-polars/polars/utils/various.py
+++ b/py-polars/polars/utils/various.py
@@ -1,12 +1,12 @@
 from __future__ import annotations
 
 import inspect
-import os
 import re
 import sys
 import warnings
 from collections.abc import MappingView, Sized
 from enum import Enum
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, Iterable, Literal, Sequence, TypeVar
 
 import polars as pl
@@ -25,7 +25,6 @@
 
 if TYPE_CHECKING:
     from collections.abc import Reversible
-    from pathlib import Path
 
     from polars import DataFrame, Series
     from polars.type_aliases import PolarsDataType, PolarsIntegerType, SizeUnit
@@ -183,10 +182,10 @@ def can_create_dicts_with_pyarrow(dtypes: Sequence[PolarsDataType]) -> bool:
 
 def normalise_filepath(path: str | Path, check_not_directory: bool = True) -> str:
     """Create a string path, expanding the home directory if present."""
-    path = os.path.expanduser(path)
-    if check_not_directory and os.path.exists(path) and os.path.isdir(path):
+    path = Path(path).expanduser()
+    if check_not_directory and path.exists() and path.is_dir():
         raise IsADirectoryError(f"Expected a file path; {path!r} is a directory")
-    return path
+    return str(path)
 
 
 def parse_version(version: Sequence[str | int]) -> tuple[int, ...]:
@@ -358,15 +357,15 @@ def find_stacklevel() -> int:
     Taken from:
     https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
     """
-    pkg_dir = os.path.dirname(pl.__file__)
-    test_dir = os.path.join(pkg_dir, "tests")
+    pkg_dir = Path(pl.__file__).parent
+    test_dir = pkg_dir / "tests"
 
     # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
     frame = inspect.currentframe()
     n = 0
     while frame:
         fname = inspect.getfile(frame)
-        if fname.startswith(pkg_dir) and not fname.startswith(test_dir):
+        if fname.startswith(str(pkg_dir)) and not fname.startswith(str(test_dir)):
             frame = frame.f_back
             n += 1
         else:
diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
index ca01851a144f..724f6638ce6e 100644
--- a/py-polars/pyproject.toml
+++ b/py-polars/pyproject.toml
@@ -119,6 +119,7 @@ select = [
   "UP", # pyupgrade
   "PT", # flake8-pytest-style
   "RUF", # Ruff-specific rules
+  "PTH", # flake8-use-pathlib
 ]
 
 ignore = [
diff --git a/py-polars/scripts/check_stacklevels.py b/py-polars/scripts/check_stacklevels.py
index 93805063acad..2ff14283ea01 100644
--- a/py-polars/scripts/check_stacklevels.py
+++ b/py-polars/scripts/check_stacklevels.py
@@ -7,6 +7,7 @@
 import subprocess
 import sys
 from ast import NodeVisitor
+from pathlib import Path
 
 # Files in which it's OK to set the stacklevel manually.
 # `git ls-files` lists files with forwards-slashes
@@ -38,10 +39,9 @@ def visit_Call(self, node: ast.Call) -> None:
     for file in files:
         if file in EXCLUDE:
             continue
-        if not file.endswith(".py"):
+        if Path(file).suffix != ".py":
             continue
-        with open(file) as fd:
-            content = fd.read()
+        content = Path(file).read_text()
         tree = ast.parse(content)
         stacklevel_checker = StackLevelChecker(file)
         stacklevel_checker.visit(tree)
diff --git a/py-polars/tests/benchmark/test_release.py b/py-polars/tests/benchmark/test_release.py
index bbea4df6b928..e6ebdac7dcbe 100644
--- a/py-polars/tests/benchmark/test_release.py
+++ b/py-polars/tests/benchmark/test_release.py
@@ -5,7 +5,6 @@
 
 To run these tests: pytest -m benchmark
 """
-import os
 import time
 from pathlib import Path
 from typing import cast
@@ -21,12 +20,12 @@
 
 
 @pytest.mark.skipif(
-    not (Path(os.path.dirname(__file__)) / "G1_1e7_1e2_5_0.csv").is_file(),
+    not (Path(__file__).parent / "G1_1e7_1e2_5_0.csv").is_file(),
     reason="Dataset must be generated before running this test.",
 )
 def test_read_scan_large_csv() -> None:
     filename = "G1_1e7_1e2_5_0.csv"
-    path = Path(os.path.dirname(__file__)) / filename
+    path = Path(__file__).parent / filename
 
     predicate = pl.col("v2") < 5
 
diff --git a/py-polars/tests/unit/io/conftest.py b/py-polars/tests/unit/io/conftest.py
index b488a9d29c44..fd174486b25f 100644
--- a/py-polars/tests/unit/io/conftest.py
+++ b/py-polars/tests/unit/io/conftest.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import os
 from pathlib import Path
 
 import pytest
@@ -8,5 +7,4 @@
 
 @pytest.fixture()
 def io_files_path() -> Path:
-    current_dir = os.path.dirname(__file__)
-    return Path(current_dir) / "files"
+    return Path(__file__).parent / "files"
diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py
index f4a18436e751..8f065bdff851 100644
--- a/py-polars/tests/unit/io/test_csv.py
+++ b/py-polars/tests/unit/io/test_csv.py
@@ -379,8 +379,7 @@ def test_read_csv_encoding(tmp_path: Path) -> None:
     )
 
     file_path = tmp_path / "encoding.csv"
-    with open(file_path, "wb") as f:
-        f.write(bts)
+    file_path.write_bytes(bts)
 
     file_str = str(file_path)
     bytesio = io.BytesIO(bts)
@@ -487,9 +486,8 @@ def test_compressed_csv(io_files_path: Path) -> None:
 
 def test_partial_decompression(foods_file_path: Path) -> None:
     f_out = io.BytesIO()
-    with open(foods_file_path, "rb") as f_read:  # noqa: SIM117
-        with gzip.GzipFile(fileobj=f_out, mode="w") as f:
-            f.write(f_read.read())
+    with gzip.GzipFile(fileobj=f_out, mode="w") as f:
+        f.write(foods_file_path.read_bytes())
 
     csv_bytes = f_out.getvalue()
     for n_rows in [1, 5, 26]:
diff --git a/py-polars/tests/unit/io/test_database.py b/py-polars/tests/unit/io/test_database.py
index a292af1217ca..4466a57761c6 100644
--- a/py-polars/tests/unit/io/test_database.py
+++ b/py-polars/tests/unit/io/test_database.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-import os
 import sys
 from datetime import date
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 import pytest
@@ -11,8 +11,6 @@
 from polars.testing import assert_frame_equal
 
 if TYPE_CHECKING:
-    from pathlib import Path
-
     from polars.type_aliases import (
         DbReadEngine,
         DbWriteEngine,
@@ -35,8 +33,7 @@ def sample_df() -> pl.DataFrame:
 def create_temp_sqlite_db(test_db: str) -> None:
     import sqlite3
 
-    if os.path.exists(test_db):
-        os.unlink(test_db)
+    Path(test_db).unlink(missing_ok=True)
 
     # NOTE: at the time of writing adcb/connectorx have weak SQLite support (poor or
     # no bool/date/datetime dtypes, for example) and there is a bug in connectorx that
diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py
index cd5aea1a1e05..2eaa730b0bc8 100644
--- a/py-polars/tests/unit/io/test_lazy_csv.py
+++ b/py-polars/tests/unit/io/test_lazy_csv.py
@@ -42,8 +42,7 @@ def test_invalid_utf8(tmp_path: Path) -> None:
     bts = bytes(np.random.randint(0, 255, 200))
 
     file_path = tmp_path / "nonutf8.csv"
-    with open(file_path, "wb") as f:
-        f.write(bts)
+    file_path.write_bytes(bts)
 
     a = pl.read_csv(file_path, has_header=False, encoding="utf8-lossy")
     b = pl.scan_csv(file_path, has_header=False, encoding="utf8-lossy").collect()
@@ -192,9 +191,8 @@ def test_glob_skip_rows(tmp_path: Path) -> None:
 
     for i in range(2):
         file_path = tmp_path / f"test_{i}.csv"
-        with open(file_path, "w") as f:
-            f.write(
-                f"""
+        file_path.write_text(
+            f"""
 metadata goes here
 file number {i}
 foo,bar,baz
@@ -202,7 +200,7 @@ def test_glob_skip_rows(tmp_path: Path) -> None:
 4,5,6
 7,8,9
     """
-            )
+        )
     file_path = tmp_path / "*.csv"
     assert pl.read_csv(file_path, skip_rows=2).to_dict(False) == {
         "foo": [1, 4, 7, 1, 4, 7],
@@ -227,7 +225,7 @@ def test_glob_n_rows(io_files_path: Path) -> None:
     }
 
 
-def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: str) -> None:
+def test_scan_csv_schema_overwrite_not_projected_8483(foods_file_path: Path) -> None:
     df = (
         pl.scan_csv(
             foods_file_path,
diff --git a/py-polars/tests/unit/io/test_lazy_json.py b/py-polars/tests/unit/io/test_lazy_json.py
index 924d59aba717..8c16e9039e2c 100644
--- a/py-polars/tests/unit/io/test_lazy_json.py
+++ b/py-polars/tests/unit/io/test_lazy_json.py
@@ -57,8 +57,8 @@ def test_scan_with_projection(tmp_path: Path) -> None:
     json_bytes = bytes(json, "utf-8")
 
     file_path = tmp_path / "escape_chars.json"
-    with open(file_path, "wb") as f:
-        f.write(json_bytes)
+    file_path.write_bytes(json_bytes)
+
     actual = pl.scan_ndjson(file_path).select(["id", "text"]).collect()
 
     expected = pl.DataFrame(
diff --git a/py-polars/tests/unit/io/test_other.py b/py-polars/tests/unit/io/test_other.py
index 7e7746b9ef98..8b068708bdc4 100644
--- a/py-polars/tests/unit/io/test_other.py
+++ b/py-polars/tests/unit/io/test_other.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import copy
-import os.path
+from pathlib import Path
 from typing import cast
 
 import polars as pl
@@ -51,8 +51,8 @@ def test_unit_io_subdir_has_no_init() -> None:
     # --------------------------------------------------------------------------------
     # TLDR: it can mask the builtin 'io' module, causing a fatal python error.
     # --------------------------------------------------------------------------------
-    io_dir = os.path.dirname(__file__)
-    assert io_dir.endswith(f"unit{os.path.sep}io")
-    assert not os.path.exists(
-        f"{io_dir}{os.path.sep}__init__.py"
-    ), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
+    io_dir = Path(__file__).parent
+    assert io_dir.parts[-2:] == ("unit", "io")
+    assert not (
+        io_dir / "__init__.py"
+    ).exists(), "Found undesirable '__init__.py' in the 'unit.io' tests subdirectory"
diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py
index 556a3efbbf39..b57765242a24 100644
--- a/py-polars/tests/unit/io/test_parquet.py
+++ b/py-polars/tests/unit/io/test_parquet.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
 import io
-import os
 from datetime import datetime, timezone
+from pathlib import Path
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -19,8 +19,6 @@
 )
 
 if TYPE_CHECKING:
-    from pathlib import Path
-
     from polars.type_aliases import ParquetCompression
 
 
@@ -513,7 +511,7 @@ def test_parquet_string_cache() -> None:
 
 def test_tz_aware_parquet_9586() -> None:
     result = pl.read_parquet(
-        os.path.join("tests", "unit", "io", "files", "tz_aware.parquet")
+        Path("tests") / "unit" / "io" / "files" / "tz_aware.parquet"
     )
     expected = pl.DataFrame(
         {"UTC_DATETIME_ID": [datetime(2023, 6, 26, 14, 15, 0, tzinfo=timezone.utc)]}
diff --git a/py-polars/tests/unit/streaming/conftest.py b/py-polars/tests/unit/streaming/conftest.py
index 31e98521a2a2..b7b476474316 100644
--- a/py-polars/tests/unit/streaming/conftest.py
+++ b/py-polars/tests/unit/streaming/conftest.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 
 import pytest
@@ -6,5 +5,4 @@
 
 @pytest.fixture()
 def io_files_path() -> Path:
-    current_dir = os.path.dirname(__file__)
-    return Path(current_dir) / ".." / "io" / "files"
+    return Path(__file__).parent.parent / "io" / "files"
diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py
index 1027a739fd51..1d1be35d45dc 100644
--- a/py-polars/tests/unit/test_cfg.py
+++ b/py-polars/tests/unit/test_cfg.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import os
-from typing import TYPE_CHECKING, Iterator
+from pathlib import Path
+from typing import Iterator
 
 import pytest
 
@@ -10,9 +11,6 @@
 from polars.exceptions import StringCacheMismatchError
 from polars.testing import assert_frame_equal
 
-if TYPE_CHECKING:
-    from pathlib import Path
-
 
 @pytest.fixture(autouse=True)
 def _environ() -> Iterator[None]:
@@ -531,7 +529,7 @@ def test_config_load_save(tmp_path: Path) -> None:
 
         # ...load back from config...
         if file is not None:
-            assert os.path.isfile(cfg)
+            assert Path(cfg).is_file()
         pl.Config.load(cfg)
 
         # ...and confirm the saved options were set.
diff --git a/py-polars/tests/unit/test_sql.py b/py-polars/tests/unit/test_sql.py
index 86a78ddddfd2..5f328390ac1b 100644
--- a/py-polars/tests/unit/test_sql.py
+++ b/py-polars/tests/unit/test_sql.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import os
 import warnings
 from pathlib import Path
 
@@ -13,8 +12,8 @@
 
 # TODO: Do not rely on I/O for these tests
 @pytest.fixture()
-def foods_ipc_path() -> str:
-    return str(Path(os.path.dirname(__file__)) / "io" / "files" / "foods1.ipc")
+def foods_ipc_path() -> Path:
+    return Path(__file__).parent / "io" / "files" / "foods1.ipc"
 
 
 def test_sql_cast() -> None:

From 4a12df1e5cf65f2dcbf818ab0b479fa2e6bd0905 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sun, 16 Jul 2023 08:50:53 +0200
Subject: [PATCH 33/37] feat(python): Add `Series.extend` (#9901)

---
 .../source/reference/series/modify_select.rst |   1 +
 py-polars/polars/series/series.py             | 114 ++++++++++++++++--
 .../polars/testing/parametric/primitives.py   |   2 +-
 py-polars/polars/utils/_construction.py       |   2 +-
 py-polars/tests/unit/dataframe/__init__.py    |   0
 py-polars/tests/unit/datatypes/test_struct.py |  50 --------
 py-polars/tests/unit/series/__init__.py       |   0
 py-polars/tests/unit/series/test_append.py    |  96 +++++++++++++++
 py-polars/tests/unit/series/test_extend.py    |  34 ++++++
 .../tests/unit/{ => series}/test_series.py    |  14 ---
 10 files changed, 236 insertions(+), 77 deletions(-)
 create mode 100644 py-polars/tests/unit/dataframe/__init__.py
 create mode 100644 py-polars/tests/unit/series/__init__.py
 create mode 100644 py-polars/tests/unit/series/test_append.py
 create mode 100644 py-polars/tests/unit/series/test_extend.py
 rename py-polars/tests/unit/{ => series}/test_series.py (99%)

diff --git a/py-polars/docs/source/reference/series/modify_select.rst b/py-polars/docs/source/reference/series/modify_select.rst
index 2738415e5010..d9808a9a0651 100644
--- a/py-polars/docs/source/reference/series/modify_select.rst
+++ b/py-polars/docs/source/reference/series/modify_select.rst
@@ -21,6 +21,7 @@ Manipulation/selection
     Series.drop_nans
     Series.drop_nulls
     Series.explode
+    Series.extend
     Series.extend_constant
     Series.fill_nan
     Series.fill_null
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index a831c272b358..e7ff377258fa 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -2303,7 +2303,7 @@ def slice(self, offset: int, length: int | None = None) -> Series:
 
         """
 
-    def append(self, other: Series, *, append_chunks: bool = True) -> Series:
+    def append(self, other: Series, *, append_chunks: bool | None = None) -> Self:
         """
         Append a Series to this one.
 
@@ -2312,6 +2312,11 @@ def append(self, other: Series, *, append_chunks: bool = True) -> Series:
         other
             Series to append.
         append_chunks
+            .. deprecated:: 0.18.8
+                This argument will be removed and ``append`` will change to always
+                behave like ``append_chunks=True`` (the previous default). For the
+                behavior of ``append_chunks=False``, use ``Series.extend``.
+
             If set to `True` the append operation will add the chunks from `other` to
             self. This is super cheap.
 
@@ -2335,13 +2340,21 @@ def append(self, other: Series, *, append_chunks: bool = True) -> Series:
             to store them in a single `Series`. In the latter case, finish the sequence
             of `append_chunks` operations with a `rechunk`.
 
+        Warnings
+        --------
+        This method modifies the series in-place. The series is returned for
+        convenience only.
+
+        See Also
+        --------
+        extend
 
         Examples
         --------
-        >>> s = pl.Series("a", [1, 2, 3])
-        >>> s2 = pl.Series("b", [4, 5, 6])
-        >>> s.append(s2)
-        shape: (6,)
+        >>> a = pl.Series("a", [1, 2, 3])
+        >>> b = pl.Series("b", [4, 5])
+        >>> a.append(b)
+        shape: (5,)
         Series: 'a' [i64]
         [
             1
@@ -2349,21 +2362,100 @@ def append(self, other: Series, *, append_chunks: bool = True) -> Series:
             3
             4
             5
-            6
         ]
 
+        The resulting series will consist of multiple chunks.
+
+        >>> a.n_chunks()
+        2
+
         """
+        if append_chunks is not None:
+            warnings.warn(
+                "the `append_chunks` argument will be removed and `append` will change"
+                " to always behave like `append_chunks=True` (the previous default)."
+                " For the behavior of `append_chunks=False`, use `Series.extend`.",
+                DeprecationWarning,
+                stacklevel=find_stacklevel(),
+            )
+        else:
+            append_chunks = True
+
+        if not append_chunks:
+            return self.extend(other)
+
         try:
-            if append_chunks:
-                self._s.append(other._s)
+            self._s.append(other._s)
+        except RuntimeError as exc:
+            if str(exc) == "Already mutably borrowed":
+                self._s.append(other._s.clone())
             else:
-                self._s.extend(other._s)
-            return self
+                raise exc
+        return self
+
+    def extend(self, other: Series) -> Self:
+        """
+        Extend the memory backed by this Series with the values from another.
+
+        Different from ``append``, which adds the chunks from ``other`` to the chunks of
+        this series, ``extend`` appends the data from ``other`` to the underlying memory
+        locations and thus may cause a reallocation (which is expensive).
+
+        If this does `not` cause a reallocation, the resulting data structure will not
+        have any extra chunks and thus will yield faster queries.
+
+        Prefer ``extend`` over ``append`` when you want to do a query after a single
+        append. For instance, during online operations where you add `n` rows
+        and rerun a query.
+
+        Prefer ``append`` over ``extend`` when you want to append many times
+        before doing a query. For instance, when you read in multiple files and want
+        to store them in a single ``Series``. In the latter case, finish the sequence
+        of ``append`` operations with a `rechunk`.
+
+        Parameters
+        ----------
+        other
+            Series to extend the series with.
+
+        Warnings
+        --------
+        This method modifies the series in-place. The series is returned for
+        convenience only.
+
+        See Also
+        --------
+        append
+
+        Examples
+        --------
+        >>> a = pl.Series("a", [1, 2, 3])
+        >>> b = pl.Series("b", [4, 5])
+        >>> a.extend(b)
+        shape: (5,)
+        Series: 'a' [i64]
+        [
+            1
+            2
+            3
+            4
+            5
+        ]
+
+        The resulting series will consist of a single chunk.
+
+        >>> a.n_chunks()
+        1
+
+        """
+        try:
+            self._s.extend(other._s)
         except RuntimeError as exc:
             if str(exc) == "Already mutably borrowed":
-                return self.append(other.clone(), append_chunks=append_chunks)
+                self._s.extend(other._s.clone())
             else:
                 raise exc
+        return self
 
     def filter(self, predicate: Series | list[bool]) -> Self:
         """
diff --git a/py-polars/polars/testing/parametric/primitives.py b/py-polars/polars/testing/parametric/primitives.py
index d6b24e2956dc..b88b6bf776b7 100644
--- a/py-polars/polars/testing/parametric/primitives.py
+++ b/py-polars/polars/testing/parametric/primitives.py
@@ -438,7 +438,7 @@ def draw_series(draw: DrawFn) -> Series:
                 s = s.cast(Categorical)
             if series_size and (chunked or (chunked is None and draw(booleans()))):
                 split_at = series_size // 2
-                s = s[:split_at].append(s[split_at:], append_chunks=True)
+                s = s[:split_at].append(s[split_at:])
             return s
 
     return draw_series()
diff --git a/py-polars/polars/utils/_construction.py b/py-polars/polars/utils/_construction.py
index d2f76fad2a9b..25efe25d6a32 100644
--- a/py-polars/polars/utils/_construction.py
+++ b/py-polars/polars/utils/_construction.py
@@ -288,7 +288,7 @@ def to_series_chunk(values: list[Any], dtype: PolarsDataType | None) -> Series:
             series = schunk
             dtype = series.dtype
         else:
-            series.append(schunk, append_chunks=True)
+            series.append(schunk)
             n_chunks += 1
 
     if series is None:
diff --git a/py-polars/tests/unit/dataframe/__init__.py b/py-polars/tests/unit/dataframe/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/py-polars/tests/unit/datatypes/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py
index 22ec2abbd96c..72e9316448d5 100644
--- a/py-polars/tests/unit/datatypes/test_struct.py
+++ b/py-polars/tests/unit/datatypes/test_struct.py
@@ -5,7 +5,6 @@
 
 import pandas as pd
 import pyarrow as pa
-import pytest
 
 import polars as pl
 from polars.testing import assert_frame_equal
@@ -517,55 +516,6 @@ def test_struct_order() -> None:
     ) == [{"a": 1, "b": 10}, {"a": 2, "b": None}]
 
 
-def test_struct_schema_on_append_extend_3452() -> None:
-    housing1_data = [
-        {
-            "city": "Chicago",
-            "address": "100 Main St",
-            "price": 250000,
-            "nbr_bedrooms": 3,
-        },
-        {
-            "city": "New York",
-            "address": "100 First Ave",
-            "price": 450000,
-            "nbr_bedrooms": 2,
-        },
-    ]
-
-    housing2_data = [
-        {
-            "address": "303 Mockingbird Lane",
-            "city": "Los Angeles",
-            "nbr_bedrooms": 2,
-            "price": 450000,
-        },
-        {
-            "address": "404 Moldave Dr",
-            "city": "Miami Beach",
-            "nbr_bedrooms": 1,
-            "price": 250000,
-        },
-    ]
-    housing1, housing2 = pl.Series(housing1_data), pl.Series(housing2_data)
-    with pytest.raises(
-        pl.SchemaError,
-        match=(
-            'cannot append field with name "address" '
-            'to struct with field name "city"'
-        ),
-    ):
-        housing1.append(housing2, append_chunks=True)
-    with pytest.raises(
-        pl.SchemaError,
-        match=(
-            'cannot extend field with name "address" '
-            'to struct with field name "city"'
-        ),
-    ):
-        housing1.append(housing2, append_chunks=False)
-
-
 def test_struct_arr_eval() -> None:
     df = pl.DataFrame(
         {"col_struct": [[{"a": 1, "b": 11}, {"a": 2, "b": 12}, {"a": 1, "b": 11}]]}
diff --git a/py-polars/tests/unit/series/__init__.py b/py-polars/tests/unit/series/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/py-polars/tests/unit/series/test_append.py b/py-polars/tests/unit/series/test_append.py
new file mode 100644
index 000000000000..46520ae67204
--- /dev/null
+++ b/py-polars/tests/unit/series/test_append.py
@@ -0,0 +1,96 @@
+import pytest
+
+import polars as pl
+from polars.testing import assert_series_equal
+
+
+def test_append() -> None:
+    a = pl.Series("a", [1, 2])
+    b = pl.Series("b", [8, 9, None])
+
+    result = a.append(b)
+
+    expected = pl.Series("a", [1, 2, 8, 9, None])
+    assert_series_equal(a, expected)
+    assert_series_equal(result, expected)
+    assert a.n_chunks() == 2
+
+
+def test_append_deprecated_append_chunks() -> None:
+    a = pl.Series("a", [1, 2])
+    b = pl.Series("b", [8, 9, None])
+
+    with pytest.deprecated_call():
+        a.append(b, append_chunks=False)
+
+    expected = pl.Series("a", [1, 2, 8, 9, None])
+    assert_series_equal(a, expected)
+    assert a.n_chunks() == 1
+
+
+def test_append_self_3915() -> None:
+    a = pl.Series("a", [1, 2])
+
+    a.append(a)
+
+    expected = pl.Series("a", [1, 2, 1, 2])
+    assert_series_equal(a, expected)
+    assert a.n_chunks() == 2
+
+
+def test_append_bad_input() -> None:
+    a = pl.Series("a", [1, 2])
+    b = a.to_frame()
+
+    with pytest.raises(AttributeError):
+        a.append(b)  # type: ignore[arg-type]
+
+
+def test_struct_schema_on_append_extend_3452() -> None:
+    housing1_data = [
+        {
+            "city": "Chicago",
+            "address": "100 Main St",
+            "price": 250000,
+            "nbr_bedrooms": 3,
+        },
+        {
+            "city": "New York",
+            "address": "100 First Ave",
+            "price": 450000,
+            "nbr_bedrooms": 2,
+        },
+    ]
+
+    housing2_data = [
+        {
+            "address": "303 Mockingbird Lane",
+            "city": "Los Angeles",
+            "nbr_bedrooms": 2,
+            "price": 450000,
+        },
+        {
+            "address": "404 Moldave Dr",
+            "city": "Miami Beach",
+            "nbr_bedrooms": 1,
+            "price": 250000,
+        },
+    ]
+    housing1, housing2 = pl.Series(housing1_data), pl.Series(housing2_data)
+    with pytest.raises(
+        pl.SchemaError,
+        match=(
+            'cannot append field with name "address" '
+            'to struct with field name "city"'
+        ),
+    ):
+        housing1.append(housing2)
+
+    with pytest.raises(
+        pl.SchemaError,
+        match=(
+            'cannot extend field with name "address" '
+            'to struct with field name "city"'
+        ),
+    ):
+        housing1.extend(housing2)
diff --git a/py-polars/tests/unit/series/test_extend.py b/py-polars/tests/unit/series/test_extend.py
new file mode 100644
index 000000000000..9e3b71acd075
--- /dev/null
+++ b/py-polars/tests/unit/series/test_extend.py
@@ -0,0 +1,34 @@
+import pytest
+
+import polars as pl
+from polars.testing import assert_series_equal
+
+
+def test_extend() -> None:
+    a = pl.Series("a", [1, 2])
+    b = pl.Series("b", [8, 9, None])
+
+    result = a.extend(b)
+
+    expected = pl.Series("a", [1, 2, 8, 9, None])
+    assert_series_equal(a, expected)
+    assert_series_equal(result, expected)
+    assert a.n_chunks() == 1
+
+
+def test_extend_self() -> None:
+    a = pl.Series("a", [1, 2])
+
+    a.extend(a)
+
+    expected = pl.Series("a", [1, 2, 1, 2])
+    assert_series_equal(a, expected)
+    assert a.n_chunks() == 1
+
+
+def test_extend_bad_input() -> None:
+    a = pl.Series("a", [1, 2])
+    b = a.to_frame()
+
+    with pytest.raises(AttributeError):
+        a.extend(b)  # type: ignore[arg-type]
diff --git a/py-polars/tests/unit/test_series.py b/py-polars/tests/unit/series/test_series.py
similarity index 99%
rename from py-polars/tests/unit/test_series.py
rename to py-polars/tests/unit/series/test_series.py
index 5aa1215934b1..624147a8b69a 100644
--- a/py-polars/tests/unit/test_series.py
+++ b/py-polars/tests/unit/series/test_series.py
@@ -409,15 +409,6 @@ def test_add_string() -> None:
     assert_series_equal(result, pl.Series(["pfx:hello", "pfx:weird"]))
 
 
-def test_append_extend() -> None:
-    a = pl.Series("a", [1, 2])
-    b = pl.Series("b", [8, 9, None])
-    a.append(b, append_chunks=False)
-    expected = pl.Series("a", [1, 2, 8, 9, None])
-    assert_series_equal(a, expected)
-    assert a.n_chunks() == 1
-
-
 @pytest.mark.parametrize(
     ("data", "expected_dtype"),
     [
@@ -2305,11 +2296,6 @@ def test_clip() -> None:
     assert s.clip(1, 10).to_list() == [1, 5, None, 10]
 
 
-def test_mutable_borrowed_append_3915() -> None:
-    s = pl.Series("s", [1, 2, 3])
-    assert s.append(s).to_list() == [1, 2, 3, 1, 2, 3]
-
-
 def test_set_at_idx() -> None:
     s = pl.Series("s", [1, 2, 3])
 

From bb36e4c6841523f3189f30cc6152b16d99f12c49 Mon Sep 17 00:00:00 2001
From: Josh Magarick <jmagarick@gmail.com>
Date: Sat, 15 Jul 2023 23:51:36 -0700
Subject: [PATCH 34/37] feat(rust, python): Optional three-valued logic for
 any/all (#9848)

---
 .../src/chunked_array/comparison/mod.rs       | 17 ++++++
 .../src/dsl/function_expr/boolean.rs          | 32 +++++------
 polars/polars-lazy/polars-plan/src/dsl/mod.rs |  8 +--
 polars/polars-sql/src/sql_expr.rs             |  4 +-
 polars/tests/it/lazy/expressions/window.rs    |  4 +-
 py-polars/polars/expr/expr.py                 | 57 +++++++++++++++++--
 .../polars/functions/aggregation/vertical.py  |  6 +-
 py-polars/polars/series/series.py             |  8 +--
 py-polars/src/expr/general.rs                 |  8 +--
 9 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/polars/polars-core/src/chunked_array/comparison/mod.rs b/polars/polars-core/src/chunked_array/comparison/mod.rs
index 1008fdd676e2..2400b568cbe7 100644
--- a/polars/polars-core/src/chunked_array/comparison/mod.rs
+++ b/polars/polars-core/src/chunked_array/comparison/mod.rs
@@ -1004,6 +1004,23 @@ impl BooleanChunked {
     pub fn any(&self) -> bool {
         self.downcast_iter().any(compute::boolean::any)
     }
+
+    // Three-valued versions which can return None
+    pub fn all_3val(&self, drop_nulls: bool) -> Option<bool> {
+        if drop_nulls || self.null_count() == 0 {
+            Some(self.all())
+        } else {
+            None
+        }
+    }
+    pub fn any_3val(&self, drop_nulls: bool) -> Option<bool> {
+        let res = self.any();
+        if drop_nulls || res {
+            Some(res)
+        } else {
+            None
+        }
+    }
 }
 
 // private
diff --git a/polars/polars-lazy/polars-plan/src/dsl/function_expr/boolean.rs b/polars/polars-lazy/polars-plan/src/dsl/function_expr/boolean.rs
index a71f240d913b..5575dfc31f32 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/function_expr/boolean.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/function_expr/boolean.rs
@@ -8,8 +8,12 @@ use crate::wrap;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[derive(Clone, PartialEq, Debug, Eq, Hash)]
 pub enum BooleanFunction {
-    All,
-    Any,
+    All {
+        drop_nulls: bool,
+    },
+    Any {
+        drop_nulls: bool,
+    },
     IsNot,
     IsNull,
     IsNotNull,
@@ -37,8 +41,8 @@ impl Display for BooleanFunction {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         use BooleanFunction::*;
         let s = match self {
-            All => "all",
-            Any => "any",
+            All { .. } => "all",
+            Any { .. } => "any",
             IsNot => "is_not",
             IsNull => "is_null",
             IsNotNull => "is_not_null",
@@ -63,8 +67,8 @@ impl From<BooleanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
     fn from(func: BooleanFunction) -> Self {
         use BooleanFunction::*;
         match func {
-            All => map!(all),
-            Any => map!(any),
+            All { drop_nulls } => map!(all, drop_nulls),
+            Any { drop_nulls } => map!(any, drop_nulls),
             IsNot => map!(is_not),
             IsNull => map!(is_null),
             IsNotNull => map!(is_not_null),
@@ -90,22 +94,14 @@ impl From<BooleanFunction> for FunctionExpr {
     }
 }
 
-fn all(s: &Series) -> PolarsResult<Series> {
+fn all(s: &Series, drop_nulls: bool) -> PolarsResult<Series> {
     let boolean = s.bool()?;
-    if boolean.all() {
-        Ok(Series::new(s.name(), [true]))
-    } else {
-        Ok(Series::new(s.name(), [false]))
-    }
+    Ok(Series::new(s.name(), [boolean.all_3val(drop_nulls)]))
 }
 
-fn any(s: &Series) -> PolarsResult<Series> {
+fn any(s: &Series, drop_nulls: bool) -> PolarsResult<Series> {
     let boolean = s.bool()?;
-    if boolean.any() {
-        Ok(Series::new(s.name(), [true]))
-    } else {
-        Ok(Series::new(s.name(), [false]))
-    }
+    Ok(Series::new(s.name(), [boolean.any_3val(drop_nulls)]))
 }
 
 fn is_not(s: &Series) -> PolarsResult<Series> {
diff --git a/polars/polars-lazy/polars-plan/src/dsl/mod.rs b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
index e43b1efb3537..120ec51da67d 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/mod.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/mod.rs
@@ -1639,8 +1639,8 @@ impl Expr {
     }
 
     /// Check if any boolean value is `true`
-    pub fn any(self) -> Self {
-        self.apply_private(BooleanFunction::Any.into())
+    pub fn any(self, drop_nulls: bool) -> Self {
+        self.apply_private(BooleanFunction::Any { drop_nulls }.into())
             .with_function_options(|mut opt| {
                 opt.auto_explode = true;
                 opt
@@ -1655,8 +1655,8 @@ impl Expr {
     }
 
     /// Check if all boolean values are `true`
-    pub fn all(self) -> Self {
-        self.apply_private(BooleanFunction::All.into())
+    pub fn all(self, drop_nulls: bool) -> Self {
+        self.apply_private(BooleanFunction::All { drop_nulls }.into())
             .with_function_options(|mut opt| {
                 opt.auto_explode = true;
                 opt
diff --git a/polars/polars-sql/src/sql_expr.rs b/polars/polars-sql/src/sql_expr.rs
index 85e99ff8dcc3..31aaabf56de7 100644
--- a/polars/polars-sql/src/sql_expr.rs
+++ b/polars/polars-sql/src/sql_expr.rs
@@ -60,8 +60,8 @@ pub(crate) struct SqlExprVisitor<'a> {
 impl SqlExprVisitor<'_> {
     fn visit_expr(&self, expr: &SqlExpr) -> PolarsResult<Expr> {
         match expr {
-            SqlExpr::AllOp(_) => Ok(self.visit_expr(expr)?.all()),
-            SqlExpr::AnyOp(expr) => Ok(self.visit_expr(expr)?.any()),
+            SqlExpr::AllOp(_) => Ok(self.visit_expr(expr)?.all(true)),
+            SqlExpr::AnyOp(expr) => Ok(self.visit_expr(expr)?.any(true)),
             SqlExpr::ArrayAgg(expr) => self.visit_arr_agg(expr),
             SqlExpr::Between {
                 expr,
diff --git a/polars/tests/it/lazy/expressions/window.rs b/polars/tests/it/lazy/expressions/window.rs
index b5c4d5fc6326..c150378d9352 100644
--- a/polars/tests/it/lazy/expressions/window.rs
+++ b/polars/tests/it/lazy/expressions/window.rs
@@ -364,8 +364,8 @@ fn test_window_exprs_any_all() -> PolarsResult<()> {
     ]?
     .lazy()
     .select([
-        col("var2").any().over([col("var1")]).alias("any"),
-        col("var2").all().over([col("var1")]).alias("all"),
+        col("var2").any(true).over([col("var1")]).alias("any"),
+        col("var2").all(true).over([col("var1")]).alias("all"),
     ])
     .collect()?;
 
diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 27dd05178986..3bacfff59766 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -311,10 +311,15 @@ def to_physical(self) -> Self:
         """
         return self._from_pyexpr(self._pyexpr.to_physical())
 
-    def any(self) -> Self:
+    def any(self, drop_nulls: bool = True) -> Self:
         """
         Check if any boolean value in a Boolean column is `True`.
 
+        Parameters
+        ----------
+        drop_nulls
+            If False, return None if there are nulls but no Trues.
+
         Returns
         -------
         Boolean literal
@@ -331,17 +336,42 @@ def any(self) -> Self:
         ╞══════╪═══════╡
         │ true ┆ false │
         └──────┴───────┘
+        >>> df = pl.DataFrame(dict(x=[None, False], y=[None, True]))
+        >>> df.select(pl.col("x").any(True), pl.col("y").any(True))
+        shape: (1, 2)
+        ┌───────┬──────┐
+        │ x     ┆ y    │
+        │ ---   ┆ ---  │
+        │ bool  ┆ bool │
+        ╞═══════╪══════╡
+        │ false ┆ true │
+        └───────┴──────┘
+        >>> df.select(pl.col("x").any(False), pl.col("y").any(False))
+        shape: (1, 2)
+        ┌──────┬──────┐
+        │ x    ┆ y    │
+        │ ---  ┆ ---  │
+        │ bool ┆ bool │
+        ╞══════╪══════╡
+        │ null ┆ true │
+        └──────┴──────┘
 
         """
-        return self._from_pyexpr(self._pyexpr.any())
+        return self._from_pyexpr(self._pyexpr.any(drop_nulls))
 
-    def all(self) -> Self:
+    def all(self, drop_nulls: bool = True) -> Self:
         """
         Check if all boolean values in a Boolean column are `True`.
 
         This method is an expression - not to be confused with
         :func:`polars.all` which is a function to select all columns.
 
+        Parameters
+        ----------
+        drop_nulls
+            If False, return None if there are any nulls.
+
+
         Returns
         -------
         Boolean literal
@@ -360,9 +390,28 @@ def all(self) -> Self:
         ╞══════╪═══════╪═══════╡
         │ true ┆ false ┆ false │
         └──────┴───────┴───────┘
+        >>> df = pl.DataFrame(dict(x=[None, False], y=[None, True]))
+        >>> df.select(pl.col("x").all(True), pl.col("y").all(True))
+        shape: (1, 2)
+        ┌───────┬───────┐
+        │ x     ┆ y     │
+        │ ---   ┆ ---   │
+        │ bool  ┆ bool  │
+        ╞═══════╪═══════╡
+        │ false ┆ false │
+        └───────┴───────┘
+        >>> df.select(pl.col("x").all(False), pl.col("y").all(False))
+        shape: (1, 2)
+        ┌──────┬──────┐
+        │ x    ┆ y    │
+        │ ---  ┆ ---  │
+        │ bool ┆ bool │
+        ╞══════╪══════╡
+        │ null ┆ null │
+        └──────┴──────┘
 
         """
-        return self._from_pyexpr(self._pyexpr.all())
+        return self._from_pyexpr(self._pyexpr.all(drop_nulls))
 
     def arg_true(self) -> Self:
         """
diff --git a/py-polars/polars/functions/aggregation/vertical.py b/py-polars/polars/functions/aggregation/vertical.py
index 5d210e9fb1e8..85b3cc698dac 100644
--- a/py-polars/polars/functions/aggregation/vertical.py
+++ b/py-polars/polars/functions/aggregation/vertical.py
@@ -28,7 +28,7 @@ def all(
 @deprecated_alias(columns="exprs")
 def all(
     exprs: IntoExpr | Iterable[IntoExpr] | None = None, *more_exprs: IntoExpr
-) -> Expr | bool:
+) -> Expr | bool | None:
     """
     Either return an expression representing all columns, or evaluate a bitwise AND operation.
 
@@ -115,7 +115,9 @@ def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
 
 
 @deprecated_alias(columns="exprs")
-def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | bool:
+def any(
+    exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr
+) -> Expr | bool | None:
     """
     Evaluate a bitwise OR operation.
 
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index e7ff377258fa..55d6c7fed39a 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -1196,7 +1196,7 @@ def sqrt(self) -> Series:
 
         """
 
-    def any(self) -> bool:
+    def any(self, drop_nulls: bool = True) -> bool | None:
         """
         Check if any boolean value in the column is `True`.
 
@@ -1205,9 +1205,9 @@ def any(self) -> bool:
         Boolean literal
 
         """
-        return self.to_frame().select(F.col(self.name).any()).to_series()[0]
+        return self.to_frame().select(F.col(self.name).any(drop_nulls)).to_series()[0]
 
-    def all(self) -> bool:
+    def all(self, drop_nulls: bool = True) -> bool | None:
         """
         Check if all boolean values in the column are `True`.
 
@@ -1216,7 +1216,7 @@ def all(self) -> bool:
         Boolean literal
 
         """
-        return self.to_frame().select(F.col(self.name).all()).to_series()[0]
+        return self.to_frame().select(F.col(self.name).all(drop_nulls)).to_series()[0]
 
     def log(self, base: float = math.e) -> Series:
         """Compute the logarithm to a given base."""
diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs
index fa7d0bf638e3..f7a0f8f79352 100644
--- a/py-polars/src/expr/general.rs
+++ b/py-polars/src/expr/general.rs
@@ -1112,12 +1112,12 @@ impl PyExpr {
             .with_fmt("extend")
             .into()
     }
-    fn any(&self) -> Self {
-        self.inner.clone().any().into()
+    fn any(&self, drop_nulls: bool) -> Self {
+        self.inner.clone().any(drop_nulls).into()
     }
 
-    fn all(&self) -> Self {
-        self.inner.clone().all().into()
+    fn all(&self, drop_nulls: bool) -> Self {
+        self.inner.clone().all(drop_nulls).into()
     }
 
     fn log(&self, base: f64) -> Self {

From 20212499e1e39649b96ede8cd28edc8f00ac669d Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Sun, 16 Jul 2023 14:49:45 +0200
Subject: [PATCH 35/37] fix(python): Handle `DataFrame.extend` extending by
 itself (#9897)

---
 py-polars/polars/dataframe/frame.py           | 40 ++++++---
 py-polars/tests/unit/dataframe/test_df.py     | 55 -------------
 py-polars/tests/unit/dataframe/test_extend.py | 81 +++++++++++++++++++
 py-polars/tests/unit/dataframe/test_vstack.py | 14 ++++
 4 files changed, 125 insertions(+), 65 deletions(-)
 create mode 100644 py-polars/tests/unit/dataframe/test_extend.py

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 6ce26c79211f..f86fbb4c45b1 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -5827,6 +5827,10 @@ def vstack(self, other: DataFrame, *, in_place: bool = False) -> Self:
         in_place
             Modify in place.
 
+        See Also
+        --------
+        extend
+
         Examples
         --------
         >>> df1 = pl.DataFrame(
@@ -5874,26 +5878,36 @@ def extend(self, other: DataFrame) -> Self:
         """
         Extend the memory backed by this `DataFrame` with the values from `other`.
 
-        Different from `vstack` which adds the chunks from `other` to the chunks of this
-        `DataFrame` `extend` appends the data from `other` to the underlying memory
-        locations and thus may cause a reallocation.
+        Different from ``vstack`` which adds the chunks from ``other`` to the chunks of
+        this ``DataFrame``, ``extend`` appends the data from `other` to the underlying
+        memory locations and thus may cause a reallocation.
 
         If this does not cause a reallocation, the resulting data structure will not
         have any extra chunks and thus will yield faster queries.
 
-        Prefer `extend` over `vstack` when you want to do a query after a single append.
-        For instance during online operations where you add `n` rows and rerun a query.
+        Prefer ``extend`` over ``vstack`` when you want to do a query after a single
+        append. For instance, during online operations where you add `n` rows and rerun
+        a query.
 
-        Prefer `vstack` over `extend` when you want to append many times before doing a
-        query. For instance when you read in multiple files and when to store them in a
-        single `DataFrame`. In the latter case, finish the sequence of `vstack`
-        operations with a `rechunk`.
+        Prefer ``vstack`` over ``extend`` when you want to append many times before
+        doing a query. For instance, when you read in multiple files and want to store
+        them in a single ``DataFrame``. In the latter case, finish the sequence of
+        ``vstack`` operations with a ``rechunk``.
 
         Parameters
         ----------
         other
             DataFrame to vertically add.
 
+        Warnings
+        --------
+        This method modifies the dataframe in-place. The dataframe is returned for
+        convenience only.
+
+        See Also
+        --------
+        vstack
+
         Examples
         --------
         >>> df1 = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
@@ -5914,7 +5928,13 @@ def extend(self, other: DataFrame) -> Self:
         └─────┴─────┘
 
         """
-        self._df.extend(other._df)
+        try:
+            self._df.extend(other._df)
+        except RuntimeError as exc:
+            if str(exc) == "Already mutably borrowed":
+                self._df.extend(other._df.clone())
+            else:
+                raise exc
         return self
 
     def drop(self, columns: str | Collection[str], *more_columns: str) -> DataFrame:
diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
index a2056ee74bef..6739a9a6d72f 100644
--- a/py-polars/tests/unit/dataframe/test_df.py
+++ b/py-polars/tests/unit/dataframe/test_df.py
@@ -704,61 +704,6 @@ def test_hstack_dataframe(in_place: bool) -> None:
         assert_frame_equal(df_out, expected)
 
 
-def test_extend() -> None:
-    with pl.StringCache():
-        df1 = pl.DataFrame(
-            {
-                "foo": [1, 2],
-                "bar": [True, False],
-                "ham": ["a", "b"],
-                "cat": ["A", "B"],
-                "dates": [datetime(2021, 1, 1), datetime(2021, 2, 1)],
-            }
-        ).with_columns(
-            [
-                pl.col("cat").cast(pl.Categorical),
-            ]
-        )
-        df2 = pl.DataFrame(
-            {
-                "foo": [3, 4],
-                "bar": [True, None],
-                "ham": ["c", "d"],
-                "cat": ["C", "B"],
-                "dates": [datetime(2022, 9, 1), datetime(2021, 2, 1)],
-            }
-        ).with_columns(
-            [
-                pl.col("cat").cast(pl.Categorical),
-            ]
-        )
-
-        df1.extend(df2)
-        expected = pl.DataFrame(
-            {
-                "foo": [1, 2, 3, 4],
-                "bar": [True, False, True, None],
-                "ham": ["a", "b", "c", "d"],
-                "cat": ["A", "B", "C", "B"],
-                "dates": [
-                    datetime(2021, 1, 1),
-                    datetime(2021, 2, 1),
-                    datetime(2022, 9, 1),
-                    datetime(2021, 2, 1),
-                ],
-            }
-        ).with_columns(
-            pl.col("cat").cast(pl.Categorical),
-        )
-        assert_frame_equal(df1, expected)
-
-        # 8745
-        df = pl.DataFrame([{"age": 1}, {"age": 2}, {"age": 3}])
-        df = df[:-1]
-        tail = pl.DataFrame([{"age": 8}])
-        assert df.extend(tail).to_dict(False) == {"age": [1, 2, 8]}
-
-
 def test_file_buffer() -> None:
     f = BytesIO()
     f.write(b"1,2,3,4,5,6\n7,8,9,10,11,12")
diff --git a/py-polars/tests/unit/dataframe/test_extend.py b/py-polars/tests/unit/dataframe/test_extend.py
new file mode 100644
index 000000000000..08359cc85c19
--- /dev/null
+++ b/py-polars/tests/unit/dataframe/test_extend.py
@@ -0,0 +1,81 @@
+from datetime import datetime
+
+import pytest
+
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_extend_various_dtypes() -> None:
+    with pl.StringCache():
+        df1 = pl.DataFrame(
+            {
+                "foo": [1, 2],
+                "bar": [True, False],
+                "ham": ["a", "b"],
+                "cat": ["A", "B"],
+                "dates": [datetime(2021, 1, 1), datetime(2021, 2, 1)],
+            },
+            schema_overrides={"cat": pl.Categorical},
+        )
+        df2 = pl.DataFrame(
+            {
+                "foo": [3, 4],
+                "bar": [True, None],
+                "ham": ["c", "d"],
+                "cat": ["C", "B"],
+                "dates": [datetime(2022, 9, 1), datetime(2021, 2, 1)],
+            },
+            schema_overrides={"cat": pl.Categorical},
+        )
+
+        df1.extend(df2)
+
+        expected = pl.DataFrame(
+            {
+                "foo": [1, 2, 3, 4],
+                "bar": [True, False, True, None],
+                "ham": ["a", "b", "c", "d"],
+                "cat": ["A", "B", "C", "B"],
+                "dates": [
+                    datetime(2021, 1, 1),
+                    datetime(2021, 2, 1),
+                    datetime(2022, 9, 1),
+                    datetime(2021, 2, 1),
+                ],
+            },
+            schema_overrides={"cat": pl.Categorical},
+        )
+        assert_frame_equal(df1, expected)
+
+
+def test_extend_slice_offset_8745() -> None:
+    df = pl.DataFrame([{"age": 1}, {"age": 2}, {"age": 3}])
+    df = df[:-1]
+    tail = pl.DataFrame([{"age": 8}])
+    assert df.extend(tail).to_dict(False) == {"age": [1, 2, 8]}
+
+
+def test_extend_self() -> None:
+    df = pl.DataFrame({"a": [1, 2], "b": [True, False]})
+
+    df.extend(df)
+
+    expected = pl.DataFrame({"a": [1, 2, 1, 2], "b": [True, False, True, False]})
+    assert_frame_equal(df, expected)
+
+
+def test_extend_column_number_mismatch() -> None:
+    df1 = pl.DataFrame({"a": [1, 2], "b": [True, False]})
+    df2 = df1.drop("a")
+
+    with pytest.raises(pl.ShapeError):
+        df1.extend(df2)
+
+
+def test_extend_column_name_mismatch() -> None:
+    df1 = pl.DataFrame({"a": [1, 2], "b": [True, False]})
+    df2 = df1.with_columns(pl.col("a").alias("c"))
+
+    with pytest.raises(pl.ShapeError):
+        df1.extend(df2)
diff --git a/py-polars/tests/unit/dataframe/test_vstack.py b/py-polars/tests/unit/dataframe/test_vstack.py
index ecf88a2f987f..504ae9a24b97 100644
--- a/py-polars/tests/unit/dataframe/test_vstack.py
+++ b/py-polars/tests/unit/dataframe/test_vstack.py
@@ -44,3 +44,17 @@ def test_vstack_self_in_place(df1: pl.DataFrame) -> None:
         {"foo": [1, 2, 1, 2], "bar": [6, 7, 6, 7], "ham": ["a", "b", "a", "b"]}
     )
     assert_frame_equal(df1, expected)
+
+
+def test_vstack_column_number_mismatch(df1: pl.DataFrame) -> None:
+    df2 = df1.drop("ham")
+
+    with pytest.raises(pl.ShapeError):
+        df1.vstack(df2)
+
+
+def test_vstack_column_name_mismatch(df1: pl.DataFrame) -> None:
+    df2 = df1.with_columns(pl.col("foo").alias("oof"))
+
+    with pytest.raises(pl.ShapeError):
+        df1.vstack(df2)

From f93e79665edb78dbe7e950187c6b02d15f1232e1 Mon Sep 17 00:00:00 2001
From: Marshall <mcrumiller@users.noreply.github.com>
Date: Sun, 16 Jul 2023 16:39:45 -0400
Subject: [PATCH 36/37] refactor(python): deprecate `bins` argument and rename
 to `breaks` in `Series.cut` (#9913)

---
 py-polars/polars/expr/expr.py                     |  2 +-
 py-polars/polars/series/series.py                 | 15 ++++++++-------
 .../tests/unit/operations/test_statistics.py      |  8 ++++----
 py-polars/tests/unit/test_errors.py               |  4 +++-
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 3bacfff59766..f113a20cba29 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -3298,7 +3298,7 @@ def cut(
         breaks
             A list of unique cut points.
         labels
-            Labels to assign to bins. If given, the length must be len(probs) + 1.
+            Labels to assign to bins. If given, the length must be len(breaks) + 1.
         left_closed
             Whether intervals should be [) instead of the default of (]
         include_breaks
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index 55d6c7fed39a..594a6ef54ba4 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -1604,9 +1604,10 @@ def to_dummies(self, separator: str = "_") -> DataFrame:
         """
         return wrap_df(self._s.to_dummies(separator))
 
+    @deprecated_alias(bins="breaks")
     def cut(
         self,
-        bins: list[float],
+        breaks: list[float],
         labels: list[str] | None = None,
         break_point_label: str = "break_point",
         category_label: str = "category",
@@ -1620,11 +1621,11 @@ def cut(
 
         Parameters
         ----------
-        bins
-            Bins to create.
+        breaks
+            A list of unique cut points.
         labels
             Labels to assign to the bins. If given the length of labels must be
-            len(bins) + 1.
+            len(breaks) + 1.
         break_point_label
             Name given to the breakpoint column/field. Only used if series == False or
             include_breaks == True
@@ -1707,14 +1708,14 @@ def cut(
             return (
                 self.to_frame()
                 .with_columns(
-                    F.col(n).cut(bins, labels, left_closed, True).alias(n + "_bin")
+                    F.col(n).cut(breaks, labels, left_closed, True).alias(n + "_bin")
                 )
                 .unnest(n + "_bin")
                 .rename({"brk": break_point_label, n + "_bin": category_label})
             )
         res = (
             self.to_frame()
-            .select(F.col(n).cut(bins, labels, left_closed, include_breaks))
+            .select(F.col(n).cut(breaks, labels, left_closed, include_breaks))
             .to_series()
         )
         if include_breaks:
@@ -1743,7 +1744,7 @@ def qcut(
             We expect quantiles ``0.0 <= quantile <= 1``
         labels
             Labels to assign to the quantiles. If given the length of labels must be
-            len(bins) + 1.
+            len(breaks) + 1.
         break_point_label
             Name given to the breakpoint column/field. Only used if series == False or
             include_breaks == True
diff --git a/py-polars/tests/unit/operations/test_statistics.py b/py-polars/tests/unit/operations/test_statistics.py
index 12d578d6f8b9..e052862b9b06 100644
--- a/py-polars/tests/unit/operations/test_statistics.py
+++ b/py-polars/tests/unit/operations/test_statistics.py
@@ -27,7 +27,7 @@ def test_corr() -> None:
 
 def test_cut() -> None:
     a = pl.Series("a", [v / 10 for v in range(-30, 30, 5)])
-    out = cast(pl.DataFrame, a.cut(bins=[-1, 1], series=False))
+    out = cast(pl.DataFrame, a.cut(breaks=[-1, 1], series=False))
 
     assert out.shape == (12, 3)
     assert out.filter(pl.col("break_point") < 1e9).to_dict(False) == {
@@ -50,7 +50,7 @@ def test_cut() -> None:
     inf = float("inf")
     df = pl.DataFrame({"a": list(range(5))})
     ser = df.select("a").to_series()
-    assert cast(pl.DataFrame, ser.cut(bins=[-1, 1], series=False)).rows() == [
+    assert cast(pl.DataFrame, ser.cut(breaks=[-1, 1], series=False)).rows() == [
         (0.0, 1.0, "(-1, 1]"),
         (1.0, 1.0, "(-1, 1]"),
         (2.0, inf, "(1, inf]"),
@@ -78,8 +78,8 @@ def test_cut() -> None:
     )
     np.random.seed(1)
     a = pl.Series("a", np.random.randint(0, 10, 10))
-    out = cast(pl.DataFrame, a.cut(bins=[-1, 1], series=False))
-    out_s = cast(pl.Series, a.cut(bins=[-1, 1], series=True))
+    out = cast(pl.DataFrame, a.cut(breaks=[-1, 1], series=False))
+    out_s = cast(pl.Series, a.cut(breaks=[-1, 1], series=True))
     assert out["a"].cast(int).series_equal(a)
     # Compare strings and categoricals without a hassle
     assert_frame_equal(expected_df, out, check_dtype=False)
diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py
index 6a551d3e98e2..afb443272591 100644
--- a/py-polars/tests/unit/test_errors.py
+++ b/py-polars/tests/unit/test_errors.py
@@ -43,7 +43,9 @@ def test_error_on_reducing_map() -> None:
         ),
     ):
         df.select(
-            pl.col("x").map(lambda x: x.cut(bins=[1, 2, 3], series=False)).over("group")
+            pl.col("x")
+            .map(lambda x: x.cut(breaks=[1, 2, 3], series=False))
+            .over("group")
         )
 
 

From f5a8c6cf070d6e269490833587fdb17bb532a97a Mon Sep 17 00:00:00 2001
From: Ray Zhang <peifeng2005@gmail.com>
Date: Mon, 17 Jul 2023 01:46:15 -0400
Subject: [PATCH 37/37] feat(rust, python): Add cloudpickle for serializing
 python UDFs (#9921)

---
 .../polars-lazy/polars-plan/src/dsl/python_udf.rs  | 14 +++++++++-----
 py-polars/polars/utils/show_versions.py            |  1 +
 py-polars/pyproject.toml                           |  3 ++-
 py-polars/requirements-dev.txt                     |  1 +
 py-polars/tests/unit/test_serde.py                 | 14 ++++++++++++++
 5 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/polars/polars-lazy/polars-plan/src/dsl/python_udf.rs b/polars/polars-lazy/polars-plan/src/dsl/python_udf.rs
index e5d73d696869..46bf0d97795b 100644
--- a/polars/polars-lazy/polars-plan/src/dsl/python_udf.rs
+++ b/polars/polars-lazy/polars-plan/src/dsl/python_udf.rs
@@ -56,8 +56,9 @@ impl Serialize for PythonFunction {
         S: Serializer,
     {
         Python::with_gil(|py| {
-            let pickle = PyModule::import(py, "pickle")
-                .expect("Unable to import 'pickle'")
+            let pickle = PyModule::import(py, "cloudpickle")
+                .or(PyModule::import(py, "pickle"))
+                .expect("Unable to import 'cloudpickle' or 'pickle'")
                 .getattr("dumps")
                 .unwrap();
 
@@ -83,7 +84,8 @@ impl<'a> Deserialize<'a> for PythonFunction {
         let bytes = Vec::<u8>::deserialize(deserializer)?;
 
         Python::with_gil(|py| {
-            let pickle = PyModule::import(py, "pickle")
+            let pickle = PyModule::import(py, "cloudpickle")
+                .or(PyModule::import(py, "pickle"))
                 .expect("Unable to import 'pickle'")
                 .getattr("loads")
                 .unwrap();
@@ -122,7 +124,8 @@ impl PythonUdfExpression {
         let remainder = &buf[reader.position() as usize..];
 
         Python::with_gil(|py| {
-            let pickle = PyModule::import(py, "pickle")
+            let pickle = PyModule::import(py, "cloudpickle")
+                .or(PyModule::import(py, "pickle"))
                 .expect("Unable to import 'pickle'")
                 .getattr("loads")
                 .unwrap();
@@ -169,7 +172,8 @@ impl SeriesUdf for PythonUdfExpression {
         ciborium::ser::into_writer(&self.output_type, &mut *buf).unwrap();
 
         Python::with_gil(|py| {
-            let pickle = PyModule::import(py, "pickle")
+            let pickle = PyModule::import(py, "cloudpickle")
+                .or(PyModule::import(py, "pickle"))
                 .expect("Unable to import 'pickle'")
                 .getattr("dumps")
                 .unwrap();
diff --git a/py-polars/polars/utils/show_versions.py b/py-polars/polars/utils/show_versions.py
index f34db533ec26..2f7ff9ed42f6 100644
--- a/py-polars/polars/utils/show_versions.py
+++ b/py-polars/polars/utils/show_versions.py
@@ -59,6 +59,7 @@ def _get_dependency_info() -> dict[str, str]:
     # see the list of dependencies in pyproject.toml
     opt_deps = [
         "adbc_driver_sqlite",
+        "cloudpickle",
         "connectorx",
         "deltalake",
         "fsspec",
diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
index 724f6638ce6e..b6a615424d50 100644
--- a/py-polars/pyproject.toml
+++ b/py-polars/pyproject.toml
@@ -51,8 +51,9 @@ pydantic = ["pydantic"]
 sqlalchemy = ["sqlalchemy", "pandas"]
 xlsxwriter = ["xlsxwriter"]
 adbc = ["adbc_driver_sqlite"]
+cloudpickle = ["cloudpickle"]
 all = [
-  "polars[pyarrow,pandas,numpy,fsspec,connectorx,xlsx2csv,deltalake,timezone,matplotlib,pydantic,sqlalchemy,xlsxwriter,adbc]",
+  "polars[pyarrow,pandas,numpy,fsspec,connectorx,xlsx2csv,deltalake,timezone,matplotlib,pydantic,sqlalchemy,xlsxwriter,adbc,cloudpickle]",
 ]
 
 [tool.mypy]
diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt
index 1d05bc42f0bc..96d05a38d370 100644
--- a/py-polars/requirements-dev.txt
+++ b/py-polars/requirements-dev.txt
@@ -17,6 +17,7 @@ xlsx2csv
 XlsxWriter
 adbc_driver_sqlite; python_version >= '3.9' and platform_system != 'Windows'
 connectorx==0.3.2a5; python_version >= '3.8'  # Latest full release is broken - unpin when 0.3.2 released
+cloudpickle
 
 # Tooling
 hypothesis==6.79.4; python_version < '3.8'
diff --git a/py-polars/tests/unit/test_serde.py b/py-polars/tests/unit/test_serde.py
index 0734a8eb9b5a..fe5939e1bc56 100644
--- a/py-polars/tests/unit/test_serde.py
+++ b/py-polars/tests/unit/test_serde.py
@@ -152,3 +152,17 @@ def test_pickle_lazyframe_udf() -> None:
 
     q = pickle.loads(b)
     assert q.collect()["a"].to_list() == [2, 4, 6]
+
+
+def test_pickle_lazyframe_nested_function_udf() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3]})
+
+    # NOTE: This is only possible when we're using cloudpickle.
+    def inner_df_times2(df: pl.DataFrame) -> pl.DataFrame:
+        return df.select(pl.all() * 2)
+
+    q = df.lazy().map(inner_df_times2)
+    b = pickle.dumps(q)
+
+    q = pickle.loads(b)
+    assert q.collect()["a"].to_list() == [2, 4, 6]