diff --git a/.github/workflows/lint-global.yml b/.github/workflows/lint-global.yml
index 95f9957b5309..031bac502548 100644
--- a/.github/workflows/lint-global.yml
+++ b/.github/workflows/lint-global.yml
@@ -15,4 +15,4 @@ jobs:
       - name: Lint Markdown and TOML
         uses: dprint/check@v2.2
       - name: Spell Check with Typos
-        uses: crate-ci/typos@v1.26.8
+        uses: crate-ci/typos@v1.27.2
diff --git a/Cargo.lock b/Cargo.lock
index 51d28defc357..5176bd831139 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3191,7 +3191,6 @@ dependencies = [
  "polars-ops",
  "polars-parquet",
  "polars-plan",
- "polars-stream",
  "polars-time",
  "polars-utils",
  "pyo3",
diff --git a/crates/polars-arrow/src/array/boolean/mod.rs b/crates/polars-arrow/src/array/boolean/mod.rs
index c1a17c0f27f3..1e7efae00d49 100644
--- a/crates/polars-arrow/src/array/boolean/mod.rs
+++ b/crates/polars-arrow/src/array/boolean/mod.rs
@@ -357,8 +357,8 @@ impl BooleanArray {
         (dtype, values, validity)
     }
 
-    /// Creates a `[BooleanArray]` from its internal representation.
-    /// This is the inverted from `[BooleanArray::into_inner]`
+    /// Creates a [`BooleanArray`] from its internal representation.
+    /// This is the inverted from [`BooleanArray::into_inner`]
     ///
     /// # Safety
     /// Callers must ensure all invariants of this struct are upheld.
diff --git a/crates/polars-arrow/src/array/primitive/mod.rs b/crates/polars-arrow/src/array/primitive/mod.rs
index 6915a97a442b..ec4062fc5288 100644
--- a/crates/polars-arrow/src/array/primitive/mod.rs
+++ b/crates/polars-arrow/src/array/primitive/mod.rs
@@ -311,8 +311,8 @@ impl<T: NativeType> PrimitiveArray<T> {
         (dtype, values, validity)
     }
 
-    /// Creates a `[PrimitiveArray]` from its internal representation.
-    /// This is the inverted from `[PrimitiveArray::into_inner]`
+    /// Creates a [`PrimitiveArray`] from its internal representation.
+    /// This is the inverted from [`PrimitiveArray::into_inner`]
     pub fn from_inner(
         dtype: ArrowDataType,
         values: Buffer<T>,
@@ -322,8 +322,8 @@ impl<T: NativeType> PrimitiveArray<T> {
         Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })
     }
 
-    /// Creates a `[PrimitiveArray]` from its internal representation.
-    /// This is the inverted from `[PrimitiveArray::into_inner]`
+    /// Creates a [`PrimitiveArray`] from its internal representation.
+    /// This is the inverted from [`PrimitiveArray::into_inner`]
     ///
     /// # Safety
     /// Callers must ensure all invariants of this struct are upheld.
diff --git a/crates/polars-arrow/src/bitmap/immutable.rs b/crates/polars-arrow/src/bitmap/immutable.rs
index 5b8d510dfe6c..3cb2851f56b8 100644
--- a/crates/polars-arrow/src/bitmap/immutable.rs
+++ b/crates/polars-arrow/src/bitmap/immutable.rs
@@ -472,8 +472,8 @@ impl Bitmap {
         }
     }
 
-    /// Creates a `[Bitmap]` from its internal representation.
-    /// This is the inverted from `[Bitmap::into_inner]`
+    /// Creates a [`Bitmap`] from its internal representation.
+    /// This is the inverted from [`Bitmap::into_inner`]
     ///
     /// # Safety
     /// Callers must ensure all invariants of this struct are upheld.
diff --git a/crates/polars-compute/src/distinct_count.rs b/crates/polars-compute/src/distinct_count.rs
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs
index 3e0e47a7e86a..49f0bf7ce1bf 100644
--- a/crates/polars-core/src/chunked_array/array/mod.rs
+++ b/crates/polars-core/src/chunked_array/array/mod.rs
@@ -81,4 +81,13 @@ impl ArrayChunked {
 
         ArrayChunked::try_from_chunk_iter(self.name().clone(), chunks)
     }
+
+    /// Recurse nested types until we are at the leaf array.
+    pub fn get_leaf_array(&self) -> Series {
+        let mut current = self.get_inner();
+        while let Some(child_array) = current.try_array() {
+            current = child_array.get_inner();
+        }
+        current
+    }
 }
diff --git a/crates/polars-core/src/chunked_array/from_iterator.rs b/crates/polars-core/src/chunked_array/from_iterator.rs
index ba9e8d1e6ccc..de5c3f89ee44 100644
--- a/crates/polars-core/src/chunked_array/from_iterator.rs
+++ b/crates/polars-core/src/chunked_array/from_iterator.rs
@@ -152,6 +152,15 @@ where
     }
 }
 
+impl FromIterator<Option<Column>> for ListChunked {
+    fn from_iter<T: IntoIterator<Item = Option<Column>>>(iter: T) -> Self {
+        ListChunked::from_iter(
+            iter.into_iter()
+                .map(|c| c.map(|c| c.take_materialized_series())),
+        )
+    }
+}
+
 impl FromIterator<Option<Series>> for ListChunked {
     #[inline]
     fn from_iter<I: IntoIterator<Item = Option<Series>>>(iter: I) -> Self {
diff --git a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs
index 8ccd455e4bd0..2429d918e2ff 100644
--- a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs
+++ b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs
@@ -317,7 +317,7 @@ impl CategoricalChunked {
         }
     }
 
-    /// Create an `[Iterator]` that iterates over the `&str` values of the `[CategoricalChunked]`.
+    /// Create an [`Iterator`] that iterates over the `&str` values of the [`CategoricalChunked`].
     pub fn iter_str(&self) -> CatIter<'_> {
         let iter = self.physical().into_iter();
         CatIter {
diff --git a/crates/polars-core/src/chunked_array/object/extension/mod.rs b/crates/polars-core/src/chunked_array/object/extension/mod.rs
index f9167b200211..846ebfa5c16b 100644
--- a/crates/polars-core/src/chunked_array/object/extension/mod.rs
+++ b/crates/polars-core/src/chunked_array/object/extension/mod.rs
@@ -58,7 +58,7 @@ unsafe fn any_as_u8_slice<T: Sized>(p: &T) -> &[u8] {
     std::slice::from_raw_parts((p as *const T) as *const u8, size_of::<T>())
 }
 
-/// Create an extension Array that can be sent to arrow and (once wrapped in `[PolarsExtension]` will
+/// Create an extension Array that can be sent to arrow and (once wrapped in [`PolarsExtension`] will
 /// also call drop on `T`, when the array is dropped.
 pub(crate) fn create_extension<I: Iterator<Item = Option<T>> + TrustedLen, T: Sized + Default>(
     iter: I,
diff --git a/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs b/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs
index f9a931a7846a..4c83426ca676 100644
--- a/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs
+++ b/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs
@@ -23,7 +23,7 @@ impl PolarsExtension {
         Self { array: Some(array) }
     }
 
-    /// Take the Array hold by `[PolarsExtension]` and forget polars extension,
+    /// Take the Array hold by [`PolarsExtension`] and forget polars extension,
     /// so that drop is not called
     pub(crate) fn take_and_forget(self) -> FixedSizeBinaryArray {
         let mut md = ManuallyDrop::new(self);
@@ -57,15 +57,15 @@ impl PolarsExtension {
         }
     }
 
-    /// Calls the heap allocated function in the `[ExtensionSentinel]` that knows
-    /// how to convert the `[FixedSizeBinaryArray]` to a `Series` of type `[ObjectChunked<T>]`
+    /// Calls the heap allocated function in the [`ExtensionSentinel`] that knows
+    /// how to convert the [`FixedSizeBinaryArray`] to a `Series` of type [`ObjectChunked<T>`]
     pub(crate) unsafe fn get_series(&self, name: &PlSmallStr) -> Series {
         self.with_sentinel(|sent| {
             (sent.to_series_fn.as_ref().unwrap())(self.array.as_ref().unwrap(), name)
         })
     }
 
-    // heap allocates a function that converts the binary array to a Series of `[ObjectChunked<T>]`
+    // heap allocates a function that converts the binary array to a Series of [`ObjectChunked<T>`]
     // the `name` will be the `name` of the output `Series` when this function is called (later).
     pub(crate) unsafe fn set_to_series_fn<T: PolarsObject>(&mut self) {
         let f = Box::new(move |arr: &FixedSizeBinaryArray, name: &PlSmallStr| {
diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs
index 377b51afe134..c2bc3e35d364 100644
--- a/crates/polars-core/src/chunked_array/ops/fill_null.rs
+++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs
@@ -233,7 +233,7 @@ fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(
 
     let idx = bits_to_idx(validity);
 
-    Ok(unsafe { s.take_unchecked_from_slice(&idx) })
+    Ok(unsafe { s.take_slice_unchecked(&idx) })
 }
 
 fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
diff --git a/crates/polars-core/src/chunked_array/struct_/mod.rs b/crates/polars-core/src/chunked_array/struct_/mod.rs
index 625da8881117..7e45b6ad11ff 100644
--- a/crates/polars-core/src/chunked_array/struct_/mod.rs
+++ b/crates/polars-core/src/chunked_array/struct_/mod.rs
@@ -380,7 +380,7 @@ impl StructChunked {
         unsafe { DataFrame::new_no_checks(self.len(), columns) }
     }
 
-    /// Get access to one of this `[StructChunked]`'s fields
+    /// Get access to one of this [`StructChunked`]'s fields
     pub fn field_by_name(&self, name: &str) -> PolarsResult<Series> {
         self.fields_as_series()
             .into_iter()
diff --git a/crates/polars-core/src/frame/column/arithmetic.rs b/crates/polars-core/src/frame/column/arithmetic.rs
index 97907f3457b9..8018ee4527e6 100644
--- a/crates/polars-core/src/frame/column/arithmetic.rs
+++ b/crates/polars-core/src/frame/column/arithmetic.rs
@@ -1,70 +1,7 @@
 use num_traits::{Num, NumCast};
-use polars_error::{polars_bail, PolarsResult};
+use polars_error::PolarsResult;
 
 use super::{Column, ScalarColumn, Series};
-use crate::utils::Container;
-
-fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
-    match (a.len(), b.len()) {
-        // broadcasting
-        (1, o) | (o, 1) => Ok(o),
-        // equal
-        (a, b) if a == b => Ok(a),
-        // unequal
-        (a, b) => {
-            polars_bail!(InvalidOperation: "cannot do arithmetic operation on series of different lengths: got {} and {}", a, b)
-        },
-    }
-}
-
-fn unit_series_op<F: Fn(&Series, &Series) -> PolarsResult<Series>>(
-    l: &Series,
-    r: &Series,
-    op: F,
-    length: usize,
-) -> PolarsResult<Column> {
-    debug_assert!(l.len() <= 1);
-    debug_assert!(r.len() <= 1);
-
-    op(l, r)
-        .map(|s| ScalarColumn::from_single_value_series(s, length))
-        .map(Column::from)
-}
-
-fn op_with_broadcast<F: Fn(&Series, &Series) -> PolarsResult<Series>>(
-    l: &Column,
-    r: &Column,
-    op: F,
-) -> PolarsResult<Column> {
-    // Here we rely on the underlying broadcast operations.
-
-    let length = output_length(l, r)?;
-    match (l, r) {
-        (Column::Series(l), Column::Scalar(r)) => {
-            let r = r.as_single_value_series();
-            if l.len() == 1 {
-                unit_series_op(l, &r, op, length)
-            } else {
-                op(l, &r).map(Column::from)
-            }
-        },
-        (Column::Scalar(l), Column::Series(r)) => {
-            let l = l.as_single_value_series();
-            if r.len() == 1 {
-                unit_series_op(&l, r, op, length)
-            } else {
-                op(&l, r).map(Column::from)
-            }
-        },
-        (Column::Scalar(l), Column::Scalar(r)) => unit_series_op(
-            &l.as_single_value_series(),
-            &r.as_single_value_series(),
-            op,
-            length,
-        ),
-        (l, r) => op(l.as_materialized_series(), r.as_materialized_series()).map(Column::from),
-    }
-}
 
 fn num_op_with_broadcast<T: Num + NumCast, F: Fn(&Series, T) -> Series>(
     c: &'_ Column,
@@ -90,7 +27,7 @@ macro_rules! broadcastable_ops {
 
             #[inline]
             fn $op(self, rhs: Self) -> Self::Output {
-                op_with_broadcast(&self, &rhs, |l, r| l.$op(r))
+                self.try_apply_broadcasting_binary_elementwise(&rhs, |l, r| l.$op(r))
             }
         }
 
@@ -99,7 +36,7 @@ macro_rules! broadcastable_ops {
 
             #[inline]
             fn $op(self, rhs: Self) -> Self::Output {
-                op_with_broadcast(self, rhs, |l, r| l.$op(r))
+                self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l.$op(r))
             }
         }
         )+
diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs
index d21ec3ac8536..d2eec86c1b15 100644
--- a/crates/polars-core/src/frame/column/mod.rs
+++ b/crates/polars-core/src/frame/column/mod.rs
@@ -531,9 +531,38 @@ impl Column {
         match self {
             Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
             Self::Partitioned(s) => {
-                unsafe { s.as_materialized_series().take_unchecked(indices) }.into()
+                let s = s.as_materialized_series();
+                unsafe { s.take_unchecked(indices) }.into()
+            },
+            Self::Scalar(s) => {
+                let idxs_length = indices.len();
+                let idxs_null_count = indices.null_count();
+
+                let scalar = ScalarColumn::from_single_value_series(
+                    s.as_single_value_series().take_unchecked(&IdxCa::new(
+                        indices.name().clone(),
+                        &[0][..s.len().min(1)],
+                    )),
+                    idxs_length,
+                );
+
+                // We need to make sure that null values in `idx` become null values in the result
+                if idxs_null_count == 0 {
+                    scalar.into_column()
+                } else if idxs_null_count == idxs_length {
+                    scalar.into_nulls().into_column()
+                } else {
+                    let validity = indices.rechunk_validity();
+                    let series = scalar.take_materialized_series();
+                    let name = series.name().clone();
+                    let dtype = series.dtype().clone();
+                    let mut chunks = series.into_chunks();
+                    assert_eq!(chunks.len(), 1);
+                    chunks[0] = chunks[0].with_validity(validity);
+                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
+                        .into_column()
+                }
             },
-            Self::Scalar(s) => s.resize(indices.len()).into(),
         }
     }
     /// # Safety
@@ -543,13 +572,17 @@ impl Column {
         debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
 
         match self {
-            Self::Series(s) => unsafe { s.take_unchecked_from_slice(indices) }.into(),
-            Self::Partitioned(s) => unsafe {
-                s.as_materialized_series()
-                    .take_unchecked_from_slice(indices)
-            }
+            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
+            Self::Partitioned(s) => {
+                let s = s.as_materialized_series();
+                unsafe { s.take_slice_unchecked(indices) }.into()
+            },
+            Self::Scalar(s) => ScalarColumn::from_single_value_series(
+                s.as_single_value_series()
+                    .take_slice_unchecked(&[0][..s.len().min(1)]),
+                indices.len(),
+            )
             .into(),
-            Self::Scalar(s) => s.resize(indices.len()).into(),
         }
     }
 
@@ -670,15 +703,22 @@ impl Column {
         unsafe { self.as_materialized_series().agg_list(groups) }.into()
     }
 
+    /// # Safety
+    ///
+    /// Does no bounds checks, groups must be correct.
+    #[cfg(feature = "algorithm_group_by")]
+    pub fn agg_valid_count(&self, groups: &GroupsProxy) -> Self {
+        // @partition-opt
+        // @scalar-opt
+        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
+    }
+
     pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
-        Series::full_null(name, size, dtype).into()
-        // @TODO: This causes failures
-        // Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
+        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
     }
 
     pub fn is_empty(&self) -> bool {
-        // @scalar-opt
-        self.as_materialized_series().is_empty()
+        self.len() == 0
     }
 
     pub fn reverse(&self) -> Column {
@@ -689,16 +729,16 @@ impl Column {
         }
     }
 
-    pub fn equals(&self, right: &Column) -> bool {
+    pub fn equals(&self, other: &Column) -> bool {
         // @scalar-opt
         self.as_materialized_series()
-            .equals(right.as_materialized_series())
+            .equals(other.as_materialized_series())
     }
 
-    pub fn equals_missing(&self, right: &Column) -> bool {
+    pub fn equals_missing(&self, other: &Column) -> bool {
         // @scalar-opt
         self.as_materialized_series()
-            .equals_missing(right.as_materialized_series())
+            .equals_missing(other.as_materialized_series())
     }
 
     pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
@@ -730,11 +770,6 @@ impl Column {
         }
     }
 
-    pub fn get_data_ptr(&self) -> usize {
-        // @scalar-opt
-        self.as_materialized_series().get_data_ptr()
-    }
-
     pub fn vec_hash(&self, build_hasher: PlRandomState, buf: &mut Vec<u64>) -> PolarsResult<()> {
         // @scalar-opt?
         self.as_materialized_series().vec_hash(build_hasher, buf)
@@ -772,13 +807,6 @@ impl Column {
         unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
     }
 
-    pub fn unique_stable(&self) -> PolarsResult<Column> {
-        // @scalar-opt?
-        self.as_materialized_series()
-            .unique_stable()
-            .map(Column::from)
-    }
-
     pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
         // @scalar-opt
         self.into_materialized_series()
@@ -795,9 +823,11 @@ impl Column {
     }
 
     pub fn explode(&self) -> PolarsResult<Column> {
-        // @scalar-opt
         self.as_materialized_series().explode().map(Column::from)
     }
+    pub fn implode(&self) -> PolarsResult<ListChunked> {
+        self.as_materialized_series().implode()
+    }
 
     pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
         // @scalar-opt
@@ -839,8 +869,12 @@ impl Column {
     }
 
     pub fn drop_nulls(&self) -> Column {
-        // @scalar-opt
-        self.as_materialized_series().drop_nulls().into()
+        match self {
+            Column::Series(s) => s.drop_nulls().into_column(),
+            // @partition-opt
+            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
+            Column::Scalar(s) => s.drop_nulls().into_column(),
+        }
     }
 
     pub fn is_sorted_flag(&self) -> IsSorted {
@@ -849,8 +883,34 @@ impl Column {
     }
 
     pub fn unique(&self) -> PolarsResult<Column> {
-        // @scalar-opt
-        self.as_materialized_series().unique().map(Column::from)
+        match self {
+            Column::Series(s) => s.unique().map(Column::from),
+            // @partition-opt
+            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
+            Column::Scalar(s) => {
+                _ = s.as_single_value_series().unique()?;
+                if s.is_empty() {
+                    return Ok(s.clone().into_column());
+                }
+
+                Ok(s.resize(1).into_column())
+            },
+        }
+    }
+    pub fn unique_stable(&self) -> PolarsResult<Column> {
+        match self {
+            Column::Series(s) => s.unique_stable().map(Column::from),
+            // @partition-opt
+            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
+            Column::Scalar(s) => {
+                _ = s.as_single_value_series().unique_stable()?;
+                if s.is_empty() {
+                    return Ok(s.clone().into_column());
+                }
+
+                Ok(s.resize(1).into_column())
+            },
+        }
     }
 
     pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
@@ -875,9 +935,26 @@ impl Column {
             .map(Self::from)
     }
 
-    pub fn filter(&self, filter: &ChunkedArray<BooleanType>) -> PolarsResult<Self> {
-        // @scalar-opt
-        self.as_materialized_series().filter(filter).map(Self::from)
+    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
+        match self {
+            Column::Series(s) => s.filter(filter).map(Column::from),
+            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
+            Column::Scalar(s) => {
+                if s.is_empty() {
+                    return Ok(s.clone().into_column());
+                }
+
+                // Broadcasting
+                if filter.len() == 1 {
+                    return match filter.get(0) {
+                        Some(true) => Ok(s.clone().into_column()),
+                        _ => Ok(s.resize(0).into_column()),
+                    };
+                }
+
+                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
+            },
+        }
     }
 
     #[cfg(feature = "random")]
@@ -949,23 +1026,16 @@ impl Column {
     }
 
     pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
-        // @scalar-opt
-        self.as_materialized_series().is_finite()
+        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
     }
-
     pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
-        // @scalar-opt
-        self.as_materialized_series().is_infinite()
+        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
     }
-
     pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
-        // @scalar-opt
-        self.as_materialized_series().is_nan()
+        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
     }
-
     pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
-        // @scalar-opt
-        self.as_materialized_series().is_not_nan()
+        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
     }
 
     pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
@@ -1018,15 +1088,55 @@ impl Column {
     }
 
     pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
-        self.as_materialized_series()
-            .bitand(rhs.as_materialized_series())
-            .map(Column::from)
+        // @partition-opt
+        // @scalar-opt
+        (self.as_materialized_series() & rhs.as_materialized_series()).map(Column::from)
+    }
+    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
+        // @partition-opt
+        // @scalar-opt
+        (self.as_materialized_series() | rhs.as_materialized_series()).map(Column::from)
+    }
+    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
+        // @partition-opt
+        // @scalar-opt
+        (self.as_materialized_series() ^ rhs.as_materialized_series()).map(Column::from)
+    }
+
+    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
+        match (self, other) {
+            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_add_owned(rhs).map(Column::from),
+            (lhs, rhs) => lhs + rhs,
+        }
+    }
+    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
+        match (self, other) {
+            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_sub_owned(rhs).map(Column::from),
+            (lhs, rhs) => lhs - rhs,
+        }
+    }
+    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
+        match (self, other) {
+            (Column::Series(lhs), Column::Series(rhs)) => lhs.try_mul_owned(rhs).map(Column::from),
+            (lhs, rhs) => lhs * rhs,
+        }
     }
 
     pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
         Ok(self.get(index)?.str_value())
     }
 
+    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.min_reduce(),
+            Column::Partitioned(s) => s.min_reduce(),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().min_reduce()
+            },
+        }
+    }
     pub fn max_reduce(&self) -> PolarsResult<Scalar> {
         match self {
             Column::Series(s) => s.max_reduce(),
@@ -1038,25 +1148,108 @@ impl Column {
             },
         }
     }
-
-    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
+    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
         match self {
-            Column::Series(s) => s.min_reduce(),
-            Column::Partitioned(s) => s.min_reduce(),
+            Column::Series(s) => s.median_reduce(),
+            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
             Column::Scalar(s) => {
                 // We don't really want to deal with handling the full semantics here so we just
                 // cast to a single value series. This is a tiny bit wasteful, but probably fine.
-                s.as_single_value_series().min_reduce()
+                s.as_single_value_series().median_reduce()
+            },
+        }
+    }
+    pub fn mean_reduce(&self) -> Scalar {
+        match self {
+            Column::Series(s) => s.mean_reduce(),
+            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().mean_reduce()
+            },
+        }
+    }
+    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.std_reduce(ddof),
+            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().std_reduce(ddof)
+            },
+        }
+    }
+    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.var_reduce(ddof),
+            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().var_reduce(ddof)
+            },
+        }
+    }
+    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
+        // @partition-opt
+        // @scalar-opt
+        self.as_materialized_series().sum_reduce()
+    }
+    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.and_reduce(),
+            Column::Partitioned(s) => s.and_reduce(),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().and_reduce()
             },
         }
     }
+    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.or_reduce(),
+            Column::Partitioned(s) => s.or_reduce(),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().or_reduce()
+            },
+        }
+    }
+    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
+        match self {
+            Column::Series(s) => s.xor_reduce(),
+            // @partition-opt
+            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
+            Column::Scalar(s) => {
+                // We don't really want to deal with handling the full semantics here so we just
+                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
+                s.as_single_value_series().xor_reduce()
+            },
+        }
+    }
+    pub fn n_unique(&self) -> PolarsResult<usize> {
+        match self {
+            Column::Series(s) => s.n_unique(),
+            Column::Partitioned(s) => s.partitions().n_unique(),
+            // @scalar-opt
+            Column::Scalar(s) => s.as_single_value_series().n_unique(),
+        }
+    }
+    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
+        self.as_materialized_series()
+            .quantile_reduce(quantile, method)
+    }
 
     pub(crate) fn estimated_size(&self) -> usize {
         // @scalar-opt
         self.as_materialized_series().estimated_size()
     }
 
-    pub(crate) fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
+    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
         match self {
             Column::Series(s) => s.sort_with(options).map(Self::from),
             // @partition-opt
@@ -1073,17 +1266,27 @@ impl Column {
         }
     }
 
-    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
+    pub fn map_unary_elementwise_to_bool(
+        &self,
+        f: impl Fn(&Series) -> BooleanChunked,
+    ) -> BooleanChunked {
+        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
+            .unwrap()
+    }
+    pub fn try_map_unary_elementwise_to_bool(
+        &self,
+        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
+    ) -> PolarsResult<BooleanChunked> {
         match self {
-            Column::Series(s) => f(s).into(),
-            Column::Partitioned(s) => s.apply_unary_elementwise(f).into(),
-            Column::Scalar(s) => {
-                ScalarColumn::from_single_value_series(f(&s.as_single_value_series()), s.len())
-                    .into()
-            },
+            Column::Series(s) => f(s),
+            Column::Partitioned(s) => f(s.as_materialized_series()),
+            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
         }
     }
 
+    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
+        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
+    }
     pub fn try_apply_unary_elementwise(
         &self,
         f: impl Fn(&Series) -> PolarsResult<Series>,
@@ -1099,6 +1302,98 @@ impl Column {
         }
     }
 
+    pub fn apply_broadcasting_binary_elementwise(
+        &self,
+        other: &Self,
+        op: impl Fn(&Series, &Series) -> Series,
+    ) -> PolarsResult<Column> {
+        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
+    }
+    pub fn try_apply_broadcasting_binary_elementwise(
+        &self,
+        other: &Self,
+        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
+    ) -> PolarsResult<Column> {
+        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
+            match (a.len(), b.len()) {
+                // broadcasting
+                (1, o) | (o, 1) => Ok(o),
+                // equal
+                (a, b) if a == b => Ok(a),
+                // unequal
+                (a, b) => {
+                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
+                },
+            }
+        }
+
+        // Here we rely on the underlying broadcast operations.
+        let length = output_length(self, other)?;
+        match (self, other) {
+            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
+            (Column::Series(lhs), Column::Scalar(rhs)) => {
+                op(lhs, &rhs.as_single_value_series()).map(Column::from)
+            },
+            (Column::Scalar(lhs), Column::Series(rhs)) => {
+                op(&lhs.as_single_value_series(), rhs).map(Column::from)
+            },
+            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
+                let lhs = lhs.as_single_value_series();
+                let rhs = rhs.as_single_value_series();
+
+                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
+            },
+            // @partition-opt
+            (lhs, rhs) => {
+                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
+            },
+        }
+    }
+
+    pub fn apply_binary_elementwise(
+        &self,
+        other: &Self,
+        f: impl Fn(&Series, &Series) -> Series,
+        f_lb: impl Fn(&Scalar, &Series) -> Series,
+        f_rb: impl Fn(&Series, &Scalar) -> Series,
+    ) -> Column {
+        self.try_apply_binary_elementwise(
+            other,
+            |lhs, rhs| Ok(f(lhs, rhs)),
+            |lhs, rhs| Ok(f_lb(lhs, rhs)),
+            |lhs, rhs| Ok(f_rb(lhs, rhs)),
+        )
+        .unwrap()
+    }
+    pub fn try_apply_binary_elementwise(
+        &self,
+        other: &Self,
+        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
+        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
+        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
+    ) -> PolarsResult<Column> {
+        debug_assert_eq!(self.len(), other.len());
+
+        match (self, other) {
+            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
+            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
+            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
+            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
+                let lhs = lhs.as_single_value_series();
+                let rhs = rhs.as_single_value_series();
+
+                Ok(
+                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
+                        .into_column(),
+                )
+            },
+            // @partition-opt
+            (lhs, rhs) => {
+                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
+            },
+        }
+    }
+
     #[cfg(feature = "approx_unique")]
     pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
         match self {
diff --git a/crates/polars-core/src/frame/column/partitioned.rs b/crates/polars-core/src/frame/column/partitioned.rs
index a22e697290ec..16d4e9538634 100644
--- a/crates/polars-core/src/frame/column/partitioned.rs
+++ b/crates/polars-core/src/frame/column/partitioned.rs
@@ -274,4 +274,19 @@ impl PartitionedColumn {
     pub fn clear(&self) -> Self {
         Self::new_empty(self.name.clone(), self.values.dtype().clone())
     }
+
+    pub fn partitions(&self) -> &Series {
+        &self.values
+    }
+    pub fn partition_ends(&self) -> &[IdxSize] {
+        &self.ends
+    }
+
+    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
+        self.values.or_reduce()
+    }
+
+    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
+        self.values.and_reduce()
+    }
 }
diff --git a/crates/polars-core/src/frame/column/scalar.rs b/crates/polars-core/src/frame/column/scalar.rs
index 18e53c469960..e3d8105362c4 100644
--- a/crates/polars-core/src/frame/column/scalar.rs
+++ b/crates/polars-core/src/frame/column/scalar.rs
@@ -137,9 +137,10 @@ impl ScalarColumn {
     ///
     /// This will panic if the value cannot be made static or if the series has length `0`.
     pub fn from_single_value_series(series: Series, length: usize) -> Self {
-        debug_assert_eq!(series.len(), 1);
-        let value = series.get(0).unwrap();
-        let value = value.into_static();
+        debug_assert!(series.len() <= 1);
+        debug_assert!(!series.is_empty() || length == 0);
+
+        let value = series.get(0).map_or(AnyValue::Null, |av| av.into_static());
         let value = Scalar::new(series.dtype().clone(), value);
         ScalarColumn::new(series.name().clone(), value, length)
     }
@@ -270,6 +271,19 @@ impl ScalarColumn {
     pub fn has_nulls(&self) -> bool {
         self.length != 0 && self.scalar.is_null()
     }
+
+    pub fn drop_nulls(&self) -> Self {
+        if self.scalar.is_null() {
+            self.resize(0)
+        } else {
+            self.clone()
+        }
+    }
+
+    pub fn into_nulls(mut self) -> Self {
+        self.scalar.update(AnyValue::Null);
+        self
+    }
 }
 
 impl IntoColumn for ScalarColumn {
diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs
index c12086def533..bedb8eccb060 100644
--- a/crates/polars-core/src/frame/explode.rs
+++ b/crates/polars-core/src/frame/explode.rs
@@ -18,7 +18,7 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer<i64>)> {
     }
 }
 
-/// Arguments for `[DataFrame::unpivot]` function
+/// Arguments for `LazyFrame::unpivot` function
 #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct UnpivotArgsIR {
diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs
index aaf24a470969..8f01ce3f291a 100644
--- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs
+++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs
@@ -15,7 +15,7 @@ impl Series {
     }
 
     #[doc(hidden)]
-    pub fn agg_valid_count(&self, groups: &GroupsProxy) -> Series {
+    pub unsafe fn agg_valid_count(&self, groups: &GroupsProxy) -> Series {
         // Prevent a rechunk for every individual group.
         let s = if groups.len() > 1 && self.null_count() > 0 {
             self.rechunk()
diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs
index 9dee1e1f411a..17a36dc4ddfd 100644
--- a/crates/polars-core/src/frame/group_by/mod.rs
+++ b/crates/polars-core/src/frame/group_by/mod.rs
@@ -233,7 +233,7 @@ impl<'df> GroupBy<'df> {
     ///     Where second value in the tuple is a vector with all matching indexes.
     ///
     /// # Safety
-    /// Groups should always be in bounds of the `DataFrame` hold by this `[GroupBy]`.
+    /// Groups should always be in bounds of the `DataFrame` hold by this [`GroupBy`].
     /// If you mutate it, you must hold that invariant.
     pub unsafe fn get_groups_mut(&mut self) -> &mut GroupsProxy {
         &mut self.groups
diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs
index ab74c7c1d2d1..aa434fb07df7 100644
--- a/crates/polars-core/src/frame/mod.rs
+++ b/crates/polars-core/src/frame/mod.rs
@@ -206,48 +206,29 @@ impl DataFrame {
     }
 
     // Reduce monomorphization.
-    pub fn _apply_columns(&self, func: &(dyn Fn(&Series) -> Series)) -> Vec<Column> {
-        self.materialized_column_iter()
-            .map(func)
-            .map(Column::from)
-            .collect()
+    fn try_apply_columns(
+        &self,
+        func: &(dyn Fn(&Column) -> PolarsResult<Column> + Send + Sync),
+    ) -> PolarsResult<Vec<Column>> {
+        self.columns.iter().map(func).collect()
     }
-
     // Reduce monomorphization.
-    pub fn _apply_columns_par(
-        &self,
-        func: &(dyn Fn(&Series) -> Series + Send + Sync),
-    ) -> Vec<Column> {
-        POOL.install(|| {
-            self.par_materialized_column_iter()
-                .map(func)
-                .map(Column::from)
-                .collect()
-        })
+    pub fn _apply_columns(&self, func: &(dyn Fn(&Column) -> Column)) -> Vec<Column> {
+        self.columns.iter().map(func).collect()
     }
-
     // Reduce monomorphization.
     fn try_apply_columns_par(
         &self,
-        func: &(dyn Fn(&Series) -> PolarsResult<Series> + Send + Sync),
+        func: &(dyn Fn(&Column) -> PolarsResult<Column> + Send + Sync),
     ) -> PolarsResult<Vec<Column>> {
-        POOL.install(|| {
-            self.par_materialized_column_iter()
-                .map(func)
-                .map(|s| s.map(Column::from))
-                .collect()
-        })
+        POOL.install(|| self.columns.par_iter().map(func).collect())
     }
-
     // Reduce monomorphization.
-    fn try_apply_columns(
+    pub fn _apply_columns_par(
         &self,
-        func: &(dyn Fn(&Series) -> PolarsResult<Series> + Send + Sync),
-    ) -> PolarsResult<Vec<Column>> {
-        self.materialized_column_iter()
-            .map(func)
-            .map(|s| s.map(Column::from))
-            .collect()
+        func: &(dyn Fn(&Column) -> Column + Send + Sync),
+    ) -> Vec<Column> {
+        POOL.install(|| self.columns.par_iter().map(func).collect())
     }
 
     /// Get the index of the column.
@@ -565,13 +546,7 @@ impl DataFrame {
     /// Aggregate all the chunks in the DataFrame to a single chunk in parallel.
     /// This may lead to more peak memory consumption.
     pub fn as_single_chunk_par(&mut self) -> &mut Self {
-        if self.columns.iter().any(|c| {
-            if let Column::Series(s) = c {
-                s.n_chunks() > 1
-            } else {
-                false
-            }
-        }) {
+        if self.columns.iter().any(|c| c.n_chunks() > 1) {
             self.columns = self._apply_columns_par(&|s| s.rechunk());
         }
         self
@@ -1896,12 +1871,9 @@ impl DataFrame {
     /// The indices must be in-bounds.
     pub unsafe fn take_unchecked_impl(&self, idx: &IdxCa, allow_threads: bool) -> Self {
         let cols = if allow_threads {
-            POOL.install(|| self._apply_columns_par(&|s| s.take_unchecked(idx)))
+            POOL.install(|| self._apply_columns_par(&|c| c.take_unchecked(idx)))
         } else {
-            self.materialized_column_iter()
-                .map(|s| s.take_unchecked(idx))
-                .map(Column::from)
-                .collect()
+            self._apply_columns(&|s| s.take_unchecked(idx))
         };
         unsafe { DataFrame::new_no_checks(idx.len(), cols) }
     }
@@ -1914,10 +1886,7 @@ impl DataFrame {
         let cols = if allow_threads {
             POOL.install(|| self._apply_columns_par(&|s| s.take_slice_unchecked(idx)))
         } else {
-            self.materialized_column_iter()
-                .map(|s| s.take_slice_unchecked(idx))
-                .map(Column::from)
-                .collect()
+            self._apply_columns(&|s| s.take_slice_unchecked(idx))
         };
         unsafe { DataFrame::new_no_checks(idx.len(), cols) }
     }
@@ -2567,7 +2536,6 @@ impl DataFrame {
         if offset == 0 && length == self.height() {
             return self.clone();
         }
-        // @scalar-opt
         let columns = self._apply_columns_par(&|s| s.slice(offset, length));
         unsafe { DataFrame::new_no_checks(length, columns) }
     }
diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs
index 35345b2a6527..3af8671dadd1 100644
--- a/crates/polars-core/src/scalar/from.rs
+++ b/crates/polars-core/src/scalar/from.rs
@@ -14,6 +14,7 @@ macro_rules! impl_from {
 }
 
 impl_from! {
+    (bool, Boolean, Boolean)
     (i8, Int8, Int8)
     (i16, Int16, Int16)
     (i32, Int32, Int32)
diff --git a/crates/polars-core/src/series/amortized_iter.rs b/crates/polars-core/src/series/amortized_iter.rs
index e56a950578e0..167fbf82612c 100644
--- a/crates/polars-core/src/series/amortized_iter.rs
+++ b/crates/polars-core/src/series/amortized_iter.rs
@@ -3,7 +3,7 @@ use std::rc::Rc;
 
 use crate::prelude::*;
 
-/// A `[Series]` that amortizes a few allocations during iteration.
+/// A [`Series`] that amortizes a few allocations during iteration.
 #[derive(Clone)]
 pub struct AmortSeries {
     container: Rc<Series>,
@@ -31,7 +31,7 @@ impl AmortSeries {
         }
     }
 
-    /// Creates a new `[UnsafeSeries]`
+    /// Creates a new [`UnsafeSeries`]
     ///
     /// # Safety
     /// Inner chunks must be from `Series` otherwise the dtype may be incorrect and lead to UB.
diff --git a/crates/polars-core/src/series/arithmetic/bitops.rs b/crates/polars-core/src/series/arithmetic/bitops.rs
new file mode 100644
index 000000000000..cd00e8de18db
--- /dev/null
+++ b/crates/polars-core/src/series/arithmetic/bitops.rs
@@ -0,0 +1,65 @@
+use std::borrow::Cow;
+
+use polars_error::PolarsResult;
+
+use super::{polars_bail, BooleanChunked, ChunkedArray, DataType, IntoSeries, Series};
+
+macro_rules! impl_bitop {
+    ($(($trait:ident, $f:ident))+) => {
+        $(
+        impl std::ops::$trait for &Series {
+            type Output = PolarsResult<Series>;
+            fn $f(self, rhs: Self) -> Self::Output {
+                use DataType as DT;
+                match self.dtype() {
+                    DT::Boolean => {
+                        let lhs: &BooleanChunked = self.as_ref().as_ref().as_ref();
+                        let rhs = lhs.unpack_series_matching_type(rhs)?;
+                        Ok(lhs.$f(rhs).into_series())
+                    },
+                    dt if dt.is_integer() => with_match_physical_integer_polars_type!(dt, |$T| {
+                        let lhs: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref();
+
+                        let rhs = if rhs.len() == 1 {
+                            Cow::Owned(rhs.cast(self.dtype())?)
+                        } else {
+                            Cow::Borrowed(rhs)
+                        };
+
+                        let rhs = lhs.unpack_series_matching_type(&rhs)?;
+                        Ok(lhs.$f(&rhs).into_series())
+                    }),
+                    _ => polars_bail!(opq = $f, self.dtype()),
+                }
+            }
+        }
+        impl std::ops::$trait for Series {
+            type Output = PolarsResult<Series>;
+            #[inline(always)]
+            fn $f(self, rhs: Self) -> Self::Output {
+                <&Series as std::ops::$trait>::$f(&self, &rhs)
+            }
+        }
+        impl std::ops::$trait<&Series> for Series {
+            type Output = PolarsResult<Series>;
+            #[inline(always)]
+            fn $f(self, rhs: &Series) -> Self::Output {
+                <&Series as std::ops::$trait>::$f(&self, rhs)
+            }
+        }
+        impl std::ops::$trait<Series> for &Series {
+            type Output = PolarsResult<Series>;
+            #[inline(always)]
+            fn $f(self, rhs: Series) -> Self::Output {
+                <&Series as std::ops::$trait>::$f(self, &rhs)
+            }
+        }
+        )+
+    };
+}
+
+impl_bitop! {
+    (BitAnd, bitand)
+    (BitOr, bitor)
+    (BitXor, bitxor)
+}
diff --git a/crates/polars-core/src/series/arithmetic/borrowed.rs b/crates/polars-core/src/series/arithmetic/borrowed.rs
index f9e5ff42139b..01065c66c1d1 100644
--- a/crates/polars-core/src/series/arithmetic/borrowed.rs
+++ b/crates/polars-core/src/series/arithmetic/borrowed.rs
@@ -51,6 +51,18 @@ where
     ChunkedArray<T>: IntoSeries,
 {
     fn subtract(lhs: &ChunkedArray<T>, rhs: &Series) -> PolarsResult<Series> {
+        #[cfg(feature = "dtype-array")]
+        if let Some(rhs) = rhs.try_array() {
+            return rhs.arithm_helper_scalar_lhs(lhs.clone().into_series(), &|l, r| l.subtract(&r));
+        }
+
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype(),
+            opq = add,
+            rhs.dtype(),
+            rhs.dtype()
+        );
+
         // SAFETY:
         // There will be UB if a ChunkedArray is alive with the wrong datatype.
         // we now only create the potentially wrong dtype for a short time.
@@ -61,6 +73,18 @@ where
         Ok(out.into_series())
     }
     fn add_to(lhs: &ChunkedArray<T>, rhs: &Series) -> PolarsResult<Series> {
+        #[cfg(feature = "dtype-array")]
+        if let Some(rhs) = rhs.try_array() {
+            return rhs.arithm_helper_scalar_lhs(lhs.clone().into_series(), &|l, r| l.add_to(&r));
+        }
+
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype(),
+            opq = add,
+            rhs.dtype(),
+            rhs.dtype()
+        );
+
         // SAFETY:
         // see subtract
         let rhs = unsafe { lhs.unpack_series_matching_physical_type(rhs) };
@@ -68,6 +92,18 @@ where
         Ok(out.into_series())
     }
     fn multiply(lhs: &ChunkedArray<T>, rhs: &Series) -> PolarsResult<Series> {
+        #[cfg(feature = "dtype-array")]
+        if let Some(rhs) = rhs.try_array() {
+            return rhs.arithm_helper_scalar_lhs(lhs.clone().into_series(), &|l, r| l.multiply(&r));
+        }
+
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype(),
+            opq = add,
+            rhs.dtype(),
+            rhs.dtype()
+        );
+
         // SAFETY:
         // see subtract
         let rhs = unsafe { lhs.unpack_series_matching_physical_type(rhs) };
@@ -75,6 +111,18 @@ where
         Ok(out.into_series())
     }
     fn divide(lhs: &ChunkedArray<T>, rhs: &Series) -> PolarsResult<Series> {
+        #[cfg(feature = "dtype-array")]
+        if let Some(rhs) = rhs.try_array() {
+            return rhs.arithm_helper_scalar_lhs(lhs.clone().into_series(), &|l, r| l.divide(&r));
+        }
+
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype(),
+            opq = add,
+            rhs.dtype(),
+            rhs.dtype()
+        );
+
         // SAFETY:
         // see subtract
         let rhs = unsafe { lhs.unpack_series_matching_physical_type(rhs) };
@@ -82,6 +130,19 @@ where
         Ok(out.into_series())
     }
     fn remainder(lhs: &ChunkedArray<T>, rhs: &Series) -> PolarsResult<Series> {
+        #[cfg(feature = "dtype-array")]
+        if let Some(rhs) = rhs.try_array() {
+            return rhs
+                .arithm_helper_scalar_lhs(lhs.clone().into_series(), &|l, r| l.remainder(&r));
+        }
+
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype(),
+            opq = add,
+            rhs.dtype(),
+            rhs.dtype()
+        );
+
         // SAFETY:
         // see subtract
         let rhs = unsafe { lhs.unpack_series_matching_physical_type(rhs) };
@@ -114,24 +175,6 @@ impl NumOpsDispatchInner for BooleanType {
     }
 }
 
-#[cfg(feature = "dtype-array")]
-fn array_shape(dt: &DataType, infer: bool) -> Vec<ReshapeDimension> {
-    fn inner(dt: &DataType, buf: &mut Vec<ReshapeDimension>) {
-        if let DataType::Array(_, size) = dt {
-            buf.push(ReshapeDimension::Specified(
-                Dimension::try_from(*size as i64).unwrap(),
-            ))
-        }
-    }
-
-    let mut buf = vec![];
-    if infer {
-        buf.push(ReshapeDimension::Infer)
-    }
-    inner(dt, &mut buf);
-    buf
-}
-
 #[cfg(feature = "dtype-array")]
 fn broadcast_array(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult<(ArrayChunked, Series)> {
     let out = match (lhs.len(), rhs.len()) {
@@ -165,19 +208,56 @@ impl ArrayChunked {
     ) -> PolarsResult<Series> {
         let (lhs, rhs) = broadcast_array(self, rhs)?;
 
-        let l_leaf_array = lhs.clone().into_series().get_leaf_array();
-        let shape = array_shape(lhs.dtype(), true);
+        polars_ensure!(
+            lhs.dtype() == rhs.dtype()
 
-        let r_leaf_array = if rhs.dtype().is_numeric() && rhs.len() == 1 {
-            rhs.clone()
-        } else {
-            polars_ensure!(lhs.dtype() == rhs.dtype(), InvalidOperation: "can only do arithmetic of arrays of the same type and shape; got {} and {}", self.dtype(), rhs.dtype());
-            rhs.get_leaf_array()
-        };
+            // @NOTE: we allow the arithmetic operations with a scalar of the leaf array
+            || rhs.dtype().is_numeric() && rhs.len() == 1,
+            InvalidOperation: "can only do arithmetic of arrays of the same type and shape; got {} and {}",
+            lhs.dtype(), rhs.dtype()
+        );
+
+        let l_leaf_array = lhs.get_leaf_array();
+        let r_leaf_array = rhs.get_leaf_array();
+
+        let mut dt = lhs.dtype();
+        let mut shape = vec![ReshapeDimension::Specified(
+            Dimension::new(lhs.len() as u64),
+        )];
+        while let DataType::Array(child, size) = dt {
+            shape.push(ReshapeDimension::Specified(Dimension::new(*size as u64)));
+            dt = child;
+        }
 
         let out = op(l_leaf_array, r_leaf_array)?;
         out.reshape_array(&shape)
     }
+
+    fn arithm_helper_scalar_lhs(
+        &self,
+        lhs: Series,
+        op: &dyn Fn(Series, Series) -> PolarsResult<Series>,
+    ) -> PolarsResult<Series> {
+        polars_ensure!(
+            lhs.len() == 1,
+            InvalidOperation: "can only do arithmetic of between arrays and a scalar the leaf type; got {} and {}",
+            lhs.dtype(), self.dtype()
+        );
+
+        let r_leaf_array = self.get_leaf_array();
+        let out = op(lhs, r_leaf_array)?;
+
+        let mut dt = self.dtype();
+        let mut shape = vec![ReshapeDimension::Specified(Dimension::new(
+            self.len() as u64
+        ))];
+        while let DataType::Array(child, size) = dt {
+            shape.push(ReshapeDimension::Specified(Dimension::new(*size as u64)));
+            dt = child;
+        }
+
+        out.reshape_array(&shape)
+    }
 }
 
 #[cfg(feature = "dtype-array")]
diff --git a/crates/polars-core/src/series/arithmetic/mod.rs b/crates/polars-core/src/series/arithmetic/mod.rs
index 0a5550b7b0f3..713bd4fbece3 100644
--- a/crates/polars-core/src/series/arithmetic/mod.rs
+++ b/crates/polars-core/src/series/arithmetic/mod.rs
@@ -1,3 +1,4 @@
+mod bitops;
 mod borrowed;
 mod list_borrowed;
 mod owned;
diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs
index 83bbacd12c00..b4cd48295c4c 100644
--- a/crates/polars-core/src/series/implementations/boolean.rs
+++ b/crates/polars-core/src/series/implementations/boolean.rs
@@ -123,21 +123,6 @@ impl SeriesTrait for SeriesWrap<BooleanChunked> {
         Some(self.0.boxed_metadata_dyn())
     }
 
-    fn bitxor(&self, other: &Series) -> PolarsResult<Series> {
-        let other = self.0.unpack_series_matching_type(other)?;
-        Ok((&self.0).bitxor(other).into_series())
-    }
-
-    fn bitand(&self, other: &Series) -> PolarsResult<Series> {
-        let other = self.0.unpack_series_matching_type(other)?;
-        Ok((&self.0).bitand(other).into_series())
-    }
-
-    fn bitor(&self, other: &Series) -> PolarsResult<Series> {
-        let other = self.0.unpack_series_matching_type(other)?;
-        Ok((&self.0).bitor(other).into_series())
-    }
-
     fn rename(&mut self, name: PlSmallStr) {
         self.0.rename(name);
     }
diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs
index 846e326d35b2..85c0d87cf0f1 100644
--- a/crates/polars-core/src/series/implementations/floats.rs
+++ b/crates/polars-core/src/series/implementations/floats.rs
@@ -110,48 +110,18 @@ macro_rules! impl_dyn_series {
             }
 
             fn subtract(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = sub,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::subtract(&self.0, rhs)
             }
             fn add_to(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = add,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::add_to(&self.0, rhs)
             }
             fn multiply(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = mul,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::multiply(&self.0, rhs)
             }
             fn divide(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = div,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::divide(&self.0, rhs)
             }
             fn remainder(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = rem,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::remainder(&self.0, rhs)
             }
             #[cfg(feature = "algorithm_group_by")]
diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs
index b2cb97e39b69..d4b9626d2bfc 100644
--- a/crates/polars-core/src/series/implementations/mod.rs
+++ b/crates/polars-core/src/series/implementations/mod.rs
@@ -26,7 +26,6 @@ mod time;
 
 use std::any::Any;
 use std::borrow::Cow;
-use std::ops::{BitAnd, BitOr, BitXor};
 use std::sync::RwLockReadGuard;
 
 use super::*;
@@ -183,48 +182,18 @@ macro_rules! impl_dyn_series {
             }
 
             fn subtract(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = sub,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::subtract(&self.0, rhs)
             }
             fn add_to(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = add,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::add_to(&self.0, rhs)
             }
             fn multiply(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = mul,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::multiply(&self.0, rhs)
             }
             fn divide(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = div,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::divide(&self.0, rhs)
             }
             fn remainder(&self, rhs: &Series) -> PolarsResult<Series> {
-                polars_ensure!(
-                    self.dtype() == rhs.dtype(),
-                    opq = rem,
-                    self.dtype(),
-                    rhs.dtype()
-                );
                 NumOpsDispatch::remainder(&self.0, rhs)
             }
             #[cfg(feature = "algorithm_group_by")]
@@ -259,36 +228,6 @@ macro_rules! impl_dyn_series {
                 Some(self.0.boxed_metadata_dyn())
             }
 
-            fn bitand(&self, other: &Series) -> PolarsResult<Series> {
-                let other = if other.len() == 1 {
-                    Cow::Owned(other.cast(self.dtype())?)
-                } else {
-                    Cow::Borrowed(other)
-                };
-                let other = self.0.unpack_series_matching_type(&other)?;
-                Ok(self.0.bitand(&other).into_series())
-            }
-
-            fn bitor(&self, other: &Series) -> PolarsResult<Series> {
-                let other = if other.len() == 1 {
-                    Cow::Owned(other.cast(self.dtype())?)
-                } else {
-                    Cow::Borrowed(other)
-                };
-                let other = self.0.unpack_series_matching_type(&other)?;
-                Ok(self.0.bitor(&other).into_series())
-            }
-
-            fn bitxor(&self, other: &Series) -> PolarsResult<Series> {
-                let other = if other.len() == 1 {
-                    Cow::Owned(other.cast(self.dtype())?)
-                } else {
-                    Cow::Borrowed(other)
-                };
-                let other = self.0.unpack_series_matching_type(&other)?;
-                Ok(self.0.bitxor(&other).into_series())
-            }
-
             fn rename(&mut self, name: PlSmallStr) {
                 self.0.rename(name);
             }
diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs
index 82d50b99827b..81754abafa19 100644
--- a/crates/polars-core/src/series/mod.rs
+++ b/crates/polars-core/src/series/mod.rs
@@ -372,7 +372,7 @@ impl Series {
         self.cast_with_options(dtype, CastOptions::NonStrict)
     }
 
-    /// Cast `[Series]` to another `[DataType]`.
+    /// Cast [`Series`] to another [`DataType`].
     pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
         use DataType as D;
 
@@ -723,14 +723,6 @@ impl Series {
         }
     }
 
-    /// Take by index if ChunkedArray contains a single chunk.
-    ///
-    /// # Safety
-    /// This doesn't check any bounds. Null validity is checked.
-    pub unsafe fn take_unchecked_from_slice(&self, idx: &[IdxSize]) -> Series {
-        self.take_slice_unchecked(idx)
-    }
-
     /// Traverse and collect every nth element in a new array.
     pub fn gather_every(&self, n: usize, offset: usize) -> Series {
         let idx = ((offset as IdxSize)..self.len() as IdxSize)
diff --git a/crates/polars-core/src/series/ops/downcast.rs b/crates/polars-core/src/series/ops/downcast.rs
index 2189fc319b5e..55450ee9aa2b 100644
--- a/crates/polars-core/src/series/ops/downcast.rs
+++ b/crates/polars-core/src/series/ops/downcast.rs
@@ -27,12 +27,12 @@ macro_rules! try_unpack_chunked {
 }
 
 impl Series {
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int8]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int8`]
     pub fn try_i8(&self) -> Option<&Int8Chunked> {
         try_unpack_chunked!(self, DataType::Int8 => Int8Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int16]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int16`]
     pub fn try_i16(&self) -> Option<&Int16Chunked> {
         try_unpack_chunked!(self, DataType::Int16 => Int16Chunked)
     }
@@ -51,91 +51,91 @@ impl Series {
     ///         }
     /// }).collect();
     /// ```
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int32`]
     pub fn try_i32(&self) -> Option<&Int32Chunked> {
         try_unpack_chunked!(self, DataType::Int32 => Int32Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int64`]
     pub fn try_i64(&self) -> Option<&Int64Chunked> {
         try_unpack_chunked!(self, DataType::Int64 => Int64Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Float32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Float32`]
     pub fn try_f32(&self) -> Option<&Float32Chunked> {
         try_unpack_chunked!(self, DataType::Float32 => Float32Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Float64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Float64`]
     pub fn try_f64(&self) -> Option<&Float64Chunked> {
         try_unpack_chunked!(self, DataType::Float64 => Float64Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt8]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt8`]
     pub fn try_u8(&self) -> Option<&UInt8Chunked> {
         try_unpack_chunked!(self, DataType::UInt8 => UInt8Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt16]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt16`]
     pub fn try_u16(&self) -> Option<&UInt16Chunked> {
         try_unpack_chunked!(self, DataType::UInt16 => UInt16Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt32`]
     pub fn try_u32(&self) -> Option<&UInt32Chunked> {
         try_unpack_chunked!(self, DataType::UInt32 => UInt32Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt64`]
     pub fn try_u64(&self) -> Option<&UInt64Chunked> {
         try_unpack_chunked!(self, DataType::UInt64 => UInt64Chunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Boolean]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Boolean`]
     pub fn try_bool(&self) -> Option<&BooleanChunked> {
         try_unpack_chunked!(self, DataType::Boolean => BooleanChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::String]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::String`]
     pub fn try_str(&self) -> Option<&StringChunked> {
         try_unpack_chunked!(self, DataType::String => StringChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Binary]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Binary`]
     pub fn try_binary(&self) -> Option<&BinaryChunked> {
         try_unpack_chunked!(self, DataType::Binary => BinaryChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Binary]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Binary`]
     pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
         try_unpack_chunked!(self, DataType::BinaryOffset => BinaryOffsetChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Time]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Time`]
     #[cfg(feature = "dtype-time")]
     pub fn try_time(&self) -> Option<&TimeChunked> {
         try_unpack_chunked!(self, DataType::Time => TimeChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Date]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Date`]
     #[cfg(feature = "dtype-date")]
     pub fn try_date(&self) -> Option<&DateChunked> {
         try_unpack_chunked!(self, DataType::Date => DateChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Datetime]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Datetime`]
     #[cfg(feature = "dtype-datetime")]
     pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
         try_unpack_chunked!(self, DataType::Datetime(_, _) => DatetimeChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Duration]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Duration`]
     #[cfg(feature = "dtype-duration")]
     pub fn try_duration(&self) -> Option<&DurationChunked> {
         try_unpack_chunked!(self, DataType::Duration(_) => DurationChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Decimal]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Decimal`]
     #[cfg(feature = "dtype-decimal")]
     pub fn try_decimal(&self) -> Option<&DecimalChunked> {
         try_unpack_chunked!(self, DataType::Decimal(_, _) => DecimalChunked)
@@ -146,19 +146,19 @@ impl Series {
         try_unpack_chunked!(self, DataType::List(_) => ListChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Array]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Array`]
     #[cfg(feature = "dtype-array")]
     pub fn try_array(&self) -> Option<&ArrayChunked> {
         try_unpack_chunked!(self, DataType::Array(_, _) => ArrayChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Categorical]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Categorical`]
     #[cfg(feature = "dtype-categorical")]
     pub fn try_categorical(&self) -> Option<&CategoricalChunked> {
         try_unpack_chunked!(self, DataType::Categorical(_, _) | DataType::Enum(_, _) => CategoricalChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Struct]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Struct`]
     #[cfg(feature = "dtype-struct")]
     pub fn try_struct(&self) -> Option<&StructChunked> {
         #[cfg(debug_assertions)]
@@ -171,17 +171,17 @@ impl Series {
         try_unpack_chunked!(self, DataType::Struct(_) => StructChunked)
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Null]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Null`]
     pub fn try_null(&self) -> Option<&NullChunked> {
         try_unpack_chunked!(self, DataType::Null => NullChunked)
     }
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int8]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int8`]
     pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
         self.try_i8()
             .ok_or_else(|| unpack_chunked_err!(self => "Int8"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int16]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int16`]
     pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
         self.try_i16()
             .ok_or_else(|| unpack_chunked_err!(self => "Int16"))
@@ -201,107 +201,107 @@ impl Series {
     ///         }
     /// }).collect();
     /// ```
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int32`]
     pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
         self.try_i32()
             .ok_or_else(|| unpack_chunked_err!(self => "Int32"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Int64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Int64`]
     pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
         self.try_i64()
             .ok_or_else(|| unpack_chunked_err!(self => "Int64"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Float32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Float32`]
     pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
         self.try_f32()
             .ok_or_else(|| unpack_chunked_err!(self => "Float32"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Float64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Float64`]
     pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
         self.try_f64()
             .ok_or_else(|| unpack_chunked_err!(self => "Float64"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt8]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt8`]
     pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
         self.try_u8()
             .ok_or_else(|| unpack_chunked_err!(self => "UInt8"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt16]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt16`]
     pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
         self.try_u16()
             .ok_or_else(|| unpack_chunked_err!(self => "UInt16"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt32]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt32`]
     pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
         self.try_u32()
             .ok_or_else(|| unpack_chunked_err!(self => "UInt32"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::UInt64]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::UInt64`]
     pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
         self.try_u64()
             .ok_or_else(|| unpack_chunked_err!(self => "UInt64"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Boolean]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Boolean`]
     pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
         self.try_bool()
             .ok_or_else(|| unpack_chunked_err!(self => "Boolean"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::String]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::String`]
     pub fn str(&self) -> PolarsResult<&StringChunked> {
         self.try_str()
             .ok_or_else(|| unpack_chunked_err!(self => "String"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Binary]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Binary`]
     pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
         self.try_binary()
             .ok_or_else(|| unpack_chunked_err!(self => "Binary"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Binary]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Binary`]
     pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
         self.try_binary_offset()
             .ok_or_else(|| unpack_chunked_err!(self => "BinaryOffset"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Time]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Time`]
     #[cfg(feature = "dtype-time")]
     pub fn time(&self) -> PolarsResult<&TimeChunked> {
         self.try_time()
             .ok_or_else(|| unpack_chunked_err!(self => "Time"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Date]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Date`]
     #[cfg(feature = "dtype-date")]
     pub fn date(&self) -> PolarsResult<&DateChunked> {
         self.try_date()
             .ok_or_else(|| unpack_chunked_err!(self => "Date"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Datetime]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Datetime`]
     #[cfg(feature = "dtype-datetime")]
     pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
         self.try_datetime()
             .ok_or_else(|| unpack_chunked_err!(self => "Datetime"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Duration]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Duration`]
     #[cfg(feature = "dtype-duration")]
     pub fn duration(&self) -> PolarsResult<&DurationChunked> {
         self.try_duration()
             .ok_or_else(|| unpack_chunked_err!(self => "Duration"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Decimal]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Decimal`]
     #[cfg(feature = "dtype-decimal")]
     pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
         self.try_decimal()
@@ -314,21 +314,21 @@ impl Series {
             .ok_or_else(|| unpack_chunked_err!(self => "List"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Array]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Array`]
     #[cfg(feature = "dtype-array")]
     pub fn array(&self) -> PolarsResult<&ArrayChunked> {
         self.try_array()
             .ok_or_else(|| unpack_chunked_err!(self => "FixedSizeList"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Categorical]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Categorical`]
     #[cfg(feature = "dtype-categorical")]
     pub fn categorical(&self) -> PolarsResult<&CategoricalChunked> {
         self.try_categorical()
             .ok_or_else(|| unpack_chunked_err!(self => "Enum | Categorical"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Struct]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Struct`]
     #[cfg(feature = "dtype-struct")]
     pub fn struct_(&self) -> PolarsResult<&StructChunked> {
         #[cfg(debug_assertions)]
@@ -343,7 +343,7 @@ impl Series {
             .ok_or_else(|| unpack_chunked_err!(self => "Struct"))
     }
 
-    /// Unpack to [`ChunkedArray`] of dtype `[DataType::Null]`
+    /// Unpack to [`ChunkedArray`] of dtype [`DataType::Null`]
     pub fn null(&self) -> PolarsResult<&NullChunked> {
         self.try_null()
             .ok_or_else(|| unpack_chunked_err!(self => "Null"))
diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs
index 0352343baa82..c77a9de0f7ad 100644
--- a/crates/polars-core/src/series/series_trait.rs
+++ b/crates/polars-core/src/series/series_trait.rs
@@ -218,18 +218,6 @@ pub trait SeriesTrait:
     /// Rename the Series.
     fn rename(&mut self, name: PlSmallStr);
 
-    fn bitand(&self, _other: &Series) -> PolarsResult<Series> {
-        polars_bail!(opq = bitand, self._dtype());
-    }
-
-    fn bitor(&self, _other: &Series) -> PolarsResult<Series> {
-        polars_bail!(opq = bitor, self._dtype());
-    }
-
-    fn bitxor(&self, _other: &Series) -> PolarsResult<Series> {
-        polars_bail!(opq = bitxor, self._dtype());
-    }
-
     fn get_metadata(&self) -> Option<RwLockReadGuard<dyn MetadataTrait>> {
         None
     }
@@ -299,19 +287,27 @@ pub trait SeriesTrait:
     /// Filter by boolean mask. This operation clones data.
     fn filter(&self, _filter: &BooleanChunked) -> PolarsResult<Series>;
 
-    /// Take by index. This operation is clone.
+    /// Take from `self` at the indexes given by `idx`.
+    ///
+    /// Null values in `idx` because null values in the output array.
+    ///
+    /// This operation is clone.
     fn take(&self, _indices: &IdxCa) -> PolarsResult<Series>;
 
-    /// Take by index.
+    /// Take from `self` at the indexes given by `idx`.
+    ///
+    /// Null values in `idx` because null values in the output array.
     ///
     /// # Safety
     /// This doesn't check any bounds.
     unsafe fn take_unchecked(&self, _idx: &IdxCa) -> Series;
 
-    /// Take by index. This operation is clone.
+    /// Take from `self` at the indexes given by `idx`.
+    ///
+    /// This operation is clone.
     fn take_slice(&self, _indices: &[IdxSize]) -> PolarsResult<Series>;
 
-    /// Take by index.
+    /// Take from `self` at the indexes given by `idx`.
     ///
     /// # Safety
     /// This doesn't check any bounds.
@@ -428,7 +424,7 @@ pub trait SeriesTrait:
     /// Count the null values.
     fn null_count(&self) -> usize;
 
-    /// Return if any the chunks in this `[ChunkedArray]` have nulls.
+    /// Return if any the chunks in this [`ChunkedArray`] have nulls.
     fn has_nulls(&self) -> bool;
 
     /// Get unique values in the Series.
diff --git a/crates/polars-core/src/testing.rs b/crates/polars-core/src/testing.rs
index f227f2bfe861..ed7c3d4fbd3e 100644
--- a/crates/polars-core/src/testing.rs
+++ b/crates/polars-core/src/testing.rs
@@ -1,5 +1,4 @@
 //! Testing utilities.
-use std::ops::Deref;
 
 use crate::prelude::*;
 
@@ -36,21 +35,6 @@ impl Series {
             }
         }
     }
-
-    /// Get a pointer to the underlying data of this [`Series`].
-    /// Can be useful for fast comparisons.
-    pub fn get_data_ptr(&self) -> usize {
-        let object = self.0.deref();
-
-        // SAFETY:
-        // A fat pointer consists of a data ptr and a ptr to the vtable.
-        // we specifically check that we only transmute &dyn SeriesTrait e.g.
-        // a trait object, therefore this is sound.
-        #[allow(clippy::transmute_undefined_repr)]
-        let (data_ptr, _vtable_ptr) =
-            unsafe { std::mem::transmute::<&dyn SeriesTrait, (usize, usize)>(object) };
-        data_ptr
-    }
 }
 
 impl PartialEq for Series {
@@ -128,26 +112,6 @@ impl DataFrame {
         }
         true
     }
-
-    /// Checks if the Arc ptrs of the [`Series`] are equal
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// # use polars_core::prelude::*;
-    /// let df1: DataFrame = df!("Atomic number" => &[1, 51, 300],
-    ///                         "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?;
-    /// let df2: &DataFrame = &df1;
-    ///
-    /// assert!(df1.ptr_equal(df2));
-    /// # Ok::<(), PolarsError>(())
-    /// ```
-    pub fn ptr_equal(&self, other: &DataFrame) -> bool {
-        self.columns
-            .iter()
-            .zip(other.columns.iter())
-            .all(|(a, b)| a.get_data_ptr() == b.get_data_ptr())
-    }
 }
 
 impl PartialEq for DataFrame {
diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs
index f1cfa5251899..fb691d746715 100644
--- a/crates/polars-expr/src/expressions/aggregation.rs
+++ b/crates/polars-expr/src/expressions/aggregation.rs
@@ -50,7 +50,7 @@ impl PhysicalExpr for AggregationExpr {
         None
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let s = self.input.evaluate(df, state)?;
 
         let AggregationType {
@@ -69,28 +69,29 @@ impl PhysicalExpr for AggregationExpr {
             GroupByMethod::Min => {
                 if MetadataEnv::experimental_enabled() {
                     if let Some(sc) = s.get_metadata().and_then(|v| v.min_value()) {
-                        return Ok(sc.into_series(s.name().clone()));
+                        return Ok(sc.into_column(s.name().clone()));
                     }
                 }
 
                 match s.is_sorted_flag() {
                     IsSorted::Ascending | IsSorted::Descending => {
-                        s.min_reduce().map(|sc| sc.into_series(s.name().clone()))
+                        s.min_reduce().map(|sc| sc.into_column(s.name().clone()))
                     },
-                    IsSorted::Not => parallel_op_series(
-                        |s| s.min_reduce().map(|sc| sc.into_series(s.name().clone())),
+                    IsSorted::Not => parallel_op_columns(
+                        |s| s.min_reduce().map(|sc| sc.into_column(s.name().clone())),
                         s,
                         allow_threading,
                     ),
                 }
             },
             #[cfg(feature = "propagate_nans")]
-            GroupByMethod::NanMin => parallel_op_series(
+            GroupByMethod::NanMin => parallel_op_columns(
                 |s| {
                     Ok(polars_ops::prelude::nan_propagating_aggregate::nan_min_s(
-                        &s,
+                        s.as_materialized_series(),
                         s.name().clone(),
-                    ))
+                    )
+                    .into_column())
                 },
                 s,
                 allow_threading,
@@ -102,28 +103,29 @@ impl PhysicalExpr for AggregationExpr {
             GroupByMethod::Max => {
                 if MetadataEnv::experimental_enabled() {
                     if let Some(sc) = s.get_metadata().and_then(|v| v.max_value()) {
-                        return Ok(sc.into_series(s.name().clone()));
+                        return Ok(sc.into_column(s.name().clone()));
                     }
                 }
 
                 match s.is_sorted_flag() {
                     IsSorted::Ascending | IsSorted::Descending => {
-                        s.max_reduce().map(|sc| sc.into_series(s.name().clone()))
+                        s.max_reduce().map(|sc| sc.into_column(s.name().clone()))
                     },
-                    IsSorted::Not => parallel_op_series(
-                        |s| s.max_reduce().map(|sc| sc.into_series(s.name().clone())),
+                    IsSorted::Not => parallel_op_columns(
+                        |s| s.max_reduce().map(|sc| sc.into_column(s.name().clone())),
                         s,
                         allow_threading,
                     ),
                 }
             },
             #[cfg(feature = "propagate_nans")]
-            GroupByMethod::NanMax => parallel_op_series(
+            GroupByMethod::NanMax => parallel_op_columns(
                 |s| {
                     Ok(polars_ops::prelude::nan_propagating_aggregate::nan_max_s(
-                        &s,
+                        s.as_materialized_series(),
                         s.name().clone(),
-                    ))
+                    )
+                    .into_column())
                 },
                 s,
                 allow_threading,
@@ -132,20 +134,20 @@ impl PhysicalExpr for AggregationExpr {
             GroupByMethod::NanMax => {
                 panic!("activate 'propagate_nans' feature")
             },
-            GroupByMethod::Median => s.median_reduce().map(|sc| sc.into_series(s.name().clone())),
-            GroupByMethod::Mean => Ok(s.mean_reduce().into_series(s.name().clone())),
+            GroupByMethod::Median => s.median_reduce().map(|sc| sc.into_column(s.name().clone())),
+            GroupByMethod::Mean => Ok(s.mean_reduce().into_column(s.name().clone())),
             GroupByMethod::First => Ok(if s.is_empty() {
-                Series::full_null(s.name().clone(), 1, s.dtype())
+                Column::full_null(s.name().clone(), 1, s.dtype())
             } else {
                 s.head(Some(1))
             }),
             GroupByMethod::Last => Ok(if s.is_empty() {
-                Series::full_null(s.name().clone(), 1, s.dtype())
+                Column::full_null(s.name().clone(), 1, s.dtype())
             } else {
                 s.tail(Some(1))
             }),
-            GroupByMethod::Sum => parallel_op_series(
-                |s| s.sum_reduce().map(|sc| sc.into_series(s.name().clone())),
+            GroupByMethod::Sum => parallel_op_columns(
+                |s| s.sum_reduce().map(|sc| sc.into_column(s.name().clone())),
                 s,
                 allow_threading,
             ),
@@ -154,41 +156,41 @@ impl PhysicalExpr for AggregationExpr {
                 if MetadataEnv::experimental_enabled() {
                     if let Some(count) = s.get_metadata().and_then(|v| v.distinct_count()) {
                         let count = count + IdxSize::from(s.null_count() > 0);
-                        return Ok(IdxCa::from_slice(s.name().clone(), &[count]).into_series());
+                        return Ok(IdxCa::from_slice(s.name().clone(), &[count]).into_column());
                     }
                 }
 
                 s.n_unique().map(|count| {
-                    IdxCa::from_slice(s.name().clone(), &[count as IdxSize]).into_series()
+                    IdxCa::from_slice(s.name().clone(), &[count as IdxSize]).into_column()
                 })
             },
             GroupByMethod::Count { include_nulls } => {
                 let count = s.len() - s.null_count() * !include_nulls as usize;
 
-                Ok(IdxCa::from_slice(s.name().clone(), &[count as IdxSize]).into_series())
+                Ok(IdxCa::from_slice(s.name().clone(), &[count as IdxSize]).into_column())
             },
-            GroupByMethod::Implode => s.implode().map(|ca| ca.into_series()),
+            GroupByMethod::Implode => s.implode().map(|ca| ca.into_column()),
             GroupByMethod::Std(ddof) => s
                 .std_reduce(ddof)
-                .map(|sc| sc.into_series(s.name().clone())),
+                .map(|sc| sc.into_column(s.name().clone())),
             GroupByMethod::Var(ddof) => s
                 .var_reduce(ddof)
-                .map(|sc| sc.into_series(s.name().clone())),
+                .map(|sc| sc.into_column(s.name().clone())),
             GroupByMethod::Quantile(_, _) => unimplemented!(),
             #[cfg(feature = "bitwise")]
             GroupByMethod::Bitwise(f) => match f {
-                GroupByBitwiseMethod::And => parallel_op_series(
-                    |s| s.and_reduce().map(|sc| sc.into_series(s.name().clone())),
+                GroupByBitwiseMethod::And => parallel_op_columns(
+                    |s| s.and_reduce().map(|sc| sc.into_column(s.name().clone())),
                     s,
                     allow_threading,
                 ),
-                GroupByBitwiseMethod::Or => parallel_op_series(
-                    |s| s.or_reduce().map(|sc| sc.into_series(s.name().clone())),
+                GroupByBitwiseMethod::Or => parallel_op_columns(
+                    |s| s.or_reduce().map(|sc| sc.into_column(s.name().clone())),
                     s,
                     allow_threading,
                 ),
-                GroupByBitwiseMethod::Xor => parallel_op_series(
-                    |s| s.xor_reduce().map(|sc| sc.into_series(s.name().clone())),
+                GroupByBitwiseMethod::Xor => parallel_op_columns(
+                    |s| s.xor_reduce().map(|sc| sc.into_column(s.name().clone())),
                     s,
                     allow_threading,
                 ),
@@ -223,27 +225,27 @@ impl PhysicalExpr for AggregationExpr {
                 GroupByMethod::Min => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_min(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Max => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_max(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Median => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_median(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Mean => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_mean(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Sum => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_sum(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Count { include_nulls } => {
                     if include_nulls || ac.series().null_count() == 0 {
@@ -321,7 +323,7 @@ impl PhysicalExpr for AggregationExpr {
                                             .map(|s| s.len() as IdxSize - s.null_count() as IdxSize)
                                     })
                                     .collect();
-                                AggregatedScalar(rename_series(out.into_series(), keep_name))
+                                AggregatedScalar(out.into_series().with_name(keep_name))
                             },
                             AggState::NotAggregated(s) => {
                                 let s = s.clone();
@@ -371,17 +373,17 @@ impl PhysicalExpr for AggregationExpr {
                 GroupByMethod::First => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_first(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Last => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_last(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::NUnique => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_n_unique(&groups);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Implode => {
                     // if the aggregation is already
@@ -404,7 +406,7 @@ impl PhysicalExpr for AggregationExpr {
                             agg.as_list().into_series()
                         },
                     };
-                    AggregatedList(rename_series(s, keep_name))
+                    AggregatedList(s.with_name(keep_name))
                 },
                 GroupByMethod::Groups => {
                     let mut column: ListChunked = ac.groups().as_list_chunked();
@@ -414,12 +416,12 @@ impl PhysicalExpr for AggregationExpr {
                 GroupByMethod::Std(ddof) => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_std(&groups, ddof);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Var(ddof) => {
                     let (s, groups) = ac.get_final_aggregation();
                     let agg_s = s.agg_var(&groups, ddof);
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::Quantile(_, _) => {
                     // implemented explicitly in AggQuantile struct
@@ -433,7 +435,7 @@ impl PhysicalExpr for AggregationExpr {
                         GroupByBitwiseMethod::Or => s.agg_or(&groups),
                         GroupByBitwiseMethod::Xor => s.agg_xor(&groups),
                     };
-                    AggregatedScalar(rename_series(agg_s, keep_name))
+                    AggregatedScalar(agg_s.with_name(keep_name))
                 },
                 GroupByMethod::NanMin => {
                     #[cfg(feature = "propagate_nans")]
@@ -444,7 +446,7 @@ impl PhysicalExpr for AggregationExpr {
                         } else {
                             s.agg_min(&groups)
                         };
-                        AggregatedScalar(rename_series(agg_s, keep_name))
+                        AggregatedScalar(agg_s.with_name(keep_name))
                     }
                     #[cfg(not(feature = "propagate_nans"))]
                     {
@@ -460,7 +462,7 @@ impl PhysicalExpr for AggregationExpr {
                         } else {
                             s.agg_max(&groups)
                         };
-                        AggregatedScalar(rename_series(agg_s, keep_name))
+                        AggregatedScalar(agg_s.with_name(keep_name))
                     }
                     #[cfg(not(feature = "propagate_nans"))]
                     {
@@ -493,20 +495,15 @@ impl PhysicalExpr for AggregationExpr {
     }
 }
 
-fn rename_series(mut s: Series, name: PlSmallStr) -> Series {
-    s.rename(name);
-    s
-}
-
 impl PartitionedAggregation for AggregationExpr {
     fn evaluate_partitioned(
         &self,
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let expr = self.input.as_partitioned_aggregator().unwrap();
-        let series = expr.evaluate_partitioned(df, groups, state)?;
+        let column = expr.evaluate_partitioned(df, groups, state)?;
 
         // SAFETY:
         // groups are in bounds
@@ -514,15 +511,15 @@ impl PartitionedAggregation for AggregationExpr {
             match self.agg_type.groupby {
                 #[cfg(feature = "dtype-struct")]
                 GroupByMethod::Mean => {
-                    let new_name = series.name().clone();
+                    let new_name = column.name().clone();
 
                     // ensure we don't overflow
                     // the all 8 and 16 bits integers are already upcasted to int16 on `agg_sum`
-                    let mut agg_s = if matches!(series.dtype(), DataType::Int32 | DataType::UInt32)
+                    let mut agg_s = if matches!(column.dtype(), DataType::Int32 | DataType::UInt32)
                     {
-                        series.cast(&DataType::Int64).unwrap().agg_sum(groups)
+                        column.cast(&DataType::Int64).unwrap().agg_sum(groups)
                     } else {
-                        series.agg_sum(groups)
+                        column.agg_sum(groups)
                     };
                     agg_s.rename(new_name.clone());
 
@@ -533,54 +530,52 @@ impl PartitionedAggregation for AggregationExpr {
                             DataType::Float32 => agg_s,
                             _ => agg_s.cast(&DataType::Float64).unwrap(),
                         };
-                        let mut count_s = series.agg_valid_count(groups);
+                        let mut count_s = column.agg_valid_count(groups);
                         count_s.rename(PlSmallStr::from_static("__POLARS_COUNT"));
-                        Ok(StructChunked::from_series(
-                            new_name,
-                            agg_s.len(),
-                            [agg_s, count_s].iter(),
+                        Ok(
+                            StructChunked::from_columns(new_name, agg_s.len(), &[agg_s, count_s])
+                                .unwrap()
+                                .into_column(),
                         )
-                        .unwrap()
-                        .into_series())
                     }
                 },
                 GroupByMethod::Implode => {
-                    let new_name = series.name().clone();
-                    let mut agg = series.agg_list(groups);
+                    let new_name = column.name().clone();
+                    let mut agg = column.agg_list(groups);
                     agg.rename(new_name);
                     Ok(agg)
                 },
                 GroupByMethod::First => {
-                    let mut agg = series.agg_first(groups);
-                    agg.rename(series.name().clone());
+                    let mut agg = column.agg_first(groups);
+                    agg.rename(column.name().clone());
                     Ok(agg)
                 },
                 GroupByMethod::Last => {
-                    let mut agg = series.agg_last(groups);
-                    agg.rename(series.name().clone());
+                    let mut agg = column.agg_last(groups);
+                    agg.rename(column.name().clone());
                     Ok(agg)
                 },
                 GroupByMethod::Max => {
-                    let mut agg = series.agg_max(groups);
-                    agg.rename(series.name().clone());
+                    let mut agg = column.agg_max(groups);
+                    agg.rename(column.name().clone());
                     Ok(agg)
                 },
                 GroupByMethod::Min => {
-                    let mut agg = series.agg_min(groups);
-                    agg.rename(series.name().clone());
+                    let mut agg = column.agg_min(groups);
+                    agg.rename(column.name().clone());
                     Ok(agg)
                 },
                 GroupByMethod::Sum => {
-                    let mut agg = series.agg_sum(groups);
-                    agg.rename(series.name().clone());
+                    let mut agg = column.agg_sum(groups);
+                    agg.rename(column.name().clone());
                     Ok(agg)
                 },
                 GroupByMethod::Count {
                     include_nulls: true,
                 } => {
                     let mut ca = groups.group_count();
-                    ca.rename(series.name().clone());
-                    Ok(ca.into_series())
+                    ca.rename(column.name().clone());
+                    Ok(ca.into_column())
                 },
                 _ => {
                     unimplemented!()
@@ -591,10 +586,10 @@ impl PartitionedAggregation for AggregationExpr {
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         match self.agg_type.groupby {
             GroupByMethod::Count {
                 include_nulls: true,
@@ -616,9 +611,9 @@ impl PartitionedAggregation for AggregationExpr {
                         let (agg_count, agg_s) =
                             unsafe { POOL.join(|| count.agg_sum(groups), || sum.agg_sum(groups)) };
                         let agg_s = &agg_s / &agg_count;
-                        Ok(rename_series(agg_s?, new_name))
+                        Ok(agg_s?.with_name(new_name).into_column())
                     },
-                    _ => Ok(Series::full_null(
+                    _ => Ok(Column::full_null(
                         new_name,
                         groups.len(),
                         partitioned.dtype(),
@@ -685,7 +680,7 @@ impl PartitionedAggregation for AggregationExpr {
                 if can_fast_explode {
                     ca.set_fast_explode()
                 }
-                Ok(ca.into_series().as_list().into_series())
+                Ok(ca.into_series().as_list().into_column())
             },
             GroupByMethod::First => {
                 let mut agg = unsafe { partitioned.agg_first(groups) };
@@ -746,12 +741,12 @@ impl PhysicalExpr for AggQuantileExpr {
         None
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let input = self.input.evaluate(df, state)?;
         let quantile = self.get_quantile(df, state)?;
         input
             .quantile_reduce(quantile, self.method)
-            .map(|sc| sc.into_series(input.name().clone()))
+            .map(|sc| sc.into_column(input.name().clone()))
     }
     #[allow(clippy::ptr_arg)]
     fn evaluate_on_groups<'a>(
@@ -791,9 +786,9 @@ impl PhysicalExpr for AggQuantileExpr {
 
 /// Simple wrapper to parallelize functions that can be divided over threads aggregated and
 /// finally aggregated in the main thread. This can be done for sum, min, max, etc.
-fn parallel_op_series<F>(f: F, s: Series, allow_threading: bool) -> PolarsResult<Series>
+fn parallel_op_columns<F>(f: F, s: Column, allow_threading: bool) -> PolarsResult<Column>
 where
-    F: Fn(Series) -> PolarsResult<Series> + Send + Sync,
+    F: Fn(Column) -> PolarsResult<Column> + Send + Sync,
 {
     // set during debug low so
     // we mimic production size data behavior
@@ -826,7 +821,7 @@ where
     let mut iter = chunks.into_iter();
     let first = iter.next().unwrap();
     let dtype = first.dtype();
-    let out = iter.fold(first.to_physical_repr().into_owned(), |mut acc, s| {
+    let out = iter.fold(first.to_physical_repr(), |mut acc, s| {
         acc.append(&s.to_physical_repr()).unwrap();
         acc
     });
diff --git a/crates/polars-expr/src/expressions/alias.rs b/crates/polars-expr/src/expressions/alias.rs
index 8d321263a3f5..6144a1418de2 100644
--- a/crates/polars-expr/src/expressions/alias.rs
+++ b/crates/polars-expr/src/expressions/alias.rs
@@ -18,7 +18,7 @@ impl AliasExpr {
         }
     }
 
-    fn finish(&self, input: Series) -> Series {
+    fn finish(&self, input: Column) -> Column {
         input.with_name(self.name.clone())
     }
 }
@@ -28,7 +28,7 @@ impl PhysicalExpr for AliasExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series = self.physical_expr.evaluate(df, state)?;
         Ok(self.finish(series))
     }
@@ -42,12 +42,16 @@ impl PhysicalExpr for AliasExpr {
     ) -> PolarsResult<AggregationContext<'a>> {
         let mut ac = self.physical_expr.evaluate_on_groups(df, groups, state)?;
         let s = ac.take();
-        let s = self.finish(s);
+        let s = self.finish(s.into());
 
         if ac.is_literal() {
-            ac.with_literal(s);
+            ac.with_literal(s.take_materialized_series());
         } else {
-            ac.with_series(s, ac.is_aggregated(), Some(&self.expr))?;
+            ac.with_series(
+                s.take_materialized_series(),
+                ac.is_aggregated(),
+                Some(&self.expr),
+            )?;
         }
         Ok(ac)
     }
@@ -78,7 +82,7 @@ impl PartitionedAggregation for AliasExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let agg = self.physical_expr.as_partitioned_aggregator().unwrap();
         let s = agg.evaluate_partitioned(df, groups, state)?;
         Ok(s.with_name(self.name.clone()))
@@ -86,10 +90,10 @@ impl PartitionedAggregation for AliasExpr {
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let agg = self.physical_expr.as_partitioned_aggregator().unwrap();
         let s = agg.finalize(partitioned, groups, state)?;
         Ok(s.with_name(self.name.clone()))
diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs
index d6c37a5a004f..ddb4c37fac5d 100644
--- a/crates/polars-expr/src/expressions/apply.rs
+++ b/crates/polars-expr/src/expressions/apply.rs
@@ -321,7 +321,7 @@ impl PhysicalExpr for ApplyExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let f = |e: &Arc<dyn PhysicalExpr>| e.evaluate(df, state);
         let mut inputs = if self.allow_threading && self.inputs.len() > 1 {
             POOL.install(|| {
@@ -341,14 +341,9 @@ impl PhysicalExpr for ApplyExpr {
 
         if self.allow_rename {
             self.eval_and_flatten(&mut inputs)
-                .map(|c| c.as_materialized_series().clone())
         } else {
             let in_name = inputs[0].name().clone();
-            Ok(self
-                .eval_and_flatten(&mut inputs)?
-                .as_materialized_series()
-                .clone()
-                .with_name(in_name))
+            Ok(self.eval_and_flatten(&mut inputs)?.with_name(in_name))
         }
     }
 
@@ -681,29 +676,24 @@ impl PartitionedAggregation for ApplyExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let a = self.inputs[0].as_partitioned_aggregator().unwrap();
-        let s = a.evaluate_partitioned(df, groups, state)?.into();
+        let s = a.evaluate_partitioned(df, groups, state)?;
 
         if self.allow_rename {
             self.eval_and_flatten(&mut [s])
-                .map(|c| c.as_materialized_series().clone())
         } else {
             let in_name = s.name().clone();
-            Ok(self
-                .eval_and_flatten(&mut [s])?
-                .as_materialized_series()
-                .clone()
-                .with_name(in_name))
+            Ok(self.eval_and_flatten(&mut [s])?.with_name(in_name))
         }
     }
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         _groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         Ok(partitioned)
     }
 }
diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs
index 23f50af45273..7754c2b6633e 100644
--- a/crates/polars-expr/src/expressions/binary.rs
+++ b/crates/polars-expr/src/expressions/binary.rs
@@ -41,7 +41,7 @@ impl BinaryExpr {
 }
 
 /// Can partially do operations in place.
-fn apply_operator_owned(left: Series, right: Series, op: Operator) -> PolarsResult<Series> {
+fn apply_operator_owned(left: Column, right: Column, op: Operator) -> PolarsResult<Column> {
     match op {
         Operator::Plus => left.try_add_owned(right),
         Operator::Minus => left.try_sub_owned(right),
@@ -52,15 +52,15 @@ fn apply_operator_owned(left: Series, right: Series, op: Operator) -> PolarsResu
     }
 }
 
-pub fn apply_operator(left: &Series, right: &Series, op: Operator) -> PolarsResult<Series> {
+pub fn apply_operator(left: &Column, right: &Column, op: Operator) -> PolarsResult<Column> {
     use DataType::*;
     match op {
-        Operator::Gt => ChunkCompareIneq::gt(left, right).map(|ca| ca.into_series()),
-        Operator::GtEq => ChunkCompareIneq::gt_eq(left, right).map(|ca| ca.into_series()),
-        Operator::Lt => ChunkCompareIneq::lt(left, right).map(|ca| ca.into_series()),
-        Operator::LtEq => ChunkCompareIneq::lt_eq(left, right).map(|ca| ca.into_series()),
-        Operator::Eq => ChunkCompareEq::equal(left, right).map(|ca| ca.into_series()),
-        Operator::NotEq => ChunkCompareEq::not_equal(left, right).map(|ca| ca.into_series()),
+        Operator::Gt => ChunkCompareIneq::gt(left, right).map(|ca| ca.into_column()),
+        Operator::GtEq => ChunkCompareIneq::gt_eq(left, right).map(|ca| ca.into_column()),
+        Operator::Lt => ChunkCompareIneq::lt(left, right).map(|ca| ca.into_column()),
+        Operator::LtEq => ChunkCompareIneq::lt_eq(left, right).map(|ca| ca.into_column()),
+        Operator::Eq => ChunkCompareEq::equal(left, right).map(|ca| ca.into_column()),
+        Operator::NotEq => ChunkCompareEq::not_equal(left, right).map(|ca| ca.into_column()),
         Operator::Plus => left + right,
         Operator::Minus => left - right,
         Operator::Multiply => left * right,
@@ -87,7 +87,11 @@ pub fn apply_operator(left: &Series, right: &Series, op: Operator) -> PolarsResu
         Operator::FloorDivide => {
             #[cfg(feature = "round_series")]
             {
-                floor_div_series(left, right)
+                floor_div_series(
+                    left.as_materialized_series(),
+                    right.as_materialized_series(),
+                )
+                .map(Column::from)
             }
             #[cfg(not(feature = "round_series"))]
             {
@@ -104,8 +108,8 @@ pub fn apply_operator(left: &Series, right: &Series, op: Operator) -> PolarsResu
             .bitand(&right.cast(&DataType::Boolean)?),
         Operator::Xor => left.bitxor(right),
         Operator::Modulus => left % right,
-        Operator::EqValidity => left.equal_missing(right).map(|ca| ca.into_series()),
-        Operator::NotEqValidity => left.not_equal_missing(right).map(|ca| ca.into_series()),
+        Operator::EqValidity => left.equal_missing(right).map(|ca| ca.into_column()),
+        Operator::NotEqValidity => left.not_equal_missing(right).map(|ca| ca.into_column()),
     }
 }
 
@@ -123,8 +127,8 @@ impl BinaryExpr {
         // Drop lhs so that we might operate in place.
         drop(ac_l.take());
 
-        let out = apply_operator_owned(lhs, rhs, self.op)?;
-        ac_l.with_series(out, aggregated, Some(&self.expr))?;
+        let out = apply_operator_owned(lhs.into_column(), rhs.into_column(), self.op)?;
+        ac_l.with_series(out.take_materialized_series(), aggregated, Some(&self.expr))?;
         Ok(ac_l)
     }
 
@@ -137,16 +141,16 @@ impl BinaryExpr {
         ac_l.groups();
         ac_r.groups();
         polars_ensure!(ac_l.groups.len() == ac_r.groups.len(), ComputeError: "lhs and rhs should have same group length");
-        let left_s = ac_l.series().rechunk();
-        let right_s = ac_r.series().rechunk();
+        let left_s = ac_l.series().rechunk().into_column();
+        let right_s = ac_r.series().rechunk().into_column();
         let res_s = apply_operator(&left_s, &right_s, self.op)?;
         ac_l.with_update_groups(UpdateGroups::WithSeriesLen);
         let res_s = if res_s.len() == 1 {
             res_s.new_from_index(0, ac_l.groups.len())
         } else {
-            ListChunked::full(name, &res_s, ac_l.groups.len()).into_series()
+            ListChunked::full(name, res_s.as_materialized_series(), ac_l.groups.len()).into_column()
         };
-        ac_l.with_series(res_s, true, Some(&self.expr))?;
+        ac_l.with_series(res_s.take_materialized_series(), true, Some(&self.expr))?;
         Ok(ac_l)
     }
 
@@ -159,7 +163,13 @@ impl BinaryExpr {
         let ca = ac_l
             .iter_groups(false)
             .zip(ac_r.iter_groups(false))
-            .map(|(l, r)| Some(apply_operator(l?.as_ref(), r?.as_ref(), self.op)))
+            .map(|(l, r)| {
+                Some(apply_operator(
+                    &l?.as_ref().clone().into_column(),
+                    &r?.as_ref().clone().into_column(),
+                    self.op,
+                ))
+            })
             .map(|opt_res| opt_res.transpose())
             .collect::<PolarsResult<ListChunked>>()?
             .with_name(name);
@@ -175,7 +185,7 @@ impl PhysicalExpr for BinaryExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         // Window functions may set a global state that determine their output
         // state, so we don't let them run in parallel as they race
         // they also saturate the thread pool by themselves, so that's fine.
@@ -246,8 +256,10 @@ impl PhysicalExpr for BinaryExpr {
             (AggState::AggregatedList(lhs), AggState::AggregatedList(rhs)) => {
                 let lhs = lhs.list().unwrap();
                 let rhs = rhs.list().unwrap();
-                let out =
-                    lhs.apply_to_inner(&|lhs| apply_operator(&lhs, &rhs.get_inner(), self.op))?;
+                let out = lhs.apply_to_inner(&|lhs| {
+                    apply_operator(&lhs.into_column(), &rhs.get_inner().into_column(), self.op)
+                        .map(|c| c.take_materialized_series())
+                })?;
                 ac_l.with_series(out.into_series(), true, Some(&self.expr))?;
                 Ok(ac_l)
             },
@@ -279,7 +291,7 @@ mod stats {
 
     use super::*;
 
-    fn apply_operator_stats_eq(min_max: &Series, literal: &Series) -> bool {
+    fn apply_operator_stats_eq(min_max: &Column, literal: &Column) -> bool {
         use ChunkCompareIneq as C;
         // Literal is greater than max, don't need to read.
         if C::gt(literal, min_max).map(|s| s.all()).unwrap_or(false) {
@@ -294,7 +306,7 @@ mod stats {
         true
     }
 
-    fn apply_operator_stats_neq(min_max: &Series, literal: &Series) -> bool {
+    fn apply_operator_stats_neq(min_max: &Column, literal: &Column) -> bool {
         if min_max.len() < 2 || min_max.null_count() > 0 {
             return true;
         }
@@ -311,7 +323,7 @@ mod stats {
         true
     }
 
-    fn apply_operator_stats_rhs_lit(min_max: &Series, literal: &Series, op: Operator) -> bool {
+    fn apply_operator_stats_rhs_lit(min_max: &Column, literal: &Column, op: Operator) -> bool {
         use ChunkCompareIneq as C;
         match op {
             Operator::Eq => apply_operator_stats_eq(min_max, literal),
@@ -347,7 +359,7 @@ mod stats {
         }
     }
 
-    fn apply_operator_stats_lhs_lit(literal: &Series, min_max: &Series, op: Operator) -> bool {
+    fn apply_operator_stats_lhs_lit(literal: &Column, min_max: &Column, op: Operator) -> bool {
         use ChunkCompareIneq as C;
         match op {
             Operator::Eq => apply_operator_stats_eq(min_max, literal),
@@ -423,7 +435,11 @@ mod stats {
                             // will be incorrect if not
                             debug_assert_eq!(min_max_s.null_count(), 0);
                             let lit_s = self.right.evaluate(&dummy, &state).unwrap();
-                            Ok(apply_operator_stats_rhs_lit(&min_max_s, &lit_s, self.op))
+                            Ok(apply_operator_stats_rhs_lit(
+                                &min_max_s.into_column(),
+                                &lit_s,
+                                self.op,
+                            ))
                         },
                     }
                 },
@@ -435,7 +451,11 @@ mod stats {
                             // will be incorrect if not
                             debug_assert_eq!(min_max_s.null_count(), 0);
                             let lit_s = self.left.evaluate(&dummy, &state).unwrap();
-                            Ok(apply_operator_stats_lhs_lit(&lit_s, &min_max_s, self.op))
+                            Ok(apply_operator_stats_lhs_lit(
+                                &lit_s,
+                                &min_max_s.into_column(),
+                                self.op,
+                            ))
                         },
                     }
                 },
@@ -476,7 +496,7 @@ impl PartitionedAggregation for BinaryExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let left = self.left.as_partitioned_aggregator().unwrap();
         let right = self.right.as_partitioned_aggregator().unwrap();
         let left = left.evaluate_partitioned(df, groups, state)?;
@@ -486,10 +506,10 @@ impl PartitionedAggregation for BinaryExpr {
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         _groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         Ok(partitioned)
     }
 }
diff --git a/crates/polars-expr/src/expressions/cast.rs b/crates/polars-expr/src/expressions/cast.rs
index ebfd50311918..dcbd67d36a7e 100644
--- a/crates/polars-expr/src/expressions/cast.rs
+++ b/crates/polars-expr/src/expressions/cast.rs
@@ -12,7 +12,7 @@ pub struct CastExpr {
 }
 
 impl CastExpr {
-    fn finish(&self, input: &Series) -> PolarsResult<Series> {
+    fn finish(&self, input: &Column) -> PolarsResult<Column> {
         input.cast_with_options(&self.dtype, self.options)
     }
 }
@@ -22,9 +22,9 @@ impl PhysicalExpr for CastExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
-        let series = self.input.evaluate(df, state)?;
-        self.finish(&series)
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
+        let column = self.input.evaluate(df, state)?;
+        self.finish(&column)
     }
 
     #[allow(clippy::ptr_arg)]
@@ -40,15 +40,18 @@ impl PhysicalExpr for CastExpr {
             // this will not explode and potentially increase memory due to overlapping groups
             AggState::AggregatedList(s) => {
                 let ca = s.list().unwrap();
-                let casted = ca.apply_to_inner(&|s| self.finish(&s))?;
+                let casted = ca.apply_to_inner(&|s| {
+                    self.finish(&s.into_column())
+                        .map(|c| c.take_materialized_series())
+                })?;
                 ac.with_series(casted.into_series(), true, None)?;
             },
             AggState::AggregatedScalar(s) => {
-                let s = self.finish(s)?;
+                let s = self.finish(&s.clone().into_column())?;
                 if ac.is_literal() {
-                    ac.with_literal(s);
+                    ac.with_literal(s.take_materialized_series());
                 } else {
-                    ac.with_series(s, true, None)?;
+                    ac.with_series(s.take_materialized_series(), true, None)?;
                 }
             },
             _ => {
@@ -56,12 +59,12 @@ impl PhysicalExpr for CastExpr {
                 ac.groups();
 
                 let s = ac.flat_naive();
-                let s = self.finish(s.as_ref())?;
+                let s = self.finish(&s.as_ref().clone().into_column())?;
 
                 if ac.is_literal() {
-                    ac.with_literal(s);
+                    ac.with_literal(s.take_materialized_series());
                 } else {
-                    ac.with_series(s, false, None)?;
+                    ac.with_series(s.take_materialized_series(), false, None)?;
                 }
             },
         }
@@ -91,17 +94,17 @@ impl PartitionedAggregation for CastExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let e = self.input.as_partitioned_aggregator().unwrap();
         self.finish(&e.evaluate_partitioned(df, groups, state)?)
     }
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let agg = self.input.as_partitioned_aggregator().unwrap();
         agg.finalize(partitioned, groups, state)
     }
diff --git a/crates/polars-expr/src/expressions/column.rs b/crates/polars-expr/src/expressions/column.rs
index 8a59d6c25ddb..2142d22df6d9 100644
--- a/crates/polars-expr/src/expressions/column.rs
+++ b/crates/polars-expr/src/expressions/column.rs
@@ -140,7 +140,7 @@ impl PhysicalExpr for ColumnExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let out = match self.schema.get_full(&self.name) {
             Some((idx, _, _)) => {
                 // check if the schema was correct
@@ -168,12 +168,12 @@ impl PhysicalExpr for ColumnExpr {
             // in debug builds we panic so that it can be fixed when occurring
             None => {
                 if self.name.starts_with(CSE_REPLACED) {
-                    return self.process_cse(df, &self.schema);
+                    return self.process_cse(df, &self.schema).map(Column::from);
                 }
                 self.process_by_linear_search(df, state, true)
             },
         };
-        self.check_external_context(out, state)
+        self.check_external_context(out, state).map(Column::from)
     }
 
     #[allow(clippy::ptr_arg)]
@@ -184,7 +184,11 @@ impl PhysicalExpr for ColumnExpr {
         state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
         let s = self.evaluate(df, state)?;
-        Ok(AggregationContext::new(s, Cow::Borrowed(groups), false))
+        Ok(AggregationContext::new(
+            s.take_materialized_series(),
+            Cow::Borrowed(groups),
+            false,
+        ))
     }
 
     fn as_partitioned_aggregator(&self) -> Option<&dyn PartitionedAggregation> {
@@ -209,16 +213,16 @@ impl PartitionedAggregation for ColumnExpr {
         df: &DataFrame,
         _groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         self.evaluate(df, state)
     }
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         _groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         Ok(partitioned)
     }
 }
diff --git a/crates/polars-expr/src/expressions/count.rs b/crates/polars-expr/src/expressions/count.rs
index 5e8b4c75e376..6102caf5a354 100644
--- a/crates/polars-expr/src/expressions/count.rs
+++ b/crates/polars-expr/src/expressions/count.rs
@@ -21,11 +21,8 @@ impl PhysicalExpr for CountExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Series> {
-        Ok(Series::new(
-            PlSmallStr::from_static("len"),
-            [df.height() as IdxSize],
-        ))
+    fn evaluate(&self, df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Column> {
+        Ok(Series::new(PlSmallStr::from_static("len"), [df.height() as IdxSize]).into_column())
     }
 
     fn evaluate_on_groups<'a>(
@@ -59,19 +56,19 @@ impl PartitionedAggregation for CountExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         self.evaluate_on_groups(df, groups, state)
-            .map(|mut ac| ac.aggregated())
+            .map(|mut ac| ac.aggregated().into_column())
     }
 
     /// Called to merge all the partitioned results in a final aggregate.
     #[allow(clippy::ptr_arg)]
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         // SAFETY: groups are in bounds.
         let agg = unsafe { partitioned.agg_sum(groups) };
         Ok(agg.with_name(PlSmallStr::from_static(LEN)))
diff --git a/crates/polars-expr/src/expressions/filter.rs b/crates/polars-expr/src/expressions/filter.rs
index b11d0dda6129..6f847a7fa8ed 100644
--- a/crates/polars-expr/src/expressions/filter.rs
+++ b/crates/polars-expr/src/expressions/filter.rs
@@ -24,7 +24,7 @@ impl PhysicalExpr for FilterExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let s_f = || self.input.evaluate(df, state);
         let predicate_f = || self.by.evaluate(df, state);
 
diff --git a/crates/polars-expr/src/expressions/gather.rs b/crates/polars-expr/src/expressions/gather.rs
index a6450bcb531b..19a0e35ff315 100644
--- a/crates/polars-expr/src/expressions/gather.rs
+++ b/crates/polars-expr/src/expressions/gather.rs
@@ -18,7 +18,7 @@ impl PhysicalExpr for GatherExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series = self.phys_expr.evaluate(df, state)?;
         self.finish(df, state, series)
     }
@@ -102,10 +102,10 @@ impl GatherExpr {
         &self,
         df: &DataFrame,
         state: &ExecutionState,
-        series: Series,
-    ) -> PolarsResult<Series> {
+        series: Column,
+    ) -> PolarsResult<Column> {
         let idx = self.idx.evaluate(df, state)?;
-        let idx = convert_to_unsigned_index(&idx, series.len())?;
+        let idx = convert_to_unsigned_index(idx.as_materialized_series(), series.len())?;
         series.take(&idx)
     }
 
diff --git a/crates/polars-expr/src/expressions/literal.rs b/crates/polars-expr/src/expressions/literal.rs
index 2089e4cf5bb4..0c6900d4356b 100644
--- a/crates/polars-expr/src/expressions/literal.rs
+++ b/crates/polars-expr/src/expressions/literal.rs
@@ -21,29 +21,31 @@ impl PhysicalExpr for LiteralExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.1)
     }
-    fn evaluate(&self, _df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, _df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Column> {
         use LiteralValue::*;
         let s = match &self.0 {
             #[cfg(feature = "dtype-i8")]
-            Int8(v) => Int8Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
+            Int8(v) => Int8Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
             #[cfg(feature = "dtype-i16")]
-            Int16(v) => Int16Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            Int32(v) => Int32Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            Int64(v) => Int64Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
+            Int16(v) => Int16Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            Int32(v) => Int32Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            Int64(v) => Int64Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
             #[cfg(feature = "dtype-u8")]
-            UInt8(v) => UInt8Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
+            UInt8(v) => UInt8Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
             #[cfg(feature = "dtype-u16")]
-            UInt16(v) => UInt16Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            UInt32(v) => UInt32Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            UInt64(v) => UInt64Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            Float32(v) => Float32Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            Float64(v) => Float64Chunked::full(get_literal_name().clone(), *v, 1).into_series(),
+            UInt16(v) => UInt16Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            UInt32(v) => UInt32Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            UInt64(v) => UInt64Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            Float32(v) => Float32Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            Float64(v) => Float64Chunked::full(get_literal_name().clone(), *v, 1).into_column(),
             #[cfg(feature = "dtype-decimal")]
             Decimal(v, scale) => Int128Chunked::full(get_literal_name().clone(), *v, 1)
                 .into_decimal_unchecked(None, *scale)
-                .into_series(),
-            Boolean(v) => BooleanChunked::full(get_literal_name().clone(), *v, 1).into_series(),
-            Null => polars_core::prelude::Series::new_null(get_literal_name().clone(), 1),
+                .into_column(),
+            Boolean(v) => BooleanChunked::full(get_literal_name().clone(), *v, 1).into_column(),
+            Null => {
+                polars_core::prelude::Series::new_null(get_literal_name().clone(), 1).into_column()
+            },
             Range { low, high, dtype } => match dtype {
                 DataType::Int32 => {
                     polars_ensure!(
@@ -53,13 +55,13 @@ impl PhysicalExpr for LiteralExpr {
                     let low = *low as i32;
                     let high = *high as i32;
                     let ca: NoNull<Int32Chunked> = (low..high).collect();
-                    ca.into_inner().into_series()
+                    ca.into_inner().into_column()
                 },
                 DataType::Int64 => {
                     let low = *low;
                     let high = *high;
                     let ca: NoNull<Int64Chunked> = (low..high).collect();
-                    ca.into_inner().into_series()
+                    ca.into_inner().into_column()
                 },
                 DataType::UInt32 => {
                     polars_ensure!(
@@ -69,28 +71,28 @@ impl PhysicalExpr for LiteralExpr {
                     let low = *low as u32;
                     let high = *high as u32;
                     let ca: NoNull<UInt32Chunked> = (low..high).collect();
-                    ca.into_inner().into_series()
+                    ca.into_inner().into_column()
                 },
                 dt => polars_bail!(
                     InvalidOperation: "datatype `{}` is not supported as range", dt
                 ),
             },
-            String(v) => StringChunked::full(get_literal_name().clone(), v, 1).into_series(),
-            Binary(v) => BinaryChunked::full(get_literal_name().clone(), v, 1).into_series(),
+            String(v) => StringChunked::full(get_literal_name().clone(), v, 1).into_column(),
+            Binary(v) => BinaryChunked::full(get_literal_name().clone(), v, 1).into_column(),
             #[cfg(feature = "dtype-datetime")]
             DateTime(timestamp, tu, tz) => {
                 Int64Chunked::full(get_literal_name().clone(), *timestamp, 1)
                     .into_datetime(*tu, tz.clone())
-                    .into_series()
+                    .into_column()
             },
             #[cfg(feature = "dtype-duration")]
             Duration(v, tu) => Int64Chunked::full(get_literal_name().clone(), *v, 1)
                 .into_duration(*tu)
-                .into_series(),
+                .into_column(),
             #[cfg(feature = "dtype-date")]
             Date(v) => Int32Chunked::full(get_literal_name().clone(), *v, 1)
                 .into_date()
-                .into_series(),
+                .into_column(),
             #[cfg(feature = "dtype-time")]
             Time(v) => {
                 if !(0..NANOSECONDS_IN_DAY).contains(v) {
@@ -102,16 +104,17 @@ impl PhysicalExpr for LiteralExpr {
 
                 Int64Chunked::full(get_literal_name().clone(), *v, 1)
                     .into_time()
-                    .into_series()
+                    .into_column()
             },
-            Series(series) => series.deref().clone(),
-            OtherScalar(s) => s.clone().into_series(get_literal_name().clone()),
+            Series(series) => series.deref().clone().into_column(),
+            OtherScalar(s) => s.clone().into_column(get_literal_name().clone()),
             lv @ (Int(_) | Float(_) | StrCat(_)) => polars_core::prelude::Series::from_any_values(
                 get_literal_name().clone(),
                 &[lv.to_any_value().unwrap()],
                 false,
             )
-            .unwrap(),
+            .unwrap()
+            .into_column(),
         };
         Ok(s)
     }
@@ -124,7 +127,10 @@ impl PhysicalExpr for LiteralExpr {
         state: &ExecutionState,
     ) -> PolarsResult<AggregationContext<'a>> {
         let s = self.evaluate(df, state)?;
-        Ok(AggregationContext::from_literal(s, Cow::Borrowed(groups)))
+        Ok(AggregationContext::from_literal(
+            s.take_materialized_series(),
+            Cow::Borrowed(groups),
+        ))
     }
 
     fn as_partitioned_aggregator(&self) -> Option<&dyn PartitionedAggregation> {
@@ -150,16 +156,16 @@ impl PartitionedAggregation for LiteralExpr {
         df: &DataFrame,
         _groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         self.evaluate(df, state)
     }
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         _groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         Ok(partitioned)
     }
 }
diff --git a/crates/polars-expr/src/expressions/mod.rs b/crates/polars-expr/src/expressions/mod.rs
index 15550c517fe7..8ccc5349b733 100644
--- a/crates/polars-expr/src/expressions/mod.rs
+++ b/crates/polars-expr/src/expressions/mod.rs
@@ -536,7 +536,7 @@ pub trait PhysicalExpr: Send + Sync {
     }
 
     /// Take a DataFrame and evaluate the expression.
-    fn evaluate(&self, df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Series>;
+    fn evaluate(&self, df: &DataFrame, _state: &ExecutionState) -> PolarsResult<Column>;
 
     /// Some expression that are not aggregations can be done per group
     /// Think of sort, slice, filter, shift, etc.
@@ -611,7 +611,9 @@ impl PhysicalIoExpr for PhysicalIoHelper {
         if self.has_window_function {
             state.insert_has_window_function_flag();
         }
-        self.expr.evaluate(df, &state)
+        self.expr
+            .evaluate(df, &state)
+            .map(|c| c.take_materialized_series())
     }
 
     fn live_variables(&self) -> Option<Vec<PlSmallStr>> {
@@ -651,14 +653,14 @@ pub trait PartitionedAggregation: Send + Sync + PhysicalExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series>;
+    ) -> PolarsResult<Column>;
 
     /// Called to merge all the partitioned results in a final aggregate.
     #[allow(clippy::ptr_arg)]
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series>;
+    ) -> PolarsResult<Column>;
 }
diff --git a/crates/polars-expr/src/expressions/rolling.rs b/crates/polars-expr/src/expressions/rolling.rs
index 806e3d5b0398..7e9897d7328c 100644
--- a/crates/polars-expr/src/expressions/rolling.rs
+++ b/crates/polars-expr/src/expressions/rolling.rs
@@ -19,7 +19,7 @@ pub(crate) struct RollingExpr {
 }
 
 impl PhysicalExpr for RollingExpr {
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let groups_key = format!("{:?}", &self.options);
 
         let groups_map = state.group_tuples.read().unwrap();
@@ -47,7 +47,7 @@ impl PhysicalExpr for RollingExpr {
         if let Some(name) = &self.out_name {
             out.rename(name.clone());
         }
-        Ok(out)
+        Ok(out.into_column())
     }
 
     fn evaluate_on_groups<'a>(
diff --git a/crates/polars-expr/src/expressions/slice.rs b/crates/polars-expr/src/expressions/slice.rs
index d0e187120939..2b805edd1bb0 100644
--- a/crates/polars-expr/src/expressions/slice.rs
+++ b/crates/polars-expr/src/expressions/slice.rs
@@ -82,7 +82,7 @@ impl PhysicalExpr for SliceExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let results = POOL.install(|| {
             [&self.offset, &self.length, &self.input]
                 .par_iter()
@@ -92,7 +92,11 @@ impl PhysicalExpr for SliceExpr {
         let offset = &results[0];
         let length = &results[1];
         let series = &results[2];
-        let (offset, length) = extract_args(offset, length, &self.expr)?;
+        let (offset, length) = extract_args(
+            offset.as_materialized_series(),
+            length.as_materialized_series(),
+            &self.expr,
+        )?;
 
         Ok(series.slice(offset, length))
     }
diff --git a/crates/polars-expr/src/expressions/sort.rs b/crates/polars-expr/src/expressions/sort.rs
index 751b09b07475..be9fe57e29ce 100644
--- a/crates/polars-expr/src/expressions/sort.rs
+++ b/crates/polars-expr/src/expressions/sort.rs
@@ -46,7 +46,7 @@ impl PhysicalExpr for SortExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series = self.physical_expr.evaluate(df, state)?;
         series.sort_with(self.options)
     }
diff --git a/crates/polars-expr/src/expressions/sortby.rs b/crates/polars-expr/src/expressions/sortby.rs
index f966e4cbb544..1624d7c9bcd6 100644
--- a/crates/polars-expr/src/expressions/sortby.rs
+++ b/crates/polars-expr/src/expressions/sortby.rs
@@ -199,7 +199,7 @@ impl PhysicalExpr for SortByExpr {
     fn as_expression(&self) -> Option<&Expr> {
         Some(&self.expr)
     }
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let series_f = || self.input.evaluate(df, state);
         if self.by.is_empty() {
             // Sorting by 0 columns returns input unchanged.
@@ -220,13 +220,11 @@ impl PhysicalExpr for SortByExpr {
                     .by
                     .iter()
                     .map(|e| {
-                        e.evaluate(df, state)
-                            .map(|s| match s.dtype() {
-                                #[cfg(feature = "dtype-categorical")]
-                                DataType::Categorical(_, _) | DataType::Enum(_, _) => s,
-                                _ => s.to_physical_repr().into_owned(),
-                            })
-                            .map(Column::from)
+                        e.evaluate(df, state).map(|s| match s.dtype() {
+                            #[cfg(feature = "dtype-categorical")]
+                            DataType::Categorical(_, _) | DataType::Enum(_, _) => s,
+                            _ => s.to_physical_repr(),
+                        })
                     })
                     .collect::<PolarsResult<Vec<_>>>()?;
 
diff --git a/crates/polars-expr/src/expressions/ternary.rs b/crates/polars-expr/src/expressions/ternary.rs
index 37600c71f06a..2d1035c22eb7 100644
--- a/crates/polars-expr/src/expressions/ternary.rs
+++ b/crates/polars-expr/src/expressions/ternary.rs
@@ -79,7 +79,7 @@ impl PhysicalExpr for TernaryExpr {
         Some(&self.expr)
     }
 
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         let mut state = state.split();
         // Don't cache window functions as they run in parallel.
         state.remove_cache_window_flag();
@@ -337,7 +337,7 @@ impl PartitionedAggregation for TernaryExpr {
         df: &DataFrame,
         groups: &GroupsProxy,
         state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         let truthy = self.truthy.as_partitioned_aggregator().unwrap();
         let falsy = self.falsy.as_partitioned_aggregator().unwrap();
         let mask = self.predicate.as_partitioned_aggregator().unwrap();
@@ -352,10 +352,10 @@ impl PartitionedAggregation for TernaryExpr {
 
     fn finalize(
         &self,
-        partitioned: Series,
+        partitioned: Column,
         _groups: &GroupsProxy,
         _state: &ExecutionState,
-    ) -> PolarsResult<Series> {
+    ) -> PolarsResult<Column> {
         Ok(partitioned)
     }
 }
diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs
index f843c0e83d95..e15a301f68b4 100644
--- a/crates/polars-expr/src/expressions/window.rs
+++ b/crates/polars-expr/src/expressions/window.rs
@@ -371,7 +371,7 @@ impl PhysicalExpr for WindowExpr {
 
     // This first cached the group_by and the join tuples, but rayon under a mutex leads to deadlocks:
     // https://github.com/rayon-rs/rayon/issues/592
-    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         // This method does the following:
         // 1. determine group_by tuples based on the group_column
         // 2. apply an aggregation function
@@ -400,7 +400,7 @@ impl PhysicalExpr for WindowExpr {
 
         if df.is_empty() {
             let field = self.phys_function.to_field(&df.schema())?;
-            return Ok(Series::full_null(field.name().clone(), 0, field.dtype()));
+            return Ok(Column::full_null(field.name().clone(), 0, field.dtype()));
         }
 
         let group_by_columns = self
@@ -443,7 +443,7 @@ impl PhysicalExpr for WindowExpr {
             if let Some((order_by, options)) = &self.order_by {
                 let order_by = order_by.evaluate(df, state)?;
                 polars_ensure!(order_by.len() == df.height(), ShapeMismatch: "the order by expression evaluated to a length: {} that doesn't match the input DataFrame: {}", order_by.len(), df.height());
-                groups = update_groups_sort_by(&groups, &order_by, options)?
+                groups = update_groups_sort_by(&groups, order_by.as_materialized_series(), options)?
             }
 
             let out: PolarsResult<GroupsProxy> = Ok(groups);
@@ -521,7 +521,7 @@ impl PhysicalExpr for WindowExpr {
                 if let Some(name) = &self.out_name {
                     out.rename(name.clone());
                 }
-                Ok(out)
+                Ok(out.into_column())
             },
             Explode => {
                 let mut out = ac.aggregated().explode()?;
@@ -529,7 +529,7 @@ impl PhysicalExpr for WindowExpr {
                 if let Some(name) = &self.out_name {
                     out.rename(name.clone());
                 }
-                Ok(out)
+                Ok(out.into_column())
             },
             Map => {
                 // TODO!
@@ -551,6 +551,7 @@ impl PhysicalExpr for WindowExpr {
                     state,
                     &cache_key,
                 )
+                .map(Column::from)
             },
             Join => {
                 let out_column = ac.aggregated();
@@ -566,7 +567,7 @@ impl PhysicalExpr for WindowExpr {
                     // we take the group locations to directly map them to the right place
                     (UpdateGroups::No, Some(out)) => {
                         cache_gb(gb, state, &cache_key);
-                        Ok(out)
+                        Ok(out.into_column())
                     },
                     (_, _) => {
                         let keys = gb.keys();
@@ -625,7 +626,7 @@ impl PhysicalExpr for WindowExpr {
                             jt_map.insert(cache_key, join_opt_ids);
                         }
 
-                        Ok(out)
+                        Ok(out.into_column())
                     },
                 }
             },
diff --git a/crates/polars-io/src/csv/read/options.rs b/crates/polars-io/src/csv/read/options.rs
index 8c840e137d7a..cbbf796d45d7 100644
--- a/crates/polars-io/src/csv/read/options.rs
+++ b/crates/polars-io/src/csv/read/options.rs
@@ -298,7 +298,7 @@ impl CsvParseOptions {
     }
 
     /// Automatically try to parse dates/datetimes and time. If parsing fails,
-    /// columns remain of dtype `[DataType::String]`.
+    /// columns remain of dtype [`DataType::String`].
     pub fn with_try_parse_dates(mut self, try_parse_dates: bool) -> Self {
         self.try_parse_dates = try_parse_dates;
         self
diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs
index 4babd4f65bd5..1eea338f4788 100644
--- a/crates/polars-io/src/shared.rs
+++ b/crates/polars-io/src/shared.rs
@@ -13,7 +13,7 @@ pub trait SerReader<R>
 where
     R: Read,
 {
-    /// Create a new instance of the `[SerReader]`
+    /// Create a new instance of the [`SerReader`]
     fn new(reader: R) -> Self;
 
     /// Make sure that all columns are contiguous in memory by
diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml
index 78f8274fb079..3f8c64dd1970 100644
--- a/crates/polars-lazy/Cargo.toml
+++ b/crates/polars-lazy/Cargo.toml
@@ -47,6 +47,7 @@ parquet = [
   "polars-pipe?/parquet",
   "polars-expr/parquet",
   "polars-mem-engine/parquet",
+  "polars-stream?/parquet",
 ]
 async = [
   "polars-plan/async",
@@ -54,11 +55,26 @@ async = [
   "polars-pipe?/async",
   "polars-mem-engine/async",
 ]
-cloud = ["async", "polars-pipe?/cloud", "polars-plan/cloud", "tokio", "futures", "polars-mem-engine/cloud"]
+cloud = [
+  "async",
+  "polars-pipe?/cloud",
+  "polars-plan/cloud",
+  "tokio",
+  "futures",
+  "polars-mem-engine/cloud",
+  "polars-stream?/cloud",
+]
 cloud_write = ["cloud"]
-ipc = ["polars-io/ipc", "polars-plan/ipc", "polars-pipe?/ipc", "polars-mem-engine/ipc"]
-json = ["polars-io/json", "polars-plan/json", "polars-json", "polars-pipe?/json", "polars-mem-engine/json"]
-csv = ["polars-io/csv", "polars-plan/csv", "polars-pipe?/csv", "polars-mem-engine/csv"]
+ipc = ["polars-io/ipc", "polars-plan/ipc", "polars-pipe?/ipc", "polars-mem-engine/ipc", "polars-stream?/ipc"]
+json = [
+  "polars-io/json",
+  "polars-plan/json",
+  "polars-json",
+  "polars-pipe?/json",
+  "polars-mem-engine/json",
+  "polars-stream?/json",
+]
+csv = ["polars-io/csv", "polars-plan/csv", "polars-pipe?/csv", "polars-mem-engine/csv", "polars-stream?/csv"]
 temporal = [
   "dtype-datetime",
   "dtype-date",
@@ -223,7 +239,7 @@ string_reverse = ["polars-plan/string_reverse"]
 string_to_integer = ["polars-plan/string_to_integer"]
 arg_where = ["polars-plan/arg_where"]
 search_sorted = ["polars-plan/search_sorted"]
-merge_sorted = ["polars-plan/merge_sorted"]
+merge_sorted = ["polars-plan/merge_sorted", "polars-stream?/merge_sorted"]
 meta = ["polars-plan/meta"]
 pivot = ["polars-core/rows", "polars-ops/pivot", "polars-plan/pivot"]
 top_k = ["polars-plan/top_k"]
diff --git a/crates/polars-lazy/src/dsl/list.rs b/crates/polars-lazy/src/dsl/list.rs
index d73e4be5d13e..c706ee9b6957 100644
--- a/crates/polars-lazy/src/dsl/list.rs
+++ b/crates/polars-lazy/src/dsl/list.rs
@@ -69,7 +69,7 @@ fn run_per_sublist(
                     let df = s.into_frame();
                     let out = phys_expr.evaluate(&df, &state);
                     match out {
-                        Ok(s) => Some(s),
+                        Ok(s) => Some(s.take_materialized_series()),
                         Err(e) => {
                             *m_err.lock().unwrap() = Some(e);
                             None
@@ -90,7 +90,7 @@ fn run_per_sublist(
                     let out = phys_expr.evaluate(&df_container, &state);
                     df_container.clear_columns();
                     match out {
-                        Ok(s) => Some(s),
+                        Ok(s) => Some(s.take_materialized_series()),
                         Err(e) => {
                             err = Some(e);
                             None
diff --git a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
index ad4d8cd1fb48..0700f5f767e7 100644
--- a/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
+++ b/crates/polars-lazy/src/physical_plan/streaming/construct_pipeline.rs
@@ -36,7 +36,9 @@ impl PhysicalIoExpr for Wrap {
 }
 impl PhysicalPipedExpr for Wrap {
     fn evaluate(&self, chunk: &DataChunk, state: &ExecutionState) -> PolarsResult<Series> {
-        self.0.evaluate(&chunk.data, state)
+        self.0
+            .evaluate(&chunk.data, state)
+            .map(|c| c.take_materialized_series())
     }
     fn field(&self, input_schema: &Schema) -> PolarsResult<Field> {
         self.0.to_field(input_schema)
diff --git a/crates/polars-lazy/src/scan/csv.rs b/crates/polars-lazy/src/scan/csv.rs
index 7127d64e87cb..dbfa989b5d05 100644
--- a/crates/polars-lazy/src/scan/csv.rs
+++ b/crates/polars-lazy/src/scan/csv.rs
@@ -137,7 +137,7 @@ impl LazyCsvReader {
         })
     }
 
-    /// Set the `char` used as quote char. The default is `b'"'`. If set to `[None]` quoting is disabled.
+    /// Set the `char` used as quote char. The default is `b'"'`. If set to [`None`] quoting is disabled.
     #[must_use]
     pub fn with_quote_char(self, quote_char: Option<u8>) -> Self {
         self.map_parse_options(|opts| opts.with_quote_char(quote_char))
@@ -181,7 +181,7 @@ impl LazyCsvReader {
     }
 
     /// Automatically try to parse dates/datetimes and time.
-    /// If parsing fails, columns remain of dtype `[DataType::String]`.
+    /// If parsing fails, columns remain of dtype [`DataType::String`].
     #[cfg(feature = "temporal")]
     pub fn with_try_parse_dates(self, try_parse_dates: bool) -> Self {
         self.map_parse_options(|opts| opts.with_try_parse_dates(try_parse_dates))
diff --git a/crates/polars-mem-engine/src/executors/filter.rs b/crates/polars-mem-engine/src/executors/filter.rs
index 689674345760..417a7ecf766e 100644
--- a/crates/polars-mem-engine/src/executors/filter.rs
+++ b/crates/polars-mem-engine/src/executors/filter.rs
@@ -45,7 +45,7 @@ impl FilterExec {
         if self.has_window {
             state.clear_window_expr_cache()
         }
-        df.filter(series_to_mask(&s)?)
+        df.filter(series_to_mask(s.as_materialized_series())?)
     }
 
     fn execute_chunks(
@@ -55,7 +55,7 @@ impl FilterExec {
     ) -> PolarsResult<DataFrame> {
         let iter = chunks.into_par_iter().map(|df| {
             let s = self.predicate.evaluate(&df, state)?;
-            df.filter(series_to_mask(&s)?)
+            df.filter(series_to_mask(s.as_materialized_series())?)
         });
         let df = POOL.install(|| iter.collect::<PolarsResult<Vec<_>>>())?;
         Ok(accumulate_dataframes_vertical_unchecked(df))
diff --git a/crates/polars-mem-engine/src/executors/group_by_partitioned.rs b/crates/polars-mem-engine/src/executors/group_by_partitioned.rs
index ad41378b3086..61cb9b10bc52 100644
--- a/crates/polars-mem-engine/src/executors/group_by_partitioned.rs
+++ b/crates/polars-mem-engine/src/executors/group_by_partitioned.rs
@@ -332,11 +332,7 @@ impl PartitionGroupByExec {
                 .map(|(expr, partitioned_s)| {
                     let agg_expr = expr.as_partitioned_aggregator().unwrap();
                     agg_expr
-                        .finalize(
-                            partitioned_s.as_materialized_series().clone(),
-                            groups,
-                            state,
-                        )
+                        .finalize(partitioned_s.clone(), groups, state)
                         .map(Column::from)
                 })
                 .collect();
diff --git a/crates/polars-mem-engine/src/executors/join.rs b/crates/polars-mem-engine/src/executors/join.rs
index 5edab8551ece..4fed3cb7a3ff 100644
--- a/crates/polars-mem-engine/src/executors/join.rs
+++ b/crates/polars-mem-engine/src/executors/join.rs
@@ -139,8 +139,8 @@ impl Executor for JoinExec {
 
             let df = df_left._join_impl(
                 &df_right,
-                left_on_series,
-                right_on_series,
+                left_on_series.into_iter().map(|c| c.take_materialized_series()).collect(),
+                right_on_series.into_iter().map(|c| c.take_materialized_series()).collect(),
                 self.args.clone(),
                 true,
                 state.verbose(),
diff --git a/crates/polars-mem-engine/src/executors/projection_utils.rs b/crates/polars-mem-engine/src/executors/projection_utils.rs
index 47464849582e..01dc5f362fd9 100644
--- a/crates/polars-mem-engine/src/executors/projection_utils.rs
+++ b/crates/polars-mem-engine/src/executors/projection_utils.rs
@@ -20,7 +20,7 @@ fn rolling_evaluate(
     df: &DataFrame,
     state: &ExecutionState,
     rolling: PlHashMap<&RollingGroupOptions, Vec<IdAndExpression>>,
-) -> PolarsResult<Vec<Vec<(u32, Series)>>> {
+) -> PolarsResult<Vec<Vec<(u32, Column)>>> {
     POOL.install(|| {
         rolling
             .par_iter()
@@ -51,7 +51,7 @@ fn window_evaluate(
     df: &DataFrame,
     state: &ExecutionState,
     window: PlHashMap<String, Vec<IdAndExpression>>,
-) -> PolarsResult<Vec<Vec<(u32, Series)>>> {
+) -> PolarsResult<Vec<Vec<(u32, Column)>>> {
     POOL.install(|| {
         window
             .par_iter()
@@ -99,7 +99,7 @@ fn execute_projection_cached_window_fns(
     df: &DataFrame,
     exprs: &[Arc<dyn PhysicalExpr>],
     state: &ExecutionState,
-) -> PolarsResult<Vec<Series>> {
+) -> PolarsResult<Vec<Column>> {
     // We partition by normal expression and window expression
     // - the normal expressions can run in parallel
     // - the window expression take more memory and often use the same group_by keys and join tuples
@@ -202,7 +202,7 @@ fn run_exprs_par(
     df: &DataFrame,
     exprs: &[Arc<dyn PhysicalExpr>],
     state: &ExecutionState,
-) -> PolarsResult<Vec<Series>> {
+) -> PolarsResult<Vec<Column>> {
     POOL.install(|| {
         exprs
             .par_iter()
@@ -215,7 +215,7 @@ fn run_exprs_seq(
     df: &DataFrame,
     exprs: &[Arc<dyn PhysicalExpr>],
     state: &ExecutionState,
-) -> PolarsResult<Vec<Series>> {
+) -> PolarsResult<Vec<Column>> {
     exprs.iter().map(|expr| expr.evaluate(df, state)).collect()
 }
 
@@ -225,7 +225,7 @@ pub(super) fn evaluate_physical_expressions(
     state: &ExecutionState,
     has_windows: bool,
     run_parallel: bool,
-) -> PolarsResult<Vec<Series>> {
+) -> PolarsResult<Vec<Column>> {
     let expr_runner = if has_windows {
         execute_projection_cached_window_fns
     } else if run_parallel && exprs.len() > 1 {
@@ -246,7 +246,7 @@ pub(super) fn evaluate_physical_expressions(
 pub(super) fn check_expand_literals(
     df: &DataFrame,
     phys_expr: &[Arc<dyn PhysicalExpr>],
-    mut selected_columns: Vec<Series>,
+    mut selected_columns: Vec<Column>,
     zero_length: bool,
     options: ProjectionOptions,
 ) -> PolarsResult<DataFrame> {
diff --git a/crates/polars-mem-engine/src/executors/stack.rs b/crates/polars-mem-engine/src/executors/stack.rs
index ddeed0e8996b..ba6fa8111402 100644
--- a/crates/polars-mem-engine/src/executors/stack.rs
+++ b/crates/polars-mem-engine/src/executors/stack.rs
@@ -37,7 +37,12 @@ impl StackExec {
                     self.options.run_parallel,
                 )?;
                 // We don't have to do a broadcast check as cse is not allowed to hit this.
-                df._add_series(res, schema)?;
+                df._add_series(
+                    res.into_iter()
+                        .map(|c| c.take_materialized_series())
+                        .collect(),
+                    schema,
+                )?;
                 Ok(df)
             });
 
@@ -94,7 +99,12 @@ impl StackExec {
                         }
                     }
                 }
-                df._add_series(res, schema)?;
+                df._add_series(
+                    res.into_iter()
+                        .map(|v| v.take_materialized_series())
+                        .collect(),
+                    schema,
+                )?;
             }
             df
         };
diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs
index 391837d52947..249e8dc1730a 100644
--- a/crates/polars-ops/src/chunked_array/gather/chunked.rs
+++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs
@@ -88,6 +88,22 @@ fn prepare_series(s: &Series) -> Cow<Series> {
     phys
 }
 
+impl TakeChunked for Column {
+    unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self {
+        // @scalar-opt
+        let s = self.as_materialized_series();
+        let s = unsafe { s.take_chunked_unchecked(by, sorted) };
+        s.into_column()
+    }
+
+    unsafe fn take_opt_chunked_unchecked(&self, by: &[ChunkId]) -> Self {
+        // @scalar-opt
+        let s = self.as_materialized_series();
+        let s = unsafe { s.take_opt_chunked_unchecked(by) };
+        s.into_column()
+    }
+}
+
 impl TakeChunked for Series {
     unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self {
         let phys = prepare_series(self);
diff --git a/crates/polars-ops/src/series/ops/idx.rs b/crates/polars-ops/src/series/ops/idx.rs
deleted file mode 100644
index 07d6381435e2..000000000000
--- a/crates/polars-ops/src/series/ops/idx.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-use num_traits::{FromPrimitive, Zero};
-use polars_core::prelude::*;
-use polars_utils::index::ToIdx;
-
-fn prepare_gather_index_impl<T>(ca: &ChunkedArray<T>, length: usize) -> IdxCa
-where T: PolarsNumericType,
-T::Native: ToIdx
-{
-    T::Native::from_usize()
-
-    ca.apply_generic(|v| {
-        v.and_then(|v|{
-            if v < T::Native::zero() {
-
-            }
-
-            v.to_idx_size()
-        })
-    })
-}
-
-pub fn convert_to_index(s: &Series, length: usize)
diff --git a/crates/polars-ops/src/series/ops/replace.rs b/crates/polars-ops/src/series/ops/replace.rs
index 7c5697429372..4aa84910239c 100644
--- a/crates/polars-ops/src/series/ops/replace.rs
+++ b/crates/polars-ops/src/series/ops/replace.rs
@@ -239,9 +239,11 @@ fn create_replacer(mut old: Series, mut new: Series, add_mask: bool) -> PolarsRe
 
     let len = old.len();
     let cols = if add_mask {
-        // @scalar-opt
-        let mask = Column::new(PlSmallStr::from_static("__POLARS_REPLACE_MASK"), &[true])
-            .new_from_index(0, new.len());
+        let mask = Column::new_scalar(
+            PlSmallStr::from_static("__POLARS_REPLACE_MASK"),
+            true.into(),
+            new.len(),
+        );
         vec![old.into(), new.into(), mask]
     } else {
         vec![old.into(), new.into()]
diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs
index a88ff858e6ee..3d6b92aeba67 100644
--- a/crates/polars-plan/src/dsl/mod.rs
+++ b/crates/polars-plan/src/dsl/mod.rs
@@ -1822,6 +1822,7 @@ impl Expr {
     #[cfg(feature = "dtype-struct")]
     /// Count all unique values and create a struct mapping value to count.
     /// (Note that it is better to turn parallel off in the aggregation context).
+    /// The name of the struct field with the counts is given by the parameter `name`.
     pub fn value_counts(self, sort: bool, parallel: bool, name: &str, normalize: bool) -> Self {
         self.apply_private(FunctionExpr::ValueCounts {
             sort,
@@ -1837,7 +1838,7 @@ impl Expr {
 
     #[cfg(feature = "unique_counts")]
     /// Returns a count of the unique values in the order of appearance.
-    /// This method differs from [`Expr::value_counts]` in that it does not return the
+    /// This method differs from [`Expr::value_counts`] in that it does not return the
     /// values, only the counts and might be faster.
     pub fn unique_counts(self) -> Self {
         self.apply_private(FunctionExpr::UniqueCounts)
@@ -1967,10 +1968,10 @@ impl Expr {
 
 /// Apply a function/closure over multiple columns once the logical plan get executed.
 ///
-/// This function is very similar to `[apply_mul]`, but differs in how it handles aggregations.
+/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
 ///
-///  * `map_mul` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
-///  * `apply_mul` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
+///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
+///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
 ///
 /// It is the responsibility of the caller that the schema is correct by giving
 /// the correct output_type. If None given the output type of the input expr is used.
@@ -1995,11 +1996,11 @@ where
 
 /// Apply a function/closure over multiple columns once the logical plan get executed.
 ///
-/// This function is very similar to `[apply_mul]`, but differs in how it handles aggregations.
+/// This function is very similar to [`apply_multiple`], but differs in how it handles aggregations.
 ///
-///  * `map_mul` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
-///  * `apply_mul` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
-///  * `map_list_mul` should be used when the function expects a list aggregated series.
+///  * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
+///  * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
+///  * [`map_list_multiple`] should be used when the function expects a list aggregated series.
 pub fn map_list_multiple<F, E>(function: F, expr: E, output_type: GetOutput) -> Expr
 where
     F: Fn(&mut [Column]) -> PolarsResult<Option<Column>> + 'static + Send + Sync,
@@ -2025,10 +2026,10 @@ where
 /// It is the responsibility of the caller that the schema is correct by giving
 /// the correct output_type. If None given the output type of the input expr is used.
 ///
-/// This difference with `[map_mul]` is that `[apply_mul]` will create a separate `[Series]` per group.
+/// This difference with [`map_multiple`] is that [`apply_multiple`] will create a separate [`Series`] per group.
 ///
-/// * `[map_mul]` should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
-/// * `[apply_mul]` should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
+/// * [`map_multiple`] should be used for operations that are independent of groups, e.g. `multiply * 2`, or `raise to the power`
+/// * [`apply_multiple`] should be used for operations that work on a group of data. e.g. `sum`, `count`, etc.
 pub fn apply_multiple<F, E>(
     function: F,
     expr: E,
diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs
index 286ea86ac968..e53dc50dc6d9 100644
--- a/crates/polars-plan/src/plans/aexpr/mod.rs
+++ b/crates/polars-plan/src/plans/aexpr/mod.rs
@@ -192,7 +192,7 @@ pub enum AExpr {
         /// Function arguments
         /// Some functions rely on aliases,
         /// for instance assignment of struct fields.
-        /// Therefor we need `[ExprIr]`.
+        /// Therefor we need [`ExprIr`].
         input: Vec<ExprIR>,
         /// function to apply
         function: FunctionExpr,
diff --git a/crates/polars-plan/src/plans/functions/explode.rs b/crates/polars-plan/src/plans/functions/explode.rs
deleted file mode 100644
index a5140d81103b..000000000000
--- a/crates/polars-plan/src/plans/functions/explode.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-use super::*;
-
-pub(super) fn explode_impl(df: DataFrame, columns: &[PlSmallStr]) -> PolarsResult<DataFrame> {
-    df.explode(columns)
-}
diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml
index 16af7a3071df..1f1624fa7b0f 100644
--- a/crates/polars-python/Cargo.toml
+++ b/crates/polars-python/Cargo.toml
@@ -22,7 +22,7 @@ polars-time = { workspace = true }
 polars-utils = { workspace = true }
 
 # TODO! remove this once truly activated. This is required to make sdist building work
-polars-stream = { workspace = true }
+# polars-stream = { workspace = true }
 
 ahash = { workspace = true }
 arboard = { workspace = true, optional = true }
diff --git a/crates/polars-python/src/functions/lazy.rs b/crates/polars-python/src/functions/lazy.rs
index 24db48144508..d3ebb376d10f 100644
--- a/crates/polars-python/src/functions/lazy.rs
+++ b/crates/polars-python/src/functions/lazy.rs
@@ -469,6 +469,7 @@ pub fn lit(value: &Bound<'_, PyAny>, allow_object: bool, is_scalar: bool) -> PyR
             )
         })?;
         match av {
+            #[cfg(feature = "object")]
             AnyValue::ObjectOwned(_) => {
                 let s = Python::with_gil(|py| {
                     PySeries::new_object(py, "", vec![ObjectValue::from(value.into_py(py))], false)
diff --git a/crates/polars-python/src/functions/misc.rs b/crates/polars-python/src/functions/misc.rs
index 2ade770d728e..1df25a222b16 100644
--- a/crates/polars-python/src/functions/misc.rs
+++ b/crates/polars-python/src/functions/misc.rs
@@ -66,5 +66,6 @@ pub fn register_plugin_function(
 
 #[pyfunction]
 pub fn __register_startup_deps() {
+    #[cfg(feature = "object")]
     crate::on_startup::register_startup_deps()
 }
diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs
index 13529cfd9d1f..fd89884ece82 100644
--- a/crates/polars-python/src/lazyframe/general.rs
+++ b/crates/polars-python/src/lazyframe/general.rs
@@ -1130,6 +1130,7 @@ impl PyLazyFrame {
         ldf.tail(n).into()
     }
 
+    #[cfg(feature = "pivot")]
     #[pyo3(signature = (on, index, value_name, variable_name))]
     fn unpivot(
         &self,
diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
index 06a98e3fe970..e3425b52ccd9 100644
--- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
+++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
@@ -973,6 +973,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                     StringFunction::ExtractMany { .. } => {
                         return Err(PyNotImplementedError::new_err("extract_many"))
                     },
+                    #[cfg(feature = "regex")]
                     StringFunction::EscapeRegex => {
                         (PyStringFunction::EscapeRegex.into_py(py),).to_object(py)
                     },
@@ -1221,7 +1222,6 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
                 FunctionExpr::Mode => ("mode",).to_object(py),
                 FunctionExpr::Skew(bias) => ("skew", bias).to_object(py),
                 FunctionExpr::Kurtosis(fisher, bias) => ("kurtosis", fisher, bias).to_object(py),
-                #[cfg(feature = "dtype-array")]
                 FunctionExpr::Reshape(_) => return Err(PyNotImplementedError::new_err("reshape")),
                 #[cfg(feature = "repeat_by")]
                 FunctionExpr::RepeatBy => ("repeat_by",).to_object(py),
diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs
index 28c5e459b1e5..05a56d920719 100644
--- a/crates/polars-python/src/lazyframe/visitor/nodes.rs
+++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs
@@ -584,6 +584,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult<PyObject> {
                     columns.iter().map(|s| s.to_string()).collect::<Vec<_>>(),
                 )
                     .to_object(py),
+                #[cfg(feature = "pivot")]
                 FunctionIR::Unpivot { args, schema: _ } => (
                     "unpivot",
                     args.index.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
diff --git a/crates/polars-python/src/series/general.rs b/crates/polars-python/src/series/general.rs
index f65822146d2c..b14285e77aa0 100644
--- a/crates/polars-python/src/series/general.rs
+++ b/crates/polars-python/src/series/general.rs
@@ -168,25 +168,15 @@ impl PySeries {
     }
 
     fn bitand(&self, other: &PySeries) -> PyResult<Self> {
-        let out = self
-            .series
-            .bitand(&other.series)
-            .map_err(PyPolarsErr::from)?;
+        let out = (&self.series & &other.series).map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
-
     fn bitor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = self
-            .series
-            .bitor(&other.series)
-            .map_err(PyPolarsErr::from)?;
+        let out = (&self.series | &other.series).map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
     fn bitxor(&self, other: &PySeries) -> PyResult<Self> {
-        let out = self
-            .series
-            .bitxor(&other.series)
-            .map_err(PyPolarsErr::from)?;
+        let out = (&self.series ^ &other.series).map_err(PyPolarsErr::from)?;
         Ok(out.into())
     }
 
diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml
index fc130a035140..c40f477ff741 100644
--- a/crates/polars-stream/Cargo.toml
+++ b/crates/polars-stream/Cargo.toml
@@ -16,7 +16,7 @@ futures = { workspace = true }
 memmap = { workspace = true }
 parking_lot = { workspace = true }
 pin-project-lite = { workspace = true }
-polars-io = { workspace = true, features = ["async", "cloud", "aws"] }
+polars-io = { workspace = true }
 polars-utils = { workspace = true }
 rand = { workspace = true }
 rayon = { workspace = true }
@@ -26,11 +26,10 @@ tokio = { workspace = true }
 
 polars-core = { workspace = true }
 polars-error = { workspace = true }
-polars-expr = { workspace = true, features = ["dtype-full"] }
-# TODO: feature gate
-polars-mem-engine = { workspace = true, features = ["parquet", "csv", "json", "ipc", "cloud", "python", "dtype-categorical", "dtype-i8", "dtype-i16", "dtype-u8", "dtype-u16", "dtype-decimal", "dtype-struct", "object"] }
+polars-expr = { workspace = true }
+polars-mem-engine = { workspace = true }
 polars-parquet = { workspace = true }
-polars-plan = { workspace = true, features = ["parquet", "csv", "json", "ipc", "cloud", "python", "serde", "dtype-categorical", "dtype-i8", "dtype-i16", "dtype-u8", "dtype-u16", "dtype-decimal", "dtype-struct", "object"] }
+polars-plan = { workspace = true }
 
 [build-dependencies]
 version_check = { workspace = true }
@@ -41,6 +40,11 @@ bitwise = ["polars-core/bitwise", "polars-plan/bitwise", "polars-expr/bitwise"]
 merge_sorted = ["polars-plan/merge_sorted"]
 dynamic_group_by = []
 strings = []
+ipc = ["polars-mem-engine/ipc", "polars-plan/ipc"]
+parquet = ["polars-mem-engine/parquet", "polars-plan/parquet"]
+csv = ["polars-mem-engine/csv", "polars-plan/csv"]
+json = ["polars-mem-engine/json", "polars-plan/json"]
+cloud = ["polars-mem-engine/cloud", "polars-plan/cloud", "polars-io/cloud"]
 
 # We need to specify default features here to match workspace defaults.
 # Otherwise we get warnings with cargo check/clippy.
diff --git a/crates/polars-stream/src/expression.rs b/crates/polars-stream/src/expression.rs
index 3c1b9445997c..197a28e265cc 100644
--- a/crates/polars-stream/src/expression.rs
+++ b/crates/polars-stream/src/expression.rs
@@ -1,7 +1,7 @@
 use std::sync::Arc;
 
 use polars_core::frame::DataFrame;
-use polars_core::prelude::Series;
+use polars_core::prelude::Column;
 use polars_error::PolarsResult;
 use polars_expr::prelude::{ExecutionState, PhysicalExpr};
 
@@ -21,7 +21,7 @@ impl StreamExpr {
         }
     }
 
-    pub async fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Series> {
+    pub async fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult<Column> {
         if self.reentrant {
             let state = state.clone();
             let phys_expr = self.inner.clone();
diff --git a/crates/polars-stream/src/nodes/group_by.rs b/crates/polars-stream/src/nodes/group_by.rs
index a2a2ae0d4d96..fb91a2965ac5 100644
--- a/crates/polars-stream/src/nodes/group_by.rs
+++ b/crates/polars-stream/src/nodes/group_by.rs
@@ -85,7 +85,10 @@ impl GroupBySinkState {
                             // SAFETY: we resize the reduction to the number of groups beforehand.
                             reduction.resize(local.grouper.num_groups());
                             reduction.update_groups(
-                                &selector.evaluate(&df, state).await?,
+                                selector
+                                    .evaluate(&df, state)
+                                    .await?
+                                    .as_materialized_series(),
                                 &group_idxs,
                             )?;
                         }
diff --git a/crates/polars-stream/src/nodes/io_sinks/mod.rs b/crates/polars-stream/src/nodes/io_sinks/mod.rs
index ce14ad3b0f7a..cc1682199a2a 100644
--- a/crates/polars-stream/src/nodes/io_sinks/mod.rs
+++ b/crates/polars-stream/src/nodes/io_sinks/mod.rs
@@ -1 +1,2 @@
+#[cfg(feature = "ipc")]
 pub mod ipc;
diff --git a/crates/polars-stream/src/nodes/mod.rs b/crates/polars-stream/src/nodes/mod.rs
index 559e4717c4e9..4fb42daddd6b 100644
--- a/crates/polars-stream/src/nodes/mod.rs
+++ b/crates/polars-stream/src/nodes/mod.rs
@@ -8,6 +8,7 @@ pub mod io_sinks;
 pub mod map;
 pub mod multiplexer;
 pub mod ordered_union;
+#[cfg(feature = "parquet")]
 pub mod parquet_source;
 pub mod reduce;
 pub mod select;
diff --git a/crates/polars-stream/src/nodes/reduce.rs b/crates/polars-stream/src/nodes/reduce.rs
index 565854e97b81..8a863050be9b 100644
--- a/crates/polars-stream/src/nodes/reduce.rs
+++ b/crates/polars-stream/src/nodes/reduce.rs
@@ -64,7 +64,7 @@ impl ReduceNode {
                     while let Ok(morsel) = recv.recv().await {
                         for (reducer, selector) in local_reducers.iter_mut().zip(selectors) {
                             let input = selector.evaluate(morsel.df(), state).await?;
-                            reducer.update_group(&input, 0)?;
+                            reducer.update_group(input.as_materialized_series(), 0)?;
                         }
                     }
 
diff --git a/crates/polars-stream/src/physical_plan/fmt.rs b/crates/polars-stream/src/physical_plan/fmt.rs
index ed0f08a0d48f..e0735144da79 100644
--- a/crates/polars-stream/src/physical_plan/fmt.rs
+++ b/crates/polars-stream/src/physical_plan/fmt.rs
@@ -99,9 +99,13 @@ fn visualize_plan_rec(
         PhysNodeKind::FileSink {
             input, file_type, ..
         } => match file_type {
+            #[cfg(feature = "parquet")]
             FileType::Parquet(_) => ("parquet-sink".to_string(), from_ref(input)),
+            #[cfg(feature = "ipc")]
             FileType::Ipc(_) => ("ipc-sink".to_string(), from_ref(input)),
+            #[cfg(feature = "csv")]
             FileType::Csv(_) => ("csv-sink".to_string(), from_ref(input)),
+            #[cfg(feature = "json")]
             FileType::Json(_) => ("json-sink".to_string(), from_ref(input)),
         },
         PhysNodeKind::InMemoryMap { input, map: _ } => {
@@ -140,9 +144,13 @@ fn visualize_plan_rec(
             file_options,
         } => {
             let name = match scan_type {
+                #[cfg(feature = "parquet")]
                 FileScan::Parquet { .. } => "parquet-source",
+                #[cfg(feature = "csv")]
                 FileScan::Csv { .. } => "csv-source",
+                #[cfg(feature = "ipc")]
                 FileScan::Ipc { .. } => "ipc-source",
+                #[cfg(feature = "json")]
                 FileScan::NDJson { .. } => "ndjson-source",
                 FileScan::Anonymous { .. } => "anonymous-source",
             };
diff --git a/crates/polars-stream/src/physical_plan/lower_ir.rs b/crates/polars-stream/src/physical_plan/lower_ir.rs
index 485bbf03a7fe..d57a8667c479 100644
--- a/crates/polars-stream/src/physical_plan/lower_ir.rs
+++ b/crates/polars-stream/src/physical_plan/lower_ir.rs
@@ -212,6 +212,7 @@ pub fn lower_ir(
                 let file_type = file_type.clone();
 
                 match file_type {
+                    #[cfg(feature = "ipc")]
                     FileType::Ipc(_) => {
                         let phys_input = lower_ir!(*input)?;
                         PhysNodeKind::FileSink {
@@ -223,6 +224,7 @@ pub fn lower_ir(
                     _ => todo!(),
                 }
             },
+            #[cfg(feature = "cloud")]
             SinkType::Cloud { .. } => todo!(),
         },
 
diff --git a/crates/polars-stream/src/physical_plan/to_graph.rs b/crates/polars-stream/src/physical_plan/to_graph.rs
index d9253e48dfa5..472cf982a253 100644
--- a/crates/polars-stream/src/physical_plan/to_graph.rs
+++ b/crates/polars-stream/src/physical_plan/to_graph.rs
@@ -213,6 +213,7 @@ fn to_graph_rec<'a>(
             let input_key = to_graph_rec(*input, ctx)?;
 
             match file_type {
+                #[cfg(feature = "ipc")]
                 FileType::Ipc(ipc_writer_options) => ctx.graph.add_node(
                     nodes::io_sinks::ipc::IpcSinkNode::new(input_schema, path, ipc_writer_options)?,
                     [input_key],
@@ -341,6 +342,7 @@ fn to_graph_rec<'a>(
                 use polars_plan::prelude::FileScan;
 
                 match scan_type {
+                    #[cfg(feature = "parquet")]
                     FileScan::Parquet {
                         options,
                         cloud_options,
diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml
index 685ed71d8306..9ff45610a3c7 100644
--- a/crates/polars/Cargo.toml
+++ b/crates/polars/Cargo.toml
@@ -311,6 +311,7 @@ dtype-array = [
   "polars-core/dtype-array",
   "polars-lazy?/dtype-array",
   "polars-ops/dtype-array",
+  "polars-plan?/dtype-array",
 ]
 dtype-i8 = [
   "polars-core/dtype-i8",
@@ -414,6 +415,8 @@ docs-selection = [
   "dynamic_group_by",
   "extract_groups",
   "replace",
+  "approx_unique",
+  "unique_counts",
 ]
 
 bench = [
diff --git a/py-polars/polars/_utils/udfs.py b/py-polars/polars/_utils/udfs.py
index 0ff968ed59ec..ed91c1920cc9 100644
--- a/py-polars/polars/_utils/udfs.py
+++ b/py-polars/polars/_utils/udfs.py
@@ -183,11 +183,15 @@ class OpNames:
     "endswith": "str.ends_with",
     "lower": "str.to_lowercase",
     "lstrip": "str.strip_chars_start",
+    "removeprefix": "str.strip_prefix",
+    "removesuffix": "str.strip_suffix",
+    "replace": "str.replace",
     "rstrip": "str.strip_chars_end",
     "startswith": "str.starts_with",
     "strip": "str.strip_chars",
     "title": "str.to_titlecase",
     "upper": "str.to_uppercase",
+    "zfill": "str.zfill",
     # temporal
     "date": "dt.date",
     "isoweekday": "dt.weekday",
@@ -983,7 +987,7 @@ def _rewrite_methods(
         """Replace python method calls with synthetic POLARS_EXPRESSION op."""
         LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"}
         if matching_instructions := (
-            # method call with one basic arg, eg: "s.endswith('!')"
+            # method call with one arg, eg: "s.endswith('!')"
             self._matches(
                 idx,
                 opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL],
@@ -1016,6 +1020,47 @@ def _rewrite_methods(
             px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
             updated_instructions.append(px)
 
+        elif matching_instructions := (
+            # method call with three args, eg: "s.replace('!','?',count=2)"
+            self._matches(
+                idx,
+                opnames=[
+                    LOAD_METHOD,
+                    {"LOAD_CONST"},
+                    {"LOAD_CONST"},
+                    {"LOAD_CONST"},
+                    OpNames.CALL,
+                ],
+                argvals=[_PYTHON_METHODS_MAP],
+            )
+            or
+            # method call with two args, eg: "s.replace('!','?')"
+            self._matches(
+                idx,
+                opnames=[LOAD_METHOD, {"LOAD_CONST"}, {"LOAD_CONST"}, OpNames.CALL],
+                argvals=[_PYTHON_METHODS_MAP],
+            )
+        ):
+            inst = matching_instructions[0]
+            expr = _PYTHON_METHODS_MAP[inst.argval]
+
+            param_values = [
+                i.argval
+                for i in matching_instructions[1 : len(matching_instructions) - 1]
+            ]
+            if expr == "str.replace":
+                if len(param_values) == 3:
+                    old, new, count = param_values
+                    expr += f"({old!r},{new!r},n={count},literal=True)"
+                else:
+                    old, new = param_values
+                    expr = f"str.replace_all({old!r},{new!r},literal=True)"
+            else:
+                expr += f"({','.join(repr(v) for v in param_values)})"
+
+            px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr)
+            updated_instructions.append(px)
+
         return len(matching_instructions)
 
     @staticmethod
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
index 3403f8c12dac..4ff2752fdfb5 100644
--- a/py-polars/polars/dataframe/frame.py
+++ b/py-polars/polars/dataframe/frame.py
@@ -162,6 +162,7 @@
         ParquetCompression,
         PivotAgg,
         PolarsDataType,
+        PythonDataType,
         RollingInterpolationMethod,
         RowTotalsDefinition,
         SchemaDefinition,
@@ -7620,7 +7621,9 @@ def drop_in_place(self, name: str) -> Series:
     def cast(
         self,
         dtypes: (
-            Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType]
+            Mapping[
+                ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType
+            ]
             | PolarsDataType
         ),
         *,
diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py
index daebcf452c1e..0483d058180b 100644
--- a/py-polars/polars/io/csv/functions.py
+++ b/py-polars/polars/io/csv/functions.py
@@ -1036,7 +1036,7 @@ def scan_csv(
     decimal_comma: bool = False,
     glob: bool = True,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     retries: int = 2,
     file_cache_ttl: int | None = None,
     include_file_paths: str | None = None,
diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py
index 1348134fc0d6..b8af12ae8806 100644
--- a/py-polars/polars/io/ipc/functions.py
+++ b/py-polars/polars/io/ipc/functions.py
@@ -364,7 +364,7 @@ def scan_ipc(
     row_index_name: str | None = None,
     row_index_offset: int = 0,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     memory_map: bool = True,
     retries: int = 2,
     file_cache_ttl: int | None = None,
diff --git a/py-polars/polars/io/ndjson.py b/py-polars/polars/io/ndjson.py
index 983b8cddcfe1..7da4635408d1 100644
--- a/py-polars/polars/io/ndjson.py
+++ b/py-polars/polars/io/ndjson.py
@@ -38,7 +38,7 @@ def read_ndjson(
     row_index_offset: int = 0,
     ignore_errors: bool = False,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     retries: int = 2,
     file_cache_ttl: int | None = None,
     include_file_paths: str | None = None,
@@ -206,7 +206,7 @@ def scan_ndjson(
     row_index_offset: int = 0,
     ignore_errors: bool = False,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     retries: int = 2,
     file_cache_ttl: int | None = None,
     include_file_paths: str | None = None,
diff --git a/py-polars/polars/io/parquet/functions.py b/py-polars/polars/io/parquet/functions.py
index 16ff7f614349..0cc91a94693f 100644
--- a/py-polars/polars/io/parquet/functions.py
+++ b/py-polars/polars/io/parquet/functions.py
@@ -54,7 +54,7 @@ def read_parquet(
     rechunk: bool = False,
     low_memory: bool = False,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     retries: int = 2,
     use_pyarrow: bool = False,
     pyarrow_options: dict[str, Any] | None = None,
@@ -338,7 +338,7 @@ def scan_parquet(
     low_memory: bool = False,
     cache: bool = True,
     storage_options: dict[str, Any] | None = None,
-    credential_provider: CredentialProviderFunction | Literal["auto"] | None = None,
+    credential_provider: CredentialProviderFunction | Literal["auto"] | None = "auto",
     retries: int = 2,
     include_file_paths: str | None = None,
     allow_missing_columns: bool = False,
diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
index 9b438ae8dbfa..3cc0cb85fb8c 100644
--- a/py-polars/polars/lazyframe/frame.py
+++ b/py-polars/polars/lazyframe/frame.py
@@ -111,6 +111,7 @@
         Label,
         Orientation,
         PolarsDataType,
+        PythonDataType,
         RollingInterpolationMethod,
         SchemaDefinition,
         SchemaDict,
@@ -2899,7 +2900,9 @@ def cache(self) -> LazyFrame:
     def cast(
         self,
         dtypes: (
-            Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType]
+            Mapping[
+                ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType
+            ]
             | PolarsDataType
         ),
         *,
@@ -2979,6 +2982,7 @@ def cast(
          'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
         """
         if not isinstance(dtypes, Mapping):
+            dtypes = parse_into_dtype(dtypes)
             return self._from_pyldf(self._ldf.cast_all(dtypes, strict))
 
         cast_map = {}
diff --git a/py-polars/polars/schema.py b/py-polars/polars/schema.py
index 81ade5a6b206..fb1b8268bf2f 100644
--- a/py-polars/polars/schema.py
+++ b/py-polars/polars/schema.py
@@ -56,31 +56,37 @@ class Schema(BaseSchema):
     Parameters
     ----------
     schema
-        The schema definition given by column names and their associated *instantiated*
+        The schema definition given by column names and their associated
         Polars data type. Accepts a mapping or an iterable of tuples.
 
     Examples
     --------
-    Define a schema by passing *instantiated* data types.
-
-    >>> schema = pl.Schema({"foo": pl.Int8(), "bar": pl.String()})
+    Define a schema by passing instantiated data types.
+
+    >>> schema = pl.Schema(
+    ...     {
+    ...         "foo": pl.String(),
+    ...         "bar": pl.Duration("us"),
+    ...         "baz": pl.Array(pl.Int8, 4),
+    ...     }
+    ... )
     >>> schema
-    Schema({'foo': Int8, 'bar': String})
+    Schema({'foo': String, 'bar': Duration(time_unit='us'), 'baz': Array(Int8, shape=(4,))})
 
     Access the data type associated with a specific column name.
 
-    >>> schema["foo"]
-    Int8
+    >>> schema["baz"]
+    Array(Int8, shape=(4,))
 
     Access various schema properties using the `names`, `dtypes`, and `len` methods.
 
     >>> schema.names()
-    ['foo', 'bar']
+    ['foo', 'bar', 'baz']
     >>> schema.dtypes()
-    [Int8, String]
+    [String, Duration(time_unit='us'), Array(Int8, shape=(4,))]
     >>> schema.len()
-    2
-    """
+    3
+    """  # noqa: W505
 
     def __init__(
         self,
@@ -123,15 +129,41 @@ def __setitem__(
         super().__setitem__(name, dtype)
 
     def names(self) -> list[str]:
-        """Get the column names of the schema."""
+        """
+        Get the column names of the schema.
+
+        Examples
+        --------
+        >>> s = pl.Schema({"x": pl.Float64(), "y": pl.Datetime(time_zone="UTC")})
+        >>> s.names()
+        ['x', 'y']
+        """
         return list(self.keys())
 
     def dtypes(self) -> list[DataType]:
-        """Get the data types of the schema."""
+        """
+        Get the data types of the schema.
+
+        Examples
+        --------
+        >>> s = pl.Schema({"x": pl.UInt8(), "y": pl.List(pl.UInt8)})
+        >>> s.dtypes()
+        [UInt8, List(UInt8)]
+        """
         return list(self.values())
 
     def len(self) -> int:
-        """Get the number of columns in the schema."""
+        """
+        Get the number of schema entries.
+
+        Examples
+        --------
+        >>> s = pl.Schema({"x": pl.Int32(), "y": pl.List(pl.String)})
+        >>> s.len()
+        2
+        >>> len(s)
+        2
+        """
         return len(self)
 
     def to_python(self) -> dict[str, type]:
@@ -140,7 +172,13 @@ def to_python(self) -> dict[str, type]:
 
         Examples
         --------
-        >>> s = pl.Schema({"x": pl.Int8(), "y": pl.String(), "z": pl.Duration("ms")})
+        >>> s = pl.Schema(
+        ...     {
+        ...         "x": pl.Int8(),
+        ...         "y": pl.String(),
+        ...         "z": pl.Duration("us"),
+        ...     }
+        ... )
         >>> s.to_python()
         {'x': <class 'int'>, 'y':  <class 'str'>, 'z': <class 'datetime.timedelta'>}
         """
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
index 934cad33d1d7..d86c9d29cd0f 100644
--- a/py-polars/polars/series/series.py
+++ b/py-polars/polars/series/series.py
@@ -4967,14 +4967,14 @@ def round_sig_figs(self, digits: int) -> Series:
 
         Examples
         --------
-        >>> s = pl.Series([0.01234, 3.333, 1234.0])
+        >>> s = pl.Series([0.01234, 3.333, 3450.0])
         >>> s.round_sig_figs(2)
         shape: (3,)
         Series: '' [f64]
         [
                 0.012
                 3.3
-                1200.0
+                3500.0
         ]
         """
 
diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt
index e89a8e19c0b6..9225aa8a690e 100644
--- a/py-polars/requirements-dev.txt
+++ b/py-polars/requirements-dev.txt
@@ -62,8 +62,8 @@ hypothesis
 # -------
 
 pytest==8.3.2
-pytest-codspeed==2.2.1
-pytest-cov==5.0.0
+pytest-codspeed==3.0.0
+pytest-cov==6.0.0
 pytest-xdist==3.6.1
 
 # Need moto.server to mock s3fs - see: https://github.com/aio-libs/aiobotocore/issues/755
diff --git a/py-polars/requirements-lint.txt b/py-polars/requirements-lint.txt
index b6c173bf8320..9c216431c84c 100644
--- a/py-polars/requirements-lint.txt
+++ b/py-polars/requirements-lint.txt
@@ -1,3 +1,3 @@
 mypy[faster-cache]==1.13.0
 ruff==0.7.1
-typos==1.26.8
+typos==1.27.2
diff --git a/py-polars/tests/unit/constructors/test_dataframe.py b/py-polars/tests/unit/constructors/test_dataframe.py
index 251ec5e7bce2..e885919294d1 100644
--- a/py-polars/tests/unit/constructors/test_dataframe.py
+++ b/py-polars/tests/unit/constructors/test_dataframe.py
@@ -59,7 +59,7 @@ def test_df_init_from_generator_dict_view() -> None:
     data = {
         "keys": d.keys(),
         "vals": d.values(),
-        "itms": d.items(),
+        "items": d.items(),
     }
     with pytest.raises(TypeError, match="unexpected value"):
         pl.DataFrame(data, strict=True)
@@ -68,12 +68,12 @@ def test_df_init_from_generator_dict_view() -> None:
     assert df.schema == {
         "keys": pl.Int64,
         "vals": pl.String,
-        "itms": pl.List(pl.String),
+        "items": pl.List(pl.String),
     }
     assert df.to_dict(as_series=False) == {
         "keys": [0, 1, 2],
         "vals": ["x", "y", "z"],
-        "itms": [["0", "x"], ["1", "y"], ["2", "z"]],
+        "items": [["0", "x"], ["1", "y"], ["2", "z"]],
     }
 
 
@@ -86,19 +86,19 @@ def test_df_init_from_generator_reversed_dict_view() -> None:
     data = {
         "rev_keys": reversed(d.keys()),
         "rev_vals": reversed(d.values()),
-        "rev_itms": reversed(d.items()),
+        "rev_items": reversed(d.items()),
     }
-    df = pl.DataFrame(data, schema_overrides={"rev_itms": pl.Object})
+    df = pl.DataFrame(data, schema_overrides={"rev_items": pl.Object})
 
     assert df.schema == {
         "rev_keys": pl.Int64,
         "rev_vals": pl.String,
-        "rev_itms": pl.Object,
+        "rev_items": pl.Object,
     }
     assert df.to_dict(as_series=False) == {
         "rev_keys": [2, 1, 0],
         "rev_vals": ["z", "y", "x"],
-        "rev_itms": [(2, "z"), (1, "y"), (0, "x")],
+        "rev_items": [(2, "z"), (1, "y"), (0, "x")],
     }
 
 
diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
index d8910cda4fb2..c375e1952347 100644
--- a/py-polars/tests/unit/dataframe/test_df.py
+++ b/py-polars/tests/unit/dataframe/test_df.py
@@ -736,7 +736,10 @@ def test_concat() -> None:
 
 def test_arg_where() -> None:
     s = pl.Series([True, False, True, False])
-    assert_series_equal(pl.arg_where(s, eager=True).cast(int), pl.Series([0, 2]))
+    assert_series_equal(
+        pl.arg_where(s, eager=True).cast(int),
+        pl.Series([0, 2]),
+    )
 
 
 def test_to_dummies() -> None:
@@ -1060,7 +1063,7 @@ def test_cast_frame() -> None:
 
     # cast via col:dtype map
     assert df.cast(
-        dtypes={"b": pl.Float32, "c": pl.String, "d": pl.Datetime("ms")}
+        dtypes={"b": pl.Float32, "c": pl.String, "d": pl.Datetime("ms")},
     ).schema == {
         "a": pl.Float64,
         "b": pl.Float32,
@@ -1068,6 +1071,16 @@ def test_cast_frame() -> None:
         "d": pl.Datetime("ms"),
     }
 
+    # cast via col:pytype map
+    assert df.cast(
+        dtypes={"b": float, "c": str, "d": datetime},
+    ).schema == {
+        "a": pl.Float64,
+        "b": pl.Float64,
+        "c": pl.String,
+        "d": pl.Datetime("us"),
+    }
+
     # cast via selector:dtype map
     assert df.cast(
         {
diff --git a/py-polars/tests/unit/datatypes/test_array.py b/py-polars/tests/unit/datatypes/test_array.py
index b578266b0c6f..df7a5c5cc4ec 100644
--- a/py-polars/tests/unit/datatypes/test_array.py
+++ b/py-polars/tests/unit/datatypes/test_array.py
@@ -383,3 +383,16 @@ def test_zero_width_array(fn: str) -> None:
 
                 df = pl.concat([a.to_frame(), b.to_frame()], how="horizontal")
                 df.select(c=expr_f(pl.col.a, pl.col.b))
+
+
+def test_elementwise_arithmetic_19682() -> None:
+    dt = pl.Array(pl.Int64, (2, 3))
+
+    a = pl.Series("a", [[[1, 2, 3], [4, 5, 6]]], dt)
+    sc = pl.Series("a", [1])
+    zfa = pl.Series("a", [[]], pl.Array(pl.Int64, 0))
+
+    assert_series_equal(a + a, pl.Series("a", [[[2, 4, 6], [8, 10, 12]]], dt))
+    assert_series_equal(a + sc, pl.Series("a", [[[2, 3, 4], [5, 6, 7]]], dt))
+    assert_series_equal(sc + a, pl.Series("a", [[[2, 3, 4], [5, 6, 7]]], dt))
+    assert_series_equal(zfa + zfa, pl.Series("a", [[]], pl.Array(pl.Int64, 0)))
diff --git a/py-polars/tests/unit/io/cloud/test_cloud.py b/py-polars/tests/unit/io/cloud/test_cloud.py
index f943ab5e2c26..54d1b5ccd6a6 100644
--- a/py-polars/tests/unit/io/cloud/test_cloud.py
+++ b/py-polars/tests/unit/io/cloud/test_cloud.py
@@ -1,3 +1,5 @@
+from functools import partial
+
 import pytest
 
 import polars as pl
@@ -11,6 +13,11 @@ def test_scan_nonexistent_cloud_path_17444(format: str) -> None:
 
     path_str = f"s3://my-nonexistent-bucket/data.{format}"
     scan_function = getattr(pl, f"scan_{format}")
+    # Prevent automatic credential provideder instantiation, otherwise CI may fail with
+    # * pytest.PytestUnraisableExceptionWarning:
+    #   * Exception ignored:
+    #     * ResourceWarning: unclosed socket
+    scan_function = partial(scan_function, credential_provider=None)
 
     # Just calling the scan function should not raise any errors
     if format == "ndjson":
diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py
index 8ea7d2152bc0..4fc9e0321a66 100644
--- a/py-polars/tests/unit/io/test_parquet.py
+++ b/py-polars/tests/unit/io/test_parquet.py
@@ -2356,6 +2356,7 @@ def test_nested_dicts(content: list[float | None]) -> None:
         [i if i % 7 < 3 and i % 5 > 3 else None for i in range(57)],
     ],
 )
+@pytest.mark.slow
 def test_dict_slices(
     leading_nulls: list[None],
     trailing_nulls: list[None],
diff --git a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py
index 74946f084d51..61416054755d 100644
--- a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py
+++ b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py
@@ -176,6 +176,26 @@
         """lambda x: x.lstrip().startswith(('!','#','?',"'"))""",
         """pl.col("b").str.strip_chars_start().str.contains(r"^(!|\\#|\\?|')")""",
     ),
+    (
+        "b",
+        "lambda x: x.replace(':','')",
+        """pl.col("b").str.replace_all(':','',literal=True)""",
+    ),
+    (
+        "b",
+        "lambda x: x.replace(':','',2)",
+        """pl.col("b").str.replace(':','',n=2,literal=True)""",
+    ),
+    (
+        "b",
+        "lambda x: x.removeprefix('A').removesuffix('F')",
+        """pl.col("b").str.strip_prefix('A').str.strip_suffix('F')""",
+    ),
+    (
+        "b",
+        "lambda x: x.zfill(8)",
+        """pl.col("b").str.zfill(8)""",
+    ),
     # ---------------------------------------------
     # json expr: load/extract
     # ---------------------------------------------
diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py
index dca9eeb3e767..4e8dae9b2d38 100644
--- a/py-polars/tests/unit/operations/test_cast.py
+++ b/py-polars/tests/unit/operations/test_cast.py
@@ -13,12 +13,13 @@
 from polars.testing.asserts.series import assert_series_equal
 
 if TYPE_CHECKING:
-    from polars._typing import PolarsDataType
+    from polars._typing import PolarsDataType, PythonDataType
 
 
-def test_string_date() -> None:
+@pytest.mark.parametrize("dtype", [pl.Date(), pl.Date, date])
+def test_string_date(dtype: PolarsDataType | PythonDataType) -> None:
     df = pl.DataFrame({"x1": ["2021-01-01"]}).with_columns(
-        **{"x1-date": pl.col("x1").cast(pl.Date)}
+        **{"x1-date": pl.col("x1").cast(dtype)}
     )
     expected = pl.DataFrame({"x1-date": [date(2021, 1, 1)]})
     out = df.select(pl.col("x1-date"))
@@ -668,9 +669,10 @@ def test_bool_numeric_supertype(dtype: PolarsDataType) -> None:
     assert result.item() - 0.3333333 <= 0.00001
 
 
-def test_cast_consistency() -> None:
+@pytest.mark.parametrize("dtype", [pl.String(), pl.String, str])
+def test_cast_consistency(dtype: PolarsDataType | PythonDataType) -> None:
     assert pl.DataFrame().with_columns(a=pl.lit(0.0)).with_columns(
-        b=pl.col("a").cast(pl.String), c=pl.lit(0.0).cast(pl.String)
+        b=pl.col("a").cast(dtype), c=pl.lit(0.0).cast(dtype)
     ).to_dict(as_series=False) == {"a": [0.0], "b": ["0.0"], "c": ["0.0"]}