diff --git a/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs b/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs index 3d3369f9ab50..c13ddebb7f27 100644 --- a/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/aggregate/mod.rs @@ -8,7 +8,7 @@ use std::ops::Add; use arrow::compute; use arrow::types::simd::Simd; use arrow::types::NativeType; -use num_traits::{Float, ToPrimitive, Zero}; +use num_traits::{Float, One, ToPrimitive, Zero}; use polars_arrow::kernels::rolling::{compare_fn_nan_max, compare_fn_nan_min}; pub use quantile::*; pub use var::*; @@ -276,7 +276,7 @@ impl BooleanChunked { } } -// Needs the same trait bounds as the implementation of ChunkedArray of dyn Series +// Needs the same trait bounds as the implementation of ChunkedArray of dyn Series. impl ChunkAggSeries for ChunkedArray where T: PolarsNumericType, @@ -291,12 +291,14 @@ where ca.rename(self.name()); ca.into_series() } + fn max_as_series(&self) -> Series { let v = ChunkAgg::max(self); let mut ca: ChunkedArray = [v].iter().copied().collect(); ca.rename(self.name()); ca.into_series() } + fn min_as_series(&self) -> Series { let v = ChunkAgg::min(self); let mut ca: ChunkedArray = [v].iter().copied().collect(); @@ -305,15 +307,11 @@ where } fn prod_as_series(&self) -> Series { - let mut prod = None; - for opt_v in self.into_iter() { - match (prod, opt_v) { - (_, None) => return Self::full_null(self.name(), 1).into_series(), - (None, Some(v)) => prod = Some(v), - (Some(p), Some(v)) => prod = Some(p * v), - } + let mut prod = T::Native::one(); + for opt_v in self.into_iter().flatten() { + prod = prod * opt_v; } - Self::from_slice_options(self.name(), &[prod]).into_series() + Self::from_slice_options(self.name(), &[Some(prod)]).into_series() } } @@ -509,15 +507,13 @@ impl BinaryChunked { match self.is_sorted_flag() { IsSorted::Ascending => { self.last_non_null().and_then(|idx| { - // Safety: - // last_non_null returns in bound index + // SAFETY: last_non_null returns in bound index. unsafe { self.get_unchecked(idx) } }) }, IsSorted::Descending => { self.first_non_null().and_then(|idx| { - // Safety: - // first_non_null returns in bound index + // SAFETY: first_non_null returns in bound index. unsafe { self.get_unchecked(idx) } }) }, @@ -535,15 +531,13 @@ impl BinaryChunked { match self.is_sorted_flag() { IsSorted::Ascending => { self.first_non_null().and_then(|idx| { - // Safety: - // first_non_null returns in bound index + // SAFETY: first_non_null returns in bound index. unsafe { self.get_unchecked(idx) } }) }, IsSorted::Descending => { self.last_non_null().and_then(|idx| { - // Safety: - // last_non_null returns in bound index + // SAFETY: last_non_null returns in bound index. unsafe { self.get_unchecked(idx) } }) }, @@ -606,9 +600,9 @@ mod test { #[test] fn test_var() { - // validated with numpy - // Note that numpy as an argument ddof which influences results. The default is ddof=0 - // we chose ddof=1, which is standard in statistics + // Validated with numpy. Note that numpy uses ddof as an argument which + // influences results. The default ddof=0, we chose ddof=1, which is + // standard in statistics. let ca1 = Int32Chunked::new("", &[5, 8, 9, 5, 0]); let ca2 = Int32Chunked::new( "", diff --git a/crates/polars-core/src/datatypes/mod.rs b/crates/polars-core/src/datatypes/mod.rs index 120820365207..e4d592b4e3c5 100644 --- a/crates/polars-core/src/datatypes/mod.rs +++ b/crates/polars-core/src/datatypes/mod.rs @@ -33,7 +33,7 @@ use arrow::types::NativeType; pub use dtype::*; pub use field::*; pub use from_values::ArrayFromElementIter; -use num_traits::{Bounded, FromPrimitive, Num, NumCast, Zero}; +use num_traits::{Bounded, FromPrimitive, Num, NumCast, One, Zero}; use polars_arrow::data_types::IsFloat; #[cfg(feature = "serde")] use serde::de::{EnumAccess, Error, Unexpected, VariantAccess, Visitor}; @@ -190,6 +190,7 @@ pub trait NumericNative: + Num + NumCast + Zero + + One + Simd + Simd8 + std::iter::Sum diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index d8de5cd028ac..ed2ca8c5ae82 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -154,14 +154,13 @@ impl Hash for Wrap { } impl Series { - /// Create a new empty Series + /// Create a new empty Series. pub fn new_empty(name: &str, dtype: &DataType) -> Series { Series::full_null(name, 0, dtype) } pub fn clear(&self) -> Series { - // only the inner of objects know their type - // so use this hack + // Only the inner of objects know their type, so use this hack. #[cfg(feature = "object")] if matches!(self.dtype(), DataType::Object(_)) { return if self.is_empty() { @@ -262,23 +261,18 @@ impl Series { self._get_inner_mut().as_single_ptr() } - /// Cast `[Series]` to another `[DataType]` + /// Cast `[Series]` to another `[DataType]`. pub fn cast(&self, dtype: &DataType) -> PolarsResult { - // best leave as is. + // Best leave as is. if matches!(dtype, DataType::Unknown) { return Ok(self.clone()); } - match self.0.cast(dtype) { - Ok(out) => Ok(out), - Err(err) => { - let len = self.len(); - if self.null_count() == len { - Ok(Series::full_null(self.name(), len, dtype)) - } else { - Err(err) - } - }, + let ret = self.0.cast(dtype); + let len = self.len(); + if ret.is_err() && self.null_count() == len { + return Ok(Series::full_null(self.name(), len, dtype)); } + ret } /// Cast from physical to logical types without any checks on the validity of the cast. @@ -288,24 +282,15 @@ impl Series { pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { match self.dtype() { #[cfg(feature = "dtype-struct")] - DataType::Struct(_) => { - let ca = self.struct_().unwrap(); - ca.cast_unchecked(dtype) - }, - DataType::List(_) => { - let ca = self.list().unwrap(); - ca.cast_unchecked(dtype) - }, + DataType::Struct(_) => self.struct_().unwrap().cast_unchecked(dtype), + DataType::List(_) => self.list().unwrap().cast_unchecked(dtype), dt if dt.is_numeric() => { with_match_physical_numeric_polars_type!(dt, |$T| { let ca: &ChunkedArray<$T> = self.as_ref().as_ref().as_ref(); ca.cast_unchecked(dtype) }) }, - DataType::Binary => { - let ca = self.binary().unwrap(); - ca.cast_unchecked(dtype) - }, + DataType::Binary => self.binary().unwrap().cast_unchecked(dtype), _ => self.cast(dtype), } } @@ -326,10 +311,8 @@ impl Series { where T: NumCast, { - self.sum_as_series() - .cast(&DataType::Float64) - .ok() - .and_then(|s| s.f64().unwrap().get(0).and_then(T::from)) + let sum = self.sum_as_series().cast(&DataType::Float64).ok()?; + T::from(sum.f64().unwrap().get(0)?) } /// Returns the minimum value in the array, according to the natural order. @@ -343,10 +326,8 @@ impl Series { where T: NumCast, { - self.min_as_series() - .cast(&DataType::Float64) - .ok() - .and_then(|s| s.f64().unwrap().get(0).and_then(T::from)) + let min = self.min_as_series().cast(&DataType::Float64).ok()?; + T::from(min.f64().unwrap().get(0)?) } /// Returns the maximum value in the array, according to the natural order. @@ -360,10 +341,8 @@ impl Series { where T: NumCast, { - self.max_as_series() - .cast(&DataType::Float64) - .ok() - .and_then(|s| s.f64().unwrap().get(0).and_then(T::from)) + let max = self.max_as_series().cast(&DataType::Float64).ok()?; + T::from(max.f64().unwrap().get(0)?) } /// Explode a list Series. This expands every item to a new row.. @@ -444,16 +423,13 @@ impl Series { #[cfg(feature = "dtype-struct")] Struct(_) => { let arr = self.struct_().unwrap(); - let fields = arr + let fields: Vec<_> = arr .fields() .iter() .map(|s| s.to_physical_repr().into_owned()) - .collect::>(); - Cow::Owned( - StructChunked::new(self.name(), &fields) - .unwrap() - .into_series(), - ) + .collect(); + let ca = StructChunked::new(self.name(), &fields).unwrap(); + Cow::Owned(ca.into_series()) }, _ => Cow::Borrowed(self), } @@ -474,7 +450,7 @@ impl Series { } } - // take a function pointer to reduce bloat + // Take a function pointer to reduce bloat. fn threaded_op( &self, rechunk: bool, @@ -562,9 +538,9 @@ impl Series { /// Filter by boolean mask. This operation clones data. pub fn filter_threaded(&self, filter: &BooleanChunked, rechunk: bool) -> PolarsResult { - // this would fail if there is a broadcasting filter. - // because we cannot split that filter over threads - // besides they are a no-op, so we do the standard filter. + // This would fail if there is a broadcasting filter, because we cannot + // split that filter over threads besides they are a no-op, so we do the + // standard filter. if filter.len() == 1 { return self.filter(filter); } @@ -598,10 +574,8 @@ impl Series { if self.is_empty() && (self.dtype().is_numeric() || matches!(self.dtype(), DataType::Boolean)) { - return Series::new(self.name(), [0]) - .cast(self.dtype()) - .unwrap() - .sum_as_series(); + let zero = Series::new(self.name(), [0]); + return zero.cast(self.dtype()).unwrap().sum_as_series(); } match self.dtype() { Int8 | UInt8 | Int16 | UInt16 => self.cast(&Int64).unwrap().sum_as_series(), @@ -609,7 +583,7 @@ impl Series { } } - /// Get an array with the cumulative max computed at every element + /// Get an array with the cumulative max computed at every element. pub fn cummax(&self, _reverse: bool) -> Series { #[cfg(feature = "cum_agg")] { @@ -621,7 +595,7 @@ impl Series { } } - /// Get an array with the cumulative min computed at every element + /// Get an array with the cumulative min computed at every element. pub fn cummin(&self, _reverse: bool) -> Series { #[cfg(feature = "cum_agg")] { @@ -648,30 +622,12 @@ impl Series { let s = self.cast(&Int64).unwrap(); s.cumsum(reverse) }, - Int32 => { - let ca = self.i32().unwrap(); - ca.cumsum(reverse).into_series() - }, - UInt32 => { - let ca = self.u32().unwrap(); - ca.cumsum(reverse).into_series() - }, - UInt64 => { - let ca = self.u64().unwrap(); - ca.cumsum(reverse).into_series() - }, - Int64 => { - let ca = self.i64().unwrap(); - ca.cumsum(reverse).into_series() - }, - Float32 => { - let ca = self.f32().unwrap(); - ca.cumsum(reverse).into_series() - }, - Float64 => { - let ca = self.f64().unwrap(); - ca.cumsum(reverse).into_series() - }, + Int32 => self.i32().unwrap().cumsum(reverse).into_series(), + UInt32 => self.u32().unwrap().cumsum(reverse).into_series(), + UInt64 => self.u64().unwrap().cumsum(reverse).into_series(), + Int64 => self.i64().unwrap().cumsum(reverse).into_series(), + Float32 => self.f32().unwrap().cumsum(reverse).into_series(), + Float64 => self.f64().unwrap().cumsum(reverse).into_series(), #[cfg(feature = "dtype-duration")] Duration(tu) => { let ca = self.to_physical_repr(); @@ -687,7 +643,7 @@ impl Series { } } - /// Get an array with the cumulative product computed at every element + /// Get an array with the cumulative product computed at every element. /// /// If the [`DataType`] is one of `{Int8, UInt8, Int16, UInt16, Int32, UInt32}` the `Series` is /// first cast to `Int64` to prevent overflow issues. @@ -702,22 +658,10 @@ impl Series { let s = self.cast(&Int64).unwrap(); s.cumprod(reverse) }, - Int64 => { - let ca = self.i64().unwrap(); - ca.cumprod(reverse).into_series() - }, - UInt64 => { - let ca = self.u64().unwrap(); - ca.cumprod(reverse).into_series() - }, - Float32 => { - let ca = self.f32().unwrap(); - ca.cumprod(reverse).into_series() - }, - Float64 => { - let ca = self.f64().unwrap(); - ca.cumprod(reverse).into_series() - }, + Int64 => self.i64().unwrap().cumprod(reverse).into_series(), + UInt64 => self.u64().unwrap().cumprod(reverse).into_series(), + Float32 => self.f32().unwrap().cumprod(reverse).into_series(), + Float64 => self.f64().unwrap().cumprod(reverse).into_series(), dt => panic!("cumprod not supported for dtype: {dt:?}"), } } @@ -741,23 +685,11 @@ impl Series { let s = self.cast(&Int64).unwrap(); s.product() }, - Int64 => { - let ca = self.i64().unwrap(); - ca.prod_as_series() - }, - UInt64 => { - let ca = self.u64().unwrap(); - ca.prod_as_series() - }, - Float32 => { - let ca = self.f32().unwrap(); - ca.prod_as_series() - }, - Float64 => { - let ca = self.f64().unwrap(); - ca.prod_as_series() - }, - dt => panic!("cumprod not supported for dtype: {dt:?}"), + Int64 => self.i64().unwrap().prod_as_series(), + UInt64 => self.u64().unwrap().prod_as_series(), + Float32 => self.f32().unwrap().prod_as_series(), + Float64 => self.f64().unwrap().prod_as_series(), + dt => panic!("product not supported for dtype: {dt:?}"), } } #[cfg(not(feature = "product"))] @@ -962,8 +894,7 @@ impl Series { /// than a naive [`Series::unique`](SeriesTrait::unique). pub fn unique_stable(&self) -> PolarsResult { let idx = self.arg_unique()?; - // Safety: - // Indices are in bounds. + // SAFETY: Indices are in bounds. unsafe { self.take_unchecked(&idx) } } @@ -1059,7 +990,7 @@ where DataType::Decimal(None, None) => panic!("impl error"), _ => { if &T::get_dtype() == self.dtype() || - // needed because we want to get ref of List no matter what the inner type is. + // Needed because we want to get ref of List no matter what the inner type is. (matches!(T::get_dtype(), DataType::List(_)) && matches!(self.dtype(), DataType::List(_))) { unsafe { &*(self as *const dyn SeriesTrait as *const ChunkedArray) } @@ -1081,7 +1012,7 @@ where { fn as_mut(&mut self) -> &mut ChunkedArray { if &T::get_dtype() == self.dtype() || - // needed because we want to get ref of List no matter what the inner type is. + // Needed because we want to get ref of List no matter what the inner type is. (matches!(T::get_dtype(), DataType::List(_)) && matches!(self.dtype(), DataType::List(_))) { unsafe { &mut *(self as *mut dyn SeriesTrait as *mut ChunkedArray) } diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index ed74c85e4d09..09af1e97f48b 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -2296,10 +2296,19 @@ def test_product() -> None: assert out == 6 a = pl.Series("a", [1, 2, None]) out = a.product() - assert out is None + assert out == 2 a = pl.Series("a", [None, 2, 3]) out = a.product() - assert out is None + assert out == 6 + a = pl.Series("a", []) + out = a.product() + assert out == 1 + a = pl.Series("a", [None, None]) + out = a.product() + assert out == 1 + a = pl.Series("a", [3.0, None, float("nan")]) + out = a.product() + assert math.isnan(out) def test_ceil() -> None: