From 320d5cd70b4246fd5e94d6f58614a04673d5127c Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Wed, 6 Nov 2024 21:01:37 +0100 Subject: [PATCH] remove some more @scalar-opts --- crates/polars-core/src/frame/column/mod.rs | 182 ++++++++++-------- crates/polars-core/src/frame/column/scalar.rs | 15 +- crates/polars-core/src/scalar/from.rs | 1 + crates/polars-core/src/testing.rs | 36 ---- 4 files changed, 115 insertions(+), 119 deletions(-) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 16eb940022b3..e33c50ea9a41 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -681,14 +681,11 @@ impl Column { } pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self { - Series::full_null(name, size, dtype).into() - // @TODO: This causes failures - // Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size) + Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size) } pub fn is_empty(&self) -> bool { - // @scalar-opt - self.as_materialized_series().is_empty() + self.len() == 0 } pub fn reverse(&self) -> Column { @@ -699,16 +696,16 @@ impl Column { } } - pub fn equals(&self, right: &Column) -> bool { + pub fn equals(&self, other: &Column) -> bool { // @scalar-opt self.as_materialized_series() - .equals(right.as_materialized_series()) + .equals(other.as_materialized_series()) } - pub fn equals_missing(&self, right: &Column) -> bool { + pub fn equals_missing(&self, other: &Column) -> bool { // @scalar-opt self.as_materialized_series() - .equals_missing(right.as_materialized_series()) + .equals_missing(other.as_materialized_series()) } pub fn set_sorted_flag(&mut self, sorted: IsSorted) { @@ -740,11 +737,6 @@ impl Column { } } - pub fn get_data_ptr(&self) -> usize { - // @scalar-opt - self.as_materialized_series().get_data_ptr() - } - pub fn vec_hash(&self, build_hasher: PlRandomState, buf: &mut Vec) -> PolarsResult<()> { // @scalar-opt? self.as_materialized_series().vec_hash(build_hasher, buf) @@ -782,13 +774,6 @@ impl Column { unsafe { DataFrame::new_no_checks(self.len(), vec![self]) } } - pub fn unique_stable(&self) -> PolarsResult { - // @scalar-opt? - self.as_materialized_series() - .unique_stable() - .map(Column::from) - } - pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> { // @scalar-opt self.into_materialized_series() @@ -805,9 +790,11 @@ impl Column { } pub fn explode(&self) -> PolarsResult { - // @scalar-opt self.as_materialized_series().explode().map(Column::from) } + pub fn implode(&self) -> PolarsResult { + self.as_materialized_series().implode() + } pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult { // @scalar-opt @@ -849,8 +836,12 @@ impl Column { } pub fn drop_nulls(&self) -> Column { - // @scalar-opt - self.as_materialized_series().drop_nulls().into() + match self { + Column::Series(s) => s.drop_nulls().into_column(), + // @partition-opt + Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(), + Column::Scalar(s) => s.drop_nulls().into_column(), + } } pub fn is_sorted_flag(&self) -> IsSorted { @@ -859,8 +850,34 @@ impl Column { } pub fn unique(&self) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().unique().map(Column::from) + match self { + Column::Series(s) => s.unique().map(Column::from), + // @partition-opt + Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from), + Column::Scalar(s) => { + _ = s.as_single_value_series().unique()?; + if s.is_empty() { + return Ok(s.clone().into_column()); + } + + Ok(s.resize(1).into_column()) + }, + } + } + pub fn unique_stable(&self) -> PolarsResult { + match self { + Column::Series(s) => s.unique_stable().map(Column::from), + // @partition-opt + Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from), + Column::Scalar(s) => { + _ = s.as_single_value_series().unique_stable()?; + if s.is_empty() { + return Ok(s.clone().into_column()); + } + + Ok(s.resize(1).into_column()) + }, + } } pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult { @@ -885,9 +902,26 @@ impl Column { .map(Self::from) } - pub fn filter(&self, filter: &ChunkedArray) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().filter(filter).map(Self::from) + pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult { + match self { + Column::Series(s) => s.filter(filter).map(Column::from), + Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from), + Column::Scalar(s) => { + if s.is_empty() { + return Ok(s.clone().into_column()); + } + + // Broadcasting + if filter.len() == 1 { + return match filter.get(0) { + Some(true) => Ok(s.clone().into_column()), + _ => Ok(s.resize(0).into_column()), + }; + } + + Ok(s.resize(filter.sum().unwrap() as usize).into_column()) + }, + } } #[cfg(feature = "random")] @@ -959,23 +993,16 @@ impl Column { } pub fn is_finite(&self) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().is_finite() + self.try_map_unary_elementwise_to_bool(|s| s.is_finite()) } - pub fn is_infinite(&self) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().is_infinite() + self.try_map_unary_elementwise_to_bool(|s| s.is_infinite()) } - pub fn is_nan(&self) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().is_nan() + self.try_map_unary_elementwise_to_bool(|s| s.is_nan()) } - pub fn is_not_nan(&self) -> PolarsResult { - // @scalar-opt - self.as_materialized_series().is_not_nan() + self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan()) } pub fn wrapping_trunc_div_scalar(&self, rhs: T) -> Self @@ -1050,25 +1077,22 @@ impl Column { } pub fn try_add_owned(self, other: Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - self.take_materialized_series() - .try_add_owned(other.take_materialized_series()) - .map(Column::from) + match (self, other) { + (Column::Series(lhs), Column::Series(rhs)) => lhs.try_add_owned(rhs).map(Column::from), + (lhs, rhs) => lhs + rhs, + } } pub fn try_sub_owned(self, other: Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - self.take_materialized_series() - .try_sub_owned(other.take_materialized_series()) - .map(Column::from) + match (self, other) { + (Column::Series(lhs), Column::Series(rhs)) => lhs.try_sub_owned(rhs).map(Column::from), + (lhs, rhs) => lhs - rhs, + } } pub fn try_mul_owned(self, other: Self) -> PolarsResult { - // @partition-opt - // @scalar-opt - self.take_materialized_series() - .try_mul_owned(other.take_materialized_series()) - .map(Column::from) + match (self, other) { + (Column::Series(lhs), Column::Series(rhs)) => lhs.try_mul_owned(rhs).map(Column::from), + (lhs, rhs) => lhs * rhs, + } } pub(crate) fn str_value(&self, index: usize) -> PolarsResult> { @@ -1193,12 +1217,6 @@ impl Column { .quantile_reduce(quantile, method) } - pub fn implode(&self) -> PolarsResult { - // @partition-opt - // @scalar-opt - self.as_materialized_series().implode() - } - pub(crate) fn estimated_size(&self) -> usize { // @scalar-opt self.as_materialized_series().estimated_size() @@ -1221,16 +1239,27 @@ impl Column { } } - pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column { + pub fn map_unary_elementwise_to_bool( + &self, + f: impl Fn(&Series) -> BooleanChunked, + ) -> BooleanChunked { + self.try_map_unary_elementwise_to_bool(|s| Ok(f(s))) + .unwrap() + } + pub fn try_map_unary_elementwise_to_bool( + &self, + f: impl Fn(&Series) -> PolarsResult, + ) -> PolarsResult { match self { - Column::Series(s) => f(s).into(), - Column::Partitioned(s) => s.apply_unary_elementwise(f).into(), - Column::Scalar(s) => { - ScalarColumn::from_single_value_series(f(&s.as_single_value_series()), s.len()) - .into() - }, + Column::Series(s) => f(s), + Column::Partitioned(s) => f(s.as_materialized_series()), + Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())), } } + + pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column { + self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap() + } pub fn try_apply_unary_elementwise( &self, f: impl Fn(&Series) -> PolarsResult, @@ -1285,12 +1314,7 @@ impl Column { let lhs = lhs.as_single_value_series(); let rhs = rhs.as_single_value_series(); - let result = op(&lhs, &rhs)?; - if result.is_empty() { - Ok(result.into_column()) - } else { - Ok(ScalarColumn::from_single_value_series(result, length).into_column()) - } + Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column()) }, // @partition-opt (lhs, rhs) => { @@ -1331,12 +1355,10 @@ impl Column { let lhs = lhs.as_single_value_series(); let rhs = rhs.as_single_value_series(); - let result = f(&lhs, &rhs)?; - if result.is_empty() { - Ok(result.into_column()) - } else { - Ok(ScalarColumn::from_single_value_series(result, self.len()).into_column()) - } + Ok( + ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len()) + .into_column(), + ) }, // @partition-opt (lhs, rhs) => { diff --git a/crates/polars-core/src/frame/column/scalar.rs b/crates/polars-core/src/frame/column/scalar.rs index 18e53c469960..037053d5ce3e 100644 --- a/crates/polars-core/src/frame/column/scalar.rs +++ b/crates/polars-core/src/frame/column/scalar.rs @@ -137,9 +137,10 @@ impl ScalarColumn { /// /// This will panic if the value cannot be made static or if the series has length `0`. pub fn from_single_value_series(series: Series, length: usize) -> Self { - debug_assert_eq!(series.len(), 1); - let value = series.get(0).unwrap(); - let value = value.into_static(); + debug_assert!(series.len() <= 1); + debug_assert!(length > 0 || series.is_empty()); + + let value = series.get(0).map_or(AnyValue::Null, |av| av.into_static()); let value = Scalar::new(series.dtype().clone(), value); ScalarColumn::new(series.name().clone(), value, length) } @@ -270,6 +271,14 @@ impl ScalarColumn { pub fn has_nulls(&self) -> bool { self.length != 0 && self.scalar.is_null() } + + pub fn drop_nulls(&self) -> Self { + if self.scalar.is_null() { + self.resize(0) + } else { + self.clone() + } + } } impl IntoColumn for ScalarColumn { diff --git a/crates/polars-core/src/scalar/from.rs b/crates/polars-core/src/scalar/from.rs index 35345b2a6527..3af8671dadd1 100644 --- a/crates/polars-core/src/scalar/from.rs +++ b/crates/polars-core/src/scalar/from.rs @@ -14,6 +14,7 @@ macro_rules! impl_from { } impl_from! { + (bool, Boolean, Boolean) (i8, Int8, Int8) (i16, Int16, Int16) (i32, Int32, Int32) diff --git a/crates/polars-core/src/testing.rs b/crates/polars-core/src/testing.rs index f227f2bfe861..ed7c3d4fbd3e 100644 --- a/crates/polars-core/src/testing.rs +++ b/crates/polars-core/src/testing.rs @@ -1,5 +1,4 @@ //! Testing utilities. -use std::ops::Deref; use crate::prelude::*; @@ -36,21 +35,6 @@ impl Series { } } } - - /// Get a pointer to the underlying data of this [`Series`]. - /// Can be useful for fast comparisons. - pub fn get_data_ptr(&self) -> usize { - let object = self.0.deref(); - - // SAFETY: - // A fat pointer consists of a data ptr and a ptr to the vtable. - // we specifically check that we only transmute &dyn SeriesTrait e.g. - // a trait object, therefore this is sound. - #[allow(clippy::transmute_undefined_repr)] - let (data_ptr, _vtable_ptr) = - unsafe { std::mem::transmute::<&dyn SeriesTrait, (usize, usize)>(object) }; - data_ptr - } } impl PartialEq for Series { @@ -128,26 +112,6 @@ impl DataFrame { } true } - - /// Checks if the Arc ptrs of the [`Series`] are equal - /// - /// # Example - /// - /// ```rust - /// # use polars_core::prelude::*; - /// let df1: DataFrame = df!("Atomic number" => &[1, 51, 300], - /// "Element" => &[Some("Hydrogen"), Some("Antimony"), None])?; - /// let df2: &DataFrame = &df1; - /// - /// assert!(df1.ptr_equal(df2)); - /// # Ok::<(), PolarsError>(()) - /// ``` - pub fn ptr_equal(&self, other: &DataFrame) -> bool { - self.columns - .iter() - .zip(other.columns.iter()) - .all(|(a, b)| a.get_data_ptr() == b.get_data_ptr()) - } } impl PartialEq for DataFrame {