diff --git a/crates/polars-ops/src/series/ops/rle.rs b/crates/polars-ops/src/series/ops/rle.rs index 15605a901772..31417de8fd0d 100644 --- a/crates/polars-ops/src/series/ops/rle.rs +++ b/crates/polars-ops/src/series/ops/rle.rs @@ -1,5 +1,6 @@ use polars_core::prelude::*; +/// Get the lengths of runs of identical values. pub fn rle(s: &Series) -> PolarsResult { let (s1, s2) = (s.slice(0, s.len() - 1), s.slice(1, s.len())); let s_neq = s1.not_equal_missing(&s2)?; @@ -22,6 +23,7 @@ pub fn rle(s: &Series) -> PolarsResult { Ok(StructChunked::new("rle", &outvals)?.into_series()) } +/// Similar to `rle`, but maps values to run IDs. pub fn rle_id(s: &Series) -> PolarsResult { if s.len() == 0 { return Ok(Series::new_empty("id", &DataType::UInt32)); diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 866a7b4dbb7b..b4bd69f24735 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -105,8 +105,8 @@ impl Expr { } } - /// Overwrite the function name used for formatting - /// this is not intended to be used + /// Overwrite the function name used for formatting. + /// (this is not intended to be used). #[doc(hidden)] pub fn with_fmt(self, name: &'static str) -> Expr { self.with_function_options(|mut options| { @@ -115,47 +115,47 @@ impl Expr { }) } - /// Compare `Expr` with other `Expr` on equality + /// Compare `Expr` with other `Expr` on equality. pub fn eq>(self, other: E) -> Expr { binary_expr(self, Operator::Eq, other.into()) } - /// Compare `Expr` with other `Expr` on equality where `None == None` + /// Compare `Expr` with other `Expr` on equality where `None == None`. pub fn eq_missing>(self, other: E) -> Expr { binary_expr(self, Operator::EqValidity, other.into()) } - /// Compare `Expr` with other `Expr` on non-equality + /// Compare `Expr` with other `Expr` on non-equality. pub fn neq>(self, other: E) -> Expr { binary_expr(self, Operator::NotEq, other.into()) } - /// Compare `Expr` with other `Expr` on non-equality where `None == None` + /// Compare `Expr` with other `Expr` on non-equality where `None == None`. pub fn neq_missing>(self, other: E) -> Expr { binary_expr(self, Operator::NotEqValidity, other.into()) } - /// Check if `Expr` < `Expr` + /// Check if `Expr` < `Expr`. pub fn lt>(self, other: E) -> Expr { binary_expr(self, Operator::Lt, other.into()) } - /// Check if `Expr` > `Expr` + /// Check if `Expr` > `Expr`. pub fn gt>(self, other: E) -> Expr { binary_expr(self, Operator::Gt, other.into()) } - /// Check if `Expr` >= `Expr` + /// Check if `Expr` >= `Expr`. pub fn gt_eq>(self, other: E) -> Expr { binary_expr(self, Operator::GtEq, other.into()) } - /// Check if `Expr` <= `Expr` + /// Check if `Expr` <= `Expr`. pub fn lt_eq>(self, other: E) -> Expr { binary_expr(self, Operator::LtEq, other.into()) } - /// Negate `Expr` + /// Negate `Expr`. #[allow(clippy::should_implement_trait)] pub fn not(self) -> Expr { self.map_private(BooleanFunction::Not.into()) @@ -178,12 +178,12 @@ impl Expr { self.map_private(BooleanFunction::IsNotNull.into()) } - /// Drop null values + /// Drop null values. pub fn drop_nulls(self) -> Self { self.apply(|s| Ok(Some(s.drop_nulls())), GetOutput::same_type()) } - /// Drop NaN values + /// Drop NaN values. pub fn drop_nans(self) -> Self { self.apply_private(FunctionExpr::DropNans) } @@ -254,7 +254,7 @@ impl Expr { AggExpr::Last(Box::new(self)).into() } - /// Aggregate the group to a Series + /// Aggregate the group to a Series. pub fn implode(self) -> Self { AggExpr::Implode(Box::new(self)).into() } @@ -274,12 +274,12 @@ impl Expr { AggExpr::AggGroups(Box::new(self)).into() } - /// Alias for explode + /// Alias for `explode`. pub fn flatten(self) -> Self { self.explode() } - /// Explode the utf8/ list column + /// Explode the utf8/ list column. pub fn explode(self) -> Self { Expr::Explode(Box::new(self)) } @@ -318,12 +318,12 @@ impl Expr { ) } - /// Get the first `n` elements of the Expr result + /// Get the first `n` elements of the Expr result. pub fn head(self, length: Option) -> Self { self.slice(lit(0), lit(length.unwrap_or(10) as u64)) } - /// Get the last `n` elements of the Expr result + /// Get the last `n` elements of the Expr result. pub fn tail(self, length: Option) -> Self { let len = length.unwrap_or(10); self.slice(lit(-(len as i64)), lit(len as u64)) @@ -349,7 +349,7 @@ impl Expr { .with_fmt("arg_unique") } - /// Get the index value that has the minimum value + /// Get the index value that has the minimum value. pub fn arg_min(self) -> Self { let options = FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, @@ -370,7 +370,7 @@ impl Expr { ) } - /// Get the index value that has the maximum value + /// Get the index value that has the maximum value. pub fn arg_max(self) -> Self { let options = FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, @@ -424,7 +424,7 @@ impl Expr { } /// Cast expression to another data type. - /// Throws an error if conversion had overflows + /// Throws an error if conversion had overflows. pub fn strict_cast(self, data_type: DataType) -> Self { Expr::Cast { expr: Box::new(self), @@ -534,7 +534,7 @@ impl Expr { } } - /// Apply a function/closure once the logical plan get executed with many arguments + /// Apply a function/closure once the logical plan get executed with many arguments. /// /// See the [`Expr::map`] function for the differences between [`map`](Expr::map) and [`apply`](Expr::apply). pub fn map_many(self, function: F, arguments: &[Expr], output_type: GetOutput) -> Self @@ -707,24 +707,24 @@ impl Expr { } } - /// Get mask of finite values if dtype is Float + /// Get mask of finite values if dtype is Float. #[allow(clippy::wrong_self_convention)] pub fn is_finite(self) -> Self { self.map_private(BooleanFunction::IsFinite.into()) } - /// Get mask of infinite values if dtype is Float + /// Get mask of infinite values if dtype is Float. #[allow(clippy::wrong_self_convention)] pub fn is_infinite(self) -> Self { self.map_private(BooleanFunction::IsInfinite.into()) } - /// Get mask of NaN values if dtype is Float + /// Get mask of NaN values if dtype is Float. pub fn is_nan(self) -> Self { self.map_private(BooleanFunction::IsNan.into()) } - /// Get inverse mask of NaN values if dtype is Float + /// Get inverse mask of NaN values if dtype is Float. pub fn is_not_nan(self) -> Self { self.map_private(BooleanFunction::IsNotNan.into()) } @@ -749,27 +749,27 @@ impl Expr { self.apply_private(FunctionExpr::Cumcount { reverse }) } - /// Get an array with the cumulative sum computed at every element + /// Get an array with the cumulative sum computed at every element. pub fn cumsum(self, reverse: bool) -> Self { self.apply_private(FunctionExpr::Cumsum { reverse }) } - /// Get an array with the cumulative product computed at every element + /// Get an array with the cumulative product computed at every element. pub fn cumprod(self, reverse: bool) -> Self { self.apply_private(FunctionExpr::Cumprod { reverse }) } - /// Get an array with the cumulative min computed at every element + /// Get an array with the cumulative min computed at every element. pub fn cummin(self, reverse: bool) -> Self { self.apply_private(FunctionExpr::Cummin { reverse }) } - /// Get an array with the cumulative max computed at every element + /// Get an array with the cumulative max computed at every element. pub fn cummax(self, reverse: bool) -> Self { self.apply_private(FunctionExpr::Cummax { reverse }) } - /// Get the product aggregation of an expression + /// Get the product aggregation of an expression. pub fn product(self) -> Self { let options = FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, @@ -979,24 +979,24 @@ impl Expr { AggExpr::Count(Box::new(self)).into() } - /// Standard deviation of the values of the Series + /// Standard deviation of the values of the Series. pub fn std(self, ddof: u8) -> Self { AggExpr::Std(Box::new(self), ddof).into() } - /// Variance of the values of the Series + /// Variance of the values of the Series. pub fn var(self, ddof: u8) -> Self { AggExpr::Var(Box::new(self), ddof).into() } - /// Get a mask of duplicated values + /// Get a mask of duplicated values. #[allow(clippy::wrong_self_convention)] #[cfg(feature = "is_unique")] pub fn is_duplicated(self) -> Self { self.apply_private(BooleanFunction::IsDuplicated.into()) } - /// Get a mask of unique values + /// Get a mask of unique values. #[allow(clippy::wrong_self_convention)] #[cfg(feature = "is_unique")] pub fn is_unique(self) -> Self { @@ -1013,17 +1013,17 @@ impl Expr { }) } - /// and operation + /// "and" operation. pub fn and>(self, expr: E) -> Self { binary_expr(self, Operator::And, expr.into()) } - // xor operation + /// "xor" operation. pub fn xor>(self, expr: E) -> Self { binary_expr(self, Operator::Xor, expr.into()) } - /// or operation + /// "or" operation. pub fn or>(self, expr: E) -> Self { binary_expr(self, Operator::Or, expr.into()) } @@ -1093,7 +1093,7 @@ impl Expr { #[cfg(feature = "repeat_by")] /// Repeat the column `n` times, where `n` is determined by the values in `by`. - /// This yields an `Expr` of dtype `List` + /// This yields an `Expr` of dtype `List`. pub fn repeat_by>(self, by: E) -> Expr { self.repeat_by_impl(by.into()) } @@ -1116,6 +1116,7 @@ impl Expr { (self * other).sum() } + /// Compute the dot/inner product between two expressions. pub fn dot>(self, other: E) -> Expr { self.dot_impl(other.into()) } @@ -1206,8 +1207,8 @@ impl Expr { Expr::Exclude(Box::new(self), v) } - // Interpolate None values #[cfg(feature = "interpolate")] + /// Fill null values using interpolation. pub fn interpolate(self, method: InterpolationMethod) -> Expr { self.apply_private(FunctionExpr::Interpolate(method)) } @@ -1366,7 +1367,7 @@ impl Expr { ) } - /// Apply a rolling variance + /// Apply a rolling variance. #[cfg(feature = "rolling_window")] pub fn rolling_var(self, options: RollingOptions) -> Expr { self.finish_rolling( @@ -1378,7 +1379,7 @@ impl Expr { ) } - /// Apply a rolling std-dev + /// Apply a rolling std-dev. #[cfg(feature = "rolling_window")] pub fn rolling_std(self, options: RollingOptions) -> Expr { self.finish_rolling( @@ -1390,7 +1391,7 @@ impl Expr { ) } - /// Apply a rolling skew + /// Apply a rolling skew. #[cfg(feature = "rolling_window")] #[cfg(feature = "moment")] pub fn rolling_skew(self, window_size: usize, bias: bool) -> Expr { @@ -1452,6 +1453,7 @@ impl Expr { } #[cfg(feature = "rank")] + /// Assign ranks to data, dealing with ties appropriately. pub fn rank(self, options: RankOptions, seed: Option) -> Expr { self.apply( move |s| Ok(Some(s.rank(options, seed))), @@ -1464,6 +1466,7 @@ impl Expr { } #[cfg(feature = "cutqcut")] + /// Bin continuous values into discrete categories. pub fn cut( self, breaks: Vec, @@ -1480,6 +1483,7 @@ impl Expr { } #[cfg(feature = "cutqcut")] + /// Bin continuous values into discrete categories based on their quantiles. pub fn qcut( self, probs: Vec, @@ -1498,6 +1502,7 @@ impl Expr { } #[cfg(feature = "cutqcut")] + /// Bin continuous values into discrete categories using uniform quantile probabilities. pub fn qcut_uniform( self, n_bins: usize, @@ -1517,20 +1522,25 @@ impl Expr { } #[cfg(feature = "rle")] + /// Get the lengths of runs of identical values. pub fn rle(self) -> Expr { self.apply_private(FunctionExpr::RLE) } + #[cfg(feature = "rle")] + /// Similar to `rle`, but maps values to run IDs. pub fn rle_id(self) -> Expr { self.apply_private(FunctionExpr::RLEID) } #[cfg(feature = "diff")] + /// Calculate the n-th discrete difference between values. pub fn diff(self, n: i64, null_behavior: NullBehavior) -> Expr { self.apply_private(FunctionExpr::Diff(n, null_behavior)) } #[cfg(feature = "pct_change")] + /// Computes percentage change between values. pub fn pct_change(self, n: i64) -> Expr { use DataType::*; self.apply( @@ -1570,6 +1580,13 @@ impl Expr { } #[cfg(feature = "moment")] + /// Compute the kurtosis (Fisher or Pearson). + /// + /// Kurtosis is the fourth central moment divided by the square of the + /// variance. If Fisher's definition is used, then 3.0 is subtracted from + /// the result to give 0.0 for a normal distribution. + /// If bias is False then the kurtosis is calculated using k statistics to + /// eliminate bias coming from biased moment estimators. pub fn kurtosis(self, fisher: bool, bias: bool) -> Expr { self.apply( move |s| { @@ -1623,6 +1640,7 @@ impl Expr { } #[cfg(feature = "ewma")] + /// Calculate the exponentially-weighted moving average. pub fn ewm_mean(self, options: EWMOptions) -> Self { use DataType::*; self.apply( @@ -1636,6 +1654,7 @@ impl Expr { } #[cfg(feature = "ewma")] + /// Calculate the exponentially-weighted moving standard deviation. pub fn ewm_std(self, options: EWMOptions) -> Self { use DataType::*; self.apply( @@ -1649,6 +1668,7 @@ impl Expr { } #[cfg(feature = "ewma")] + /// Calculate the exponentially-weighted moving variance. pub fn ewm_var(self, options: EWMOptions) -> Self { use DataType::*; self.apply( @@ -1699,8 +1719,8 @@ impl Expr { } #[cfg(feature = "dtype-struct")] - /// Count all unique values and create a struct mapping value to count - /// Note that it is better to turn parallel off in the aggregation context + /// Count all unique values and create a struct mapping value to count. + /// (Note that it is better to turn parallel off in the aggregation context). pub fn value_counts(self, sort: bool, parallel: bool) -> Self { self.apply( move |s| { @@ -1724,7 +1744,7 @@ impl Expr { #[cfg(feature = "unique_counts")] /// Returns a count of the unique values in the order of appearance. /// This method differs from [`Expr::value_counts]` in that it does not return the - /// values, only the counts and might be faster + /// values, only the counts and might be faster. pub fn unique_counts(self) -> Self { self.apply( |s| Ok(Some(s.unique_counts().into_series())), @@ -1734,19 +1754,19 @@ impl Expr { } #[cfg(feature = "log")] - /// Compute the logarithm to a given base + /// Compute the logarithm to a given base. pub fn log(self, base: f64) -> Self { self.map_private(FunctionExpr::Log { base }) } #[cfg(feature = "log")] - /// Compute the natural logarithm of all elements plus one in the input array + /// Compute the natural logarithm of all elements plus one in the input array. pub fn log1p(self) -> Self { self.map_private(FunctionExpr::Log1p) } #[cfg(feature = "log")] - /// Calculate the exponential of all elements in the input array + /// Calculate the exponential of all elements in the input array. pub fn exp(self) -> Self { self.map_private(FunctionExpr::Exp) } @@ -1761,7 +1781,7 @@ impl Expr { options }) } - /// Get the null count of the column/group + /// Get the null count of the column/group. pub fn null_count(self) -> Expr { self.apply_private(FunctionExpr::NullCount) .with_function_options(|mut options| { @@ -1780,7 +1800,7 @@ impl Expr { } #[cfg(feature = "row_hash")] - /// Compute the hash of every element + /// Compute the hash of every element. pub fn hash(self, k0: u64, k1: u64, k2: u64, k3: u64) -> Expr { self.map_private(FunctionExpr::Hash(k0, k1, k2, k3)) } @@ -1807,19 +1827,19 @@ impl Expr { list::ListNameSpace(self) } - /// Get the [`array::ArrayNameSpace`] + /// Get the [`array::ArrayNameSpace`]. #[cfg(feature = "dtype-array")] pub fn arr(self) -> array::ArrayNameSpace { array::ArrayNameSpace(self) } - /// Get the [`CategoricalNameSpace`] + /// Get the [`CategoricalNameSpace`]. #[cfg(feature = "dtype-categorical")] pub fn cat(self) -> cat::CategoricalNameSpace { cat::CategoricalNameSpace(self) } - /// Get the [`struct_::StructNameSpace`] + /// Get the [`struct_::StructNameSpace`]. #[cfg(feature = "dtype-struct")] pub fn struct_(self) -> struct_::StructNameSpace { struct_::StructNameSpace(self) @@ -1923,17 +1943,17 @@ where } } -/// Count expression +/// Count expression. pub fn count() -> Expr { Expr::Count } -/// First column in DataFrame +/// First column in DataFrame. pub fn first() -> Expr { Expr::Nth(0) } -/// Last column in DataFrame +/// Last column in DataFrame. pub fn last() -> Expr { Expr::Nth(-1) } diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index f42cf5fff2ef..ec5dce459308 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -7396,7 +7396,7 @@ def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> Self: variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is False then the kurtosis is calculated using k statistics to - eliminate bias coming from biased moment estimators + eliminate bias coming from biased moment estimators. See scipy.stats for more information