Skip to content

Commit

Permalink
feat(rust, python): {any/all}_horizontal to expression architecture (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Aug 10, 2023
1 parent 547feef commit 577fa72
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 24 deletions.
56 changes: 51 additions & 5 deletions crates/polars-plan/src/dsl/function_expr/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::ops::Not;
use std::ops::{BitAnd, BitOr, Not};

use polars_core::POOL;
use rayon::prelude::*;

use super::*;
use crate::map;
#[cfg(feature = "is_in")]
use crate::wrap;
use crate::{map, wrap};

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
Expand All @@ -29,11 +30,18 @@ pub enum BooleanFunction {
IsDuplicated,
#[cfg(feature = "is_in")]
IsIn,
AllHorizontal,
AnyHorizontal,
}

impl BooleanFunction {
pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult<Field> {
mapper.with_dtype(DataType::Boolean)
use BooleanFunction::*;
match self {
AllHorizontal => Ok(Field::new("all", DataType::Boolean)),
AnyHorizontal => Ok(Field::new("any", DataType::Boolean)),
_ => mapper.with_dtype(DataType::Boolean),
}
}
}

Expand All @@ -58,6 +66,8 @@ impl Display for BooleanFunction {
IsDuplicated => "is_duplicated",
#[cfg(feature = "is_in")]
IsIn => "is_in",
AnyHorizontal => "any_horizontal",
AllHorizontal => "any_horizontal",
};
write!(f, "{s}")
}
Expand All @@ -84,6 +94,8 @@ impl From<BooleanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
IsDuplicated => map!(is_duplicated),
#[cfg(feature = "is_in")]
IsIn => wrap!(is_in),
AllHorizontal => wrap!(all_horizontal),
AnyHorizontal => wrap!(any_horizontal),
}
}
}
Expand Down Expand Up @@ -153,3 +165,37 @@ fn is_in(s: &mut [Series]) -> PolarsResult<Option<Series>> {
let other = &s[1];
left.is_in(other).map(|ca| Some(ca.into_series()))
}

fn any_horizontal(s: &mut [Series]) -> PolarsResult<Option<Series>> {
let mut out = POOL.install(|| {
s.par_iter()
.try_fold(
|| BooleanChunked::new("", &[false]),
|acc, b| {
let b = b.cast(&DataType::Boolean)?;
let b = b.bool()?;
PolarsResult::Ok((&acc).bitor(b))
},
)
.try_reduce(|| BooleanChunked::new("", [false]), |a, b| Ok(a.bitor(b)))
})?;
out.rename("any");
Ok(Some(out.into_series()))
}

fn all_horizontal(s: &mut [Series]) -> PolarsResult<Option<Series>> {
let mut out = POOL.install(|| {
s.par_iter()
.try_fold(
|| BooleanChunked::new("", &[true]),
|acc, b| {
let b = b.cast(&DataType::Boolean)?;
let b = b.bool()?;
PolarsResult::Ok((&acc).bitand(b))
},
)
.try_reduce(|| BooleanChunked::new("", [true]), |a, b| Ok(a.bitand(b)))
})?;
out.rename("all");
Ok(Some(out.into_series()))
}
40 changes: 24 additions & 16 deletions crates/polars-plan/src/dsl/functions/horizontal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,29 +195,37 @@ where
/// The name of the resulting column will be "all"; use [`alias`](Expr::alias) to choose a different name.
pub fn all_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
let exprs = exprs.as_ref().to_vec();
let func = |s1: Series, s2: Series| {
Ok(Some(
s1.bool()?
.bitand(s2.cast(&DataType::Boolean)?.bool()?)
.into_series(),
))
};
fold_exprs(lit(true), func, exprs).alias("all")
Expr::Function {
input: exprs,
function: FunctionExpr::Boolean(BooleanFunction::AllHorizontal),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyFlat,
input_wildcard_expansion: true,
auto_explode: true,
cast_to_supertypes: false,
allow_rename: true,
..Default::default()
},
}
}

/// Create a new column with the the bitwise-or of the elements in each row.
///
/// The name of the resulting column will be "any"; use [`alias`](Expr::alias) to choose a different name.
pub fn any_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
let exprs = exprs.as_ref().to_vec();
let func = |s1: Series, s2: Series| {
Ok(Some(
s1.bool()?
.bitor(s2.cast(&DataType::Boolean)?.bool()?)
.into_series(),
))
};
fold_exprs(lit(false), func, exprs).alias("any")
Expr::Function {
input: exprs,
function: FunctionExpr::Boolean(BooleanFunction::AnyHorizontal),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyFlat,
input_wildcard_expansion: true,
auto_explode: true,
cast_to_supertypes: false,
allow_rename: true,
..Default::default()
},
}
}

/// Create a new column with the the maximum value per row.
Expand Down
2 changes: 0 additions & 2 deletions crates/polars-plan/src/dsl/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ mod selectors;
mod syntactic_sugar;
mod temporal;

use std::ops::{BitAnd, BitOr};

pub use arity::*;
pub use coerce::*;
pub use concat::*;
Expand Down
4 changes: 3 additions & 1 deletion crates/polars-plan/src/logical_plan/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ pub struct FunctionOptions {
pub auto_explode: bool,
// if the expression and its inputs should be cast to supertypes
pub cast_to_supertypes: bool,
// apply physical expression may rename the output of this function
// The physical expression may rename the output of this function.
// If set to `false` the physical engine will ensure the left input
// expression is the output name.
pub allow_rename: bool,
// if set, then the `Series` passed to the function in the groupby operation
// will ensure the name is set. This is an extra heap allocation per group.
Expand Down

0 comments on commit 577fa72

Please sign in to comment.