Skip to content

Commit

Permalink
feat(rust, python): Optional three-valued logic for any/all (#9848)
Browse files Browse the repository at this point in the history
  • Loading branch information
magarick authored Jul 16, 2023
1 parent 4a12df1 commit bb36e4c
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 40 deletions.
17 changes: 17 additions & 0 deletions polars/polars-core/src/chunked_array/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,23 @@ impl BooleanChunked {
pub fn any(&self) -> bool {
self.downcast_iter().any(compute::boolean::any)
}

// Three-valued versions which can return None
pub fn all_3val(&self, drop_nulls: bool) -> Option<bool> {
if drop_nulls || self.null_count() == 0 {
Some(self.all())
} else {
None
}
}
pub fn any_3val(&self, drop_nulls: bool) -> Option<bool> {
let res = self.any();
if drop_nulls || res {
Some(res)
} else {
None
}
}
}

// private
Expand Down
32 changes: 14 additions & 18 deletions polars/polars-lazy/polars-plan/src/dsl/function_expr/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ use crate::wrap;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum BooleanFunction {
All,
Any,
All {
drop_nulls: bool,
},
Any {
drop_nulls: bool,
},
IsNot,
IsNull,
IsNotNull,
Expand Down Expand Up @@ -37,8 +41,8 @@ impl Display for BooleanFunction {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use BooleanFunction::*;
let s = match self {
All => "all",
Any => "any",
All { .. } => "all",
Any { .. } => "any",
IsNot => "is_not",
IsNull => "is_null",
IsNotNull => "is_not_null",
Expand All @@ -63,8 +67,8 @@ impl From<BooleanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: BooleanFunction) -> Self {
use BooleanFunction::*;
match func {
All => map!(all),
Any => map!(any),
All { drop_nulls } => map!(all, drop_nulls),
Any { drop_nulls } => map!(any, drop_nulls),
IsNot => map!(is_not),
IsNull => map!(is_null),
IsNotNull => map!(is_not_null),
Expand All @@ -90,22 +94,14 @@ impl From<BooleanFunction> for FunctionExpr {
}
}

fn all(s: &Series) -> PolarsResult<Series> {
fn all(s: &Series, drop_nulls: bool) -> PolarsResult<Series> {
let boolean = s.bool()?;
if boolean.all() {
Ok(Series::new(s.name(), [true]))
} else {
Ok(Series::new(s.name(), [false]))
}
Ok(Series::new(s.name(), [boolean.all_3val(drop_nulls)]))
}

fn any(s: &Series) -> PolarsResult<Series> {
fn any(s: &Series, drop_nulls: bool) -> PolarsResult<Series> {
let boolean = s.bool()?;
if boolean.any() {
Ok(Series::new(s.name(), [true]))
} else {
Ok(Series::new(s.name(), [false]))
}
Ok(Series::new(s.name(), [boolean.any_3val(drop_nulls)]))
}

fn is_not(s: &Series) -> PolarsResult<Series> {
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-lazy/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1639,8 +1639,8 @@ impl Expr {
}

/// Check if any boolean value is `true`
pub fn any(self) -> Self {
self.apply_private(BooleanFunction::Any.into())
pub fn any(self, drop_nulls: bool) -> Self {
self.apply_private(BooleanFunction::Any { drop_nulls }.into())
.with_function_options(|mut opt| {
opt.auto_explode = true;
opt
Expand All @@ -1655,8 +1655,8 @@ impl Expr {
}

/// Check if all boolean values are `true`
pub fn all(self) -> Self {
self.apply_private(BooleanFunction::All.into())
pub fn all(self, drop_nulls: bool) -> Self {
self.apply_private(BooleanFunction::All { drop_nulls }.into())
.with_function_options(|mut opt| {
opt.auto_explode = true;
opt
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ pub(crate) struct SqlExprVisitor<'a> {
impl SqlExprVisitor<'_> {
fn visit_expr(&self, expr: &SqlExpr) -> PolarsResult<Expr> {
match expr {
SqlExpr::AllOp(_) => Ok(self.visit_expr(expr)?.all()),
SqlExpr::AnyOp(expr) => Ok(self.visit_expr(expr)?.any()),
SqlExpr::AllOp(_) => Ok(self.visit_expr(expr)?.all(true)),
SqlExpr::AnyOp(expr) => Ok(self.visit_expr(expr)?.any(true)),
SqlExpr::ArrayAgg(expr) => self.visit_arr_agg(expr),
SqlExpr::Between {
expr,
Expand Down
4 changes: 2 additions & 2 deletions polars/tests/it/lazy/expressions/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ fn test_window_exprs_any_all() -> PolarsResult<()> {
]?
.lazy()
.select([
col("var2").any().over([col("var1")]).alias("any"),
col("var2").all().over([col("var1")]).alias("all"),
col("var2").any(true).over([col("var1")]).alias("any"),
col("var2").all(true).over([col("var1")]).alias("all"),
])
.collect()?;

Expand Down
57 changes: 53 additions & 4 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,15 @@ def to_physical(self) -> Self:
"""
return self._from_pyexpr(self._pyexpr.to_physical())

def any(self) -> Self:
def any(self, drop_nulls: bool = True) -> Self:
"""
Check if any boolean value in a Boolean column is `True`.
Parameters
----------
drop_nulls
If False, return None if there are nulls but no Trues.
Returns
-------
Boolean literal
Expand All @@ -331,17 +336,42 @@ def any(self) -> Self:
╞══════╪═══════╡
│ true ┆ false │
└──────┴───────┘
>>> df = pl.DataFrame(dict(x=[None, False], y=[None, True]))
>>> df.select(pl.col("x").any(True), pl.col("y").any(True))
shape: (1, 2)
┌───────┬──────┐
│ x ┆ y │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
>>> df.select(pl.col("x").any(False), pl.col("y").any(False))
shape: (1, 2)
┌──────┬──────┐
│ x ┆ y │
│ --- ┆ --- │
│ bool ┆ bool │
╞══════╪══════╡
│ null ┆ true │
└──────┴──────┘
"""
return self._from_pyexpr(self._pyexpr.any())
return self._from_pyexpr(self._pyexpr.any(drop_nulls))

def all(self) -> Self:
def all(self, drop_nulls: bool = True) -> Self:
"""
Check if all boolean values in a Boolean column are `True`.
This method is an expression - not to be confused with
:func:`polars.all` which is a function to select all columns.
Parameters
----------
drop_nulls
If False, return None if there are any nulls.
Returns
-------
Boolean literal
Expand All @@ -360,9 +390,28 @@ def all(self) -> Self:
╞══════╪═══════╪═══════╡
│ true ┆ false ┆ false │
└──────┴───────┴───────┘
>>> df = pl.DataFrame(dict(x=[None, False], y=[None, True]))
>>> df.select(pl.col("x").all(True), pl.col("y").all(True))
shape: (1, 2)
┌───────┬───────┐
│ x ┆ y │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
└───────┴───────┘
>>> df.select(pl.col("x").all(False), pl.col("y").all(False))
shape: (1, 2)
┌──────┬──────┐
│ x ┆ y │
│ --- ┆ --- │
│ bool ┆ bool │
╞══════╪══════╡
│ null ┆ null │
└──────┴──────┘
"""
return self._from_pyexpr(self._pyexpr.all())
return self._from_pyexpr(self._pyexpr.all(drop_nulls))

def arg_true(self) -> Self:
"""
Expand Down
6 changes: 4 additions & 2 deletions py-polars/polars/functions/aggregation/vertical.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def all(
@deprecated_alias(columns="exprs")
def all(
exprs: IntoExpr | Iterable[IntoExpr] | None = None, *more_exprs: IntoExpr
) -> Expr | bool:
) -> Expr | bool | None:
"""
Either return an expression representing all columns, or evaluate a bitwise AND operation.
Expand Down Expand Up @@ -115,7 +115,9 @@ def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:


@deprecated_alias(columns="exprs")
def any(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr | bool:
def any(
exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr
) -> Expr | bool | None:
"""
Evaluate a bitwise OR operation.
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,7 @@ def sqrt(self) -> Series:
"""

def any(self) -> bool:
def any(self, drop_nulls: bool = True) -> bool | None:
"""
Check if any boolean value in the column is `True`.
Expand All @@ -1205,9 +1205,9 @@ def any(self) -> bool:
Boolean literal
"""
return self.to_frame().select(F.col(self.name).any()).to_series()[0]
return self.to_frame().select(F.col(self.name).any(drop_nulls)).to_series()[0]

def all(self) -> bool:
def all(self, drop_nulls: bool = True) -> bool | None:
"""
Check if all boolean values in the column are `True`.
Expand All @@ -1216,7 +1216,7 @@ def all(self) -> bool:
Boolean literal
"""
return self.to_frame().select(F.col(self.name).all()).to_series()[0]
return self.to_frame().select(F.col(self.name).all(drop_nulls)).to_series()[0]

def log(self, base: float = math.e) -> Series:
"""Compute the logarithm to a given base."""
Expand Down
8 changes: 4 additions & 4 deletions py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1112,12 +1112,12 @@ impl PyExpr {
.with_fmt("extend")
.into()
}
fn any(&self) -> Self {
self.inner.clone().any().into()
fn any(&self, drop_nulls: bool) -> Self {
self.inner.clone().any(drop_nulls).into()
}

fn all(&self) -> Self {
self.inner.clone().all().into()
fn all(&self, drop_nulls: bool) -> Self {
self.inner.clone().all(drop_nulls).into()
}

fn log(&self, base: f64) -> Self {
Expand Down

0 comments on commit bb36e4c

Please sign in to comment.