Skip to content

Commit

Permalink
fix(rust): Raise if *_horizontal without inputs (#12106)
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Nov 1, 2023
1 parent 1b9c169 commit def47f8
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 46 deletions.
16 changes: 3 additions & 13 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ pub use parquet::*;
use polars_core::frame::explode::MeltArgs;
use polars_core::prelude::*;
use polars_io::RowCount;
use polars_plan::dsl::all_horizontal;
pub use polars_plan::frame::{AllowedOptimizations, OptState};
use polars_plan::global::FETCH_ROWS;
#[cfg(any(feature = "ipc", feature = "parquet", feature = "csv"))]
Expand Down Expand Up @@ -1411,18 +1410,9 @@ impl LazyFrame {
/// `subset` is an optional `Vec` of column names to consider for nulls; if None, all
/// columns are considered.
pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame {
match subset {
None => self.filter(all_horizontal([col("*").is_not_null()])),
Some(subset) => {
let predicate = all_horizontal(
subset
.into_iter()
.map(|e| e.is_not_null())
.collect::<Vec<_>>(),
);
self.filter(predicate)
},
}
let opt_state = self.get_opt_state();
let lp = self.get_plan_builder().drop_nulls(subset).build();
Self::from_logical_plan(lp, opt_state)
}

/// Slice the DataFrame using an offset (starting row) and a length.
Expand Down
43 changes: 22 additions & 21 deletions crates/polars-plan/src/dsl/functions/horizontal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,11 @@ where
/// Create a new column with the the bitwise-and of the elements in each row.
///
/// The name of the resulting column will be "all"; use [`alias`](Expr::alias) to choose a different name.
pub fn all_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
pub fn all_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
let exprs = exprs.as_ref().to_vec();
Expr::Function {
polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");

Ok(Expr::Function {
input: exprs,
function: FunctionExpr::Boolean(BooleanFunction::AllHorizontal),
options: FunctionOptions {
Expand All @@ -206,15 +208,17 @@ pub fn all_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
allow_rename: true,
..Default::default()
},
}
})
}

/// Create a new column with the the bitwise-or of the elements in each row.
///
/// The name of the resulting column will be "any"; use [`alias`](Expr::alias) to choose a different name.
pub fn any_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
pub fn any_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
let exprs = exprs.as_ref().to_vec();
Expr::Function {
polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");

Ok(Expr::Function {
input: exprs,
function: FunctionExpr::Boolean(BooleanFunction::AnyHorizontal),
options: FunctionOptions {
Expand All @@ -225,19 +229,17 @@ pub fn any_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
allow_rename: true,
..Default::default()
},
}
})
}

/// Create a new column with the the maximum value per row.
///
/// The name of the resulting column will be `"max"`; use [`alias`](Expr::alias) to choose a different name.
pub fn max_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
pub fn max_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
let exprs = exprs.as_ref().to_vec();
if exprs.is_empty() {
return Expr::Columns(Vec::new());
}
polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");

Expr::Function {
Ok(Expr::Function {
input: exprs,
function: FunctionExpr::MaxHorizontal,
options: FunctionOptions {
Expand All @@ -247,19 +249,17 @@ pub fn max_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
allow_rename: true,
..Default::default()
},
}
})
}

/// Create a new column with the the minimum value per row.
///
/// The name of the resulting column will be `"min"`; use [`alias`](Expr::alias) to choose a different name.
pub fn min_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
pub fn min_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
let exprs = exprs.as_ref().to_vec();
if exprs.is_empty() {
return Expr::Columns(Vec::new());
}
polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");

Expr::Function {
Ok(Expr::Function {
input: exprs,
function: FunctionExpr::MinHorizontal,
options: FunctionOptions {
Expand All @@ -269,16 +269,17 @@ pub fn min_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
allow_rename: true,
..Default::default()
},
}
})
}

/// Create a new column with the the sum of the values in each row.
///
/// The name of the resulting column will be `"sum"`; use [`alias`](Expr::alias) to choose a different name.
pub fn sum_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
pub fn sum_horizontal<E: AsRef<[Expr]>>(exprs: E) -> PolarsResult<Expr> {
let exprs = exprs.as_ref().to_vec();
polars_ensure!(!exprs.is_empty(), ComputeError: "cannot return empty fold because the number of output rows is unknown");

Expr::Function {
Ok(Expr::Function {
input: exprs,
function: FunctionExpr::SumHorizontal,
options: FunctionOptions {
Expand All @@ -289,7 +290,7 @@ pub fn sum_horizontal<E: AsRef<[Expr]>>(exprs: E) -> Expr {
allow_rename: true,
..Default::default()
},
}
})
}

/// Folds the expressions from left to right keeping the first non-null values.
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod arity;
mod coerce;
mod concat;
mod correlation;
mod horizontal;
pub(crate) mod horizontal;
mod index;
#[cfg(feature = "range")]
mod range;
Expand Down
24 changes: 24 additions & 0 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use polars_io::{
};

use super::builder_functions::*;
use crate::dsl::functions::horizontal::all_horizontal;
use crate::logical_plan::functions::FunctionNode;
use crate::logical_plan::projection::{is_regex_projection, rewrite_projections};
use crate::logical_plan::schema::{det_join_schema, FileInfo};
Expand Down Expand Up @@ -478,6 +479,29 @@ impl LogicalPlanBuilder {
self.project(exprs, Default::default())
}

pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> Self {
match subset {
None => {
let predicate =
try_delayed!(all_horizontal([col("*").is_not_null()]), &self.0, into);
self.filter(predicate)
},
Some(subset) => {
let predicate = try_delayed!(
all_horizontal(
subset
.into_iter()
.map(|e| e.is_not_null())
.collect::<Vec<_>>(),
),
&self.0,
into
);
self.filter(predicate)
},
}
}

pub fn fill_nan(self, fill_value: Expr) -> Self {
let schema = try_delayed!(self.0.schema(), &self.0, into);

Expand Down
2 changes: 1 addition & 1 deletion crates/polars/tests/it/lazy/folds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn test_fold_wildcard() -> PolarsResult<()> {
// test if we don't panic due to wildcard
let _out = df1
.lazy()
.select([polars_lazy::dsl::all_horizontal([col("*").is_not_null()])])
.select([polars_lazy::dsl::all_horizontal([col("*").is_not_null()])?])
.collect()?;
Ok(())
}
26 changes: 16 additions & 10 deletions py-polars/src/functions/aggregation.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,41 @@
use polars::lazy::dsl;
use pyo3::prelude::*;

use crate::error::PyPolarsErr;
use crate::expr::ToExprs;
use crate::PyExpr;

#[pyfunction]
pub fn all_horizontal(exprs: Vec<PyExpr>) -> PyExpr {
pub fn all_horizontal(exprs: Vec<PyExpr>) -> PyResult<PyExpr> {
let exprs = exprs.to_exprs();
dsl::all_horizontal(exprs).into()
let e = dsl::all_horizontal(exprs).map_err(PyPolarsErr::from)?;
Ok(e.into())
}

#[pyfunction]
pub fn any_horizontal(exprs: Vec<PyExpr>) -> PyExpr {
pub fn any_horizontal(exprs: Vec<PyExpr>) -> PyResult<PyExpr> {
let exprs = exprs.to_exprs();
dsl::any_horizontal(exprs).into()
let e = dsl::any_horizontal(exprs).map_err(PyPolarsErr::from)?;
Ok(e.into())
}

#[pyfunction]
pub fn max_horizontal(exprs: Vec<PyExpr>) -> PyExpr {
pub fn max_horizontal(exprs: Vec<PyExpr>) -> PyResult<PyExpr> {
let exprs = exprs.to_exprs();
dsl::max_horizontal(exprs).into()
let e = dsl::max_horizontal(exprs).map_err(PyPolarsErr::from)?;
Ok(e.into())
}

#[pyfunction]
pub fn min_horizontal(exprs: Vec<PyExpr>) -> PyExpr {
pub fn min_horizontal(exprs: Vec<PyExpr>) -> PyResult<PyExpr> {
let exprs = exprs.to_exprs();
dsl::min_horizontal(exprs).into()
let e = dsl::min_horizontal(exprs).map_err(PyPolarsErr::from)?;
Ok(e.into())
}

#[pyfunction]
pub fn sum_horizontal(exprs: Vec<PyExpr>) -> PyExpr {
pub fn sum_horizontal(exprs: Vec<PyExpr>) -> PyResult<PyExpr> {
let exprs = exprs.to_exprs();
dsl::sum_horizontal(exprs).into()
let e = dsl::sum_horizontal(exprs).map_err(PyPolarsErr::from)?;
Ok(e.into())
}
14 changes: 14 additions & 0 deletions py-polars/tests/unit/functions/aggregation/test_horizontal.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,20 @@ def test_nested_min_max() -> None:
assert_frame_equal(result, expected)


def test_empty_inputs_raise() -> None:
with pytest.raises(
pl.ComputeError,
match="cannot return empty fold because the number of output rows is unknown",
):
pl.select(pl.any_horizontal())

with pytest.raises(
pl.ComputeError,
match="cannot return empty fold because the number of output rows is unknown",
):
pl.select(pl.all_horizontal())


def test_max_min_wildcard_columns(fruits_cars: pl.DataFrame) -> None:
result = fruits_cars.select(pl.col(pl.datatypes.Int64)).select(
pl.min_horizontal("*")
Expand Down

0 comments on commit def47f8

Please sign in to comment.