diff --git a/crates/polars-lazy/src/tests/err_msg.rs b/crates/polars-lazy/src/tests/err_msg.rs new file mode 100644 index 000000000000..5f73f1c30c9a --- /dev/null +++ b/crates/polars-lazy/src/tests/err_msg.rs @@ -0,0 +1,83 @@ +use polars_core::error::ErrString; + +use super::*; + +const INITIAL_PROJECTION_STR: &str = r#"DF ["c1"]; PROJECT */1 COLUMNS; SELECTION: "None""#; + +fn make_df() -> LazyFrame { + df! [ "c1" => [0, 1] ].unwrap().lazy() +} + +fn assert_errors_eq(e1: &PolarsError, e2: &PolarsError) { + use PolarsError::*; + match (e1, e2) { + (ColumnNotFound(s1), ColumnNotFound(s2)) => { + assert_eq!(s1.as_ref(), s2.as_ref()); + }, + (ComputeError(s1), ComputeError(s2)) => { + assert_eq!(s1.as_ref(), s2.as_ref()); + }, + _ => panic!("{e1:?} != {e2:?}"), + } +} + +#[test] +fn col_not_found_error_messages() { + fn get_err_msg(err_msg: &str, n: usize) -> String { + let plural_s; + let was_were; + + if n == 1 { + plural_s = ""; + was_were = "was" + } else { + plural_s = "s"; + was_were = "were"; + }; + format!( + "{err_msg}\n\nLogicalPlan had already failed with the above error; \ + after failure, {n} additional operation{plural_s} \ + {was_were} attempted on the LazyFrame" + ) + } + fn test_col_not_found(df: LazyFrame, n: usize) { + let err_msg = format!( + "xyz\n\nError originated just after this \ + operation:\n{INITIAL_PROJECTION_STR}" + ); + + let plan_err_str = + format!("ErrorState {{ n_times: {n}, err: ColumnNotFound(ErrString({err_msg:?})) }}"); + + let collect_err = if n == 0 { + PolarsError::ColumnNotFound(ErrString::from(err_msg.to_owned())) + } else { + PolarsError::ColumnNotFound(ErrString::from(get_err_msg(&err_msg, n))) + }; + + assert_eq!(df.describe_plan(), plan_err_str); + assert_errors_eq(&df.collect().unwrap_err(), &collect_err); + } + + let df = make_df(); + + assert_eq!(df.describe_plan(), INITIAL_PROJECTION_STR); + + test_col_not_found(df.clone().select([col("xyz")]), 0); + test_col_not_found(df.clone().select([col("xyz")]).select([col("c1")]), 1); + test_col_not_found( + df.clone() + .select([col("xyz")]) + .select([col("c1")]) + .select([col("c2")]), + 2, + ); + test_col_not_found( + df.clone() + .select([col("xyz")]) + .select([col("c1")]) + .select([col("c2")]) + .select([col("c3")]), + 3, + ); +} diff --git a/crates/polars-lazy/src/tests/mod.rs b/crates/polars-lazy/src/tests/mod.rs index 9a84f972abae..058a40a9b38e 100644 --- a/crates/polars-lazy/src/tests/mod.rs +++ b/crates/polars-lazy/src/tests/mod.rs @@ -2,6 +2,7 @@ mod aggregations; mod arity; #[cfg(all(feature = "strings", feature = "cse"))] mod cse; +mod err_msg; #[cfg(feature = "parquet")] mod io; mod logical; diff --git a/crates/polars-plan/src/dot.rs b/crates/polars-plan/src/dot.rs index 22e1272c4247..4f80503ef88d 100644 --- a/crates/polars-plan/src/dot.rs +++ b/crates/polars-plan/src/dot.rs @@ -406,7 +406,7 @@ impl LogicalPlan { input.dot(acc_str, (branch, id + 1), current_node, id_map) }, Error { err, .. } => { - let fmt = format!("{:?}", &**err); + let fmt = format!("{:?}", &err.0); let current_node = DotNode { branch, id, diff --git a/crates/polars-plan/src/logical_plan/builder.rs b/crates/polars-plan/src/logical_plan/builder.rs index 34a34ed84898..2a97ead9a5db 100644 --- a/crates/polars-plan/src/logical_plan/builder.rs +++ b/crates/polars-plan/src/logical_plan/builder.rs @@ -59,18 +59,23 @@ fn format_err(msg: &str, input: &LogicalPlan) -> String { format!("{msg}\n\nError originated just after this operation:\n{input:?}") } -/// Returns every error or msg: &str as `ComputeError`. -/// It also shows the logical plan node where the error -/// originated. +/// Returns every error or msg: &str as `ComputeError`. It also shows the logical plan node where the error originated. +/// If `input` is already a `LogicalPlan::Error`, then return it as is; errors already keep track of their previous +/// inputs, so we don't have to do it again here. macro_rules! raise_err { ($err:expr, $input:expr, $convert:ident) => {{ - let format_err_outer = |msg: &str| format_err(msg, &$input); - - let err = $err.wrap_msg(&format_err_outer); - - LogicalPlan::Error { - input: Box::new($input.clone()), - err: err.into(), + let input: LogicalPlan = $input.clone(); + match &input { + LogicalPlan::Error { .. } => input, + _ => { + let format_err_outer = |msg: &str| format_err(msg, &input); + let err = $err.wrap_msg(&format_err_outer); + + LogicalPlan::Error { + input: Box::new(input), + err: err.into(), // PolarsError -> ErrorState + } + }, } .$convert() }}; diff --git a/crates/polars-plan/src/logical_plan/format.rs b/crates/polars-plan/src/logical_plan/format.rs index 9cfdf825a617..e784af228ca4 100644 --- a/crates/polars-plan/src/logical_plan/format.rs +++ b/crates/polars-plan/src/logical_plan/format.rs @@ -216,7 +216,7 @@ impl LogicalPlan { write!(f, "{:indent$}{function_fmt}", "")?; input._format(f, sub_indent) }, - Error { input, err } => write!(f, "{err:?}\n{input:?}"), + Error { err, .. } => write!(f, "{err:?}"), ExtContext { input, .. } => { write!(f, "{:indent$}EXTERNAL_CONTEXT", "")?; input._format(f, sub_indent) diff --git a/crates/polars-plan/src/logical_plan/mod.rs b/crates/polars-plan/src/logical_plan/mod.rs index 08e9816b900a..f60d3b5c5da3 100644 --- a/crates/polars-plan/src/logical_plan/mod.rs +++ b/crates/polars-plan/src/logical_plan/mod.rs @@ -75,72 +75,64 @@ pub enum Context { } #[derive(Debug)] -pub enum ErrorState { - NotYetEncountered { err: PolarsError }, - AlreadyEncountered { prev_err_msg: String }, -} - -impl std::fmt::Display for ErrorState { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ErrorState::NotYetEncountered { err } => write!(f, "NotYetEncountered({err})")?, - ErrorState::AlreadyEncountered { prev_err_msg } => { - write!(f, "AlreadyEncountered({prev_err_msg})")? - }, - }; - - Ok(()) - } +pub(crate) struct ErrorStateUnsync { + n_times: usize, + err: PolarsError, } #[derive(Clone)] -pub struct ErrorStateSync(Arc>); - -impl std::ops::Deref for ErrorStateSync { - type Target = Arc>; +pub struct ErrorState(pub(crate) Arc>); - fn deref(&self) -> &Self::Target { - &self.0 +impl std::fmt::Debug for ErrorState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let this = self.0.lock().unwrap(); + // Skip over the Arc> and just print the fields we care + // about. Technically this is misleading, but the insides of ErrorState are not + // public, so this only affects authors of polars, not users (and the odds that + // this affects authors is slim) + f.debug_struct("ErrorState") + .field("n_times", &this.n_times) + .field("err", &this.err) + .finish() } } -impl std::fmt::Debug for ErrorStateSync { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "ErrorStateSync({})", &*self.0.lock().unwrap()) +impl From for ErrorState { + fn from(err: PolarsError) -> Self { + Self(Arc::new(Mutex::new(ErrorStateUnsync { n_times: 0, err }))) } } -impl ErrorStateSync { +impl ErrorState { fn take(&self) -> PolarsError { - let mut curr_err = self.0.lock().unwrap(); + let mut this = self.0.lock().unwrap(); + + let ret_err = if this.n_times == 0 { + this.err.wrap_msg(&|msg| msg.to_owned()) + } else { + this.err.wrap_msg(&|msg| { + let n_times = this.n_times; - match &*curr_err { - ErrorState::NotYetEncountered { err: polars_err } => { - // Need to finish using `polars_err` here so that NLL considers `err` dropped - let prev_err_msg = polars_err.to_string(); - // Place AlreadyEncountered in `self` for future users of `self` - let prev_err = std::mem::replace( - &mut *curr_err, - ErrorState::AlreadyEncountered { prev_err_msg }, - ); - // Since we're in this branch, we know err was a NotYetEncountered - match prev_err { - ErrorState::NotYetEncountered { err } => err, - ErrorState::AlreadyEncountered { .. } => unreachable!(), - } - }, - ErrorState::AlreadyEncountered { prev_err_msg } => { - polars_err!( - ComputeError: "LogicalPlan already failed with error: '{}'", prev_err_msg, + let plural_s; + let was_were; + + if n_times == 1 { + plural_s = ""; + was_were = "was" + } else { + plural_s = "s"; + was_were = "were"; + }; + format!( + "{msg}\n\nLogicalPlan had already failed with the above error; \ + after failure, {n_times} additional operation{plural_s} \ + {was_were} attempted on the LazyFrame", ) - }, - } - } -} + }) + }; + this.n_times += 1; -impl From for ErrorStateSync { - fn from(err: PolarsError) -> Self { - Self(Arc::new(Mutex::new(ErrorState::NotYetEncountered { err }))) + ret_err } } @@ -248,7 +240,7 @@ pub enum LogicalPlan { #[cfg_attr(feature = "serde", serde(skip))] Error { input: Box, - err: ErrorStateSync, + err: ErrorState, }, /// This allows expressions to access other tables ExtContext { diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index f6b5b2df8110..9a54e07bfbd0 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -689,3 +689,13 @@ def test_error_list_to_array() -> None: pl.DataFrame( data={"a": [[1, 2], [3, 4, 5]]}, schema={"a": pl.List(pl.Int8)} ).with_columns(array=pl.col("a").list.to_array(2)) + + +# https://github.com/pola-rs/polars/issues/8079 +def test_error_lazyframe_not_repeating() -> None: + lf = pl.LazyFrame({"a": 1, "b": range(2)}) + with pytest.raises(pl.ColumnNotFoundError) as exc_info: + lf.select("c").select("d").select("e").collect() + + match = "Error originated just after this operation:" + assert str(exc_info).count(match) == 1