Skip to content

Commit

Permalink
fix: Remove duplicated content in error messages (#8107)
Browse files Browse the repository at this point in the history
Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
rben01 and stinodego authored Feb 7, 2024
1 parent 279540f commit 9dfc9fd
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 65 deletions.
83 changes: 83 additions & 0 deletions crates/polars-lazy/src/tests/err_msg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use polars_core::error::ErrString;

use super::*;

const INITIAL_PROJECTION_STR: &str = r#"DF ["c1"]; PROJECT */1 COLUMNS; SELECTION: "None""#;

fn make_df() -> LazyFrame {
df! [ "c1" => [0, 1] ].unwrap().lazy()
}

fn assert_errors_eq(e1: &PolarsError, e2: &PolarsError) {
use PolarsError::*;
match (e1, e2) {
(ColumnNotFound(s1), ColumnNotFound(s2)) => {
assert_eq!(s1.as_ref(), s2.as_ref());
},
(ComputeError(s1), ComputeError(s2)) => {
assert_eq!(s1.as_ref(), s2.as_ref());
},
_ => panic!("{e1:?} != {e2:?}"),
}
}

#[test]
fn col_not_found_error_messages() {
fn get_err_msg(err_msg: &str, n: usize) -> String {
let plural_s;
let was_were;

if n == 1 {
plural_s = "";
was_were = "was"
} else {
plural_s = "s";
was_were = "were";
};
format!(
"{err_msg}\n\nLogicalPlan had already failed with the above error; \
after failure, {n} additional operation{plural_s} \
{was_were} attempted on the LazyFrame"
)
}
fn test_col_not_found(df: LazyFrame, n: usize) {
let err_msg = format!(
"xyz\n\nError originated just after this \
operation:\n{INITIAL_PROJECTION_STR}"
);

let plan_err_str =
format!("ErrorState {{ n_times: {n}, err: ColumnNotFound(ErrString({err_msg:?})) }}");

let collect_err = if n == 0 {
PolarsError::ColumnNotFound(ErrString::from(err_msg.to_owned()))
} else {
PolarsError::ColumnNotFound(ErrString::from(get_err_msg(&err_msg, n)))
};

assert_eq!(df.describe_plan(), plan_err_str);
assert_errors_eq(&df.collect().unwrap_err(), &collect_err);
}

let df = make_df();

assert_eq!(df.describe_plan(), INITIAL_PROJECTION_STR);

test_col_not_found(df.clone().select([col("xyz")]), 0);
test_col_not_found(df.clone().select([col("xyz")]).select([col("c1")]), 1);
test_col_not_found(
df.clone()
.select([col("xyz")])
.select([col("c1")])
.select([col("c2")]),
2,
);
test_col_not_found(
df.clone()
.select([col("xyz")])
.select([col("c1")])
.select([col("c2")])
.select([col("c3")]),
3,
);
}
1 change: 1 addition & 0 deletions crates/polars-lazy/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod aggregations;
mod arity;
#[cfg(all(feature = "strings", feature = "cse"))]
mod cse;
mod err_msg;
#[cfg(feature = "parquet")]
mod io;
mod logical;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ impl LogicalPlan {
input.dot(acc_str, (branch, id + 1), current_node, id_map)
},
Error { err, .. } => {
let fmt = format!("{:?}", &**err);
let fmt = format!("{:?}", &err.0);
let current_node = DotNode {
branch,
id,
Expand Down
25 changes: 15 additions & 10 deletions crates/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,23 @@ fn format_err(msg: &str, input: &LogicalPlan) -> String {
format!("{msg}\n\nError originated just after this operation:\n{input:?}")
}

/// Returns every error or msg: &str as `ComputeError`.
/// It also shows the logical plan node where the error
/// originated.
/// Returns every error or msg: &str as `ComputeError`. It also shows the logical plan node where the error originated.
/// If `input` is already a `LogicalPlan::Error`, then return it as is; errors already keep track of their previous
/// inputs, so we don't have to do it again here.
macro_rules! raise_err {
($err:expr, $input:expr, $convert:ident) => {{
let format_err_outer = |msg: &str| format_err(msg, &$input);

let err = $err.wrap_msg(&format_err_outer);

LogicalPlan::Error {
input: Box::new($input.clone()),
err: err.into(),
let input: LogicalPlan = $input.clone();
match &input {
LogicalPlan::Error { .. } => input,
_ => {
let format_err_outer = |msg: &str| format_err(msg, &input);
let err = $err.wrap_msg(&format_err_outer);

LogicalPlan::Error {
input: Box::new(input),
err: err.into(), // PolarsError -> ErrorState
}
},
}
.$convert()
}};
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/logical_plan/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ impl LogicalPlan {
write!(f, "{:indent$}{function_fmt}", "")?;
input._format(f, sub_indent)
},
Error { input, err } => write!(f, "{err:?}\n{input:?}"),
Error { err, .. } => write!(f, "{err:?}"),
ExtContext { input, .. } => {
write!(f, "{:indent$}EXTERNAL_CONTEXT", "")?;
input._format(f, sub_indent)
Expand Down
98 changes: 45 additions & 53 deletions crates/polars-plan/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,72 +75,64 @@ pub enum Context {
}

#[derive(Debug)]
pub enum ErrorState {
NotYetEncountered { err: PolarsError },
AlreadyEncountered { prev_err_msg: String },
}

impl std::fmt::Display for ErrorState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorState::NotYetEncountered { err } => write!(f, "NotYetEncountered({err})")?,
ErrorState::AlreadyEncountered { prev_err_msg } => {
write!(f, "AlreadyEncountered({prev_err_msg})")?
},
};

Ok(())
}
pub(crate) struct ErrorStateUnsync {
n_times: usize,
err: PolarsError,
}

#[derive(Clone)]
pub struct ErrorStateSync(Arc<Mutex<ErrorState>>);

impl std::ops::Deref for ErrorStateSync {
type Target = Arc<Mutex<ErrorState>>;
pub struct ErrorState(pub(crate) Arc<Mutex<ErrorStateUnsync>>);

fn deref(&self) -> &Self::Target {
&self.0
impl std::fmt::Debug for ErrorState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let this = self.0.lock().unwrap();
// Skip over the Arc<Mutex<ErrorStateUnsync>> and just print the fields we care
// about. Technically this is misleading, but the insides of ErrorState are not
// public, so this only affects authors of polars, not users (and the odds that
// this affects authors is slim)
f.debug_struct("ErrorState")
.field("n_times", &this.n_times)
.field("err", &this.err)
.finish()
}
}

impl std::fmt::Debug for ErrorStateSync {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "ErrorStateSync({})", &*self.0.lock().unwrap())
impl From<PolarsError> for ErrorState {
fn from(err: PolarsError) -> Self {
Self(Arc::new(Mutex::new(ErrorStateUnsync { n_times: 0, err })))
}
}

impl ErrorStateSync {
impl ErrorState {
fn take(&self) -> PolarsError {
let mut curr_err = self.0.lock().unwrap();
let mut this = self.0.lock().unwrap();

let ret_err = if this.n_times == 0 {
this.err.wrap_msg(&|msg| msg.to_owned())
} else {
this.err.wrap_msg(&|msg| {
let n_times = this.n_times;

match &*curr_err {
ErrorState::NotYetEncountered { err: polars_err } => {
// Need to finish using `polars_err` here so that NLL considers `err` dropped
let prev_err_msg = polars_err.to_string();
// Place AlreadyEncountered in `self` for future users of `self`
let prev_err = std::mem::replace(
&mut *curr_err,
ErrorState::AlreadyEncountered { prev_err_msg },
);
// Since we're in this branch, we know err was a NotYetEncountered
match prev_err {
ErrorState::NotYetEncountered { err } => err,
ErrorState::AlreadyEncountered { .. } => unreachable!(),
}
},
ErrorState::AlreadyEncountered { prev_err_msg } => {
polars_err!(
ComputeError: "LogicalPlan already failed with error: '{}'", prev_err_msg,
let plural_s;
let was_were;

if n_times == 1 {
plural_s = "";
was_were = "was"
} else {
plural_s = "s";
was_were = "were";
};
format!(
"{msg}\n\nLogicalPlan had already failed with the above error; \
after failure, {n_times} additional operation{plural_s} \
{was_were} attempted on the LazyFrame",
)
},
}
}
}
})
};
this.n_times += 1;

impl From<PolarsError> for ErrorStateSync {
fn from(err: PolarsError) -> Self {
Self(Arc::new(Mutex::new(ErrorState::NotYetEncountered { err })))
ret_err
}
}

Expand Down Expand Up @@ -248,7 +240,7 @@ pub enum LogicalPlan {
#[cfg_attr(feature = "serde", serde(skip))]
Error {
input: Box<LogicalPlan>,
err: ErrorStateSync,
err: ErrorState,
},
/// This allows expressions to access other tables
ExtContext {
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,3 +689,13 @@ def test_error_list_to_array() -> None:
pl.DataFrame(
data={"a": [[1, 2], [3, 4, 5]]}, schema={"a": pl.List(pl.Int8)}
).with_columns(array=pl.col("a").list.to_array(2))


# https://github.com/pola-rs/polars/issues/8079
def test_error_lazyframe_not_repeating() -> None:
lf = pl.LazyFrame({"a": 1, "b": range(2)})
with pytest.raises(pl.ColumnNotFoundError) as exc_info:
lf.select("c").select("d").select("e").collect()

match = "Error originated just after this operation:"
assert str(exc_info).count(match) == 1

0 comments on commit 9dfc9fd

Please sign in to comment.