diff --git a/crates/polars-core/src/frame/horizontal.rs b/crates/polars-core/src/frame/horizontal.rs index af790f407b1e..6ed4e8bbb356 100644 --- a/crates/polars-core/src/frame/horizontal.rs +++ b/crates/polars-core/src/frame/horizontal.rs @@ -66,7 +66,7 @@ impl DataFrame { } /// Concat [`DataFrame`]s horizontally. /// Concat horizontally and extend with null values if lengths don't match -pub fn concat_df_horizontal(dfs: &[DataFrame]) -> PolarsResult { +pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> PolarsResult { let max_len = dfs .iter() .map(|df| df.height()) @@ -99,18 +99,26 @@ pub fn concat_df_horizontal(dfs: &[DataFrame]) -> PolarsResult { let height = first_df.height(); let is_empty = first_df.is_empty(); - let columns = first_df - .columns - .iter() - .map(|s| SmartString::from(s.name())) - .collect::>(); + let columns; + let mut names = if check_duplicates { + columns = first_df + .columns + .iter() + .map(|s| SmartString::from(s.name())) + .collect::>(); - let mut names = columns.iter().map(|n| n.as_str()).collect::>(); + columns.iter().map(|n| n.as_str()).collect::>() + } else { + Default::default() + }; for df in &dfs[1..] { let cols = df.get_columns(); - for col in cols { - check_hstack(col, &mut names, height, is_empty)?; + + if check_duplicates { + for col in cols { + check_hstack(col, &mut names, height, is_empty)?; + } } unsafe { first_df.hstack_mut_unchecked(cols) }; diff --git a/crates/polars-mem-engine/src/executors/hconcat.rs b/crates/polars-mem-engine/src/executors/hconcat.rs index 0a755f694cfe..2d9c543dd3c5 100644 --- a/crates/polars-mem-engine/src/executors/hconcat.rs +++ b/crates/polars-mem-engine/src/executors/hconcat.rs @@ -58,6 +58,7 @@ impl Executor for HConcatExec { out?.into_iter().flatten().collect() }; - concat_df_horizontal(&dfs) + // Invariant of IR. Schema is already checked to contain no duplicates. + concat_df_horizontal(&dfs, false) } } diff --git a/crates/polars-plan/src/plans/ir/mod.rs b/crates/polars-plan/src/plans/ir/mod.rs index e4e09954255e..5440fdb80686 100644 --- a/crates/polars-plan/src/plans/ir/mod.rs +++ b/crates/polars-plan/src/plans/ir/mod.rs @@ -136,6 +136,8 @@ pub enum IR { inputs: Vec, options: UnionOptions, }, + /// Horizontal concatenation + /// - Invariant: the names will be unique HConcat { inputs: Vec, schema: SchemaRef, diff --git a/crates/polars-plan/src/plans/optimizer/simplify_functions.rs b/crates/polars-plan/src/plans/optimizer/simplify_functions.rs index 396af32afef3..504af2e517f9 100644 --- a/crates/polars-plan/src/plans/optimizer/simplify_functions.rs +++ b/crates/polars-plan/src/plans/optimizer/simplify_functions.rs @@ -1,5 +1,3 @@ -use polars_core::chunked_array::cast::CastOptions; - use super::*; pub(super) fn optimize_functions( @@ -67,17 +65,6 @@ pub(super) fn optimize_functions( None } }, - FunctionExpr::Boolean(BooleanFunction::AllHorizontal | BooleanFunction::AnyHorizontal) => { - if input.len() == 1 { - Some(AExpr::Cast { - expr: input[0].node(), - data_type: DataType::Boolean, - options: CastOptions::NonStrict, - }) - } else { - None - } - }, FunctionExpr::Boolean(BooleanFunction::Not) => { let y = expr_arena.get(input[0].node()); diff --git a/crates/polars-stream/src/nodes/zip.rs b/crates/polars-stream/src/nodes/zip.rs index 8816bfbb0640..3a55e7e1eed3 100644 --- a/crates/polars-stream/src/nodes/zip.rs +++ b/crates/polars-stream/src/nodes/zip.rs @@ -136,7 +136,7 @@ impl ComputeNode for ZipNode { } } - let out_df = concat_df_horizontal(&out)?; + let out_df = concat_df_horizontal(&out, false)?; out.clear(); let morsel = Morsel::new(out_df, self.out_seq, source_token.clone()); diff --git a/docs/src/rust/user-guide/transformations/concatenation.rs b/docs/src/rust/user-guide/transformations/concatenation.rs index 240ff8e9f59b..4b7d183316c9 100644 --- a/docs/src/rust/user-guide/transformations/concatenation.rs +++ b/docs/src/rust/user-guide/transformations/concatenation.rs @@ -30,7 +30,7 @@ fn main() -> Result<(), Box> { "r2"=> &[7, 8], "r3"=> &[9, 10], )?; - let df_horizontal_concat = polars::functions::concat_df_horizontal(&[df_h1, df_h2])?; + let df_horizontal_concat = polars::functions::concat_df_horizontal(&[df_h1, df_h2], true)?; println!("{}", &df_horizontal_concat); // --8<-- [end:horizontal] // @@ -43,7 +43,7 @@ fn main() -> Result<(), Box> { "r1"=> &[5, 6, 7], "r2"=> &[8, 9, 10], )?; - let df_horizontal_concat = polars::functions::concat_df_horizontal(&[df_h1, df_h2])?; + let df_horizontal_concat = polars::functions::concat_df_horizontal(&[df_h1, df_h2], true)?; println!("{}", &df_horizontal_concat); // --8<-- [end:horizontal_different_lengths] diff --git a/py-polars/src/functions/eager.rs b/py-polars/src/functions/eager.rs index 6e423fc90065..f271b818c8ce 100644 --- a/py-polars/src/functions/eager.rs +++ b/py-polars/src/functions/eager.rs @@ -88,6 +88,6 @@ pub fn concat_df_horizontal(dfs: &Bound<'_, PyAny>) -> PyResult { }) .collect::>>()?; - let df = functions::concat_df_horizontal(&dfs).map_err(PyPolarsErr::from)?; + let df = functions::concat_df_horizontal(&dfs, true).map_err(PyPolarsErr::from)?; Ok(df.into()) }