From 369f49e11b96a0b1706f87b3e01e8726d822c196 Mon Sep 17 00:00:00 2001 From: ritchie Date: Thu, 27 Jul 2023 17:22:38 +0200 Subject: [PATCH] fix(rust, python): always sort projection by schema --- .../polars-plan/src/logical_plan/file_scan.rs | 13 ------------- .../optimizer/projection_pushdown/mod.rs | 10 ++-------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/polars/polars-lazy/polars-plan/src/logical_plan/file_scan.rs b/polars/polars-lazy/polars-plan/src/logical_plan/file_scan.rs index f3d52c115f930..6aa19b79c6287 100644 --- a/polars/polars-lazy/polars-plan/src/logical_plan/file_scan.rs +++ b/polars/polars-lazy/polars-plan/src/logical_plan/file_scan.rs @@ -24,19 +24,6 @@ impl FileScan { } } - pub(crate) fn sort_projection(&self, _file_options: &FileScanOptions) -> bool { - match self { - #[cfg(feature = "csv")] - Self::Csv { .. } => true, - #[cfg(feature = "ipc")] - Self::Ipc { .. } => _file_options.row_count.is_some(), - #[cfg(feature = "parquet")] - Self::Parquet { .. } => _file_options.row_count.is_some(), - #[allow(unreachable_patterns)] - _ => false, - } - } - pub fn streamable(&self) -> bool { match self { #[cfg(feature = "csv")] diff --git a/polars/polars-lazy/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs b/polars/polars-lazy/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs index 91c812b29309b..eed4df0011433 100644 --- a/polars/polars-lazy/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs +++ b/polars/polars-lazy/polars-plan/src/logical_plan/optimizer/projection_pushdown/mod.rs @@ -127,7 +127,6 @@ fn update_scan_schema( acc_projections: &[Node], expr_arena: &Arena, schema: &Schema, - sort_projections: bool, ) -> PolarsResult { let mut new_schema = Schema::with_capacity(acc_projections.len()); let mut new_cols = Vec::with_capacity(acc_projections.len()); @@ -140,9 +139,8 @@ fn update_scan_schema( } } // make sure that the projections are sorted by the schema. - if sort_projections { - new_cols.sort_unstable_by_key(|item| item.0); - } + // otherwise the upstream nodes will have an incorrect schema. + new_cols.sort_unstable_by_key(|item| item.0); for item in new_cols { new_schema.with_column(item.1.clone(), item.2.clone()); } @@ -375,7 +373,6 @@ impl ProjectionPushDown { &acc_projections, expr_arena, &file_info.schema, - true, )?)) }; mut_options.output_schema = output_schema.clone(); @@ -412,7 +409,6 @@ impl ProjectionPushDown { &acc_projections, expr_arena, &schema, - false, )?)); projection = get_scan_columns(&mut acc_projections, expr_arena, None); } @@ -439,7 +435,6 @@ impl ProjectionPushDown { &acc_projections, expr_arena, &options.schema, - true, )?)) }; Ok(PythonScan { options, predicate }) @@ -465,7 +460,6 @@ impl ProjectionPushDown { &acc_projections, expr_arena, &file_info.schema, - scan_type.sort_projection(&file_options), )?)) };