remove index calc

apache · ozankabak · Jan 31, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 17, 2025
commit 79dd9422e5163e78c7e3e1941c6660f1213a2421
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -375,7 +375,6 @@ pub struct AggregateExec {
     /// Describes how the input is ordered relative to the group by columns
     input_order_mode: InputOrderMode,
     cache: PlanProperties,
-    aggr_expr_indices: Vec<usize>,
 }
 
 impl AggregateExec {
@@ -400,7 +399,6 @@ impl AggregateExec {
             input: Arc::clone(&self.input),
             schema: Arc::clone(&self.schema),
             input_schema: Arc::clone(&self.input_schema),
-            aggr_expr_indices: self.aggr_expr_indices.clone(),
         }
     }
 
@@ -417,8 +415,7 @@ impl AggregateExec {
         input: Arc<dyn ExecutionPlan>,
         input_schema: SchemaRef,
     ) -> Result<Self> {
-        let (schema, aggr_exprs_indices) =
-            create_schema(&input.schema(), &group_by, &aggr_expr, mode)?;
+        let schema = create_schema(&input.schema(), &group_by, &aggr_expr, mode)?;
 
         let schema = Arc::new(schema);
         AggregateExec::try_new_with_schema(
@@ -429,7 +426,6 @@ impl AggregateExec {
             input,
             input_schema,
             schema,
-            aggr_exprs_indices,
         )
     }
 
@@ -450,7 +446,6 @@ impl AggregateExec {
         input: Arc<dyn ExecutionPlan>,
         input_schema: SchemaRef,
         schema: SchemaRef,
-        aggr_expr_indices: Vec<usize>,
     ) -> Result<Self> {
         // Make sure arguments are consistent in size
         if aggr_expr.len() != filter_expr.len() {
@@ -518,7 +513,6 @@ impl AggregateExec {
             &mode,
             &input_order_mode,
             aggr_expr.clone(),
-            aggr_expr_indices.clone(),
         );
 
         Ok(AggregateExec {
@@ -534,7 +528,6 @@ impl AggregateExec {
             limit: None,
             input_order_mode,
             cache,
-            aggr_expr_indices,
         })
     }
 
@@ -657,13 +650,15 @@ impl AggregateExec {
         mode: &AggregateMode,
         input_order_mode: &InputOrderMode,
         aggr_exprs: Vec<Arc<AggregateFunctionExpr>>,
-        aggr_expr_indices: Vec<usize>,
     ) -> PlanProperties {
         // Construct equivalence properties:
         let mut eq_properties = input
             .equivalence_properties()
-            .project(group_expr_mapping, schema);
+            .project(group_expr_mapping, Arc::clone(&schema));
 
+        let schema_len = schema.fields.len();
+        let aggr_expr_indices =
+            ((schema_len - aggr_exprs.len())..schema_len).collect::<Vec<_>>();
         // if the aggregate function is set monotonic, add it into equivalence properties
         for (i, aggr_expr) in aggr_exprs.iter().enumerate() {
             let aggr_expr_index = aggr_expr_indices[i];
@@ -870,7 +865,6 @@ impl ExecutionPlan for AggregateExec {
             Arc::clone(&children[0]),
             Arc::clone(&self.input_schema),
             Arc::clone(&self.schema),
-            self.aggr_expr_indices.clone(),
         )?;
         me.limit = self.limit;
 
@@ -947,8 +941,7 @@ fn create_schema(
     group_by: &PhysicalGroupBy,
     aggr_expr: &[Arc<AggregateFunctionExpr>],
     mode: AggregateMode,
-) -> Result<(Schema, Vec<usize>)> {
-    let mut aggr_exprs_indices = vec![];
+) -> Result<Schema> {
     let mut fields = Vec::with_capacity(group_by.num_output_exprs() + aggr_expr.len());
     fields.extend(group_by.output_fields(input_schema)?);
 
@@ -957,7 +950,6 @@ fn create_schema(
             // in partial mode, the fields of the accumulator's state
             for expr in aggr_expr {
                 fields.extend(expr.state_fields()?.iter().cloned());
-                aggr_exprs_indices.push(fields.len() - 1);
             }
         }
         AggregateMode::Final
@@ -966,15 +958,14 @@ fn create_schema(
         | AggregateMode::SinglePartitioned => {
             // in final mode, the field with the final result of the accumulator
             for expr in aggr_expr {
-                fields.push(expr.field());
-                aggr_exprs_indices.push(fields.len() - 1);
+                fields.extend(expr.state_fields()?.iter().cloned())
             }
         }
     }
 
-    Ok((
-        Schema::new_with_metadata(fields, input_schema.metadata().clone()),
-        aggr_exprs_indices,
+    Ok(Schema::new_with_metadata(
+        fields,
+        input_schema.metadata().clone(),
     ))
 }
 
@@ -2810,7 +2801,7 @@ mod tests {
                 vec![false, false], // (a,b)
             ],
         );
-        let (aggr_schema, _) = create_schema(
+        let aggr_schema = create_schema(
             &input_schema,
             &grouping_set,
             &aggr_expr,

diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -506,7 +506,7 @@ impl GroupedHashAggregateStream {
         // Therefore, when we spill these intermediate states or pass them to another
         // aggregation operator, we must use a schema that includes both the group
         // columns **and** the partial-state columns.
-        let (partial_agg_schema, _) = create_schema(
+        let partial_agg_schema = create_schema(
             &agg.input().schema(),
             &agg_group_by,
             &aggregate_exprs,

diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
@@ -344,10 +344,8 @@ pub(crate) fn window_equivalence_properties(
         .extend(input.equivalence_properties().clone());
 
     let schema_len = schema.fields.len();
-    let window_expr_indices = (schema_len..(schema_len - window_exprs.len()))
-        .rev()
-        .collect::<Vec<_>>();
-
+    let window_expr_indices =
+        ((schema_len - window_exprs.len())..schema_len).collect::<Vec<_>>();
     for (i, expr) in window_exprs.iter().enumerate() {
         if let Some(udf_window_expr) = expr.as_any().downcast_ref::<StandardWindowExpr>()
         {

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -4963,9 +4963,6 @@ false
 true
 NULL
 
-statement ok
-set datafusion.optimizer.prefer_existing_sort = true;
-
 #
 # Add valid distinct case as aggregation plan test
 #
@@ -4992,8 +4989,6 @@ physical_plan
 11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 12)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 
-statement ok
-set datafusion.optimizer.prefer_existing_sort = false;
 
 #
 # Push limit into distinct group-by aggregation tests

diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
@@ -16,9 +16,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-statement ok
-set datafusion.optimizer.prefer_existing_sort = true;
-
 query TT
 explain select
     c_count,
@@ -75,7 +72,4 @@ physical_plan
 17)------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
 18)--------------------------CoalesceBatchesExec: target_batch_size=8192
 19)----------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1]
-20)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], has_header=false
-
-statement ok
-set datafusion.optimizer.prefer_existing_sort = false;
+20)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], has_header=false