From 745ae38554571b6890be7db5b1e1b5dc4c51324b Mon Sep 17 00:00:00 2001 From: Pavel Tiunov Date: Tue, 24 Oct 2023 13:07:57 -0700 Subject: [PATCH] feat(cubesql): Aggregation over dimensions support (#7290) --- rust/cubesql/cubesql/src/compile/mod.rs | 27 +++++++++++++-- .../cubesql/src/compile/rewrite/converter.rs | 1 + .../cubesql/src/compile/rewrite/cost.rs | 34 +++++++++++++++++-- .../cubesql/src/compile/rewrite/rewriter.rs | 3 +- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 5f44438d81a1a..48ff558cee498 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -18520,7 +18520,7 @@ ORDER BY \"COUNT(count)\" DESC" init_logger(); let query_plan = convert_select_to_query_plan( - "SELECT COALESCE(customer_gender, 'N/A'), MIN(avgPrice) mp FROM (SELECT AVG(avgPrice) avgPrice, customer_gender FROM KibanaSampleDataEcommerce GROUP BY 2 LIMIT 1) a GROUP BY 1" + "SELECT COALESCE(customer_gender, 'N/A'), AVG(avgPrice) mp FROM KibanaSampleDataEcommerce a GROUP BY 1" .to_string(), DatabaseProtocol::PostgreSQL, ) @@ -18549,7 +18549,7 @@ ORDER BY \"COUNT(count)\" DESC" init_logger(); let query_plan = convert_select_to_query_plan( - "SELECT CASE WHEN customer_gender = 'female' THEN 'f' ELSE 'm' END, MIN(avgPrice) mp FROM (SELECT AVG(avgPrice) avgPrice, customer_gender FROM KibanaSampleDataEcommerce GROUP BY 2 LIMIT 1) a GROUP BY 1" + "SELECT CASE WHEN customer_gender = 'female' THEN 'f' ELSE 'm' END, AVG(avgPrice) mp FROM KibanaSampleDataEcommerce a GROUP BY 1" .to_string(), DatabaseProtocol::PostgreSQL, ) @@ -18666,7 +18666,7 @@ ORDER BY \"COUNT(count)\" DESC" init_logger(); let query_plan = convert_select_to_query_plan( - "SELECT * FROM (SELECT CASE WHEN customer_gender = 'female' THEN 'f' ELSE 'm' END, MIN(avgPrice) mp FROM (SELECT AVG(avgPrice) avgPrice, customer_gender FROM KibanaSampleDataEcommerce GROUP BY 2 LIMIT 1) a GROUP BY 1) q LIMIT 1123" + "SELECT * FROM (SELECT CASE WHEN customer_gender = 'female' THEN 'f' ELSE 'm' END, AVG(avgPrice) mp FROM KibanaSampleDataEcommerce a GROUP BY 1) q LIMIT 1123" .to_string(), DatabaseProtocol::PostgreSQL, ) @@ -18694,6 +18694,27 @@ ORDER BY \"COUNT(count)\" DESC" ); } + #[tokio::test] + async fn test_case_wrapper_ungrouped_on_dimension() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_logger(); + + let query_plan = convert_select_to_query_plan( + "SELECT CASE WHEN SUM(taxful_total_price) > 0 THEN SUM(taxful_total_price) ELSE 0 END FROM KibanaSampleDataEcommerce a" + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + } + #[tokio::test] async fn test_wrapper_tableau_sunday_week() { if !Rewriter::sql_push_down_enabled() { diff --git a/rust/cubesql/cubesql/src/compile/rewrite/converter.rs b/rust/cubesql/cubesql/src/compile/rewrite/converter.rs index 82c9815c93272..a08d6862e84ca 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/converter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/converter.rs @@ -1773,6 +1773,7 @@ impl LanguageToLogicalPlanConverter { LogicalPlanLanguage::CubeScan(_) => return true, _ => (), }, + LogicalPlanLanguage::CubeScanWrapper(_) => return true, _ => (), } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs index 10d6cb7d2bfe0..ee351ba2d17ab 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs @@ -12,6 +12,7 @@ pub struct BestCubePlan; /// - `filters` > `filter_members` - optimize for push down of filters /// - `filter_members` > `cube_members` - optimize for `inDateRange` filter push down to time dimension /// - `member_errors` > `cube_members` - extra cube members may be required (e.g. CASE) +/// - `member_errors` > `wrapper_nodes` - use SQL push down where possible if cube scan can't be detected /// - match errors by priority - optimize for more specific errors #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] pub struct CubePlanCost { @@ -21,13 +22,15 @@ pub struct CubePlanCost { filters: i64, structure_points: i64, filter_members: i64, + empty_wrappers: i64, member_errors: i64, + wrapper_nodes: i64, + ast_size_outside_wrapper: usize, cube_members: i64, errors: i64, - ast_size_outside_wrapper: usize, - wrapper_nodes: i64, cube_scan_nodes: i64, ast_size: usize, + ast_size_inside_wrapper: usize, } #[derive(Debug, Clone, Eq, PartialEq)] @@ -99,11 +102,13 @@ impl CubePlanCost { cube_members: self.cube_members + other.cube_members, errors: self.errors + other.errors, structure_points: self.structure_points + other.structure_points, + empty_wrappers: self.empty_wrappers + other.empty_wrappers, ast_size_outside_wrapper: self.ast_size_outside_wrapper + other.ast_size_outside_wrapper, wrapper_nodes: self.wrapper_nodes + other.wrapper_nodes, cube_scan_nodes: self.cube_scan_nodes + other.cube_scan_nodes, ast_size: self.ast_size + other.ast_size, + ast_size_inside_wrapper: self.ast_size_inside_wrapper + other.ast_size_inside_wrapper, } } @@ -112,7 +117,11 @@ impl CubePlanCost { replacers: self.replacers, table_scans: self.table_scans, filters: self.filters, - non_detected_cube_scans: self.non_detected_cube_scans, + non_detected_cube_scans: match state { + CubePlanState::Wrapped => 0, + CubePlanState::Unwrapped(_) => self.non_detected_cube_scans, + CubePlanState::Wrapper => 0, + }, filter_members: self.filter_members, member_errors: self.member_errors, cube_members: self.cube_members, @@ -123,9 +132,21 @@ impl CubePlanCost { CubePlanState::Unwrapped(size) => *size, CubePlanState::Wrapper => 0, } + self.ast_size_outside_wrapper, + empty_wrappers: match state { + CubePlanState::Wrapped => 0, + CubePlanState::Unwrapped(_) => 0, + CubePlanState::Wrapper => { + if self.ast_size_inside_wrapper == 0 { + 1 + } else { + 0 + } + } + } + self.empty_wrappers, wrapper_nodes: self.wrapper_nodes, cube_scan_nodes: self.cube_scan_nodes, ast_size: self.ast_size, + ast_size_inside_wrapper: self.ast_size_inside_wrapper, } } } @@ -165,6 +186,11 @@ impl CostFunction for BestCubePlan { _ => 0, }; + let ast_size_inside_wrapper = match enode { + LogicalPlanLanguage::WrappedSelect(_) => 1, + _ => 0, + }; + let wrapper_nodes = match enode { LogicalPlanLanguage::CubeScanWrapper(_) => 1, _ => 0, @@ -242,7 +268,9 @@ impl CostFunction for BestCubePlan { errors: this_errors, structure_points, wrapper_nodes, + empty_wrappers: 0, ast_size_outside_wrapper: 0, + ast_size_inside_wrapper, cube_scan_nodes, ast_size: 1, }, diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs index bb7dab412634a..85ee828380c58 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs @@ -339,7 +339,7 @@ impl Rewriter { vec![] }; let extractor = Extractor::new(&runner.egraph, BestCubePlan); - let (_, best) = extractor.find_best(root); + let (best_cost, best) = extractor.find_best(root); let qtrace_best_graph = if Qtrace::is_enabled() { best.as_ref().iter().cloned().collect() } else { @@ -354,6 +354,7 @@ impl Rewriter { .map(|(i, n)| format!("{}: {:?}", i, n)) .join(", ") ); + log::debug!("Best cost: {:?}", best_cost); let converter = LanguageToLogicalPlanConverter::new(best, cube_context.clone(), auth_context); Ok((