From d4f0053c4149b313d531867964dc82e91745ae87 Mon Sep 17 00:00:00 2001 From: kasakrisz Date: Mon, 27 Jan 2025 14:32:01 +0100 Subject: [PATCH] HIVE-28725: Sorting is performed when order by position is disabled when CBO is enabled --- .../hadoop/hive/ql/parse/CalcitePlanner.java | 38 ++- .../cbo_distribute_sort_cluster_by_pos.q | 21 ++ ...bo_order_distribute_sort_cluster_by_cnst.q | 36 +++ .../cbo_distribute_sort_cluster_by_pos.q.out | 235 +++++++++++++++ ...rder_distribute_sort_cluster_by_cnst.q.out | 276 ++++++++++++++++++ 5 files changed, 595 insertions(+), 11 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/cbo_distribute_sort_cluster_by_pos.q create mode 100644 ql/src/test/queries/clientpositive/cbo_order_distribute_sort_cluster_by_cnst.q create mode 100644 ql/src/test/results/clientpositive/llap/cbo_distribute_sort_cluster_by_pos.q.out create mode 100644 ql/src/test/results/clientpositive/llap/cbo_order_distribute_sort_cluster_by_cnst.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 143f85769f30..3cc314c8806f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -5405,6 +5405,9 @@ OrderByRelBuilder addSortByKeys(ASTNode obAST) throws SemanticException { ASTNode ref = (ASTNode) nullObASTExpr.getChild(0); int fieldIndex = genSortByKey(ref); + if (fieldIndex < 0) { + continue; + } // 2.4 Determine the Direction of order by RelFieldCollation.Direction order = RelFieldCollation.Direction.DESCENDING; @@ -5448,6 +5451,7 @@ private int genSortByKey(ASTNode ref) throws SemanticException { LOG.warn("Using constant number {}" + " in order by. If you try to use position alias when hive.orderby.position.alias is false, " + "the position alias will be ignored.", ref.getText()); + return -1; } } else { // 2.2 Convert ExprNode to RexNode @@ -5465,8 +5469,6 @@ private int genSortByKey(ASTNode ref) throws SemanticException { return fieldIndex; } } - - return 0; } private RexNode getOrderByExpression( @@ -5520,16 +5522,21 @@ OrderByRelBuilder addRelDistribution(ASTNode distributeByAST) throws SemanticExc for (int i = 0; i < distributeByAST.getChildCount(); ++i) { ASTNode keyAST = (ASTNode) distributeByAST.getChild(i); int fieldIndex = genSortByKey(keyAST); - keys.add(fieldIndex); + if (fieldIndex >= 0) { + keys.add(fieldIndex); + } + } + ImmutableList keyList = keys.build(); + if (!keyList.isEmpty()) { + hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, keyList); + return this; } - hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, keys.build()); - } else { - // In case of SORT BY we do not need Distribution - // but the instance RelDistributions.ANY can not be used here because - // org.apache.calcite.rel.core.Exchange has - // assert distribution != RelDistributions.ANY; - hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys()); } + // In case of SORT BY we do not need Distribution + // but the instance RelDistributions.ANY can not be used here because + // org.apache.calcite.rel.core.Exchange has + // assert distribution != RelDistributions.ANY; + hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys()); return this; } @@ -5599,6 +5606,10 @@ private void genOBProject() throws SemanticException { RelNode sortLimit(RexNode offsetRN, RexNode fetchRN) throws SemanticException { genOBProject(); + if (fieldCollations.isEmpty()) { + return endGenOBLogicalPlan(obInputRel); + } + // 4. Construct SortRel RelOptCluster cluster = calcitePlannerAction.cluster; RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); @@ -5610,13 +5621,18 @@ RelNode sortLimit(RexNode offsetRN, RexNode fetchRN) throws SemanticException { RelNode sortExchange() throws SemanticException { genOBProject(); + if (fieldCollations.isEmpty() && hiveRelDistribution.getKeys().isEmpty()) { + return endGenOBLogicalPlan(obInputRel); + } + RelCollation canonizedCollation = RelCollations.of(fieldCollations); ImmutableList.Builder builder = ImmutableList.builder(); for (RelFieldCollation relFieldCollation : canonizedCollation.getFieldCollations()) { int index = relFieldCollation.getFieldIndex(); builder.add(calcitePlannerAction.cluster.getRexBuilder().makeInputRef(obInputRel, index)); } - RelNode sortRel = HiveSortExchange.create(obInputRel, hiveRelDistribution, canonizedCollation, builder.build()); + ImmutableList keys = builder.build(); + RelNode sortRel = HiveSortExchange.create(obInputRel, hiveRelDistribution, canonizedCollation, keys); return endGenOBLogicalPlan(sortRel); } diff --git a/ql/src/test/queries/clientpositive/cbo_distribute_sort_cluster_by_pos.q b/ql/src/test/queries/clientpositive/cbo_distribute_sort_cluster_by_pos.q new file mode 100644 index 000000000000..94db179740b6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_distribute_sort_cluster_by_pos.q @@ -0,0 +1,21 @@ +create table t1 (a string, b int, c int); + +-- distribute by +explain cbo +select * from t1 distribute by 2; +explain +select * from t1 distribute by 2; + +-- distribute by and sort by +explain cbo +select * from t1 distribute by 1, b sort by 2; + +explain +select * from t1 distribute by 1, b sort by 2; + +-- cluster by +explain cbo +select * from t1 cluster by 1, b; + +explain +select * from t1 cluster by 1, b; diff --git a/ql/src/test/queries/clientpositive/cbo_order_distribute_sort_cluster_by_cnst.q b/ql/src/test/queries/clientpositive/cbo_order_distribute_sort_cluster_by_cnst.q new file mode 100644 index 000000000000..15073b1d2c0d --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_order_distribute_sort_cluster_by_cnst.q @@ -0,0 +1,36 @@ +-- When orderby.position.alias is disabled, we expect no operation to occur if a constant integer is specified as a key. +set hive.orderby.position.alias=false; + +create table t1 (a string, b int, c int); + +-- order by +explain cbo +select * from t1 order by 2, 3; +explain +select * from t1 order by 2, 3; + +-- distribute by +explain cbo +select * from t1 distribute by 2; +explain +select * from t1 distribute by 2; + +-- distribute by and sort by +explain cbo +select * from t1 distribute by 1, b sort by 2; + +explain +select * from t1 distribute by 1, b sort by 2; + +-- cluster by +explain cbo +select * from t1 cluster by 1; + +explain +select * from t1 cluster by 1; + +explain cbo +select * from t1 cluster by 1, b; + +explain +select * from t1 cluster by 1, b; diff --git a/ql/src/test/results/clientpositive/llap/cbo_distribute_sort_cluster_by_pos.q.out b/ql/src/test/results/clientpositive/llap/cbo_distribute_sort_cluster_by_pos.q.out new file mode 100644 index 000000000000..bf6afbb0af94 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_distribute_sort_cluster_by_pos.q.out @@ -0,0 +1,235 @@ +PREHOOK: query: create table t1 (a string, b int, c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a string, b int, c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain cbo +select * from t1 distribute by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 distribute by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[1]], collation=[[]]) + HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 distribute by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 distribute by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain cbo +select * from t1 distribute by 1, b sort by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 distribute by 1, b sort by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[0, 1]], collation=[[1]]) + HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 distribute by 1, b sort by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 distribute by 1, b sort by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain cbo +select * from t1 cluster by 1, b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 cluster by 1, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[0, 1]], collation=[[0 ASC-nulls-first, 1 ASC-nulls-first]]) + HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 cluster by 1, b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 cluster by 1, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/cbo_order_distribute_sort_cluster_by_cnst.q.out b/ql/src/test/results/clientpositive/llap/cbo_order_distribute_sort_cluster_by_cnst.q.out new file mode 100644 index 000000000000..905d72a91805 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cbo_order_distribute_sort_cluster_by_cnst.q.out @@ -0,0 +1,276 @@ +PREHOOK: query: create table t1 (a string, b int, c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a string, b int, c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain cbo +select * from t1 order by 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 order by 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 order by 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 order by 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: explain cbo +select * from t1 distribute by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 distribute by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 distribute by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 distribute by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: explain cbo +select * from t1 distribute by 1, b sort by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 distribute by 1, b sort by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[1]], collation=[[]]) + HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 distribute by 1, b sort by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 distribute by 1, b sort by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain cbo +select * from t1 cluster by 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 cluster by 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 cluster by 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 cluster by 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t1 + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + ListSink + +PREHOOK: query: explain cbo +select * from t1 cluster by 1, b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select * from t1 cluster by 1, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[1]], collation=[[1 ASC-nulls-first]]) + HiveProject(a=[$0], b=[$1], c=[$2]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + +PREHOOK: query: explain +select * from t1 cluster by 1, b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain +select * from t1 cluster by 1, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: int), c (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +