Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kasakrisz committed Jan 27, 2025
1 parent ed98fe3 commit f561591
Show file tree
Hide file tree
Showing 5 changed files with 545 additions and 9 deletions.
23 changes: 14 additions & 9 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -5522,16 +5522,21 @@ OrderByRelBuilder addRelDistribution(ASTNode distributeByAST) throws SemanticExc
for (int i = 0; i < distributeByAST.getChildCount(); ++i) {
ASTNode keyAST = (ASTNode) distributeByAST.getChild(i);
int fieldIndex = genSortByKey(keyAST);
keys.add(fieldIndex);
if (fieldIndex >= 0) {
keys.add(fieldIndex);
}
}
ImmutableList<Integer> keyList = keys.build();
if (!keyList.isEmpty()) {
hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, keyList);
return this;
}
hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, keys.build());
} else {
// In case of SORT BY we do not need Distribution
// but the instance RelDistributions.ANY can not be used here because
// org.apache.calcite.rel.core.Exchange has
// assert distribution != RelDistributions.ANY;
hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys());
}
// In case of SORT BY we do not need Distribution
// but the instance RelDistributions.ANY can not be used here because
// org.apache.calcite.rel.core.Exchange has
// assert distribution != RelDistributions.ANY;
hiveRelDistribution = new HiveRelDistribution(RelDistribution.Type.ANY, RelDistributions.ANY.getKeys());
return this;
}

Expand Down Expand Up @@ -5616,7 +5621,7 @@ RelNode sortLimit(RexNode offsetRN, RexNode fetchRN) throws SemanticException {
RelNode sortExchange() throws SemanticException {
genOBProject();

if (fieldCollations.isEmpty()) {
if (fieldCollations.isEmpty() && hiveRelDistribution.getKeys().isEmpty()) {
return endGenOBLogicalPlan(obInputRel);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
create table t1 (a string, b int, c int);

-- distribute by
explain cbo
select * from t1 distribute by 2;
explain
select * from t1 distribute by 2;

-- distribute by and sort by
explain cbo
select * from t1 distribute by 1, b sort by 2;

explain
select * from t1 distribute by 1, b sort by 2;

-- cluster by
explain cbo
select * from t1 cluster by 1, b;

explain
select * from t1 cluster by 1, b;
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
set hive.orderby.position.alias=false;

create table t1 (a string, b int, c int);

-- order by
explain cbo
select * from t1 order by 2, 3;
explain
select * from t1 order by 2, 3;

-- distribute by
explain cbo
select * from t1 distribute by 2;
explain
select * from t1 distribute by 2;

-- distribute by and sort by
explain cbo
select * from t1 distribute by 1, b sort by 2;

explain
select * from t1 distribute by 1, b sort by 2;

-- cluster by
explain cbo
select * from t1 cluster by 1;

explain
select * from t1 cluster by 1;

explain cbo
select * from t1 cluster by 1, b;

explain
select * from t1 cluster by 1, b;
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
PREHOOK: query: create table t1 (a string, b int, c int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1 (a string, b int, c int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: explain cbo
select * from t1 distribute by 2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select * from t1 distribute by 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
CBO PLAN:
HiveProject(a=[$0], b=[$1], c=[$2])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: explain
select * from t1 distribute by 2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain
select * from t1 distribute by 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
TableScan
alias: t1
Select Operator
expressions: a (type: string), b (type: int), c (type: int)
outputColumnNames: _col0, _col1, _col2
ListSink

PREHOOK: query: explain cbo
select * from t1 distribute by 1, b sort by 2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select * from t1 distribute by 1, b sort by 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
CBO PLAN:
HiveSortExchange(distribution=[hash[0, 1]], collation=[[1]])
HiveProject(a=[$0], b=[$1], c=[$2])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: explain
select * from t1 distribute by 1, b sort by 2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain
select * from t1 distribute by 1, b sort by 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: string), b (type: int), c (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: explain cbo
select * from t1 cluster by 1, b
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select * from t1 cluster by 1, b
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
CBO PLAN:
HiveSortExchange(distribution=[hash[0, 1]], collation=[[0 ASC-nulls-first, 1 ASC-nulls-first]])
HiveProject(a=[$0], b=[$1], c=[$2])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: explain
select * from t1 cluster by 1, b
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain
select * from t1 cluster by 1, b
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: a (type: string), b (type: int), c (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

Loading

0 comments on commit f561591

Please sign in to comment.