Skip to content

Commit

Permalink
fix](nereids)exists subquery should handle top level scalar agg corre…
Browse files Browse the repository at this point in the history
…ctly
  • Loading branch information
starocean999 committed Dec 29, 2023
1 parent c60eb77 commit 015b26d
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,17 @@ public List<Rule> buildRules() {
);
}

private static boolean canConvertToSupply(TreeNode<Expression> expression) {
// The subquery except ListQuery can be converted to Supply
return expression instanceof SubqueryExpr && !(expression instanceof ListQuery);
}

private static boolean isValidSubqueryConjunct(Expression expression) {
// only support 1 subquery expr in the expression
// don't support expression like subquery1 or subquery2
return expression.collectToList(SubqueryToApply::canConvertToSupply).size() == 1;
return expression
.collectToList(SubqueryToApply::canConvertToSupply)
.size() == 1;
}

private enum RelatedInfo {
Expand Down Expand Up @@ -311,19 +318,17 @@ private ImmutableList<RelatedInfo> collectRelatedInfo(List<Expression> subqueryC
return correlatedInfoList.build();
}

private static boolean canConvertToSupply(TreeNode<Expression> expression) {
// The subquery except ListQuery can be converted to Supply
return expression instanceof SubqueryExpr && !(expression instanceof ListQuery);
}

private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPlan childPlan,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot,
CascadesContext ctx,
Optional<Expression> conjunct, boolean isProject) {
LogicalPlan tmpPlan = childPlan;
for (int i = 0; i < subqueryExprs.size(); ++i) {
SubqueryExpr subqueryExpr = subqueryExprs.get(i);
if (nonMarkJoinExistsWithAgg(subqueryExpr, subqueryToMarkJoinSlot)) {
if (subqueryExpr instanceof Exists && hasTopLevelScalarAgg(subqueryExpr.getQueryPlan())) {
// because top level scalar agg always returns a value or null(for empty input)
// so Exists and Not Exists conjunct are always evaluated to True and false literals respectively
// we don't create apply node for it
continue;
}

Expand All @@ -336,18 +341,11 @@ private LogicalPlan subqueryToApply(List<SubqueryExpr> subqueryExprs, LogicalPla
return tmpPlan;
}

private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists,
Map<SubqueryExpr, Optional<MarkJoinSlotReference>> subqueryToMarkJoinSlot) {
return exists instanceof Exists
&& !subqueryToMarkJoinSlot.get(exists).isPresent()
&& hasTopLevelAggWithoutGroupBy(exists.getQueryPlan());
}

private static boolean hasTopLevelAggWithoutGroupBy(Plan plan) {
private static boolean hasTopLevelScalarAgg(Plan plan) {
if (plan instanceof LogicalAggregate) {
return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty();
} else if (plan instanceof LogicalProject || plan instanceof LogicalSort) {
return hasTopLevelAggWithoutGroupBy(plan.child(0));
return hasTopLevelScalarAgg(plan.child(0));
}
return false;
}
Expand Down Expand Up @@ -425,19 +423,30 @@ public Expression visitExistsSubquery(Exists exists, SubqueryContext context) {
// The result set when NULL is specified in the subquery and still evaluates to TRUE by using EXISTS
// When the number of rows returned is empty, agg will return null, so if there is more agg,
// it will always consider the returned result to be true
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
MarkJoinSlotReference markJoinSlotReference = null;
if (hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()) && needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName(), true);
} else if (needCreateMarkJoinSlot) {
markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
}
if (needCreateMarkJoinSlot) {
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
if (hasTopLevelScalarAgg(exists.getQueryPlan())) {
/*
top level scalar agg and always return a value or null for empty input
so Exists and Not Exists conjunct are always evaluated to True and False literals respectively
SELECT *
FROM t1
WHERE EXISTS (
SELECT SUM(a)
FROM t2
WHERE t1.a = t2.b and t1.a = 1;
);
*/
return exists.isNot() ? BooleanLiteral.FALSE : BooleanLiteral.TRUE;
} else {
boolean needCreateMarkJoinSlot = isMarkJoin || shouldOutputMarkJoinSlot;
if (needCreateMarkJoinSlot) {
MarkJoinSlotReference markJoinSlotReference =
new MarkJoinSlotReference(statementContext.generateColumnName());
context.setSubqueryToMarkJoinSlot(exists, Optional.of(markJoinSlotReference));
return markJoinSlotReference;
} else {
return BooleanLiteral.TRUE;
}
}
return needCreateMarkJoinSlot ? markJoinSlotReference : BooleanLiteral.TRUE;
}

@Override
Expand Down
81 changes: 81 additions & 0 deletions regression-test/data/nereids_syntax_p0/sub_query_correlated.out
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,84 @@ true
-- !doris_7643 --
3 3

-- !select_exists1 --
\N \N 0
\N \N 100

-- !select_exists2 --

-- !select_exists3 --
\N \N 0
\N \N 100

-- !select_exists4 --
\N \N 100

-- !select_exists5 --
true
true

-- !select_exists6 --
false
false

-- !select_exists7 --
true
true

-- !select_exists8 --
false
true

-- !select_exists9 --
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100

-- !select_exists10 --

-- !select_exists11 --
\N \N 0
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100
\N \N 100

-- !select_exists12 --
\N \N 0

Original file line number Diff line number Diff line change
Expand Up @@ -679,4 +679,87 @@ suite ("sub_query_correlated") {
// order_qt_doris_6937_2 """
// select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2;
// """
sql """drop table if exists table_21_undef_partitions2_keys3;"""
sql """drop table if exists table_1_undef_partitions2_keys3;"""
sql """create table table_21_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """create table table_1_undef_partitions2_keys3 (
`col_int_undef_signed` int ,
`col_varchar_10__undef_signed` varchar(10) ,
`pk` int
) engine=olap
distributed by hash(pk) buckets 10
properties('replication_num' = '1');"""
sql """insert into table_21_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(1,6,'c'),(2,7,'m'),(3,null,null),(4,null,'b'),(5,4,null),(6,3,null),(7,0,'z'),(8,null,"me"),(9,6,null),(10,0,""),(11,null,'e'),(12,null,"up"),(13,null,""),(14,7,'s'),(15,null,""),(16,3,null),(17,null,""),(18,null,""),(19,4,""),(20,7,null);"""
sql """insert into table_1_undef_partitions2_keys3(pk,col_int_undef_signed,col_varchar_10__undef_signed) values (0,null,null),(100,null,null);"""

qt_select_exists1 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists2 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) order by t1.pk;"""
qt_select_exists3 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists4 """SELECT *
FROM table_1_undef_partitions2_keys3 AS t1
WHERE not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = 9
) or t1.pk = 100 order by t1.pk;"""
qt_select_exists5 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1;"""
qt_select_exists6 """select not EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists7 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) or t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists8 """select EXISTS (
SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9
) and t1.pk = 100 from table_1_undef_partitions2_keys3 AS t1 order by t1.pk;"""
qt_select_exists9 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists10 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and not exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
qt_select_exists11 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk = 100 order by t1.pk;"""
qt_select_exists12 """select t1.* from table_1_undef_partitions2_keys3 AS t1 join table_21_undef_partitions2_keys3 AS t2
on t1.pk = t2.pk and exists ( SELECT SUM(`pk`)
FROM table_21_undef_partitions2_keys3 AS t2
WHERE t1.pk = t2.pk and t1.pk = 9 ) or t1.pk > 100 order by t1.pk;"""
}

0 comments on commit 015b26d

Please sign in to comment.