Skip to content

Commit

Permalink
[nereids] consider numNulls in filter estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj committed Dec 28, 2023
1 parent 0cc4ee5 commit 5e004d0
Showing 1 changed file with 52 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation
Set<Slot> rightInputSlots = rightExpr.getInputSlots();
for (Slot slot : context.keyColumns) {
if (leftInputSlots.contains(slot) && rightInputSlots.contains(slot)) {
ColumnStatistic origColStats = context.statistics.findColumnStatistics(slot);
ColumnStatistic leftColStats = leftStats.findColumnStatistics(slot);
ColumnStatistic rightColStats = rightStats.findColumnStatistics(slot);
StatisticRange leftRange = StatisticRange.from(leftColStats, slot.getDataType());
Expand All @@ -117,6 +118,11 @@ public Statistics visitCompoundPredicate(CompoundPredicate predicate, Estimation
colBuilder.setMinValue(union.getLow()).setMinExpr(union.getLowExpr())
.setMaxValue(union.getHigh()).setMaxExpr(union.getHighExpr())
.setNdv(union.getDistinctValues());
if (!(leftExpr instanceof IsNull || rightExpr instanceof IsNull)) {
colBuilder.setNumNulls(0);
} else {
colBuilder.setNumNulls(origColStats.numNulls);
}
orStats.addColumnStats(slot, colBuilder.build());
}
}
Expand Down Expand Up @@ -175,7 +181,7 @@ public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationCon
}

private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
ColumnStatistic statsForRight, EstimationContext context, boolean contains) {
ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, statsForLeft.minExpr,
statsForRight.maxValue, statsForRight.maxExpr,
statsForLeft.ndv, leftExpr.getDataType());
Expand All @@ -185,7 +191,7 @@ private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic st
}

private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft,
ColumnStatistic statsForRight, EstimationContext context, boolean contains) {
ColumnStatistic statsForRight, EstimationContext context) {
StatisticRange rightRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr,
statsForLeft.maxValue, statsForLeft.maxExpr,
statsForLeft.ndv, leftExpr.getDataType());
Expand All @@ -202,12 +208,9 @@ private Statistics calculateWhenLiteralRight(ComparisonPredicate cp,
return estimateEqualTo(cp, statsForLeft, statsForRight, context);
} else {
if (cp instanceof LessThan || cp instanceof LessThanEqual) {
return updateLessThanLiteral(cp.left(), statsForLeft, statsForRight,
context, cp instanceof LessThanEqual);
return updateLessThanLiteral(cp.left(), statsForLeft, statsForRight, context);
} else if (cp instanceof GreaterThan || cp instanceof GreaterThanEqual) {

return updateGreaterThanLiteral(cp.left(), statsForLeft, statsForRight, context,
cp instanceof GreaterThanEqual);
return updateGreaterThanLiteral(cp.left(), statsForLeft, statsForRight, context);
} else {
throw new RuntimeException(String.format("Unexpected expression : %s", cp.toSql()));
}
Expand All @@ -222,9 +225,12 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats
double val = statsForRight.maxValue;
if (val > statsForLeft.maxValue || val < statsForLeft.minValue) {
selectivity = 0.0;
} else if (statsForRight.isUnKnown) {
selectivity = 0.0;
} else {
selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
}
selectivity = getNotNullSelectivity(statsForLeft, selectivity);
Statistics equalStats = context.statistics.withSel(selectivity);
Expression left = cp.left();
equalStats.addColumnStats(left, statsForRight);
Expand Down Expand Up @@ -331,10 +337,12 @@ A not in (1, 2, 3, 100):
selectivity = 1.0;
}
}
compareExprStatsBuilder.setNumNulls(0);
Statistics estimated = new Statistics(context.statistics);
ColumnStatistic stats = compareExprStatsBuilder.build();
selectivity = getNotNullSelectivity(stats, selectivity);
estimated = estimated.withSel(selectivity);
estimated.addColumnStats(compareExpr,
compareExprStatsBuilder.build());
estimated.addColumnStats(compareExpr, stats);
context.addKeyIfSlot(compareExpr);
return estimated;
}
Expand Down Expand Up @@ -394,6 +402,11 @@ public Statistics visitNot(Not not, EstimationContext context) {
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
}
if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) {
// only consider the single column numNull, otherwise, ignore
rowCount = Math.max(rowCount - originColStats.numNulls, 1);
statisticsBuilder.setRowCount(rowCount);
}
statisticsBuilder.putColumnStatistics(slot, colBuilder.build());
}
}
Expand Down Expand Up @@ -460,15 +473,18 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, ColumnSta
.setMaxValue(Double.POSITIVE_INFINITY)
.setMaxExpr(null)
.setNdv(0)
.setCount(0);
.setCount(0)
.setNumNulls(0);
} else {
leftColumnStatisticBuilder = new ColumnStatisticBuilder(leftStats)
.setMinValue(intersectRange.getLow())
.setMinExpr(intersectRange.getLowExpr())
.setMaxValue(intersectRange.getHigh())
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues());
.setNdv(intersectRange.getDistinctValues())
.setNumNulls(0);
double sel = leftRange.overlapPercentWith(rightRange);
sel = getNotNullSelectivity(leftStats, sel);
updatedStatistics = context.statistics.withSel(sel);
leftColumnStatisticBuilder.setCount(updatedStatistics.getRowCount());
}
Expand All @@ -488,6 +504,7 @@ private Statistics estimateColumnEqualToColumn(Expression leftExpr, ColumnStatis
intersectBuilder.setNdv(intersect.getDistinctValues());
intersectBuilder.setMinValue(intersect.getLow());
intersectBuilder.setMaxValue(intersect.getHigh());
intersectBuilder.setNumNulls(0);
double sel = 1 / StatsMathUtil.nonZeroDivisor(Math.max(leftStats.ndv, rightStats.ndv));
Statistics updatedStatistics = context.statistics.withSel(sel);
updatedStatistics.addColumnStats(leftExpr, intersectBuilder.build());
Expand Down Expand Up @@ -568,10 +585,33 @@ public Statistics visitLike(Like like, EstimationContext context) {
"col stats not found. slot=%s in %s",
like.left().toSql(), like.toSql());
ColumnStatisticBuilder colBuilder = new ColumnStatisticBuilder(origin);
colBuilder.setNdv(origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY).setNumNulls(0);
double selectivity = origin.ndv * DEFAULT_LIKE_COMPARISON_SELECTIVITY;
double notNullSel = getNotNullSelectivity(origin, selectivity);
colBuilder.setNdv(selectivity).setCount(notNullSel * context.statistics.getRowCount()).setNumNulls(0);
statsBuilder.putColumnStatistics(like.left(), colBuilder.build());
context.addKeyIfSlot(like.left());
}
return statsBuilder.build();
}

private double getNotNullSelectivity(ColumnStatistic stats, double origSel) {
double rowCount = stats.count;
double numNulls = stats.numNulls;

// comment following check since current rowCount and ndv may be inconsistant
// e.g, rowCount has been reduced by one filter but another filter column's
// ndv and numNull remains originally, which will unexpectedly go into the following
// normalization.

//if (numNulls > rowCount - ndv) {
// numNulls = rowCount - ndv > 0 ? rowCount - ndv : 0;
//}
double notNullSel = rowCount <= 1.0 ? 1.0 : 1 - getValidSelectivity(numNulls / rowCount);
double validSel = origSel * notNullSel;
return getValidSelectivity(validSel);
}

private static double getValidSelectivity(double nullSel) {
return nullSel < 0 ? 0 : (nullSel > 1 ? 1 : nullSel);
}
}

0 comments on commit 5e004d0

Please sign in to comment.