Skip to content

Commit

Permalink
SQL: Support GROUP BY and ORDER BY for NULL types. (apache#16252)
Browse files Browse the repository at this point in the history
* SQL: Support GROUP BY and ORDER BY for NULL types.

Treats SQL NULL types as strings at the native layer.

* Update tests.

* Fixes.

* Fix tests.

* Fix tests.

* Fix test.

* Add back NOT_ENOUGH_RULES.

* Fix test.

* Fix test.

* Update test case.

* Update quidem test.
  • Loading branch information
gianm authored Feb 4, 2025
1 parent 1db2fb7 commit f694255
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -399,16 +399,16 @@ public void testNullInputs()
ImmutableList.of(
new ArrayOfDoublesSketchToMetricsSumEstimatePostAggregator(
"p1",
expressionPostAgg("p0", "null", null)
expressionPostAgg("p0", "null", ColumnType.STRING)
),
new ArrayOfDoublesSketchSetOpPostAggregator(
"p4",
ArrayOfDoublesSketchOperations.Operation.UNION.name(),
null,
null,
ImmutableList.of(
expressionPostAgg("p2", "null", null),
expressionPostAgg("p3", "null", null)
expressionPostAgg("p2", "null", ColumnType.STRING),
expressionPostAgg("p3", "null", ColumnType.STRING)
)
),
new ArrayOfDoublesSketchSetOpPostAggregator(
Expand All @@ -417,7 +417,7 @@ public void testNullInputs()
null,
null,
ImmutableList.of(
expressionPostAgg("p5", "null", null),
expressionPostAgg("p5", "null", ColumnType.STRING),
new FieldAccessPostAggregator("p6", "a1")
)
),
Expand All @@ -428,7 +428,7 @@ public void testNullInputs()
null,
ImmutableList.of(
new FieldAccessPostAggregator("p8", "a1"),
expressionPostAgg("p9", "null", null)
expressionPostAgg("p9", "null", ColumnType.STRING)
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,11 @@ private static DimFilter toSimpleLeafFilter(

// Numeric lhs needs a numeric comparison.
final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
if (comparator == null) {
// Type is not comparable.
return null;
}

final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
final DimFilter filter;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type)
return ColumnType.DOUBLE;
} else if (isLongType(sqlTypeName)) {
return ColumnType.LONG;
} else if (isStringType(sqlTypeName)) {
} else if (isStringType(sqlTypeName) || sqlTypeName == SqlTypeName.NULL) {
return ColumnType.STRING;
} else if (SqlTypeName.OTHER == sqlTypeName) {
if (type instanceof RowSignatures.ComplexSqlType) {
Expand Down Expand Up @@ -247,12 +247,22 @@ public static boolean isLongType(SqlTypeName sqlTypeName)
}

/**
* Returns the natural StringComparator associated with the RelDataType
* Returns the natural StringComparator associated with the RelDataType, or null if the type is not convertible to
* {@link ColumnType} by {@link #getColumnTypeForRelDataType(RelDataType)}.
*/
@Nullable
public static StringComparator getStringComparatorForRelDataType(RelDataType dataType)
{
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
return getStringComparatorForValueType(valueType);
if (dataType.getSqlTypeName() == SqlTypeName.NULL) {
return StringComparators.NATURAL;
} else {
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
if (valueType == null) {
return null;
}

return getStringComparatorForValueType(valueType);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7331,7 +7331,13 @@ public void testNullArray()
newScanQueryBuilder()
.dataSource(CalciteTests.ARRAYS_DATASOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(expressionVirtualColumn("v0", "(\"arrayLongNulls\" == array(null,null))", ColumnType.LONG))
.virtualColumns(
expressionVirtualColumn(
"v0",
"(\"arrayLongNulls\" == CAST(array(null,null), 'ARRAY<LONG>'))",
ColumnType.LONG
)
)
.columns("v0")
.columnTypes(ColumnType.LONG)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
Expand Down
108 changes: 108 additions & 0 deletions sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -6198,6 +6198,114 @@ public void testCountStarWithTimeInIntervalFilterLosAngeles()
);
}

@Test
public void testGroupByNullType()
{
// Cannot vectorize due to null constant expression.
cannotVectorize();
testQuery(
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1",
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{null, 6L}
)
);
}

@Test
public void testOrderByNullType()
{
testQuery(
// Order on subquery, since the native engine doesn't currently support ordering when selecting directly
// from a table.
"SELECT dim1, NULL as nullcol FROM (SELECT DISTINCT dim1 FROM druid.foo LIMIT 1) ORDER BY 2",
ImmutableList.of(
WindowOperatorQueryBuilder
.builder()
.setDataSource(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.dimension(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))
.threshold(1)
.metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))
.postAggregators(expressionPostAgg("s0", "null", ColumnType.STRING))
.context(QUERY_CONTEXT_DEFAULT)
.build()
)
.setSignature(
RowSignature.builder()
.add("d0", ColumnType.STRING)
.add("s0", ColumnType.STRING)
.build()
)
.setOperators(
OperatorFactoryBuilders.naiveSortOperator("s0", ColumnWithDirection.Direction.ASC)
)
.setLeafOperators(
OperatorFactoryBuilders
.scanOperatorFactoryBuilder()
.setOffsetLimit(0, Long.MAX_VALUE)
.setProjectedColumns("d0", "s0")
.build()
)
.build()
),
ImmutableList.of(
new Object[]{"", null}
)
);
}

@Test
public void testGroupByOrderByNullType()
{
// Cannot vectorize due to null constant expression.
cannotVectorize();

testQuery(
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1 ORDER BY 1",
ImmutableList.of(
new GroupByQuery.Builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
.setLimitSpec(
queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS)
? NoopLimitSpec.instance()
: new DefaultLimitSpec(
ImmutableList.of(
new OrderByColumnSpec(
"d0",
Direction.ASCENDING,
StringComparators.NATURAL
)
),
Integer.MAX_VALUE
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{null, 6L}
)
);
}

@Test
public void testCountStarWithTimeInIntervalFilterInvalidInterval()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ public void testValuesContainingNull()
ImmutableList.of(new Object[]{null, "United States"}),
RowSignature
.builder()
.add("EXPR$0", null)
.add("EXPR$0", ColumnType.STRING)
.add("EXPR$1", ColumnType.STRING)
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ public void testUnion()
.columnTypes(ColumnType.STRING, ColumnType.STRING)
.context(QUERY_CONTEXT_DEFAULT)
.resultFormat(ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.virtualColumns(
expressionVirtualColumn("v0", "null", null)
)
.virtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
.build(),
Druids.newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.druid.sql.calcite;

import org.apache.druid.sql.calcite.NotYetSupported.NotYetSupportedProcessor;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.RegisterExtension;

Expand All @@ -34,4 +35,12 @@ protected QueryTestBuilder testBuilder()
{
return decoupledExtension.testBuilder();
}

@Override
@Test
@NotYetSupported(NotYetSupported.Modes.NOT_ENOUGH_RULES)
public void testOrderByNullType()
{
super.testOrderByNullType();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
enum Modes
{
// @formatter:off
NOT_ENOUGH_RULES(DruidException.class, "There are not enough rules to produce a node"),
DISTINCT_AGGREGATE_NOT_SUPPORTED(DruidException.class, "DISTINCT is not supported"),
EXPRESSION_NOT_GROUPED(DruidException.class, "Expression '[a-z]+' is not being grouped"),
NULLS_FIRST_LAST(DruidException.class, "NULLS (FIRST|LAST)"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2800,7 +2800,7 @@ public void testCalciteLiteralToDruidLiteral()
);

assertDruidLiteral(
new DruidLiteral(null, null),
new DruidLiteral(ExpressionType.STRING, null),
Expressions.calciteLiteralToDruidLiteral(
plannerContext,
rexBuilder.makeNullLiteral(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL))
Expand Down
Loading

0 comments on commit f694255

Please sign in to comment.