Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for query that have aggregation but missing group by #124

Merged
merged 9 commits into from
Aug 30, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,39 @@ public void testQueryQ1AggregationFilterWithStringInFilterAlongWithNonAliasField
"mongo/test_string_in_filter_aggr_alias_distinct_count_response.json");
}

@ParameterizedTest
@MethodSource("databaseContextBoth")
void testQueryQ1DistinctCountAggregationWithOnlyFilter(String dataStoreName) throws IOException {
Datastore datastore = datastoreMap.get(dataStoreName);
Collection collection = datastore.getCollection(COLLECTION_NAME);
org.hypertrace.core.documentstore.query.Query query =
org.hypertrace.core.documentstore.query.Query.builder()
.addSelection(
AggregateExpression.of(DISTINCT_COUNT, IdentifierExpression.of("quantity")),
"qty_count")
.addSelection(IdentifierExpression.of("item"))
.addSelection(IdentifierExpression.of("price"))
.setFilter(
LogicalExpression.builder()
.operator(AND)
.operand(
RelationalExpression.of(
IdentifierExpression.of("price"), LTE, ConstantExpression.of(10)))
.operand(
RelationalExpression.of(
IdentifierExpression.of("item"),
IN,
ConstantExpression.ofStrings(
List.of("Mirror", "Comb", "Shampoo", "Bottle"))))
.build())
.build();

try (CloseableIterator<Document> resultDocs = collection.aggregate(query)) {
Utils.assertDocsAndSizeEqualWithoutOrder(
dataStoreName, resultDocs, 1, "mongo/test_aggr_only_with_fliter_response.json");
}
}

@ParameterizedTest
@MethodSource("databaseContextBoth")
public void testQueryV1ForSimpleWhereClause(String dataStoreName) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
{
"qty_count":3
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.hypertrace.core.documentstore.UpdateResult;
import org.hypertrace.core.documentstore.commons.DocStoreConstants;
import org.hypertrace.core.documentstore.postgres.internal.BulkUpdateSubDocsInternalResult;
import org.hypertrace.core.documentstore.postgres.query.v1.transformer.PostgresQueryTransformer;
import org.hypertrace.core.documentstore.postgres.utils.PostgresUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -782,7 +783,7 @@ private CloseableIterator<Document> executeQueryV1(
final org.hypertrace.core.documentstore.query.Query query) {
org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser queryParser =
new org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser(
collectionName, query);
collectionName, transformAndLog(query));
String sqlQuery = queryParser.parse();
try {
PreparedStatement preparedStatement =
Expand All @@ -801,6 +802,14 @@ private CloseableIterator<Document> executeQueryV1(
}
}

private org.hypertrace.core.documentstore.query.Query transformAndLog(
org.hypertrace.core.documentstore.query.Query query) {
LOGGER.debug("Original query before transformation: {}", query);
query = PostgresQueryTransformer.transform(query);
LOGGER.debug("Query after transformation: {}", query);
return query;
}

private boolean isValidType(Object v) {
Set<Class<?>> validClassez =
new HashSet<>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import lombok.Setter;
import org.hypertrace.core.documentstore.postgres.Params;
import org.hypertrace.core.documentstore.postgres.Params.Builder;
import org.hypertrace.core.documentstore.postgres.PostgresCollection;
import org.hypertrace.core.documentstore.postgres.query.v1.transformer.FieldToPgColumnTransformer;
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresAggregationFilterTypeExpressionVisitor;
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresFilterTypeExpressionVisitor;
Expand All @@ -17,8 +18,12 @@
import org.hypertrace.core.documentstore.postgres.query.v1.vistors.PostgresUnnestFilterTypeExpressionVisitor;
import org.hypertrace.core.documentstore.query.Pagination;
import org.hypertrace.core.documentstore.query.Query;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PostgresQueryParser {
private static final Logger LOGGER = LoggerFactory.getLogger(PostgresCollection.class);

@Getter private final String collection;
@Getter private final Query query;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.hypertrace.core.documentstore.postgres.query.v1.transformer;

import com.google.common.collect.ImmutableList;
import java.util.List;
import org.hypertrace.core.documentstore.query.Query;
import org.hypertrace.core.documentstore.query.transform.QueryTransformer;

public class PostgresQueryTransformer {

// Transform the query in the listed below order
private static final List<QueryTransformer> TRANSFORMERS =
new ImmutableList.Builder<QueryTransformer>()
.add(new PostgresSelectionQueryTransformer())
.build();

public static Query transform(final Query query) {
Query transformedQuery = query;

for (QueryTransformer transformer : TRANSFORMERS) {
transformedQuery = transformer.transform(transformedQuery);
}

return transformedQuery;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package org.hypertrace.core.documentstore.postgres.query.v1.transformer;

import java.util.List;
import java.util.stream.Collectors;
import org.hypertrace.core.documentstore.expression.impl.AggregateExpression;
import org.hypertrace.core.documentstore.expression.impl.ConstantExpression;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.parser.SelectTypeExpressionVisitor;
import org.hypertrace.core.documentstore.query.Query;
import org.hypertrace.core.documentstore.query.SelectionSpec;
import org.hypertrace.core.documentstore.query.transform.QueryTransformer;
import org.hypertrace.core.documentstore.query.transform.TransformedQueryBuilder;

/*
* Postgres doesn't support the selection of attributes and aggregation w/o group by expression.
* e.g
* SELECT COUNT(DISTINCT document->>'quantity' ) AS QTY, document->'price' AS price
* FROM testCollection
* WHERE (CAST (document->>'price' AS NUMERIC) <= 10)
*
* So, if group by clause is missing, and selection contains any aggregation expression,
* this transformer removes all the non-aggregated expressions. So, the above query will be transformed
* to:
*
* SELECT COUNT(DISTINCT document->>'quantity' ) AS QTY
Copy link
Contributor

@suresh-prakash suresh-prakash Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we should go this route. We have never transformed the user-given query to a different non-equivalent query just because some database does not support it. In fact, this transformation alters the input selections and removes some of them. This might result in unexpected/undesired effects in the clients because they are asking for 2 selections, but, we only return 1 silently ignoring the other.

The query transformer in Mongo only builds equivalent queries by modifying the given expressions to other equivalent forms or adds some expressions to support the modification. But, the overall query is transformed to another equivalent query supported by the database. We neither remove anything nor transform to a non-equivalent query. Ideally, for such scenarios, we should fail (even in Mongo if that's not the case today). I suspect, the DB itself is not returning the rows in case of Mongo.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, in Mongo we convert "DISTINCT_COUNT" into "$addToSet" in the "$group" stage and add "$size" in the "$project" stage. But, the query is still equivalent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mongo internally does that and discards them. And, thought of the above path, but have to do this for backward compatibility as currently in the Query API layer, we are not discarding those queries.

e.g Mongo Query for the above sample query

[{"$match": {"$and": [{"price": {"$lte": 10}}, {"item": {"$in": ["Mirror", "Comb", "Shampoo", "Bottle"]}}]}}, 
{"$group": {"qty_count": {"$addToSet": "$quantity"}, "_id": null}}, 
{"$project": {"item": 1, "qty_count": {"$size": "$qty_count"}, "price": 1}}]

Response to the above query from mongo (selections are discarded):

[{"qty_count":3}]

* FROM testCollection
* WHERE (CAST (document->>'price' AS NUMERIC) <= 10)
*
* This is the similar behavior supported in our other document store implementation (e.g Mongo)
* */
public class PostgresSelectionQueryTransformer
implements QueryTransformer, SelectTypeExpressionVisitor {

@Override
public Query transform(Query query) {
// no-op if group by clause exits
if (!query.getAggregations().isEmpty()) return query;

// check for all selections, remove non-aggregated selections.
List<SelectionSpec> finalSelectionSpecs =
query.getSelections().stream()
.filter(selectionSpec -> selectionSpec.getExpression().accept(this))
.collect(Collectors.toUnmodifiableList());

return finalSelectionSpecs.size() > 0
? new TransformedQueryBuilder(query).setSelections(finalSelectionSpecs).build()
: query;
}

@Override
public Boolean visit(AggregateExpression expression) {
return true;
}

@Override
public Boolean visit(ConstantExpression expression) {
return false;
}

@Override
public Boolean visit(FunctionExpression expression) {
return false;
}

@Override
public Boolean visit(IdentifierExpression expression) {
return false;
}
}
Loading