Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ASK queries #1562

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 103 additions & 20 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
// Copyright 2022 - 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Author: Johannes Kalmbach <[email protected]>

#include "ExportQueryExecutionTrees.h"

Expand All @@ -12,6 +12,62 @@
#include "util/ConstexprUtils.h"
#include "util/http/MediaTypes.h"

// Return true iff the `result` is nonempty.
bool getResultForAsk(const std::shared_ptr<const Result>& result) {
if (result->isFullyMaterialized()) {
return !result->idTable().empty();
} else {
return std::ranges::any_of(result->idTables(),
std::not_fn(&IdTable::empty));
}
}

// _____________________________________________________________________________
ad_utility::streams::stream_generator computeResultForAsk(
[[maybe_unused]] const ParsedQuery& parsedQuery,
const QueryExecutionTree& qet, ad_utility::MediaType mediaType,
[[maybe_unused]] const ad_utility::Timer& requestTimer) {
// Compute the result of the ASK query.
bool result = getResultForAsk(qet.getResult(true));

// Lambda that returns the result bool in XML format.
auto getXmlResult = [result]() {
std::string xmlTemplate = R"(<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
<head/>
<boolean>true</boolean>
</sparql>)";

if (result) {
return xmlTemplate;
} else {
return absl::StrReplaceAll(xmlTemplate, {{"true", "false"}});
}
};

// Lambda that returns the result bool in SPARQL JSON format.
auto getSparqlJsonResult = [result]() {
nlohmann::json j;
j["head"] = nlohmann::json::object_t{};
j["boolean"] = result;
return j.dump();
};

// Return the result in the requested format.
using enum ad_utility::MediaType;
switch (mediaType) {
case sparqlXml:
co_yield getXmlResult();
break;
case sparqlJson:
co_yield getSparqlJsonResult();
break;
default:
throw std::runtime_error{
"ASK queries are not supported for TSV or CSV or binary format."};
}
}

// __________________________________________________________________________
cppcoro::generator<const IdTable&> ExportQueryExecutionTrees::getIdTables(
const Result& result) {
Expand Down Expand Up @@ -360,6 +416,20 @@ static nlohmann::json stringAndTypeToBinding(std::string_view entitystr,
return b;
}

// _____________________________________________________________________________
cppcoro::generator<std::string> askQueryResultToQLeverJSON(
const QueryExecutionTree& qet, std::shared_ptr<const Result> result) {
AD_CORRECTNESS_CHECK(result != nullptr);
// TODO joka921: Call function for converting this to JSON (which also avoids
// spelling out the XSD type).
std::string s =
getResultForAsk(qet.getResult(true))
? "[\"\\\"true\\\"^^<http://www.w3.org/2001/XMLSchema#boolean>\"]"
: "[\"\\\"false\\\"^^<http://www.w3.org/2001/"
"XMLSchema#boolean>\"]";
co_yield s;
}

// _____________________________________________________________________________
cppcoro::generator<std::string>
ExportQueryExecutionTrees::selectQueryResultBindingsToQLeverJSON(
Expand Down Expand Up @@ -725,15 +795,19 @@ cppcoro::generator<std::string> ExportQueryExecutionTrees::computeResult(
if constexpr (format == MediaType::qleverJson) {
return computeResultAsQLeverJSON(parsedQuery, qet, requestTimer,
std::move(cancellationHandle));
} else {
if (parsedQuery.hasAskClause()) {
return computeResultForAsk(parsedQuery, qet, mediaType, requestTimer);
}
return parsedQuery.hasSelectClause()
? selectQueryResultToStream<format>(
qet, parsedQuery.selectClause(),
parsedQuery._limitOffset, std::move(cancellationHandle))
: constructQueryResultToStream<format>(
qet, parsedQuery.constructClause().triples_,
parsedQuery._limitOffset, qet.getResult(true),
std::move(cancellationHandle));
}
return parsedQuery.hasSelectClause()
? selectQueryResultToStream<format>(
qet, parsedQuery.selectClause(), parsedQuery._limitOffset,
std::move(cancellationHandle))
: constructQueryResultToStream<format>(
qet, parsedQuery.constructClause().triples_,
parsedQuery._limitOffset, qet.getResult(true),
std::move(cancellationHandle));
};

using enum MediaType;
Expand Down Expand Up @@ -766,23 +840,32 @@ ExportQueryExecutionTrees::computeResultAsQLeverJSON(
if (query.hasSelectClause()) {
jsonPrefix["selected"] =
query.selectClause().getSelectedVariablesAsStrings();
} else {
} else if (query.hasConstructClause()) {
jsonPrefix["selected"] =
std::vector<std::string>{"?subject", "?predicate", "?object"};
} else {
AD_CORRECTNESS_CHECK(query.hasAskClause());
jsonPrefix["selected"] = std::vector<std::string>{"?result"};
}

std::string prefixStr = jsonPrefix.dump();
co_yield absl::StrCat(prefixStr.substr(0, prefixStr.size() - 1),
R"(,"res":[)");

auto bindings =
query.hasSelectClause()
? selectQueryResultBindingsToQLeverJSON(
qet, query.selectClause(), query._limitOffset,
std::move(result), std::move(cancellationHandle))
: constructQueryResultBindingsToQLeverJSON(
qet, query.constructClause().triples_, query._limitOffset,
std::move(result), std::move(cancellationHandle));
auto bindings = [&]() {
if (query.hasSelectClause()) {
return selectQueryResultBindingsToQLeverJSON(
qet, query.selectClause(), query._limitOffset, std::move(result),
std::move(cancellationHandle));
} else if (query.hasConstructClause()) {
return constructQueryResultBindingsToQLeverJSON(
qet, query.constructClause().triples_, query._limitOffset,
std::move(result), std::move(cancellationHandle));
} else {
// TODO<joka921>: Refactor this to use std::visit.
return askQueryResultToQLeverJSON(qet, std::move(result));
}
}();

size_t resultSize = 0;
for (const std::string& b : bindings) {
Expand Down
6 changes: 4 additions & 2 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
// expressions
selectClause.deleteAliasesButKeepVariables();
}
} else {
AD_CORRECTNESS_CHECK(hasConstructClause());
} else if (hasConstructClause()) {
if (_groupByVariables.empty()) {
return;
}
Expand All @@ -232,6 +231,9 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) {
noteForGroupByError);
}
}
} else {
// TODO<joka921> refactor this to use `std::visit`. It is much safer.
AD_CORRECTNESS_CHECK(hasAskClause());
}
}

Expand Down
17 changes: 13 additions & 4 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class ParsedQuery {

using DatasetClauses = parsedQuery::DatasetClauses;

// ASK queries have no further context in the header, so we use an empty
// struct
struct AskClause : public parsedQuery::ClauseBase {};

ParsedQuery() = default;

GraphPattern _rootGraphPattern;
Expand All @@ -99,10 +103,11 @@ class ParsedQuery {
LimitOffsetClause _limitOffset{};
string _originalString;

// explicit default initialisation because the constructor
// of SelectClause is private
std::variant<SelectClause, ConstructClause, UpdateClause> _clause{
SelectClause{}};
using HeaderClause =
std::variant<SelectClause, ConstructClause, UpdateClause, AskClause>;
// Use explicit default initialization for `SelectClause` because its
// constructor is private.
HeaderClause _clause{SelectClause{}};

// The IRIs from the FROM and FROM NAMED clauses.
DatasetClauses datasetClauses_;
Expand All @@ -119,6 +124,10 @@ class ParsedQuery {
return std::holds_alternative<UpdateClause>(_clause);
}

bool hasAskClause() const {
return std::holds_alternative<AskClause>(_clause);
}

[[nodiscard]] decltype(auto) selectClause() const {
return std::get<SelectClause>(_clause);
}
Expand Down
24 changes: 22 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,28 @@ ParsedQuery Visitor::visit(const Parser::DescribeQueryContext* ctx) {
}

// ____________________________________________________________________________________
ParsedQuery Visitor::visit(const Parser::AskQueryContext* ctx) {
reportNotSupported(ctx, "ASK queries are");
ParsedQuery Visitor::visit(Parser::AskQueryContext* ctx) {
parsedQuery_._clause = ParsedQuery::AskClause{};
parsedQuery_.datasetClauses_ = parsedQuery::DatasetClauses::fromClauses(
visitVector(ctx->datasetClause()));
auto [pattern, visibleVariables] = visit(ctx->whereClause());
parsedQuery_._rootGraphPattern = std::move(pattern);
parsedQuery_.registerVariablesVisibleInQueryBody(visibleVariables);
// NOTE: It can make sense to have solution modifiers with an ASK query, for
// example, a GROUP BY with a HAVING.
auto getSolutionModifiers = [this, ctx]() {
auto solutionModifiers = visit(ctx->solutionModifier());
const auto& limitOffset = solutionModifiers.limitOffset_;
if (!limitOffset.isUnconstrained() || limitOffset.textLimit_.has_value()) {
reportError(
ctx->solutionModifier(),
"ASK queries may not contain LIMIT, OFFSET, or TEXTLIMIT clauses");
}
solutionModifiers.limitOffset_._limit = 1;
return solutionModifiers;
};
parsedQuery_.addSolutionModifiers(getSolutionModifiers());
return parsedQuery_;
}

// ____________________________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class SparqlQleverVisitor {
[[noreturn]] static ParsedQuery visit(
const Parser::DescribeQueryContext* ctx);

[[noreturn]] static ParsedQuery visit(const Parser::AskQueryContext* ctx);
ParsedQuery visit(Parser::AskQueryContext* ctx);

DatasetClause visit(Parser::DatasetClauseContext* ctx);

Expand Down
87 changes: 87 additions & 0 deletions test/ExportQueryExecutionTreesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ struct TestCaseSelectQuery {
std::string resultXml;
};

// A test case that tests the correct execution and exporting of an ASK query
// in various formats.
struct TestCaseAskQuery {
std::string kg; // The knowledge graph (TURTLE)
std::string query; // The query (SPARQL)
nlohmann::json resultQLeverJSON; // The expected result in QLeverJSON format.
// Note: this member only contains the inner
// result array with the bindings and NOT
// the metadata.
nlohmann::json resultSparqlJSON; // The expected result in SparqlJSON format.
std::string resultXml;
};

struct TestCaseConstructQuery {
std::string kg; // The knowledge graph (TURTLE)
std::string query; // The query (SPARQL)
Expand Down Expand Up @@ -152,6 +165,34 @@ void runConstructQueryTestCase(
testCase.resultTurtle);
}

// Run a single test case for an ASK query.
void runAskQueryTestCase(
const TestCaseAskQuery& testCase,
ad_utility::source_location l = ad_utility::source_location::current()) {
auto trace = generateLocationTrace(l, "runAskQueryTestCase");
using enum ad_utility::MediaType;
// TODO<joka921> match the exception
EXPECT_ANY_THROW(runQueryStreamableResult(testCase.kg, testCase.query, tsv));
EXPECT_ANY_THROW(runQueryStreamableResult(testCase.kg, testCase.query, csv));
EXPECT_ANY_THROW(
runQueryStreamableResult(testCase.kg, testCase.query, octetStream));
EXPECT_ANY_THROW(
runQueryStreamableResult(testCase.kg, testCase.query, turtle));
auto qleverJSONStreamResult = nlohmann::json::parse(
runQueryStreamableResult(testCase.kg, testCase.query, qleverJson));
ASSERT_EQ(qleverJSONStreamResult["query"], testCase.query);
ASSERT_EQ(qleverJSONStreamResult["resultsize"], 1u);
EXPECT_EQ(qleverJSONStreamResult["res"], testCase.resultQLeverJSON);

EXPECT_EQ(nlohmann::json::parse(runQueryStreamableResult(
testCase.kg, testCase.query, sparqlJson)),
testCase.resultSparqlJSON);

auto xmlAsString =
runQueryStreamableResult(testCase.kg, testCase.query, sparqlXml);
EXPECT_EQ(testCase.resultXml, xmlAsString);
}

// Create a `json` that can be used as the `resultQLeverJSON` of a
// `TestCaseSelectQuery`. This function can only be used when there is a single
// variable in the result. The `values` then become the bindings of that
Expand Down Expand Up @@ -1174,6 +1215,52 @@ TEST(ExportQueryExecutionTrees, CornerCases) {
::testing::ContainsRegex("should be unreachable"));
}

// Test the correct exporting of ASK queries.
TEST(ExportQueryExecutionTrees, AskQuery) {
auto askResultTrue = [](bool lazy) {
TestCaseAskQuery testCase;
if (lazy) {
testCase.kg = "<x> <y> <z>";
testCase.query = "ASK { <x> ?p ?o}";
} else {
testCase.query = "ASK { BIND (3 as ?x) FILTER (?x > 0)}";
}
testCase.resultQLeverJSON = nlohmann::json{std::vector<std::string>{
"\"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>"}};
testCase.resultSparqlJSON =
nlohmann::json::parse(R"({"head":{ }, "boolean" : true})");
testCase.resultXml =
"<?xml version=\"1.0\"?>\n<sparql "
"xmlns=\"http://www.w3.org/2005/sparql-results#\">\n <head/>\n "
"<boolean>true</boolean>\n</sparql>";

return testCase;
};

auto askResultFalse = [](bool lazy) {
TestCaseAskQuery testCase;
if (lazy) {
testCase.kg = "<x> <y> <z>";
testCase.query = "ASK { <y> ?p ?o}";
} else {
testCase.query = "ASK { BIND (3 as ?x) FILTER (?x < 0)}";
}
testCase.resultQLeverJSON = nlohmann::json{std::vector<std::string>{
"\"false\"^^<http://www.w3.org/2001/XMLSchema#boolean>"}};
testCase.resultSparqlJSON =
nlohmann::json::parse(R"({"head":{ }, "boolean" : false})");
testCase.resultXml =
"<?xml version=\"1.0\"?>\n<sparql "
"xmlns=\"http://www.w3.org/2005/sparql-results#\">\n <head/>\n "
"<boolean>false</boolean>\n</sparql>";
return testCase;
};
runAskQueryTestCase(askResultTrue(true));
runAskQueryTestCase(askResultTrue(false));
runAskQueryTestCase(askResultFalse(true));
runAskQueryTestCase(askResultFalse(false));
}

using enum ad_utility::MediaType;

// ____________________________________________________________________________
Expand Down
Loading
Loading