From 99d7ba398700efaf60f17934956d916e91bb1f10 Mon Sep 17 00:00:00 2001 From: Andreas Schwarte Date: Tue, 14 Jan 2025 12:10:52 +0100 Subject: [PATCH] GH-5227: fix binding assigner optimizer in FedX The federation optimizer was missing to execute the binding assigner (which injects external bindings into the statement pattern). The consequence was potentially incorrect results (due to source source selection with partial knowledge) as well as sub-optimal source selection Issue is covered with a unit test, which is failing in two places prior to this change. --- .../evaluation/FederationEvalStrategy.java | 16 ++++- .../FederationEvalStrategyTest.java | 60 +++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java index 5dafe137315..2fec83ab3b3 100644 --- a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java +++ b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java @@ -107,6 +107,7 @@ import org.eclipse.rdf4j.query.algebra.ValueExpr; import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException; import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedService; @@ -118,6 +119,7 @@ import org.eclipse.rdf4j.query.algebra.evaluation.iterator.HashJoinIteration; import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.ConstantOptimizer; import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.DisjunctiveConstraintOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.StandardQueryOptimizerPipeline; import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtil; import org.eclipse.rdf4j.query.algebra.helpers.TupleExprs; import org.eclipse.rdf4j.query.algebra.helpers.collectors.VarNameCollector; @@ -147,6 +149,14 @@ public abstract class FederationEvalStrategy extends StrictEvaluationStrategy { protected FederationContext federationContext; + /** + * List of standard {@link QueryOptimizer}s applicable to federation + */ + private static final List standardOptimizers = List.of( + StandardQueryOptimizerPipeline.BINDING_ASSIGNER, + StandardQueryOptimizerPipeline.BINDING_SET_ASSIGNMENT_INLINER, + StandardQueryOptimizerPipeline.DISJUNCTIVE_CONSTRAINT_OPTIMIZER); + public FederationEvalStrategy(FederationContext federationContext) { super(new org.eclipse.rdf4j.query.algebra.evaluation.TripleSource() { @@ -209,9 +219,11 @@ public TupleExpr optimize(TupleExpr expr, EvaluationStatistics evaluationStatist } /* original RDF4J optimizers */ - new ConstantOptimizer(this).optimize(query, dataset, bindings); // maybe remove this optimizer later + for (QueryOptimizer optimizer : standardOptimizers) { + optimizer.optimize(query, dataset, bindings); + } - new DisjunctiveConstraintOptimizer().optimize(query, dataset, bindings); + new ConstantOptimizer(this).optimize(query, dataset, bindings); // maybe remove this optimizer later /* * TODO add some generic optimizers: - FILTER ?s=1 && ?s=2 => EmptyResult - Remove variables that are not diff --git a/tools/federation/src/test/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategyTest.java b/tools/federation/src/test/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategyTest.java index 7277b82f3ec..edf43cf5c54 100644 --- a/tools/federation/src/test/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategyTest.java +++ b/tools/federation/src/test/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategyTest.java @@ -10,9 +10,23 @@ *******************************************************************************/ package org.eclipse.rdf4j.federated.evaluation; +import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import org.eclipse.rdf4j.federated.SPARQLBaseTest; +import org.eclipse.rdf4j.federated.cache.SourceSelectionCache; +import org.eclipse.rdf4j.federated.cache.SourceSelectionCache.StatementSourceAssurance; +import org.eclipse.rdf4j.federated.endpoint.Endpoint; +import org.eclipse.rdf4j.federated.structures.SubQuery; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.model.vocabulary.OWL; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -46,4 +60,50 @@ public void testOptimize_SingleMember_Service() throws Exception { Assertions.assertTrue(queryPlan.startsWith("QueryRoot")); } + + @Test + public void testSourceSelectionCache_setBindings() throws Exception { + + var bob = Values.iri("http://example.com/bob"); + + List endpoints = prepareTest( + Arrays.asList("/tests/basic/data_emptyStore.ttl", "/tests/basic/data_emptyStore.ttl")); + + Repository repo1 = getRepository(1); + Repository repo2 = getRepository(2); + + String repo1Id = endpoints.get(0).getId(); + + try (RepositoryConnection con = repo1.getConnection()) { + con.add(bob, RDF.TYPE, FOAF.PERSON); + } + + try (RepositoryConnection con = repo2.getConnection()) { + con.add(FOAF.PERSON, RDF.TYPE, OWL.CLASS); + } + + Repository fedxRepo = fedxRule.getRepository(); + + fedxRule.enableDebug(); + + try (var conn = fedxRepo.getConnection()) { + + TupleQuery tq = conn.prepareTupleQuery("SELECT * WHERE { ?s a ?type }"); + tq.setBinding("s", bob); + + try (var tqr = tq.evaluate()) { + // just consume the result + Assertions.assertEquals(Set.of(FOAF.PERSON), + tqr.stream().map(bs -> bs.getValue("type")).collect(Collectors.toSet())); + } + } + + SourceSelectionCache cache = federationContext().getSourceSelectionCache(); + + var assurance = cache.getAssurance(new SubQuery(bob, RDF.TYPE, null), + federationContext().getEndpointManager().getEndpoint(repo1Id)); + + // we expect that the source selection cache can assure statements + Assertions.assertEquals(StatementSourceAssurance.HAS_REMOTE_STATEMENTS, assurance); + } }