From 29097d1492b4f67dd14c0d96b83dd49d6b4055af Mon Sep 17 00:00:00 2001 From: Andreas Schwarte Date: Wed, 15 Jan 2025 13:12:43 +0100 Subject: [PATCH 1/2] GH-5231: fix poor query performance for hasStatements() in FedX The previous implementation of the FedXConnection was delegating "hasStatements()" to the implementation of "getStatements()", where the latter was actually fetching data from the federation members. For checks hasStatements() checks like {null, rdf:type, null} or even {null, null, null} the implementation is problematic as it would fetch all data matching the pattern from the federation members, only to answer if it actually exists. We now make use of "existence" check on the federation members, and can actually rely on the source selection cache for this. Unit test coverage has been added. --- .../rdf4j/federated/FedXConnection.java | 21 +++++++ .../evaluation/FederationEvalStrategy.java | 43 ++++++++++++++- .../eclipse/rdf4j/federated/BasicTests.java | 55 +++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) diff --git a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/FedXConnection.java b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/FedXConnection.java index e1df6b6ca8..e57e647ae2 100644 --- a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/FedXConnection.java +++ b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/FedXConnection.java @@ -337,6 +337,27 @@ protected SailException convert(RuntimeException e) { } } + @Override + protected boolean hasStatementInternal(Resource subj, IRI pred, Value obj, boolean includeInferred, + Resource[] contexts) { + try { + Dataset dataset = new SimpleDataset(); + FederationEvalStrategy strategy = federationContext.createStrategy(dataset); + QueryInfo queryInfo = new QueryInfo(subj, pred, obj, 0, includeInferred, federationContext, strategy, + dataset); + federationContext.getMonitoringService().monitorQuery(queryInfo); + return strategy.hasStatements(queryInfo, subj, pred, obj, contexts); + + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + throw new SailException(e); + } + } + @Override protected void addStatementInternal(Resource subj, IRI pred, Value obj, Resource... contexts) throws SailException { try { diff --git a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java index 5dafe13731..23d4214cab 100644 --- a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java +++ b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java @@ -562,7 +562,7 @@ public CloseableIteration getStatements(QueryInfo queryInfo, Resource IRI pred, Value obj, Resource... contexts) throws RepositoryException, MalformedQueryException, QueryEvaluationException { - List members = federationContext.getFederation().getMembers(); + List members = getAccessibleFederationMembers(queryInfo); // a bound query: if at least one fed member provides results // return the statement, otherwise empty result @@ -605,6 +605,47 @@ public CloseableIteration getStatements(QueryInfo queryInfo, Resource return union; } + /** + * Returns true if the federation has statements + * + * @param queryInfo information about the query + * @param subj the subject or null + * @param pred the predicate or null + * @param obj the object or null + * @param contexts optional list of contexts + * @return the statement iteration + * + * @throws RepositoryException + * @throws MalformedQueryException + * @throws QueryEvaluationException + */ + public boolean hasStatements(QueryInfo queryInfo, Resource subj, + IRI pred, Value obj, Resource... contexts) + throws RepositoryException, MalformedQueryException, QueryEvaluationException { + + List members = getAccessibleFederationMembers(queryInfo); + + // form the union of results from relevant endpoints + List sources = CacheUtils.checkCacheForStatementSourcesUpdateCache(cache, members, subj, pred, + obj, queryInfo, contexts); + + if (sources.isEmpty()) { + return false; + } + + return true; + } + + /** + * Returns the accessible federation members in the context of the query. By default this is all federation members. + * + * @param queryInfo + * @return + */ + protected List getAccessibleFederationMembers(QueryInfo queryInfo) { + return federationContext.getFederation().getMembers(); + } + public CloseableIteration evaluateService(FedXService service, BindingSet bindings) throws QueryEvaluationException { diff --git a/tools/federation/src/test/java/org/eclipse/rdf4j/federated/BasicTests.java b/tools/federation/src/test/java/org/eclipse/rdf4j/federated/BasicTests.java index 962ded2783..51566c1cb4 100644 --- a/tools/federation/src/test/java/org/eclipse/rdf4j/federated/BasicTests.java +++ b/tools/federation/src/test/java/org/eclipse/rdf4j/federated/BasicTests.java @@ -139,6 +139,61 @@ public void testBindClause() throws Exception { execute("/tests/basic/query_bind.rq", "/tests/basic/query_bind.srx", false, true); } + @Test + public void testRepositoryConnectionApi() throws Exception { + + prepareTest( + Arrays.asList("/tests/basic/data_emptyStore.ttl", "/tests/basic/data_emptyStore.ttl")); + + Repository repo1 = getRepository(1); + Repository repo2 = getRepository(2); + + IRI bob = Values.iri("http://example.org/bob"); + IRI alice = Values.iri("http://example.org/alice"); + IRI graph1 = Values.iri("http://example.org/graph1"); + IRI graph2 = Values.iri("http://example.org/graph2"); + + try (RepositoryConnection conn = repo1.getConnection()) { + conn.add(bob, RDF.TYPE, FOAF.PERSON, graph1); + conn.add(bob, FOAF.NAME, Values.literal("Bob"), graph1); + } + + try (RepositoryConnection conn = repo2.getConnection()) { + conn.add(alice, RDF.TYPE, FOAF.PERSON, graph2); + conn.add(alice, FOAF.NAME, Values.literal("Alice"), graph2); + } + + var fedxRepo = fedxRule.getRepository(); + + try (var conn = fedxRepo.getConnection()) { + + // hasStatement which exist + Assertions.assertTrue(conn.hasStatement(bob, RDF.TYPE, FOAF.PERSON, false)); + Assertions.assertTrue(conn.hasStatement(bob, RDF.TYPE, FOAF.PERSON, false, graph1)); + Assertions.assertTrue(conn.hasStatement(null, RDF.TYPE, FOAF.PERSON, false)); + Assertions.assertTrue(conn.hasStatement(null, RDF.TYPE, FOAF.PERSON, false, graph1)); + Assertions.assertTrue(conn.hasStatement(null, RDF.TYPE, null, false)); + Assertions.assertTrue(conn.hasStatement(null, RDF.TYPE, null, false, graph1)); + Assertions.assertTrue(conn.hasStatement(null, RDF.TYPE, null, false, graph2)); + Assertions.assertTrue(conn.hasStatement(null, null, null, false)); + Assertions.assertTrue(conn.hasStatement(null, null, null, false, graph1)); + + // hasStatement which do not exist + Assertions.assertFalse(conn.hasStatement(bob, RDF.TYPE, FOAF.ORGANIZATION, false)); + Assertions.assertFalse(conn.hasStatement(bob, RDF.TYPE, FOAF.PERSON, false, graph2)); + + // getStatements + Assertions.assertEquals(Set.of(bob, alice), + QueryResults.asModel(conn.getStatements(null, RDF.TYPE, FOAF.PERSON, false)).subjects()); + Assertions.assertEquals(Set.of(bob), + QueryResults.asModel(conn.getStatements(null, RDF.TYPE, FOAF.PERSON, false, graph1)).subjects()); + Assertions.assertEquals(Set.of(bob, alice), + QueryResults.asModel(conn.getStatements(null, null, null, false)).subjects()); + Assertions.assertEquals(Set.of(bob), + QueryResults.asModel(conn.getStatements(null, null, null, false, graph1)).subjects()); + } + } + @Test public void testFederationSubSetQuery() throws Exception { String ns1 = "http://namespace1.org/"; From 1cc4ab81f6ae14e22fe39de70475206c0a201ba8 Mon Sep 17 00:00:00 2001 From: Andreas Schwarte Date: Thu, 23 Jan 2025 07:25:30 +0100 Subject: [PATCH 2/2] GH-5231: refine javadoc, add Experimental annotation --- .../rdf4j/federated/evaluation/FederationEvalStrategy.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java index 23d4214cab..27e15607d6 100644 --- a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java +++ b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java @@ -17,6 +17,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; import org.eclipse.rdf4j.common.iteration.SingletonIteration; @@ -638,10 +639,16 @@ public boolean hasStatements(QueryInfo queryInfo, Resource subj, /** * Returns the accessible federation members in the context of the query. By default this is all federation members. + *

+ * Specialized implementations of the {@link FederationEvalStrategy} may override and define custom behavior (e.g., + * to support resilience). + *

+ * * * @param queryInfo * @return */ + @Experimental protected List getAccessibleFederationMembers(QueryInfo queryInfo) { return federationContext.getFederation().getMembers(); }