Skip to content

Commit

Permalink
GH-1720: Filter out literals-as-subjects from --rdfs
Browse files Browse the repository at this point in the history
  • Loading branch information
afs committed Jan 20, 2025
1 parent 949533d commit ab887fd
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 61 deletions.
6 changes: 6 additions & 0 deletions jena-arq/src/main/java/org/apache/jena/rdfs/RDFSFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.jena.query.DatasetFactory;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.sparql.core.DatasetGraph;
import org.apache.jena.sparql.util.NodeUtils;

/** Factory for data+RDFS inference. */
public class RDFSFactory {
Expand Down Expand Up @@ -79,4 +80,9 @@ public static StreamRDF streamRDFS(StreamRDF data, Graph vocab) {
public static StreamRDF streamRDFS(StreamRDF data, SetupRDFS setup) {
return new InfStreamRDFS(data, setup);
}

/** RDFS can generate "symmetric RDF", "generalized RDF" - e.g. triples with literals as subjects. */
public static StreamRDF removeGeneralizedRDF(StreamRDF data) {
return NodeUtils.removeGeneralizedRDF(data);
}
}
8 changes: 0 additions & 8 deletions jena-arq/src/main/java/org/apache/jena/rdfs/SetupRDFS.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@
/** RDFS setup in Node space */
public class SetupRDFS extends BaseSetupRDFS<Node> {

/**
* {@code incDerivedDataRDFS} causes the engine to look for RDFS relationships in the data
* as if TBox (rules) and ABox (ground data) are one unit.
* <p>
* Set true if abox == tbox.
* <p>
* Can choose false or true if abox != tbox.
*/
public SetupRDFS(Graph vocab) {
super(vocab);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import org.apache.jena.graph.Triple ;
import org.apache.jena.sparql.core.Quad ;

/** Send to two stream */
public class StreamRDF2 implements StreamRDF
/** Send to two streams */
public class StreamRDF2 implements StreamRDF
{
protected final StreamRDF sink1 ;
protected final StreamRDF sink2 ;
Expand All @@ -31,7 +31,7 @@ public StreamRDF2(StreamRDF sink1, StreamRDF sink2) {
this.sink1 = sink1 ;
this.sink2 = sink2 ;
}

@Override
public void start() {
sink1.start() ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import org.apache.jena.sparql.core.Substitute;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.util.ModelUtils;
import org.apache.jena.sparql.util.NodeUtils;

public class TemplateLib {
// See also Substitute -- combine?
Expand Down Expand Up @@ -79,7 +79,7 @@ public Iterator<Triple> apply(final Binding b) {
List<Triple> tripleList = new ArrayList<>(triples.size());
for ( Triple triple : triples ) {
Triple q = subst(triple, b, bNodeMap);
if ( !q.isConcrete() || !ModelUtils.isValidAsStatement(q.getSubject(), q.getPredicate(), q.getObject()) ) {
if ( !q.isConcrete() || ! NodeUtils.isValidAsRDF(q.getSubject(), q.getPredicate(), q.getObject()) ) {
// Log.warn(TemplateLib.class, "Unbound quad:
// "+FmtUtils.stringForQuad(quad)) ;
continue;
Expand Down
72 changes: 32 additions & 40 deletions jena-arq/src/main/java/org/apache/jena/sparql/util/ModelUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@

package org.apache.jena.sparql.util;

import java.util.Iterator ;

import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.Triple ;
import org.apache.jena.query.QueryException ;
import org.apache.jena.rdf.model.Model ;
import org.apache.jena.rdf.model.RDFNode ;
import org.apache.jena.rdf.model.Statement ;
import org.apache.jena.rdf.model.StmtIterator ;
import org.apache.jena.rdf.model.impl.LiteralImpl ;
import org.apache.jena.rdf.model.impl.ResourceImpl ;
import org.apache.jena.rdf.model.impl.StmtIteratorImpl ;
import org.apache.jena.sparql.ARQInternalErrorException ;
import java.util.Iterator;

import org.apache.jena.atlas.iterator.Iter;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
import org.apache.jena.query.QueryException;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.rdf.model.impl.LiteralImpl;
import org.apache.jena.rdf.model.impl.ResourceImpl;
import org.apache.jena.rdf.model.impl.StmtIteratorImpl;
import org.apache.jena.sparql.ARQInternalErrorException;
import org.apache.jena.util.ModelCollector;
import org.apache.jena.util.iterator.ClosableIterator ;
import org.apache.jena.util.iterator.ClosableIterator;

public class ModelUtils
{
Expand All @@ -46,22 +46,22 @@ public class ModelUtils

public static RDFNode convertGraphNodeToRDFNode(Node node, Model model) {
if ( node.isVariable() )
throw new QueryException("Variable: "+node) ;
throw new QueryException("Variable: "+node);

// Best way.
if ( model != null )
return model.asRDFNode(node) ;
return model.asRDFNode(node);

if ( node.isLiteral() )
return new LiteralImpl(node, null) ;
return new LiteralImpl(node, null);

if ( node.isURI() || node.isBlank() )
return new ResourceImpl(node, null) ;
return new ResourceImpl(node, null);

if ( node.isNodeTriple() )
return new ResourceImpl(node, null) ;
return new ResourceImpl(node, null);

throw new ARQInternalErrorException("Unknown node type for node: "+node) ;
throw new ARQInternalErrorException("Unknown node type for node: "+node);
}

/** Convert a {@link Node} (graph SPI) to an RDFNode (model API)
Expand All @@ -73,18 +73,17 @@ public static RDFNode convertGraphNodeToRDFNode(Node node) {
return convertGraphNodeToRDFNode(node, null);
}

public static Statement tripleToStatement(Model model, Triple t)
{
public static Statement tripleToStatement(Model model, Triple t) {
if ( model == null )
throw new ARQInternalErrorException("Attempt to create statement with null model") ;
throw new ARQInternalErrorException("Attempt to create statement with null model");

Node sNode = t.getSubject() ;
Node pNode = t.getPredicate() ;
Node oNode = t.getObject() ;
Node sNode = t.getSubject();
Node pNode = t.getPredicate();
Node oNode = t.getObject();

if (!isValidAsStatement(sNode, pNode, oNode)) return null;

return model.asStatement(t) ;
return model.asStatement(t);
}

/**
Expand All @@ -97,19 +96,12 @@ public static Statement tripleToStatement(Model model, Triple t)
* @param p Predicate
* @param o Object
* @return True if a valid Statement can be formed
*
* @deprecated Use {@link NodeUtils#isValidAsRDF(Node, Node, Node)}.
*/
public static boolean isValidAsStatement(Node s, Node p, Node o)
{
if ( s.isLiteral() || s.isVariable() )
return false ;

if ( ! p.isURI() ) // Not variable, literal or blank.
return false ;

if ( o.isVariable() )
return false ;

return true;
@Deprecated
public static boolean isValidAsStatement(Node s, Node p, Node o) {
return NodeUtils.isValidAsRDF(s, p, o);
}

public static StmtIterator triplesToStatements(final Iterator<Triple> it, final Model refModel) {
Expand Down
66 changes: 66 additions & 0 deletions jena-arq/src/main/java/org/apache/jena/sparql/util/NodeUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.graph.Triple;
import org.apache.jena.rdf.model.impl.Util;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.riot.system.StreamRDFWrapper;
import org.apache.jena.sparql.core.Quad;
import org.apache.jena.sparql.expr.ExprEvalException ;
import org.apache.jena.sparql.expr.NodeValue ;
import org.apache.jena.sparql.expr.nodevalue.NodeFunctions ;
Expand Down Expand Up @@ -184,4 +188,66 @@ public static boolean isXSDNumeric(Node node) {
*/
public static boolean isLangString(Node n) { return Util.isLangString(n) ; }

/**
* Determines whether a triple (as s/p/o) is valid as a RDF statement.
* <p>
* This function reflects the fact that the {@link Triple} API is flexible in
* allowing any Node type in any position (including non-RDF node types like
* Variable) and as such not all Triples can be safely converted into Statements
* </p>
* @param s Subject
* @param p Predicate
* @param o Object
* @return True if a valid as a statement
*/
public static boolean isValidAsRDF(Node s, Node p, Node o) {
if ( s == null || ( ! s.isBlank() && ! s.isURI() ) )
return false;
if ( p == null || ( ! p.isURI() ) )
return false;
if ( o == null || ( ! o.isBlank() && ! o.isURI() && ! o.isLiteral() && !o.isNodeTriple() ) )
return false;
return true;
}

/**
* Determines whether a quad (as g/s/p/o) is valid as a RDF statement.
* <p>
* This function reflects the fact that the {@link Triple} API is flexible in
* allowing any Node type in any position (including non-RDF node types like
* Variable) and as such not all Triples can be safely converted into Statements
* </p>
* @param s Subject
* @param p Predicate
* @param o Object
* @return True if a valid as a statement
*/
public static boolean isValidAsRDF(Node g, Node s, Node p, Node o) {
if ( g == null || ( ! g.isURI() && ! g.isBlank() ) )
return false;
return isValidAsRDF(s, p, o);
}

/** Filter out triples and quads that are "generalized" RDF */
public static StreamRDF removeGeneralizedRDF(StreamRDF data) {
return new StreamRDFWrapper(data) {
@Override
public void quad(Quad quad) {
if ( ! isValidAsRDF(quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject()) ) {
// Reject
return;
}
super.quad(quad);
}

@Override
public void triple(Triple triple) {
if ( ! isValidAsRDF(triple.getSubject(), triple.getPredicate(), triple.getObject()) ) {
// Reject
return;
}
super.triple(triple);
}
};
}
}
12 changes: 8 additions & 4 deletions jena-arq/src/test/java/org/apache/jena/arq/ARQTestSuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

package org.apache.jena.arq;

import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;

import org.apache.jena.atlas.TC_Atlas_ARQ;
import org.apache.jena.atlas.legacy.BaseTest2;
import org.apache.jena.external.Scripts_LangSuiteCG;
Expand All @@ -34,10 +39,7 @@
import org.apache.jena.sys.JenaSystem;
import org.apache.jena.system.TS_System;
import org.apache.jena.system.buffering.TS_Buffering;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
import org.apache.jena.util.TS_UtilsARQ;

/**
* All the ARQ tests
Expand All @@ -47,6 +49,8 @@
@Suite.SuiteClasses( {
TC_Atlas_ARQ.class,

TS_UtilsARQ.class,

TC_Riot.class,

TS_System.class,
Expand Down
29 changes: 29 additions & 0 deletions jena-arq/src/test/java/org/apache/jena/util/TS_UtilsARQ.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.jena.util;

import org.junit.runner.RunWith ;
import org.junit.runners.Suite ;

@RunWith(Suite.class)
@Suite.SuiteClasses( {
TestNodeUtils.class
})

public class TS_UtilsARQ {}
Loading

0 comments on commit ab887fd

Please sign in to comment.