Skip to content

Commit

Permalink
apacheGH-3026: spatial index per graph and kryo serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
Lorenz Buehmann authored and Aklakan committed Feb 27, 2025
1 parent a3983b2 commit ae4ba13
Show file tree
Hide file tree
Showing 46 changed files with 3,266 additions and 155 deletions.
91 changes: 91 additions & 0 deletions jena-arq/src/main/java/org/apache/jena/system/TxnCtl.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.system;

import java.util.Objects;

import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.TxnType;
import org.apache.jena.sparql.core.Transactional;

/**
* Txn variant for use with try-with-resources. Allows raising
* checked exceptions in an idiomatic way. Closing the TxnCtl
* instance will abort the transaction unless it
* has been manually committed.
* <p>
*
* Usage example:
* <pre>
* public void myMethod() throws IOException {
* try (TxnCtl txn = TxnCtl.begin(dataset, TxnType.WRITE)) {
* // Do work
* if (someError) {
* throw new IOException();
* }
* // Must manually call commit on success.
* txn.commit();
* }
* }
* </pre>
*/
public class TxnCtl
implements AutoCloseable
{
private Transactional txn;
private boolean b;

private TxnCtl(Transactional txn, boolean b) {
super();
this.txn = txn;
this.b = b;
}

public static TxnCtl begin(Transactional txn, ReadWrite readWrite) {
return begin(txn, TxnType.convert(readWrite));
}

public static TxnCtl begin(Transactional txn, TxnType txnType) {
Objects.requireNonNull(txn);
Objects.requireNonNull(txnType);
boolean b = txn.isInTransaction();
if ( b )
TxnOp.compatibleWithPromote(txnType, txn);
else
txn.begin(txnType);
return new TxnCtl(txn, b);
}

public void commit() {
if ( txn.isInTransaction() ) {

// May have been explicit commit or abort.
txn.commit();
}
}

@Override
public void close() {
if ( !b ) {
if ( txn.isInTransaction() )
// May have been explicit commit or abort.
txn.abort();
txn.end();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,10 @@ private static void prepareSpatialExtension(Dataset dataset, ArgsConfig argsConf
if (!isEmpty) {
if (argsConfig.getSpatialIndexFile() != null) {
File spatialIndexFile = argsConfig.getSpatialIndexFile();
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexFile);
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexFile.toPath());
} else if (argsConfig.isTDBFileSetup()) {
File spatialIndexFile = new File(argsConfig.getTdbFile(), SPATIAL_INDEX_FILE);
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexFile);
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexFile.toPath());
} else {
GeoSPARQLConfig.setupSpatialIndex(dataset);
}
Expand Down
117 changes: 101 additions & 16 deletions jena-geosparql/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>jena-geosparql</artifactId>
<packaging>jar</packaging>
Expand All @@ -34,6 +36,16 @@

<dependencies>

<dependency>
<groupId>io.github.galbiston</groupId>
<artifactId>expiring-map</artifactId>
</dependency>

<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</dependency>

<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-arq</artifactId>
Expand All @@ -44,18 +56,6 @@
<groupId>org.apache.sis.core</groupId>
<artifactId>sis-referencing</artifactId>
</dependency>

<!-- Evntually this wil be:
<dependency>
<groupId>jakarta.xml.bind</groupId>
<artifactId>jakarta.xml.bind-api</artifactId>
</dependency>
-->

<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
Expand All @@ -82,6 +82,90 @@
<artifactId>commons-collections4</artifactId>
</dependency>

<!-- Kryo serialization via Apache Sedona -->

<!--
<dependency>
<groupId>org.apache.sedona</groupId>
<artifactId>sedona-spark-common-3.5_2.13</artifactId>
<version>1.6.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.geotools</groupId>
<artifactId>gt-geotiff</artifactId>
</exclusion>
<exclusion>
<groupId>org.geotools</groupId>
<artifactId>gt-coverage</artifactId>
</exclusion>
</exclusions>
</dependency>
-->

<dependency>
<groupId>org.apache.sedona</groupId>
<artifactId>sedona-common</artifactId>
<version>1.7.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</exclusion>
<exclusion>
<groupId>org.wololo</groupId>
<artifactId>jts2geojson</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.geometry</groupId>
<artifactId>s2-geometry</artifactId>
</exclusion>
<exclusion>
<groupId>com.uber</groupId>
<artifactId>h3</artifactId>
</exclusion>
<exclusion>
<groupId>net.sf.geographiclib</groupId>
<artifactId>GeographicLib-Java</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- H3Utils only exists in newer sedona-common versions -->
<!--
<dependency>
<groupId>org.apache.sedona</groupId>
<artifactId>sedona-common</artifactId>
<version>1.6.0</version>
</dependency>
-->

<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
<version>4.0.2</version>
</dependency>

<!-- Resolve conflicts between jenax/tika/nashorn/hadoop/kryo-->
<!--
<dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
<version>8.0.1</version>
</dependency>
-->

<!-- Non-free; testing only -->
<dependency>
<groupId>org.apache.sis.non-free</groupId>
Expand All @@ -102,6 +186,7 @@
<scope>test</scope>
</dependency>


</dependencies>

<build>
Expand Down Expand Up @@ -133,11 +218,11 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<executions>
<execution>
<id>attach-sources-test</id>
<id>attach-sources-test</id>
<goals>
<goal>test-jar-no-fork</goal>
<goal>test-jar-no-fork</goal>
</goals>
</execution>
</executions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static void init() {
if ( initialized )
return ;
synchronized (initLock) {
if ( initialized ) {
if ( initialized || System.getProperty("jena.geosparql.skip", "false").equalsIgnoreCase("true") ) {
JenaSystem.logLifecycle("InitGeoSPARQL - skip") ;
return ;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@

package org.apache.jena.geosparql.assembler;

import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.*;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pApplyDefaultGeometry;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pDataset;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pIndexEnabled;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pIndexExpiries;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pIndexSizes;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pInference;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pQueryRewrite;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pSpatialIndexFile;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pSpatialIndexPerGraph;
import static org.apache.jena.geosparql.assembler.VocabGeoSPARQL.pSrsUri;
import static org.apache.jena.sparql.util.graph.GraphUtils.getBooleanValue;

import java.io.IOException;
Expand All @@ -34,6 +43,7 @@
import org.apache.jena.geosparql.configuration.GeoSPARQLOperations;
import org.apache.jena.geosparql.configuration.SrsException;
import org.apache.jena.geosparql.spatial.SpatialIndexException;
import org.apache.jena.geosparql.spatial.index.v2.SpatialIndexUtils;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.Node;
import org.apache.jena.query.Dataset;
Expand Down Expand Up @@ -117,9 +127,21 @@ public DatasetGraph createDataset(Assembler a, Resource root) {
if (root.hasProperty(pSpatialIndexFile) )
spatialIndexFilename = GraphUtils.getStringValue(root, pSpatialIndexFile);

// ---- Build
// spatial index per named graph option
boolean spatialIndexPerGraph = false;
if (root.hasProperty(pSpatialIndexPerGraph) )
spatialIndexPerGraph = getBooleanValue(root, pSpatialIndexPerGraph);

// SRS URI
String srsURI = null;
if (root.hasProperty(pSrsUri) )
srsURI = GraphUtils.getStringValue(root, pSrsUri);


// ---- Build
Dataset dataset = DatasetFactory.wrap(base);
dataset.getContext().set(SpatialIndexUtils.symSpatialIndexPerGraph, spatialIndexPerGraph);
dataset.getContext().set(SpatialIndexUtils.symSrsUri, srsURI);

// Conversion of data. Startup-only.
// needed for w3c:geo/wgs84_pos#lat/log.
Expand All @@ -142,13 +164,13 @@ public DatasetGraph createDataset(Assembler a, Resource root) {
//Setup GeoSPARQL
if (indexEnabled) {
GeoSPARQLConfig.setupMemoryIndex(indexSizes.get(0), indexSizes.get(1), indexSizes.get(2),
(long)indexExpiries.get(0), (long)indexExpiries.get(1), (long)indexExpiries.get(2),
queryRewrite);
(long)indexExpiries.get(0), (long)indexExpiries.get(1), (long)indexExpiries.get(2),
queryRewrite);
} else {
GeoSPARQLConfig.setupNoIndex(queryRewrite);
}

prepareSpatialExtension(dataset, spatialIndexFilename);
prepareSpatialExtension(dataset, spatialIndexFilename, spatialIndexPerGraph);
return base;
}

Expand All @@ -165,8 +187,8 @@ private static List<Integer> getListInteger(Resource r, Property p, int len) {
return integerList;
}

private static void prepareSpatialExtension(Dataset dataset, String spatialIndex){
boolean isEmpty = dataset.calculateRead(()->dataset.isEmpty());
private static void prepareSpatialExtension(Dataset dataset, String spatialIndex, boolean spatialIndexPerGraph){
boolean isEmpty = dataset.calculateRead(dataset::isEmpty);
if ( isEmpty && spatialIndex != null ) {
LOG.warn("Dataset empty. Spatial Index not constructed. Server will require restarting after adding data and any updates to build Spatial Index.");
return;
Expand All @@ -185,12 +207,12 @@ private static void prepareSpatialExtension(Dataset dataset, String spatialIndex
// file given but empty -> compute and serialize index
Path spatialIndexPath = Path.of(spatialIndex);
if ( ! Files.exists(spatialIndexPath) || Files.size(spatialIndexPath) == 0 ) {
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexPath.toFile());
GeoSPARQLConfig.setupSpatialIndex(dataset, spatialIndexPath, spatialIndexPerGraph);
return;
}

// load and setup the precomputed index
GeoSPARQLConfig.setupPrecomputedSpatialIndex(dataset, spatialIndexPath.toFile());
GeoSPARQLConfig.setupPrecomputedSpatialIndex(dataset, spatialIndexPath);
}
catch (SrsException ex) {
// Data but no spatial data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ private static Property property(String shortName) {
// "File to load or store the spatial index. Default to " + SPATIAL_INDEX_FILE + " in TDB folder if using TDB and not set. Otherwise spatial index is not stored.
public static final Property pSpatialIndexFile = property("spatialIndexFile");

public static final Property pSpatialIndexPerGraph = property("spatialIndexPerGraph");

public static final Property pSrsUri = property("srsUri");

// Dataset
public static final Property pDataset = property("dataset");
}
Loading

0 comments on commit ae4ba13

Please sign in to comment.