Skip to content

Commit

Permalink
David/shacl specified queryables (#268)
Browse files Browse the repository at this point in the history
* Queryables MVP

* Add documentation. All tests passing.
  • Loading branch information
recalcitrantsupplant authored Oct 1, 2024
1 parent e55e594 commit 4ce71b7
Show file tree
Hide file tree
Showing 43 changed files with 885 additions and 535 deletions.
89 changes: 89 additions & 0 deletions README-OGC-Features.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
Prez provides an OGC Features compliant API

The API is mounted as a sub application at `"/catalogs/{catalogId}/collections/{recordsCollectionId}/features"` by default.
It can be mounted at a different path by setting the configuration setting `ogc_features_mount_path` (or corresponding upper cased environment variable).

Queryables are a part of the OGC Features specifications which provide a listing of which parameters can be queried.
The queryables are a flat set of properties on features.

Because Prez consumes an RDF Knowledge Graph, it is desirable to query more than top level properties.
To achieve this, Prez provides a mechanism to declare paths through the graph as queryables.
To declare these paths, you can use SHACL.

An example is provided below:
```
@prefix cql: <http://www.opengis.net/doc/IS/cql2/1.0/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dwc: <http://rs.tdwg.org/dwc/terms/> .
@prefix ex: <http://example.com/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix sname: <https://fake-scientific-name-id.com/name/afd/> .
@prefix sosa: <http://www.w3.org/ns/sosa/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ex:BDRScientificNameQueryableShape
a sh:PropertyShape ;
a cql:Queryable ;
sh:path (
[ sh:inversePath sosa:hasFeatureOfInterest ]
sosa:hasMember
sosa:hasResult
dwc:scientificNameID
) ;
sh:name "Scientific Name" ;
dcterms:identifier "scientificname" ;
sh:datatype xsd:string ;
sh:in (
sname:001
sname:002
) ;
.
```
It is recommended that templated SPARQL queries are used to periodically update the `sh:in` values, which correspond to enumerations.
# TODO other SHACL predicates can be reused to specify min/max values, etc. where the range is numeric and enumerations are not appropriate.

When Prez starts, it will query the remote repository (typically a triplestore) for all Queryables.
It queries for them using a CONSTRUCT query, serializes this as JSON-LD, and does a minimal transformation to produce the OGC Features compliant response.
The query is:
```
"""
PREFIX cql: <http://www.opengis.net/doc/IS/cql2/1.0/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
CONSTRUCT {
?queryable cql:id ?id ;
cql:name ?title ;
cql:datatype ?type ;
cql:enum ?enums .
}
WHERE {?queryable a cql:Queryable ;
dcterms:identifier ?id ;
sh:name ?title ;
sh:datatype ?type ;
sh:in/rdf:rest*/rdf:first ?enums ;
}
"""
```
And the output after transformation is of the form (which is the format required for OGC Features):
```
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"$id": "http://localhost:8000/catalogs/dtst:bdr/collections/syn:68a782a8-d7fe-4b3e-8377-c76c9cc245cc/features/queryables",
"type": "object",
"title": "Global Queryables",
"description": "Global queryable properties for all collections in the OGC Features API.",
"properties": {
"scientificname": {
"title": "Scientific Name",
"type": "string",
"enum": [
"https://fake-scientific-name-id.com/name/afd/001",
"https://fake-scientific-name-id.com/name/afd/002",
]
}
}
}
```

Separately, Prez internally translates the declared SHACL Property Path expression into SPARQL and injects this into queries when the queryable, e.g. `scientificname`, in the example above, is requested.
484 changes: 183 additions & 301 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions prez/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
load_system_data_to_oxigraph,
load_annotations_data_to_oxigraph,
get_annotations_store,
get_queryable_props,
)
from prez.exceptions.model_exceptions import (
ClassNotFoundException,
Expand All @@ -40,6 +41,7 @@
populate_api_info,
prefix_initialisation,
retrieve_remote_template_queries,
retrieve_remote_queryable_definitions,
)
from prez.services.exception_catchers import (
catch_400,
Expand Down Expand Up @@ -115,8 +117,10 @@ async def lifespan(app: FastAPI):
await count_objects(app.state.repo)
await populate_api_info()

app.state.queryable_props = get_queryable_props()
app.state.pyoxi_system_store = get_system_store()
app.state.annotations_store = get_annotations_store()
await retrieve_remote_queryable_definitions(app.state, app.state.pyoxi_system_store)
await load_system_data_to_oxigraph(app.state.pyoxi_system_store)
await load_annotations_data_to_oxigraph(app.state.annotations_store)

Expand Down
2 changes: 2 additions & 0 deletions prez/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

annotations_store = Store()

queryable_props = {}

oxrdflib_store = Graph(store="Oxigraph")

caches.set_config(
Expand Down
23 changes: 18 additions & 5 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
endpoints_graph_cache,
annotations_store,
prez_system_graph,
queryable_props,
)
from prez.config import settings
from prez.enums import (
Expand All @@ -24,7 +25,7 @@
GeoJSONMediaType,
)
from prez.models.query_params import QueryParams
from prez.reference_data.prez_ns import ALTREXT, ONT, EP, OGCE, OGCFEAT, PREZ
from prez.reference_data.prez_ns import ALTREXT, ONT, EP, OGCE, OGCFEAT
from prez.repositories import PyoxigraphRepo, RemoteSparqlRepo, OxrdflibRepo, Repo
from prez.services.classes import get_classes_single
from prez.services.connegp_service import NegotiatedPMTs
Expand Down Expand Up @@ -63,6 +64,10 @@ def get_oxrdflib_store():
return oxrdflib_store


def get_queryable_props():
return queryable_props


async def get_data_repo(
request: Request,
http_async_client: httpx.AsyncClient = Depends(get_async_http_client),
Expand Down Expand Up @@ -133,13 +138,18 @@ async def load_annotations_data_to_oxigraph(store: Store):
store.load(file_bytes, "application/n-triples")


async def cql_post_parser_dependency(request: Request) -> CQLParser:
async def cql_post_parser_dependency(
request: Request,
queryable_props: list = Depends(get_queryable_props),
) -> CQLParser:
try:
body = await request.json()
context = json.load(
(Path(__file__).parent / "reference_data/cql/default_context.json").open()
)
cql_parser = CQLParser(cql=body, context=context)
cql_parser = CQLParser(
cql=body, context=context, queryable_props=queryable_props
)
cql_parser.generate_jsonld()
cql_parser.parse()
return cql_parser
Expand All @@ -153,6 +163,7 @@ async def cql_post_parser_dependency(request: Request) -> CQLParser:

async def cql_get_parser_dependency(
query_params: QueryParams = Depends(),
queryable_props: list = Depends(get_queryable_props),
) -> CQLParser:
if query_params.filter:
try:
Expand All @@ -163,13 +174,15 @@ async def cql_get_parser_dependency(
Path(__file__).parent / "reference_data/cql/default_context.json"
).open()
)
cql_parser = CQLParser(cql=query, context=context, crs=crs)
cql_parser = CQLParser(
cql=query, context=context, crs=crs, queryable_props=queryable_props
)
cql_parser.generate_jsonld()
cql_parser.parse()
return cql_parser
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid JSON format.")
except Exception as e: # Replace with your specific parsing exception
except Exception as e:
raise HTTPException(
status_code=400, detail="Invalid CQL format: Parsing failed."
)
Expand Down
1 change: 0 additions & 1 deletion prez/reference_data/cql/default_context.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"@version": 1.1,
"@base": "http://example.com/",
"@vocab": "http://example.com/vocab/",
"cql": "http://www.opengis.net/doc/IS/cql2/1.0/",
"sf": "http://www.opengis.net/ont/sf#",
Expand Down
4 changes: 1 addition & 3 deletions prez/renderers/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
from prez.renderers.csv_renderer import render_csv_dropdown
from prez.renderers.json_renderer import render_json_dropdown, NotFoundError
from prez.repositories import Repo
from prez.services.annotations import (
get_annotation_properties,
)
from prez.services.annotations import get_annotation_properties
from prez.services.connegp_service import RDF_MEDIATYPES, RDF_SERIALIZER_TYPES_MAP
from prez.services.curie_functions import get_curie_id_for_uri

Expand Down
16 changes: 8 additions & 8 deletions prez/routers/ogc_features_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
get_system_repo,
get_endpoint_nodeshapes,
get_profile_nodeshape,
get_endpoint_uri_type,
get_ogc_features_path_params,
get_template_query,
check_unknown_params,
get_endpoint_uri_type,
)
from prez.exceptions.model_exceptions import (
ClassNotFoundException,
Expand Down Expand Up @@ -118,6 +118,12 @@ async def ogc_features_api(
methods=ALLOWED_METHODS,
name=OGCFEAT["queryables-global"],
)
@features_subapi.api_route(
"/collections/{collectionId}/queryables",
methods=ALLOWED_METHODS,
name=OGCFEAT["queryables-local"],
openapi_extra=ogc_features_openapi_extras.get("feature-collection"),
)
@features_subapi.api_route(
"/collections",
methods=ALLOWED_METHODS,
Expand All @@ -129,15 +135,9 @@ async def ogc_features_api(
name=OGCFEAT["features"],
openapi_extra=ogc_features_openapi_extras.get("feature-collection"),
)
@features_subapi.api_route(
"/collections/{collectionId}/queryables",
methods=ALLOWED_METHODS,
name=OGCFEAT["queryables-local"],
openapi_extra=ogc_features_openapi_extras.get("feature-collection"),
)
async def listings_with_feature_collection(
validate_unknown_params: bool = Depends(check_unknown_params),
endpoint_uri_type: tuple = Depends(get_endpoint_uri_type),
endpoint_uri_type: str = Depends(get_endpoint_uri_type),
endpoint_nodeshape: NodeShape = Depends(get_endpoint_nodeshapes),
profile_nodeshape: NodeShape = Depends(get_profile_nodeshape),
url: str = Depends(get_url),
Expand Down
24 changes: 23 additions & 1 deletion prez/services/app_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path

import httpx
from rdflib import URIRef, Literal, Graph, RDF, BNode
from rdflib import URIRef, Literal, Graph, RDF, BNode, DCTERMS

from prez.cache import (
prez_system_graph,
Expand Down Expand Up @@ -194,3 +194,25 @@ async def get_remote_endpoint_definitions(repo):
log.info(f"Remote endpoint definition(s) found and added")
else:
log.info("No remote endpoint definitions found")


async def retrieve_remote_queryable_definitions(app_state, system_store):
query = "DESCRIBE ?queryable { ?queryable a <http://www.opengis.net/doc/IS/cql2/1.0/Queryable> }"
g, _ = await app_state.repo.send_queries([query], [])
if len(g) > 0:
prez_system_graph.__iadd__(g) # use for generating property shapes
queryable_bytes = g.serialize(
format="nt", encoding="utf-8"
) # use for generating JSON
system_store.load(queryable_bytes, "application/n-triples")
queryables = list(
g.subjects(
object=URIRef("http://www.opengis.net/doc/IS/cql2/1.0/Queryable")
)
)
for triple in list(g.triples_choices((queryables, DCTERMS.identifier, None))):
app_state.queryable_props[str(triple[2])] = str(triple[0])
n_queryables = len(queryables)
log.info(f"Remote queryable definition(s) found and added: {n_queryables}")
else:
log.info("No remote queryable definitions found")
Loading

0 comments on commit 4ce71b7

Please sign in to comment.