Skip to content

Commit

Permalink
Merge pull request #14 from sentier-dev/simplified-entry
Browse files Browse the repository at this point in the history
Add simplified entry function
  • Loading branch information
tngTUDOR authored Oct 10, 2024
2 parents 2a772ed + 137018c commit e5e9262
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 211 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies = [
"platformdirs",
"rdflib",
"requests",
"rfc3987",
"skosify",
"tqdm",
]
Expand Down
140 changes: 140 additions & 0 deletions sentier_vocab/add_terms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
from .ordered_serialization import OrderedTurtleSerializer
from pathlib import Path
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS
import skosify

VAEM = Namespace("http://www.linkedmodel.org/schema/vaem")
QUDTS = Namespace("http://qudt.org/schema/qudt/")
QUDTV = Namespace("http://qudt.org/vocab/")
QK = QUDTV.quantitykind


COMMON_PREDICATES = {
'broader': SKOS.broader,
'narrower': SKOS.narrower,
'prefLabel': SKOS.prefLabel,
'altLabel': SKOS.altLabel,
'hiddenLabel': SKOS.hiddenLabel,
'notation': SKOS.notation,
'definition': SKOS.definition,
'related': SKOS.related,
'exactMatch': SKOS.exactMatch,
'closeMatch': SKOS.closeMatch,
'inScheme': SKOS.inScheme,
'isDefinedBy': RDFS.isDefinedBy,
'isReplacedBy': DCTERMS.isReplacedBy,
'type': RDF.type,
'hasQuantityKind': QUDTS.hasQuantityKind,
'hasDimensionVector': QUDTS.hasDimensionVector,
'conversionMultiplier': QUDTS.conversionMultiplier,
'conversionMultiplier': QUDTS.conversionMultiplier,
'conversionMultiplierSN': QUDTS.conversionMultiplierSN,
}
OBJECT_TYPES_FOR_PREDICATES = {
SKOS.broader: Literal,
SKOS.narrower: Literal,
SKOS.prefLabel: Literal,
SKOS.altLabel: Literal,
SKOS.hiddenLabel: Literal,
SKOS.notation: Literal,
SKOS.definition: Literal,
SKOS.related: URIRef,
SKOS.exactMatch: URIRef,
SKOS.closeMatch: URIRef,
SKOS.inScheme: URIRef,
RDFS.isDefinedBy: URIRef,
DCTERMS.isReplacedBy: URIRef,
RDF.type: URIRef,
QUDTS.hasQuantityKind: URIRef,
QUDTS.hasDimensionVector: URIRef,
QUDTS.conversionMultiplier: URIRef,
QUDTS.conversionMultiplier: URIRef,
QUDTS.conversionMultiplierSN: URIRef,
}
COMMON_OBJECTS = {
'Concept': SKOS.Concept,
'ConceptScheme': SKOS.ConceptScheme,
}


def add_custom_terms(data: list[dict], namespace: str, filename: str) -> Path:
"""Add new `Concept` terms, validate them, and serialize the graph.
`data` is a list of dicts which define each triple. The dicts can have the following structure:
```python
{
'subject': str, # required; will be combined with `namespace` and turned into a `URIRef`
'predicate': str | URIRef, # required; see COMMON_PREDICATES for allowed strings
'object': str | URIRef | Literal, # required; type will be inferred from predicate if possible
'language': str # optional; only for literal `object` values.
}
```
If given a string, and the `predicate` is `RDF.type`, `object` will use `COMMON_OBJECTS` mapping if possible.
"""
if not namespace or not isinstance(namespace, str):
raise ValueError("namespace must be a string")
if not filename or not isinstance(filename, str):
raise ValueError("filename must be a string")

graph = Graph()
for line in data:
if len(line) == 3:
s, p, o = line
lang = None
elif len(line) == 4:
s, p, o, lang = line
else:
raise ValueError(f"Data line {line} has wrong number of elements")

object_type = None
subject = URIRef(namespace + s)

if isinstance(p, URIRef):
predicate = p
elif isinstance(p, str):
try:
predicate = COMMON_PREDICATES[p]
except KeyError:
raise KeyError(f"Predicate {p} not in common predicates; pass a `URIRef` instead")
else:
raise ValueError(f"Predicate {p} has incorrect type for this function")

try:
object_type = OBJECT_TYPES_FOR_PREDICATES[predicate]
except KeyError:
pass

if isinstance(o, (Literal, URIRef)):
object_ = o
elif predicate is RDF.type and o in COMMON_OBJECTS:
object_ = COMMON_OBJECTS[o]
elif object_type is not None:
if object_type is Literal:
if lang is not None:
object_ = Literal(o, lang=lang)
else:
object_ = Literal(o)
else:
object_ = URIRef(o)
else:
raise ValueError(f"Object {o} can be translated into correct form")

if object_type is not None and not isinstance(object_, object_type):
raise ValueError(f"Object {object_} has incorrect type for this function; should be {type(object_type)} but got {type(object_)}")

graph.add((subject, predicate, object_))

skosify.infer.skos_topConcept(graph)
skosify.infer.skos_hierarchical(graph, narrower=True)
skosify.infer.skos_transitive(graph, narrower=True)

output_path = (Path(__file__).parent / "output" / filename).with_suffix(".ttl")
serializer = OrderedTurtleSerializer(graph)
with open(output_path, 'wb') as fp:
serializer.serialize(fp)

return output_path
22 changes: 4 additions & 18 deletions sentier_vocab/custom_products.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
from rdflib import Graph, Namespace, URIRef
from sentier_vocab.graph_base import GraphBase
from sentier_vocab.add_terms import add_custom_terms
from sentier_vocab.input.custom_products import CUSTOM_PRODUCTS_DATA
from rdflib.namespace import RDFS, SKOS, RDF
import skosify
from loguru import logger

PRODUCTS = Namespace("http://vocab.sentier.dev/products")


class CustomProducts(GraphBase):
def __init__(self):
self.graph = Graph()
for triple in CUSTOM_PRODUCTS_DATA:
self.graph.add(triple)

skosify.infer.skos_topConcept(self.graph)
skosify.infer.skos_hierarchical(self.graph, narrower=True)
skosify.infer.skos_transitive(self.graph, narrower=True)
def add_custom_products():
add_custom_terms(CUSTOM_PRODUCTS_DATA, "https://vocab.sentier.dev/products/", "custom-products")


if __name__ == "__main__":
fp = CustomProducts().write_graph("custom-products.ttl")
logger.info(f"Created custom graph at {fp}")
add_custom_products()
182 changes: 32 additions & 150 deletions sentier_vocab/input/custom_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,157 +4,39 @@
PRODUCTS = Namespace("https://vocab.sentier.dev/products/")

CUSTOM_PRODUCTS_DATA = [
(
URIRef(PRODUCTS + "electrolyzer"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "electrolyzer"),
SKOS.broader,
URIRef("http://data.europa.eu/xsp/cn2024/854330700080")
),
(
URIRef(PRODUCTS + "electrolyzer"),
SKOS.prefLabel,
Literal("Electrolyzer", lang="en")
),
(
URIRef(PRODUCTS + "electrolyzer"),
SKOS.definition,
Literal("An electrolyzer is a machine that uses electricity to drive a chemical reaction.", lang="en")
),
(
URIRef(PRODUCTS + "electrolyzer"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Electrolysis")
),
(
URIRef(PRODUCTS + "aec-electrolyzer"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "aec-electrolyzer"),
SKOS.broader,
URIRef(PRODUCTS + "electrolyzer")
),
(
URIRef(PRODUCTS + "aec-electrolyzer"),
SKOS.prefLabel,
Literal("Alkaline Electrolysis Cell Electrolyzer", lang="en")
),
(
URIRef(PRODUCTS + "aec-electrolyzer"),
SKOS.definition,
Literal("An electrolyzer with two electrodes operating in a liquid alkaline electrolyte.", lang="en")
),
(
URIRef(PRODUCTS + "aec-electrolyzer"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Alkaline_water_electrolysis")
),
(
URIRef(PRODUCTS + "pem-electrolyzer"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "pem-electrolyzer"),
SKOS.broader,
URIRef(PRODUCTS + "electrolyzer")
),
(
URIRef(PRODUCTS + "pem-electrolyzer"),
SKOS.prefLabel,
Literal("Proton Exchange Membrane Electrolyzer", lang="en")
),
(
URIRef(PRODUCTS + "pem-electrolyzer"),
SKOS.definition,
Literal("An electrolyzer with a solid polymer electrolyte and a proton exchange membrane.", lang="en")
),
(
URIRef(PRODUCTS + "pem-electrolyzer"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis")
),
(
URIRef(PRODUCTS + "soel-electrolyzer"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "soel-electrolyzer"),
SKOS.broader,
URIRef(PRODUCTS + "electrolyzer")
),
(
URIRef(PRODUCTS + "soel-electrolyzer"),
SKOS.prefLabel,
Literal("Solid Oxide Electrolyzer", lang="en")
),
(
URIRef(PRODUCTS + "soel-electrolyzer"),
SKOS.definition,
Literal("A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.", lang="en")
),
(
URIRef(PRODUCTS + "soel-electrolyzer"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell")
),
("electrolyzer", "type", "Concept"),
("electrolyzer", "broader", "http://data.europa.eu/xsp/cn2024/854330700080"),
("electrolyzer", "prefLabel", "Electrolyzer", "en-US"),
("electrolyzer", "prefLabel", "Electrolyzer", "en-GB"),
("electrolyzer", "definition", "An electrolyzer is a machine that uses electricity to drive a chemical reaction.", "en"),
("electrolyzer", "related", "https://en.wikipedia.org/wiki/Electrolysis"),
("aec-electrolyzer", "type", "Concept"),
("aec-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("aec-electrolyzer", "prefLabel", "Alkaline Electrolysis Cell Electrolyzer", "en"),
("aec-electrolyzer", "definition", "An electrolyser with two electrodes operating in a liquid alkaline electrolyte.", "en"),
("aec-electrolyzer", "related", "https://en.wikipedia.org/wiki/Alkaline_water_electrolysis"),
("pem-electrolyzer", "type", "Concept"),
("pem-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyser", "en-GB"),
("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyzer", "en-US"),
("pem-electrolyzer", "definition", "An electrolyser with a solid polymer electrolyte and a proton exchange membrane.", "en"),
("pem-electrolyzer", "related", "https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis"),
("soel-electrolyzer", "type", "Concept"),
("soel-electrolyzer", "broader", PRODUCTS + "electrolyzer"),
("soel-electrolyzer", "prefLabel", "Solid Oxide Electrolyzer", "en"),
("soel-electrolyzer", "definition", "A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.", "en"),
("soel-electrolyzer", "related", "https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell"),
# Missing from Combined Nomenclature
# tetraflouroethylene, not poly-
(
URIRef(PRODUCTS + "tetrafluoroethylene"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "tetrafluoroethylene"),
SKOS.broader,
URIRef("http://data.europa.eu/xsp/cn2024/290349000080")
),
(
URIRef(PRODUCTS + "tetrafluoroethylene"),
SKOS.prefLabel,
Literal("Tetrafluoroethylene", lang="en")
),
(
URIRef(PRODUCTS + "tetrafluoroethylene"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Tetrafluoroethylene")
),
(
URIRef(PRODUCTS + "tetrafluoroethylene"),
SKOS.definition,
Literal("Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", lang="en")
),
("tetrafluoroethylene", "type", "Concept"),
("tetrafluoroethylene", "broader", "http://data.europa.eu/xsp/cn2024/290349000080"),
("tetrafluoroethylene", "prefLabel", "Tetrafluoroethylene", "en"),
("tetrafluoroethylene", "related", "https://en.wikipedia.org/wiki/Tetrafluoroethylene"),
("tetrafluoroethylene", "definition", "Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", "en"),
# Zeolite
(
URIRef(PRODUCTS + "zeolite"),
RDF.type,
SKOS.Concept
),
(
URIRef(PRODUCTS + "zeolite"),
SKOS.broader,
URIRef("http://data.europa.eu/xsp/cn2024/382400000080")
),
(
URIRef(PRODUCTS + "zeolite"),
SKOS.prefLabel,
Literal("Zeolite", lang="en")
),
(
URIRef(PRODUCTS + "zeolite"),
SKOS.related,
URIRef("https://en.wikipedia.org/wiki/Zeolite")
),
(
URIRef(PRODUCTS + "zeolite"),
SKOS.definition,
Literal("Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", lang="en")
),
("zeolite", "type", "Concept"),
("zeolite", "broader", "http://data.europa.eu/xsp/cn2024/382400000080"),
("zeolite", "prefLabel", "Zeolite", "en"),
("zeolite", "related", "https://en.wikipedia.org/wiki/Zeolite"),
("zeolite", "definition", "Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", "en"),
]
Loading

0 comments on commit e5e9262

Please sign in to comment.