diff --git a/pyproject.toml b/pyproject.toml index 4423e29..d40ac95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "platformdirs", "rdflib", "requests", + "rfc3987", "skosify", "tqdm", ] diff --git a/sentier_vocab/add_terms.py b/sentier_vocab/add_terms.py new file mode 100644 index 0000000..2531b01 --- /dev/null +++ b/sentier_vocab/add_terms.py @@ -0,0 +1,140 @@ +from .ordered_serialization import OrderedTurtleSerializer +from pathlib import Path +from rdflib import Graph, Literal, Namespace, URIRef +from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS +import skosify + +VAEM = Namespace("http://www.linkedmodel.org/schema/vaem") +QUDTS = Namespace("http://qudt.org/schema/qudt/") +QUDTV = Namespace("http://qudt.org/vocab/") +QK = QUDTV.quantitykind + + +COMMON_PREDICATES = { + 'broader': SKOS.broader, + 'narrower': SKOS.narrower, + 'prefLabel': SKOS.prefLabel, + 'altLabel': SKOS.altLabel, + 'hiddenLabel': SKOS.hiddenLabel, + 'notation': SKOS.notation, + 'definition': SKOS.definition, + 'related': SKOS.related, + 'exactMatch': SKOS.exactMatch, + 'closeMatch': SKOS.closeMatch, + 'inScheme': SKOS.inScheme, + 'isDefinedBy': RDFS.isDefinedBy, + 'isReplacedBy': DCTERMS.isReplacedBy, + 'type': RDF.type, + 'hasQuantityKind': QUDTS.hasQuantityKind, + 'hasDimensionVector': QUDTS.hasDimensionVector, + 'conversionMultiplier': QUDTS.conversionMultiplier, + 'conversionMultiplier': QUDTS.conversionMultiplier, + 'conversionMultiplierSN': QUDTS.conversionMultiplierSN, +} +OBJECT_TYPES_FOR_PREDICATES = { + SKOS.broader: Literal, + SKOS.narrower: Literal, + SKOS.prefLabel: Literal, + SKOS.altLabel: Literal, + SKOS.hiddenLabel: Literal, + SKOS.notation: Literal, + SKOS.definition: Literal, + SKOS.related: URIRef, + SKOS.exactMatch: URIRef, + SKOS.closeMatch: URIRef, + SKOS.inScheme: URIRef, + RDFS.isDefinedBy: URIRef, + DCTERMS.isReplacedBy: URIRef, + RDF.type: URIRef, + QUDTS.hasQuantityKind: URIRef, + QUDTS.hasDimensionVector: URIRef, + QUDTS.conversionMultiplier: URIRef, + QUDTS.conversionMultiplier: URIRef, + QUDTS.conversionMultiplierSN: URIRef, +} +COMMON_OBJECTS = { + 'Concept': SKOS.Concept, + 'ConceptScheme': SKOS.ConceptScheme, +} + + +def add_custom_terms(data: list[dict], namespace: str, filename: str) -> Path: + """Add new `Concept` terms, validate them, and serialize the graph. + + `data` is a list of dicts which define each triple. The dicts can have the following structure: + + ```python + { + 'subject': str, # required; will be combined with `namespace` and turned into a `URIRef` + 'predicate': str | URIRef, # required; see COMMON_PREDICATES for allowed strings + 'object': str | URIRef | Literal, # required; type will be inferred from predicate if possible + 'language': str # optional; only for literal `object` values. + } + ``` + + If given a string, and the `predicate` is `RDF.type`, `object` will use `COMMON_OBJECTS` mapping if possible. + + """ + if not namespace or not isinstance(namespace, str): + raise ValueError("namespace must be a string") + if not filename or not isinstance(filename, str): + raise ValueError("filename must be a string") + + graph = Graph() + for line in data: + if len(line) == 3: + s, p, o = line + lang = None + elif len(line) == 4: + s, p, o, lang = line + else: + raise ValueError(f"Data line {line} has wrong number of elements") + + object_type = None + subject = URIRef(namespace + s) + + if isinstance(p, URIRef): + predicate = p + elif isinstance(p, str): + try: + predicate = COMMON_PREDICATES[p] + except KeyError: + raise KeyError(f"Predicate {p} not in common predicates; pass a `URIRef` instead") + else: + raise ValueError(f"Predicate {p} has incorrect type for this function") + + try: + object_type = OBJECT_TYPES_FOR_PREDICATES[predicate] + except KeyError: + pass + + if isinstance(o, (Literal, URIRef)): + object_ = o + elif predicate is RDF.type and o in COMMON_OBJECTS: + object_ = COMMON_OBJECTS[o] + elif object_type is not None: + if object_type is Literal: + if lang is not None: + object_ = Literal(o, lang=lang) + else: + object_ = Literal(o) + else: + object_ = URIRef(o) + else: + raise ValueError(f"Object {o} can be translated into correct form") + + if object_type is not None and not isinstance(object_, object_type): + raise ValueError(f"Object {object_} has incorrect type for this function; should be {type(object_type)} but got {type(object_)}") + + graph.add((subject, predicate, object_)) + + skosify.infer.skos_topConcept(graph) + skosify.infer.skos_hierarchical(graph, narrower=True) + skosify.infer.skos_transitive(graph, narrower=True) + + output_path = (Path(__file__).parent / "output" / filename).with_suffix(".ttl") + serializer = OrderedTurtleSerializer(graph) + with open(output_path, 'wb') as fp: + serializer.serialize(fp) + + return output_path diff --git a/sentier_vocab/custom_products.py b/sentier_vocab/custom_products.py index edb8427..86b3dd8 100644 --- a/sentier_vocab/custom_products.py +++ b/sentier_vocab/custom_products.py @@ -1,24 +1,10 @@ -from rdflib import Graph, Namespace, URIRef -from sentier_vocab.graph_base import GraphBase +from sentier_vocab.add_terms import add_custom_terms from sentier_vocab.input.custom_products import CUSTOM_PRODUCTS_DATA -from rdflib.namespace import RDFS, SKOS, RDF -import skosify -from loguru import logger -PRODUCTS = Namespace("http://vocab.sentier.dev/products") - -class CustomProducts(GraphBase): - def __init__(self): - self.graph = Graph() - for triple in CUSTOM_PRODUCTS_DATA: - self.graph.add(triple) - - skosify.infer.skos_topConcept(self.graph) - skosify.infer.skos_hierarchical(self.graph, narrower=True) - skosify.infer.skos_transitive(self.graph, narrower=True) +def add_custom_products(): + add_custom_terms(CUSTOM_PRODUCTS_DATA, "https://vocab.sentier.dev/products/", "custom-products") if __name__ == "__main__": - fp = CustomProducts().write_graph("custom-products.ttl") - logger.info(f"Created custom graph at {fp}") + add_custom_products() diff --git a/sentier_vocab/input/custom_products.py b/sentier_vocab/input/custom_products.py index 1608b1c..7cd2938 100644 --- a/sentier_vocab/input/custom_products.py +++ b/sentier_vocab/input/custom_products.py @@ -4,157 +4,39 @@ PRODUCTS = Namespace("https://vocab.sentier.dev/products/") CUSTOM_PRODUCTS_DATA = [ - ( - URIRef(PRODUCTS + "electrolyzer"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "electrolyzer"), - SKOS.broader, - URIRef("http://data.europa.eu/xsp/cn2024/854330700080") - ), - ( - URIRef(PRODUCTS + "electrolyzer"), - SKOS.prefLabel, - Literal("Electrolyzer", lang="en") - ), - ( - URIRef(PRODUCTS + "electrolyzer"), - SKOS.definition, - Literal("An electrolyzer is a machine that uses electricity to drive a chemical reaction.", lang="en") - ), - ( - URIRef(PRODUCTS + "electrolyzer"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Electrolysis") - ), - ( - URIRef(PRODUCTS + "aec-electrolyzer"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "aec-electrolyzer"), - SKOS.broader, - URIRef(PRODUCTS + "electrolyzer") - ), - ( - URIRef(PRODUCTS + "aec-electrolyzer"), - SKOS.prefLabel, - Literal("Alkaline Electrolysis Cell Electrolyzer", lang="en") - ), - ( - URIRef(PRODUCTS + "aec-electrolyzer"), - SKOS.definition, - Literal("An electrolyzer with two electrodes operating in a liquid alkaline electrolyte.", lang="en") - ), - ( - URIRef(PRODUCTS + "aec-electrolyzer"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Alkaline_water_electrolysis") - ), - ( - URIRef(PRODUCTS + "pem-electrolyzer"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "pem-electrolyzer"), - SKOS.broader, - URIRef(PRODUCTS + "electrolyzer") - ), - ( - URIRef(PRODUCTS + "pem-electrolyzer"), - SKOS.prefLabel, - Literal("Proton Exchange Membrane Electrolyzer", lang="en") - ), - ( - URIRef(PRODUCTS + "pem-electrolyzer"), - SKOS.definition, - Literal("An electrolyzer with a solid polymer electrolyte and a proton exchange membrane.", lang="en") - ), - ( - URIRef(PRODUCTS + "pem-electrolyzer"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis") - ), - ( - URIRef(PRODUCTS + "soel-electrolyzer"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "soel-electrolyzer"), - SKOS.broader, - URIRef(PRODUCTS + "electrolyzer") - ), - ( - URIRef(PRODUCTS + "soel-electrolyzer"), - SKOS.prefLabel, - Literal("Solid Oxide Electrolyzer", lang="en") - ), - ( - URIRef(PRODUCTS + "soel-electrolyzer"), - SKOS.definition, - Literal("A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.", lang="en") - ), - ( - URIRef(PRODUCTS + "soel-electrolyzer"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell") - ), + ("electrolyzer", "type", "Concept"), + ("electrolyzer", "broader", "http://data.europa.eu/xsp/cn2024/854330700080"), + ("electrolyzer", "prefLabel", "Electrolyzer", "en-US"), + ("electrolyzer", "prefLabel", "Electrolyzer", "en-GB"), + ("electrolyzer", "definition", "An electrolyzer is a machine that uses electricity to drive a chemical reaction.", "en"), + ("electrolyzer", "related", "https://en.wikipedia.org/wiki/Electrolysis"), + ("aec-electrolyzer", "type", "Concept"), + ("aec-electrolyzer", "broader", PRODUCTS + "electrolyzer"), + ("aec-electrolyzer", "prefLabel", "Alkaline Electrolysis Cell Electrolyzer", "en"), + ("aec-electrolyzer", "definition", "An electrolyser with two electrodes operating in a liquid alkaline electrolyte.", "en"), + ("aec-electrolyzer", "related", "https://en.wikipedia.org/wiki/Alkaline_water_electrolysis"), + ("pem-electrolyzer", "type", "Concept"), + ("pem-electrolyzer", "broader", PRODUCTS + "electrolyzer"), + ("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyser", "en-GB"), + ("pem-electrolyzer", "prefLabel", "Proton Exchange Membrane Electrolyzer", "en-US"), + ("pem-electrolyzer", "definition", "An electrolyser with a solid polymer electrolyte and a proton exchange membrane.", "en"), + ("pem-electrolyzer", "related", "https://en.wikipedia.org/wiki/Proton_exchange_membrane_electrolysis"), + ("soel-electrolyzer", "type", "Concept"), + ("soel-electrolyzer", "broader", PRODUCTS + "electrolyzer"), + ("soel-electrolyzer", "prefLabel", "Solid Oxide Electrolyzer", "en"), + ("soel-electrolyzer", "definition", "A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water.", "en"), + ("soel-electrolyzer", "related", "https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell"), # Missing from Combined Nomenclature # tetraflouroethylene, not poly- - ( - URIRef(PRODUCTS + "tetrafluoroethylene"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "tetrafluoroethylene"), - SKOS.broader, - URIRef("http://data.europa.eu/xsp/cn2024/290349000080") - ), - ( - URIRef(PRODUCTS + "tetrafluoroethylene"), - SKOS.prefLabel, - Literal("Tetrafluoroethylene", lang="en") - ), - ( - URIRef(PRODUCTS + "tetrafluoroethylene"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Tetrafluoroethylene") - ), - ( - URIRef(PRODUCTS + "tetrafluoroethylene"), - SKOS.definition, - Literal("Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", lang="en") - ), + ("tetrafluoroethylene", "type", "Concept"), + ("tetrafluoroethylene", "broader", "http://data.europa.eu/xsp/cn2024/290349000080"), + ("tetrafluoroethylene", "prefLabel", "Tetrafluoroethylene", "en"), + ("tetrafluoroethylene", "related", "https://en.wikipedia.org/wiki/Tetrafluoroethylene"), + ("tetrafluoroethylene", "definition", "Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", "en"), # Zeolite - ( - URIRef(PRODUCTS + "zeolite"), - RDF.type, - SKOS.Concept - ), - ( - URIRef(PRODUCTS + "zeolite"), - SKOS.broader, - URIRef("http://data.europa.eu/xsp/cn2024/382400000080") - ), - ( - URIRef(PRODUCTS + "zeolite"), - SKOS.prefLabel, - Literal("Zeolite", lang="en") - ), - ( - URIRef(PRODUCTS + "zeolite"), - SKOS.related, - URIRef("https://en.wikipedia.org/wiki/Zeolite") - ), - ( - URIRef(PRODUCTS + "zeolite"), - SKOS.definition, - Literal("Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", lang="en") - ), + ("zeolite", "type", "Concept"), + ("zeolite", "broader", "http://data.europa.eu/xsp/cn2024/382400000080"), + ("zeolite", "prefLabel", "Zeolite", "en"), + ("zeolite", "related", "https://en.wikipedia.org/wiki/Zeolite"), + ("zeolite", "definition", "Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", "en"), ] diff --git a/sentier_vocab/ordered_serialization.py b/sentier_vocab/ordered_serialization.py new file mode 100644 index 0000000..b0c634f --- /dev/null +++ b/sentier_vocab/ordered_serialization.py @@ -0,0 +1,103 @@ +# The MIT License (MIT) + +# Copyright (c) 2015 UBO : Scriptotek + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# From https://github.com/scriptotek/otsrdflib + +from rdflib.plugins.serializers.turtle import TurtleSerializer +from rdflib.namespace import Namespace, RDF +from rdflib import BNode +import logging +import re + +SD = Namespace('http://www.w3.org/ns/sparql-service-description#') +ISOTHES = Namespace('http://purl.org/iso25964/skos-thes#') + +logger = logging.getLogger(__name__) + + +class OrderedTurtleSerializer(TurtleSerializer): + + short_name = "ots" + + def __init__(self, store): + super(OrderedTurtleSerializer, self).__init__(store) + + # Class order: + self.class_order = [] + + # Sort key generators for specific classes : + self.sorters_by_class = {} + + # Default sort key generators + self.sorters = [ + ('^(.+)$', lambda x: str(x[0])), + ] + + def getSorters(self, class_uri): + return self.sorters_by_class.get(class_uri, self.sorters) + + def getSortKeyFunction(self, class_uri): + sorters = self.getSorters(class_uri) + + # Order of instances: + def sortKeyFn(x): + # Check if the instances match any special pattern: + for pattern, func in sorters: + m1 = re.search(pattern, x) + if m1: + return func(m1.groups()) + logging.warning('%s did not match any sorters', x) + + return sortKeyFn + + def orderSubjects(self): + seen = {} + subjects = [] + + # Find classes not included in self.class_order and sort them alphabetically + other_classes = [x for x in set(self.store.objects(predicate=RDF.type)) if x not in self.class_order] + other_classes = sorted(other_classes) + + # Loop over all classes + for class_uri in self.class_order + other_classes: + + # Sort the members of each class + members = sorted(self.store.subjects(RDF.type, class_uri), + key=self.getSortKeyFunction(class_uri)) + + for member in members: + subjects.append(member) + self._topLevels[member] = True + seen[member] = True + + # Include anything not seen yet + recursable = [ + (isinstance(subject, BNode), + self._references[subject], subject) + for subject in self._subjects + if subject not in seen + ] + + recursable.sort() + subjects.extend([subject for (isbnode, refs, subject) in recursable]) + + return subjects diff --git a/sentier_vocab/output/custom-products.ttl b/sentier_vocab/output/custom-products.ttl index 75ef64e..d3289bd 100644 --- a/sentier_vocab/output/custom-products.ttl +++ b/sentier_vocab/output/custom-products.ttl @@ -1,65 +1,62 @@ @prefix skos: . - skos:narrower ; - skos:narrowerTransitive . + a skos:Concept ; + skos:broader "https://vocab.sentier.dev/products/electrolyzer" ; + skos:broaderTransitive "https://vocab.sentier.dev/products/electrolyzer" ; + skos:definition "An electrolyser with two electrodes operating in a liquid alkaline electrolyte."@en ; + skos:prefLabel "Alkaline Electrolysis Cell Electrolyzer"@en ; + skos:related . - skos:narrower ; - skos:narrowerTransitive . + a skos:Concept ; + skos:broader "http://data.europa.eu/xsp/cn2024/854330700080" ; + skos:broaderTransitive "http://data.europa.eu/xsp/cn2024/854330700080" ; + skos:definition "An electrolyzer is a machine that uses electricity to drive a chemical reaction."@en ; + skos:prefLabel "Electrolyzer"@en-GB, + "Electrolyzer"@en-US ; + skos:related . + + a skos:Concept ; + skos:broader "https://vocab.sentier.dev/products/electrolyzer" ; + skos:broaderTransitive "https://vocab.sentier.dev/products/electrolyzer" ; + skos:definition "An electrolyser with a solid polymer electrolyte and a proton exchange membrane."@en ; + skos:prefLabel "Proton Exchange Membrane Electrolyser"@en-GB, + "Proton Exchange Membrane Electrolyzer"@en-US ; + skos:related . + + a skos:Concept ; + skos:broader "https://vocab.sentier.dev/products/electrolyzer" ; + skos:broaderTransitive "https://vocab.sentier.dev/products/electrolyzer" ; + skos:definition "A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water."@en ; + skos:prefLabel "Solid Oxide Electrolyzer"@en ; + skos:related . a skos:Concept ; - skos:broader ; - skos:broaderTransitive ; + skos:broader "http://data.europa.eu/xsp/cn2024/290349000080" ; + skos:broaderTransitive "http://data.europa.eu/xsp/cn2024/290349000080" ; skos:definition "Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)"@en ; skos:prefLabel "Tetrafluoroethylene"@en ; skos:related . a skos:Concept ; - skos:broader ; - skos:broaderTransitive ; + skos:broader "http://data.europa.eu/xsp/cn2024/382400000080" ; + skos:broaderTransitive "http://data.europa.eu/xsp/cn2024/382400000080" ; skos:definition "Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)"@en ; skos:prefLabel "Zeolite"@en ; skos:related . - a skos:Concept ; - skos:broader ; - skos:broaderTransitive , - ; - skos:definition "An electrolyzer with two electrodes operating in a liquid alkaline electrolyte."@en ; - skos:prefLabel "Alkaline Electrolysis Cell Electrolyzer"@en ; - skos:related . - - a skos:Concept ; - skos:broader ; - skos:broaderTransitive , - ; - skos:definition "An electrolyzer with a solid polymer electrolyte and a proton exchange membrane."@en ; - skos:prefLabel "Proton Exchange Membrane Electrolyzer"@en ; - skos:related . +"http://data.europa.eu/xsp/cn2024/290349000080" skos:narrower ; + skos:narrowerTransitive . - a skos:Concept ; - skos:broader ; - skos:broaderTransitive , - ; - skos:definition "A solid oxide fuel cell that runs in regenerative mode to achieve the electrolysis of water."@en ; - skos:prefLabel "Solid Oxide Electrolyzer"@en ; - skos:related . +"http://data.europa.eu/xsp/cn2024/382400000080" skos:narrower ; + skos:narrowerTransitive . - skos:narrower ; - skos:narrowerTransitive , - , - , - . +"http://data.europa.eu/xsp/cn2024/854330700080" skos:narrower ; + skos:narrowerTransitive . - a skos:Concept ; - skos:broader ; - skos:broaderTransitive ; - skos:definition "An electrolyzer is a machine that uses electricity to drive a chemical reaction."@en ; - skos:narrower , +"https://vocab.sentier.dev/products/electrolyzer" skos:narrower , , ; skos:narrowerTransitive , , - ; - skos:prefLabel "Electrolyzer"@en ; - skos:related . + .