diff --git a/sentier_vocab/custom_products.py b/sentier_vocab/custom_products.py index 5319725..0e1fd05 100644 --- a/sentier_vocab/custom_products.py +++ b/sentier_vocab/custom_products.py @@ -1,6 +1,6 @@ from rdflib import Graph, Namespace, URIRef from sentier_vocab.graph_base import GraphBase -from sentier_vocab.data.custom_products import CUSTOM_PRODUCTS_DATA +from sentier_vocab.data.input.custom_products import CUSTOM_PRODUCTS_DATA from rdflib.namespace import RDFS, SKOS, RDF import skosify from loguru import logger diff --git a/sentier_vocab/graph_base.py b/sentier_vocab/graph_base.py index 2d54f2c..105a2b8 100644 --- a/sentier_vocab/graph_base.py +++ b/sentier_vocab/graph_base.py @@ -5,7 +5,7 @@ from sentier_vocab.utils import DEFAULT_DATA_DIR, GithubZipfileRelease -vocab_data_dir = Path(__file__).parent / "data" +vocab_data_dir = Path(__file__).parent / "output" class GraphBase: diff --git a/sentier_vocab/input/__init__.py b/sentier_vocab/input/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sentier_vocab/data/custom_products.py b/sentier_vocab/input/custom_products.py similarity index 64% rename from sentier_vocab/data/custom_products.py rename to sentier_vocab/input/custom_products.py index 1996b8d..1608b1c 100644 --- a/sentier_vocab/data/custom_products.py +++ b/sentier_vocab/input/custom_products.py @@ -104,4 +104,57 @@ SKOS.related, URIRef("https://en.wikipedia.org/wiki/Solid_oxide_electrolyzer_cell") ), + # Missing from Combined Nomenclature + # tetraflouroethylene, not poly- + ( + URIRef(PRODUCTS + "tetrafluoroethylene"), + RDF.type, + SKOS.Concept + ), + ( + URIRef(PRODUCTS + "tetrafluoroethylene"), + SKOS.broader, + URIRef("http://data.europa.eu/xsp/cn2024/290349000080") + ), + ( + URIRef(PRODUCTS + "tetrafluoroethylene"), + SKOS.prefLabel, + Literal("Tetrafluoroethylene", lang="en") + ), + ( + URIRef(PRODUCTS + "tetrafluoroethylene"), + SKOS.related, + URIRef("https://en.wikipedia.org/wiki/Tetrafluoroethylene") + ), + ( + URIRef(PRODUCTS + "tetrafluoroethylene"), + SKOS.definition, + Literal("Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)", lang="en") + ), + # Zeolite + ( + URIRef(PRODUCTS + "zeolite"), + RDF.type, + SKOS.Concept + ), + ( + URIRef(PRODUCTS + "zeolite"), + SKOS.broader, + URIRef("http://data.europa.eu/xsp/cn2024/382400000080") + ), + ( + URIRef(PRODUCTS + "zeolite"), + SKOS.prefLabel, + Literal("Zeolite", lang="en") + ), + ( + URIRef(PRODUCTS + "zeolite"), + SKOS.related, + URIRef("https://en.wikipedia.org/wiki/Zeolite") + ), + ( + URIRef(PRODUCTS + "zeolite"), + SKOS.definition, + Literal("Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)", lang="en") + ), ] diff --git a/sentier_vocab/data/extra-data.ttl b/sentier_vocab/input/extra-data.ttl similarity index 100% rename from sentier_vocab/data/extra-data.ttl rename to sentier_vocab/input/extra-data.ttl diff --git a/sentier_vocab/data/model-terms.ttl b/sentier_vocab/input/model-terms.ttl similarity index 90% rename from sentier_vocab/data/model-terms.ttl rename to sentier_vocab/input/model-terms.ttl index ff8318a..282059a 100644 --- a/sentier_vocab/data/model-terms.ttl +++ b/sentier_vocab/input/model-terms.ttl @@ -64,6 +64,13 @@ qudt:hasQuantityKind ; skos:definition "Product lifetime is the time interval from when a product is sold to when it is discarded."@en . + a skos:Concept ; + skos:inScheme ; + skos:related ; + skos:prefLabel "Capacity factor"@en ; + qudt:hasQuantityKind ; + skos:definition "Ratio of actual output to potential output considering availability constraints."@en . + # Energy production and consumption a skos:ConceptScheme ; @@ -171,6 +178,21 @@ qudt:hasQuantityKind ; skos:definition "The maximum allowed temperature in the immediate environment of the electrolyser for successful operation"@en . + a skos:Concept ; + skos:broader ; + skos:inScheme ; + skos:prefLabel "Stack (electrolyser)"@en-GB ; + skos:prefLabel "Stack (electrolyzer)"@en-US ; + qudt:hasQuantityKind ; + skos:definition "Multiple electrolysis cells connected together form the electrolyser stack"@en . + + a skos:Concept ; + skos:broader ; + skos:inScheme ; + skos:prefLabel "Balance of Plant"@en ; + qudt:hasQuantityKind ; + skos:definition "The other components of an electrolyser other than the stack. Usually less costly and has a longer lifetime."@en . + a skos:Concept ; skos:inScheme ; skos:prefLabel "Minimum stack temperature"@en ; diff --git a/sentier_vocab/data/qudt-patches.ttl b/sentier_vocab/input/qudt-patches.ttl similarity index 100% rename from sentier_vocab/data/qudt-patches.ttl rename to sentier_vocab/input/qudt-patches.ttl diff --git a/sentier_vocab/data/selected-quantity-kinds.json b/sentier_vocab/input/selected-quantity-kinds.json similarity index 100% rename from sentier_vocab/data/selected-quantity-kinds.json rename to sentier_vocab/input/selected-quantity-kinds.json diff --git a/sentier_vocab/data/simapro.ttl b/sentier_vocab/input/simapro.ttl similarity index 100% rename from sentier_vocab/data/simapro.ttl rename to sentier_vocab/input/simapro.ttl diff --git a/sentier_vocab/model_terms.py b/sentier_vocab/model_terms.py index fda67a0..dbfc38f 100644 --- a/sentier_vocab/model_terms.py +++ b/sentier_vocab/model_terms.py @@ -4,7 +4,7 @@ from loguru import logger from rdflib import Graph -filepath = Path(__file__).parent / "data" / "model-terms.ttl" +filepath = Path(__file__).parent / "input" / "model-terms.ttl" def ModelTerms(): @@ -15,7 +15,7 @@ def ModelTerms(): skosify.infer.skos_hierarchical(graph, narrower=True) skosify.infer.skos_transitive(graph, narrower=True) - output_path = filepath.with_suffix(".reciprocal.ttl") + output_path = Path(__file__).parent / "output" / "model-terms.reciprocal.ttl" logger.info("Writing output TTL file {}", output_path) graph.serialize(destination=output_path) return filepath.with_suffix(".ttl") diff --git a/sentier_vocab/data/custom-products.ttl b/sentier_vocab/output/custom-products.ttl similarity index 66% rename from sentier_vocab/data/custom-products.ttl rename to sentier_vocab/output/custom-products.ttl index c4f7cfb..75ef64e 100644 --- a/sentier_vocab/data/custom-products.ttl +++ b/sentier_vocab/output/custom-products.ttl @@ -1,5 +1,25 @@ @prefix skos: . + skos:narrower ; + skos:narrowerTransitive . + + skos:narrower ; + skos:narrowerTransitive . + + a skos:Concept ; + skos:broader ; + skos:broaderTransitive ; + skos:definition "Tetrafluoroethylene (TFE) is a fluorocarbon with the chemical formula C2F4. It is the simplest perfluorinated alkene. This gaseous species is used primarily in the industrial preparation of fluoropolymers (from Wikipedia)"@en ; + skos:prefLabel "Tetrafluoroethylene"@en ; + skos:related . + + a skos:Concept ; + skos:broader ; + skos:broaderTransitive ; + skos:definition "Zeolite is a family of several microporous, crystalline aluminosilicate materials commonly used as commercial adsorbents and catalysts (from Wikipedia)"@en ; + skos:prefLabel "Zeolite"@en ; + skos:related . + a skos:Concept ; skos:broader ; skos:broaderTransitive , diff --git a/sentier_vocab/data/model-terms.reciprocal.ttl b/sentier_vocab/output/model-terms.reciprocal.ttl similarity index 87% rename from sentier_vocab/data/model-terms.reciprocal.ttl rename to sentier_vocab/output/model-terms.reciprocal.ttl index fc57159..9b77974 100644 --- a/sentier_vocab/data/model-terms.reciprocal.ttl +++ b/sentier_vocab/output/model-terms.reciprocal.ttl @@ -3,6 +3,13 @@ @prefix skos: . @prefix xsd: . + a skos:Concept ; + qudt:hasQuantityKind ; + skos:definition "Ratio of actual output to potential output considering availability constraints."@en ; + skos:inScheme ; + skos:prefLabel "Capacity factor"@en ; + skos:related . + a skos:Concept ; qudt:hasQuantityKind ; skos:definition "The pressure of the hydrogen produced by the electrolyser."@en ; @@ -141,6 +148,23 @@ skos:prefLabel "Electrolyser terms"@en-GB, "Electrolyzer terms"@en-US . + a skos:Concept ; + qudt:hasQuantityKind ; + skos:broader ; + skos:broaderTransitive ; + skos:definition "The other components of an electrolyser other than the stack. Usually less costly and has a longer lifetime."@en ; + skos:inScheme ; + skos:prefLabel "Balance of Plant"@en . + + a skos:Concept ; + qudt:hasQuantityKind ; + skos:broader ; + skos:broaderTransitive ; + skos:definition "Multiple electrolysis cells connected together form the electrolyser stack"@en ; + skos:inScheme ; + skos:prefLabel "Stack (electrolyser)"@en-GB, + "Stack (electrolyzer)"@en-US . + a skos:Concept ; skos:broader ; skos:broaderTransitive ; @@ -194,6 +218,11 @@ ; skos:prefLabel "energy service demand"@en . + skos:narrower , + ; + skos:narrowerTransitive , + . + a skos:ConceptScheme ; dcterms:contributor "Chris Mutel" ; dcterms:created "2024-10-07"^^xsd:date ; diff --git a/sentier_vocab/data/oeo-vocab.ttl b/sentier_vocab/output/oeo-vocab.ttl similarity index 100% rename from sentier_vocab/data/oeo-vocab.ttl rename to sentier_vocab/output/oeo-vocab.ttl diff --git a/sentier_vocab/data/qudt-sentier-dev.ttl b/sentier_vocab/output/qudt-sentier-dev.ttl similarity index 100% rename from sentier_vocab/data/qudt-sentier-dev.ttl rename to sentier_vocab/output/qudt-sentier-dev.ttl diff --git a/sentier_vocab/data/simapro.supplemented.ttl b/sentier_vocab/output/simapro.supplemented.ttl similarity index 100% rename from sentier_vocab/data/simapro.supplemented.ttl rename to sentier_vocab/output/simapro.supplemented.ttl diff --git a/sentier_vocab/qudt.py b/sentier_vocab/qudt.py index ed7b9bb..a773afe 100644 --- a/sentier_vocab/qudt.py +++ b/sentier_vocab/qudt.py @@ -19,7 +19,7 @@ QK = QUDTV.quantitykind -vocab_data_dir = Path(__file__).parent / "data" +vocab_data_dir = Path(__file__).parent / "input" selected_fp = vocab_data_dir / "selected-quantity-kinds.json" extra_concepts_data = vocab_data_dir / "extra-data.ttl" qudt_patches_data = vocab_data_dir / "qudt-patches.ttl" @@ -79,7 +79,7 @@ def write_graph( if not filename.endswith(".ttl"): filename += ".ttl" if not dirpath: - dirpath = vocab_data_dir + dirpath = Path(__file__).parent / "output" output_fp = Path(dirpath) / filename self.graph.serialize(destination=output_fp) return output_fp @@ -338,5 +338,43 @@ def add_unit(self, uri: URIRef, unit_graph: Graph, cs: URIRef, qudt_uri: URIRef) pass +def add_quantity_kinds_to_graph( + input_ttl: Path, + qudt_ttl: Path, +) -> Path: + """ + The `input_ttl` concept scheme was written by hand (!), but we can make it more useful by + positioning each input concept in the QUDT quantity kind hierarchy. + """ + output_ttl = input_ttl.with_suffix(".supplemented" + input_ttl.suffix) + input_graph = Graph().parse(input_ttl) + qudt = Graph().parse(qudt_ttl) + + qudt_qk_mapping = { + s: o + for s, v, o in qudt.triples((None, QUDTS.hasQuantityKind, None)) + if o.startswith("https://vocab.sentier.dev/units/quantity-kind/") + and s.startswith("https://vocab.sentier.dev/units/unit/") + } + qudt_d_mapping = { + s: o + for s, v, o in qudt.triples((None, QUDTS.hasDimensionVector, None)) + if o.startswith("http://qudt.org/vocab/dimensionvector/") + and s.startswith("https://vocab.sentier.dev/units/unit/") + } + + for s, v, o in input_graph.triples((None, SKOS.exactMatch, None)): + if o.startswith("https://vocab.sentier.dev/units/unit/"): + input_graph.add((s, QUDTS.hasQuantityKind, qudt_qk_mapping[o])) + input_graph.add((s, QUDTS.hasDimensionVector, qudt_d_mapping[o])) + + input_graph.serialize(destination=output_ttl) + return output_ttl + + if __name__ == "__main__": QUDT().write_graph() + add_quantity_kinds_to_graph( + Path(__file__).parent / "input" / "simapro.ttl", + Path(__file__).parent / "output" / "qudt-sentier-dev.ttl" + ) diff --git a/sentier_vocab/supplements.py b/sentier_vocab/supplements.py deleted file mode 100644 index f04dd29..0000000 --- a/sentier_vocab/supplements.py +++ /dev/null @@ -1,47 +0,0 @@ -from pathlib import Path - -from rdflib import Graph, Namespace -from rdflib.namespace import SKOS - -QUDTS = Namespace("http://qudt.org/schema/qudt/") - - -def add_quantity_kinds_to_graph( - input_ttl: Path, - qudt_ttl: Path, -) -> Path: - """ - The `input_ttl` concept scheme was written by hand (!), but we can make it more useful by - positioning each input concept in the QUDT quantity kind hierarchy. - """ - output_ttl = input_ttl.with_suffix(".supplemented" + input_ttl.suffix) - input_graph = Graph().parse(input_ttl) - qudt = Graph().parse(qudt_ttl) - - qudt_qk_mapping = { - s: o - for s, v, o in qudt.triples((None, QUDTS.hasQuantityKind, None)) - if o.startswith("https://vocab.sentier.dev/units/quantity-kind/") - and s.startswith("https://vocab.sentier.dev/units/unit/") - } - qudt_d_mapping = { - s: o - for s, v, o in qudt.triples((None, QUDTS.hasDimensionVector, None)) - if o.startswith("http://qudt.org/vocab/dimensionvector/") - and s.startswith("https://vocab.sentier.dev/units/unit/") - } - - for s, v, o in input_graph.triples((None, SKOS.exactMatch, None)): - if o.startswith("https://vocab.sentier.dev/units/unit/"): - input_graph.add((s, QUDTS.hasQuantityKind, qudt_qk_mapping[o])) - input_graph.add((s, QUDTS.hasDimensionVector, qudt_d_mapping[o])) - - input_graph.serialize(destination=output_ttl) - return output_ttl - - -if __name__ == "__main__": - vocab_data_dir = Path(__file__).parent / "data" - add_quantity_kinds_to_graph( - vocab_data_dir / "simapro.ttl", vocab_data_dir / "qudt-sentier-dev.ttl" - )