From f6a06d431ae62b22b7676cefafb1061a02bb578c Mon Sep 17 00:00:00 2001 From: Bartek Foltyn <62876443+bfoltyn@users.noreply.github.com> Date: Thu, 5 Oct 2023 23:36:49 +0200 Subject: [PATCH] Additional EFO xref context from axioms merges https://github.com/related-sciences/nxontology-data/pull/19 Co-authored-by: Bartek Foltyn --- nxontology_data/efo/efo.py | 45 +++++++++++++++++++ .../efo/queries/mapping_properties.rq | 17 +++++++ nxontology_data/efo/queries/xref_sources.rq | 18 ++++++++ 3 files changed, 80 insertions(+) create mode 100644 nxontology_data/efo/queries/mapping_properties.rq create mode 100644 nxontology_data/efo/queries/xref_sources.rq diff --git a/nxontology_data/efo/efo.py b/nxontology_data/efo/efo.py index 1b072bc..fb048c0 100644 --- a/nxontology_data/efo/efo.py +++ b/nxontology_data/efo/efo.py @@ -7,6 +7,7 @@ from typing import Any import bioversions +import curies import fsspec import networkx as nx import pandas as pd @@ -142,6 +143,42 @@ def get_obsolete_df(self) -> pd.DataFrame: def get_alt_id_df(self) -> pd.DataFrame: return self.run_query("alt_id", cache=True) + def get_xref_sources_df(self) -> pd.DataFrame: + return self.run_query("xref_sources", cache=True) + + def get_mapping_properties_df(self) -> pd.DataFrame: + converter = curies.get_bioregistry_converter() + + converter.add_prefix( + "icd10cm-missing-prefix", "http://purl.bioontology.org/ontology/ICD10CM/" + ) + + df = ( + self.run_query("mapping_properties", cache=True) + .assign( + xref_id=lambda df: df["xref_id"].apply( + lambda xref: converter.compress(xref) + ) + ) + .dropna() + .assign( + xref_id=lambda df: df["xref_id"] + .str.replace("icd10cm-missing-prefix:", "icd10cm:") + .str.replace("obo:Orphanet_", "Orphanet:") + .str.split(":", expand=True) + .apply( + lambda row: normalize_parsed_curie( + xref_prefix=row[0], + xref_accession=row[1], + collapse_orphanet=True, + ), + axis="columns", + ) + ) + ) + + return df + def get_synonyms(self) -> dict[str, dict[str, str]]: synonym_scopes = { "hasExactSynonym": "exact", @@ -272,6 +309,14 @@ def write_outputs(self) -> None: write_dataframe( self.get_obsolete_df(), output_dir.joinpath(f"{self.name}_obsolete.json.gz") ) + write_dataframe( + self.get_mapping_properties_df(), + output_dir.joinpath(f"{self.name}_mapping_properties.json.gz"), + ) + write_dataframe( + self.get_xref_sources_df(), + output_dir.joinpath(f"{self.name}_xref_sources.json.gz"), + ) if nxo.name == "efo_otar_profile": nxo_slim = self.create_slim_nxo(nxo) write_ontology(nxo_slim, output_dir, compression_threshold_mb=30.0) diff --git a/nxontology_data/efo/queries/mapping_properties.rq b/nxontology_data/efo/queries/mapping_properties.rq new file mode 100644 index 0000000..086f241 --- /dev/null +++ b/nxontology_data/efo/queries/mapping_properties.rq @@ -0,0 +1,17 @@ +PREFIX mondo: +PREFIX efo: +PREFIX skos: + +SELECT ?efo_id (?xref_uri as ?xref_id) ?mapping_property_id ?efo_uri ?xref_uri ?mapping_property_uri +WHERE { + VALUES ?mapping_property_uri {mondo:closeMatch mondo:exactMatch skos:mappingRelation skos:closeMatch skos:exactMatch skos:broadMatch skos:narrowMatch skos:relatedMatch} + + ?efo_uri rdf:type owl:Class . + ?efo_uri ?mapping_property_uri ?xref_uri + + + BIND( REPLACE( STR(?efo_uri), "^http.+/([^:]+)_(.+)$", "$1:$2" ) AS ?efo_id ) + BIND( REPLACE( STR(?mapping_property_uri), "^http://purl\\.obolibrary\\.org/obo/mondo#(.+)$", "mondo:$1" ) AS ?mapping_property_id ) + BIND( REPLACE( STR(?mapping_property_id), "^http://www\\.w3\\.org/2004/02/skos/core#(.+)$", "skos:$1" ) AS ?mapping_property_id ) +} +ORDER BY ?efo_id ?xref_id ?mapping_property_id diff --git a/nxontology_data/efo/queries/xref_sources.rq b/nxontology_data/efo/queries/xref_sources.rq new file mode 100644 index 0000000..8aa3b9e --- /dev/null +++ b/nxontology_data/efo/queries/xref_sources.rq @@ -0,0 +1,18 @@ +PREFIX rdf: +PREFIX owl: +PREFIX oboInOwl: + +SELECT ?efo_id ?xref ?axiom_source +WHERE { + ?axiom rdf:type owl:Axiom. + ?axiom owl:annotatedSource ?source. + ?axiom owl:annotatedProperty oboInOwl:hasDbXref. + ?axiom owl:annotatedTarget ?xref. + + OPTIONAL { ?axiom oboInOwl:source ?axiom_source }. + + BIND( REPLACE( STR(?source), "^http.+/([^:]+)_(.+)$", "$1:$2" ) AS ?efo_id ) +} + +GROUP BY ?efo_id ?xref ?axiom_source +ORDER BY ?efo_id ?xref ?axiom_source