-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ontologies): add ontology terms/resources models/types/instances
- Loading branch information
1 parent
2d75a9d
commit 05bff69
Showing
7 changed files
with
247 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
from .models import OntologyResource, VersionedOntologyResource | ||
|
||
__all__ = [ | ||
# EFO | ||
"EFO", | ||
"EFO_3_69_0", | ||
# MONDO | ||
"MONDO", | ||
"MONDO_2024_09_03", | ||
# NCBITaxon | ||
"NCBI_TAXON", | ||
"NCBI_TAXON_2024_07_03", | ||
# NCIT | ||
"NCIT", | ||
"NCIT_2024_05_07", | ||
# OBI | ||
"OBI", | ||
"OBI_2024_06_10", | ||
# SO | ||
"SO", | ||
# UBERON | ||
"UBERON", | ||
] | ||
|
||
|
||
def _versioned(ont: OntologyResource, url: str, version: str) -> VersionedOntologyResource: | ||
return VersionedOntologyResource( | ||
**ont.model_dump(include={"id", "name", "namespace_prefix", "iri_prefix"}), | ||
url=url, | ||
version=version, | ||
) | ||
|
||
|
||
# === EFO ============================================================================================================== | ||
|
||
EFO = OntologyResource( | ||
id="efo", | ||
name="Experimental Factor Ontology", | ||
namespace_prefix="EFO", | ||
iri_prefix="http://www.ebi.ac.uk/efo/EFO_", | ||
url="http://www.ebi.ac.uk/efo/efo.owl", | ||
) | ||
EFO_3_69_0 = _versioned(EFO, "http://www.ebi.ac.uk/efo/releases/v3.69.0/efo.owl", version="3.69.0") | ||
|
||
# === MONDO ============================================================================================================ | ||
|
||
MONDO = OntologyResource( | ||
id="mondo", | ||
name="Mondo Disease Ontology", | ||
namespace_prefix="MONDO", | ||
iri_prefix="http://purl.obolibrary.org/obo/MONDO_", | ||
url="http://purl.obolibrary.org/obo/mondo.owl", | ||
) | ||
MONDO_2024_09_03 = _versioned( | ||
MONDO, | ||
url="http://purl.obolibrary.org/obo/mondo/releases/2024-09-03/mondo.owl", | ||
version="2024-09-03", | ||
) | ||
|
||
# === NCBITaxon ======================================================================================================== | ||
|
||
NCBI_TAXON = OntologyResource( | ||
id="ncbitaxon", | ||
name="NCBI organismal classification", | ||
namespace_prefix="NCBITaxon", | ||
iri_prefix="http://purl.obolibrary.org/obo/NCBITaxon_", | ||
url="http://purl.obolibrary.org/obo/ncbitaxon.owl", | ||
) | ||
NCBI_TAXON_2024_07_03 = _versioned( | ||
NCBI_TAXON, | ||
url="http://purl.obolibrary.org/obo/ncbitaxon/2024-07-03/ncbitaxon.owl", | ||
version="2024-07-03", | ||
) | ||
|
||
# === NCIT ============================================================================================================= | ||
|
||
NCIT = OntologyResource( | ||
id="ncit", | ||
name="NCI Thesaurus OBO Edition", | ||
namespace_prefix="NCIT", | ||
iri_prefix="http://purl.obolibrary.org/obo/NCIT_", | ||
url="http://purl.obolibrary.org/obo/ncit.owl", | ||
) | ||
NCIT_2024_05_07 = _versioned( | ||
NCIT, | ||
url="http://purl.obolibrary.org/obo/ncit/releases/2024-05-07/ncit.owl", | ||
version="2024-05-07", | ||
) | ||
|
||
# === OBI ============================================================================================================== | ||
|
||
OBI = OntologyResource( | ||
id="obi", | ||
name="Ontology for Biomedical Investigations", | ||
namespace_prefix="OBI", | ||
iri_prefix="http://purl.obolibrary.org/obo/OBI_", | ||
url="http://purl.obolibrary.org/obo/obi.owl", | ||
) | ||
OBI_2024_06_10 = _versioned(OBI, url="http://purl.obolibrary.org/obo/obi/2024-06-10/obi.owl", version="2024-06-10") | ||
|
||
# === SO =============================================================================================================== | ||
|
||
SO = OntologyResource( | ||
id="so", | ||
name="Sequence types and features ontology", | ||
namespace_prefix="SO", | ||
iri_prefix="http://purl.obolibrary.org/obo/SO_", | ||
url="http://purl.obolibrary.org/obo/so.owl", | ||
) | ||
SO_2024_06_05 = _versioned(SO, url="http://purl.obolibrary.org/obo/so/2024-06-05/so.owl", version="2024-06-05") | ||
|
||
# === UBERON =========================================================================================================== | ||
|
||
UBERON = OntologyResource( | ||
id="uberon", | ||
name="Uberon multi-species anatomy ontology", | ||
namespace_prefix="UBERON", | ||
iri_prefix="http://purl.obolibrary.org/obo/UBERON_", | ||
url="http://purl.obolibrary.org/obo/uberon.owl", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from .common_resources import NCBI_TAXON, OBI, SO | ||
|
||
__all__ = [ | ||
# NCBITaxon | ||
"NCBI_TAXON_HOMO_SAPIENS", | ||
"NCBI_TAXON_MUS_MUSCULUS", | ||
# OBI | ||
"OBI_16S_RRNA_ASSAY", | ||
"OBI_RNA_SEQ_ASSAY", | ||
"OBI_PROTEOMIC_PROFILING_BY_ARRAY_ASSAY", | ||
"OBI_WHOLE_GENOME_SEQUENCING_ASSAY", | ||
# SO | ||
"SO_GENOMIC_DNA", | ||
] | ||
|
||
|
||
# === NCBITaxon ======================================================================================================== | ||
|
||
NCBI_TAXON_HOMO_SAPIENS = NCBI_TAXON.make_term("NCBITaxon:9606", "Homo sapiens") | ||
NCBI_TAXON_MUS_MUSCULUS = NCBI_TAXON.make_term("NCBITaxon:10090", "Mus musculus") | ||
|
||
# === OBI ============================================================================================================== | ||
|
||
OBI_16S_RRNA_ASSAY = OBI.make_term("OBI:0002763", "16s ribosomal gene sequencing assay") | ||
OBI_RNA_SEQ_ASSAY = OBI.make_term("OBI:0001271", "RNA-seq assay") | ||
OBI_PROTEOMIC_PROFILING_BY_ARRAY_ASSAY = OBI.make_term("OBI:0001318", "proteomic profiling by array assay") | ||
OBI_WHOLE_GENOME_SEQUENCING_ASSAY = OBI.make_term("OBI:0002117", "whole genome sequencing assay") | ||
|
||
# === SO =============================================================================================================== | ||
|
||
SO_GENOMIC_DNA = SO.make_term("SO:0000991", "genomic DNA") | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from pydantic import BaseModel, Field, HttpUrl | ||
from typing import Annotated | ||
|
||
from .types import PhenoV2Resource, PhenoV2OntologyClassDict | ||
|
||
NC_NAME_PATTERN = r"^[a-zA-Z_][a-zA-Z0-9.\-_]*$" | ||
CURIE_PATTERN = r"^[a-zA-Z_][a-zA-Z0-9.\-_]*:[a-zA-Z0-9.\-_]+$" | ||
|
||
|
||
class OntologyResource(BaseModel): | ||
""" | ||
Inspired by the Phenopackets v2 Resource model: | ||
https://phenopacket-schema.readthedocs.io/en/latest/resource.html | ||
""" | ||
|
||
# From Phenopackets v2: "For OBO ontologies, the value of this string MUST always be the official OBO ID, which is | ||
# always equivalent to the ID prefix in lower case. Examples: hp, go, mp, mondo Consult http://obofoundry.org for | ||
# a complete list. For other resources which do not use native CURIE identifiers (e.g. SNOMED, UniProt, ClinVar), | ||
# use the prefix in identifiers.org." | ||
id: str | ||
|
||
# From Phenopackets v2: "The name of the ontology referred to by the id element, for example, The Human Phenotype | ||
# Ontology. For OBO Ontologies, the value of this string SHOULD be the same as the title field on | ||
# http://obofoundry.org. Other resources should use the official title for that resource. Note that this field is | ||
# purely for information purposes and software should not encode any assumptions." | ||
name: str | ||
url: HttpUrl | ||
# From Phenopackets v2: "The prefix used in the CURIE of an OntologyClass e.g. HP, MP, ECO for example an HPO term | ||
# will have a CURIE like this - HP:0012828 which should be used in combination with the iri_prefix to form a | ||
# fully-resolvable IRI." | ||
# Since we use it in a CURIE prefix context, it must match a valid NCName: | ||
# https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName | ||
namespace_prefix: Annotated[str, Field(pattern=NC_NAME_PATTERN)] | ||
iri_prefix: HttpUrl | ||
|
||
def make_term(self, id_: str, label: str) -> "OntologyTerm": | ||
return OntologyTerm(ontology=self, id=id_, label=label) | ||
|
||
|
||
class VersionedOntologyResource(OntologyResource): | ||
version: str | ||
|
||
def to_phenopackets_repr(self) -> PhenoV2Resource: | ||
return self.model_dump(mode="json", include={"id", "version", "name", "url", "namespace_prefix", "iri_prefix"}) | ||
|
||
|
||
class OntologyTerm(BaseModel): | ||
""" | ||
Inspired by the Phenopackets v2 OntologyClass model: | ||
https://phenopacket-schema.readthedocs.io/en/latest/ontologyclass.html | ||
""" | ||
|
||
ontology: VersionedOntologyResource | ||
id: Annotated[str, Field(pattern=CURIE_PATTERN)] | ||
label: str | ||
|
||
def to_phenopackets_repr(self) -> PhenoV2OntologyClassDict: | ||
return self.model_dump(mode="json", include={"id", "label"}) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from typing import TypedDict | ||
|
||
__all__ = [ | ||
"PhenoV2Resource", | ||
"PhenoV2OntologyClassDict", | ||
] | ||
|
||
|
||
class PhenoV2Resource(TypedDict): | ||
id: str | ||
name: str | ||
url: str | ||
version: str | ||
namespace_prefix: str | ||
iri_prefix: str | ||
|
||
|
||
class PhenoV2OntologyClassDict(TypedDict): | ||
id: str | ||
label: str | ||