Skip to content

Commit

Permalink
feat: expose cool-seq-tool feature overlap endpoint (#523)
Browse files Browse the repository at this point in the history
For issue #521
  • Loading branch information
korikuzma authored Nov 14, 2023
1 parent dda730e commit da230a5
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ pydantic = "==1.*"
gene-normalizer = "~=0.1.36"
boto3 = "*"
"ga4gh.vrsatile.pydantic" = "~=0.0.13"
cool-seq-tool = "~=0.1.14.dev0"
cool-seq-tool = "~=0.1.14.dev3"
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ install_requires =
gene-normalizer ~= 0.1.36
boto3
ga4gh.vrsatile.pydantic ~= 0.0.13
cool-seq-tool ~= 0.1.14.dev0
cool-seq-tool ~= 0.1.14.dev3

tests_require =
pytest
Expand Down
74 changes: 74 additions & 0 deletions variation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import pkg_resources
import python_jsonschema_objects
from bioutils.exceptions import BioutilsError
from cool_seq_tool.data_sources.feature_overlap import (
FeatureOverlap,
FeatureOverlapError,
)
from cool_seq_tool.schemas import Assembly, ResidueMode
from fastapi import FastAPI, Query
from ga4gh.vrs import models
Expand Down Expand Up @@ -35,6 +39,7 @@
)
from variation.schemas.service_schema import (
ClinVarAssembly,
FeatureOverlapService,
ToCdnaService,
ToGenomicService,
)
Expand All @@ -59,9 +64,11 @@ class Tag(Enum):
VRS_PYTHON = "VRS-Python"
TO_COPY_NUMBER_VARIATION = "To Copy Number Variation"
ALIGNMENT_MAPPER = "Alignment Mapper"
FEATURE_OVERLAP = "Feature Overlap"


query_handler = QueryHandler()
feature_overlap = FeatureOverlap(query_handler.seqrepo_access)


app = FastAPI(
Expand Down Expand Up @@ -841,3 +848,70 @@ async def p_to_g(
warnings=[w] if w else [],
service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()),
)


@app.get(
"/variation/feature_overlap",
summary="Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)",
response_description="A response to a validly-formed query.",
description="The genomic data is specified as a sequence location by `chromosome`, `start`, `end`. All CDS regions with which the input sequence location has nonzero base pair overlap will be returned.",
response_model=FeatureOverlapService,
tags=[Tag.FEATURE_OVERLAP],
)
def get_feature_overlap(
start: int = Query(..., description="GRCh38 start position"),
end: int = Query(..., description="GRCh38 end position"),
chromosome: Optional[str] = Query(
None,
description="Chromosome. 1..22, X, or Y. If not provided, must provide `identifier`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
),
identifier: Optional[str] = Query(
None,
description="Genomic identifier on GRCh38 assembly. If not provided, must provide `chromosome`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
),
residue_mode: ResidueMode = Query(
ResidueMode.RESIDUE, description="Residue mode for `start` and `end`"
),
) -> FeatureOverlapService:
"""Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)
The genomic data is specified as a sequence location by `chromosome`, `start`,
`end`. All CDS regions with which the input sequence location has nonzero base
pair overlap will be returned.
:param start: GRCh38 start position
:param end: GRCh38 end position
:param chromosome: Chromosome. 1..22, X, or Y. If not provided, must provide
`identifier`. If both `chromosome` and `identifier` are provided,
`chromosome` will be used.
:param identifier: Genomic identifier on GRCh38 assembly. If not provided, must
provide `chromosome`. If both `chromosome` and `identifier` are provided,
`chromosome` will be used.
:param residue_mode: Residue mode for `start` and `end`
:return: MANE feature (gene/cds) overlap data represented as a dict. The
dictionary will be keyed by genes which overlap the input sequence location.
Each gene contains a list of the overlapping CDS regions with the beginning
and end of the input sequence location's overlap with each
"""
try:
overlap_data = feature_overlap.get_grch38_mane_gene_cds_overlap(
start=start,
end=end,
chromosome=chromosome,
identifier=identifier,
residue_mode=residue_mode,
)
errors = []
except FeatureOverlapError as e:
errors = [str(e)]
overlap_data = None
except Exception as e:
logger.error("Unhandled exception: %s", str(e))
errors = ["Unhandled exception. See logs for more information."]
overlap_data = None
return FeatureOverlapService(
feature_overlap=overlap_data,
warnings=errors,
service_meta_=ServiceMeta(
version=__version__, response_datetime=datetime.now()
),
)
69 changes: 65 additions & 4 deletions variation/schemas/service_schema.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
"""Module containing schemas for services"""
from enum import Enum
from typing import Any, Dict, Type
from typing import Any, Dict, List, Optional, Type

from cool_seq_tool.schemas import ToCdnaService as ToCdna
from cool_seq_tool.schemas import ToGenomicService as ToGenomic
from cool_seq_tool.schemas import (
CdsOverlap,
)
from cool_seq_tool.schemas import (
ToCdnaService as ToCdna,
)
from cool_seq_tool.schemas import (
ToGenomicService as ToGenomic,
)

from variation.schemas.normalize_response_schema import ServiceMeta
from variation.schemas.normalize_response_schema import ServiceMeta, ServiceResponse


class ClinVarAssembly(str, Enum):
Expand Down Expand Up @@ -82,3 +89,57 @@ def schema_extra(schema: Dict[str, Any], model: Type["ToCdnaService"]) -> None:
"url": "https://github.com/cancervariants/variation-normalization",
},
}


class FeatureOverlapService(ServiceResponse):
"""Define model for representing Feature Overlap response"""

feature_overlap: Optional[Dict[str, List[CdsOverlap]]] = None

class Config:
"""Configure model."""

@staticmethod
def schema_extra(
schema: Dict[str, Any], model: Type["FeatureOverlapService"]
) -> None:
"""Configure OpenAPI schema."""
if "title" in schema.keys():
schema.pop("title", None)
for prop in schema.get("properties", {}).values():
prop.pop("title", None)
schema["example"] = {
"feature_overlap": {
"BRAF": [
{
"cds": {
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
"type": "SequenceLocation",
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
"interval": {
"type": "SequenceInterval",
"start": {"value": 140726493, "type": "Number"},
"end": {"value": 140726516, "type": "Number"},
},
},
"overlap": {
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
"type": "SequenceLocation",
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
"interval": {
"type": "SequenceInterval",
"start": {"value": 140726493, "type": "Number"},
"end": {"value": 140726516, "type": "Number"},
},
},
}
]
},
"warnings": [],
"service_meta": {
"version": "0.5.4",
"response_datetime": "2022-09-29T15:08:18.696882",
"name": "variation-normalizer",
"url": "https://github.com/cancervariants/variation-normalization",
},
}

0 comments on commit da230a5

Please sign in to comment.