From b34fd8e492b84ac9805f81fd0f37abd618544583 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 19 Mar 2024 13:49:00 -0400 Subject: [PATCH] build!: udpate ga4gh.vrs version (#542) * ~=2.0.0a5 --- Pipfile | 2 +- pyproject.toml | 2 +- src/variation/main.py | 131 +++++++++--------- src/variation/schemas/copy_number_schema.py | 18 ++- .../schemas/hgvs_to_copy_number_schema.py | 12 +- .../schemas/normalize_response_schema.py | 6 +- .../schemas/to_vrs_response_schema.py | 24 ++-- .../schemas/vrs_python_translator_schema.py | 12 +- src/variation/version.py | 2 +- tests/conftest.py | 70 +++++++--- tests/test_hgvs_dup_del_mode.py | 9 +- tests/test_normalize.py | 37 ++--- tests/test_translator.py | 6 +- .../test_amplification_to_cx_var.py | 4 +- .../test_hgvs_to_copy_number.py | 4 +- .../test_parsed_to_copy_number.py | 22 ++- 16 files changed, 215 insertions(+), 146 deletions(-) diff --git a/Pipfile b/Pipfile index 5181fffb..ae4f085d 100644 --- a/Pipfile +++ b/Pipfile @@ -19,7 +19,7 @@ ruff = "==0.2.0" fastapi = "*" uvicorn = "*" pydantic = "==2.*" -"ga4gh.vrs" = {version = "==2.0.0a2", extras = ["extras"]} +"ga4gh.vrs" = {version = "~=2.0.0a5", extras = ["extras"]} gene-normalizer = "~=0.3.0.dev1" boto3 = "*" cool-seq-tool = "~=0.4.0.dev1" diff --git a/pyproject.toml b/pyproject.toml index 07b6dfb5..8022269a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "fastapi", "uvicorn", "pydantic ==2.*", - "ga4gh.vrs[extras] == 2.0.0a2", + "ga4gh.vrs[extras] ~= 2.0.0a5", "gene-normalizer ~=0.3.0.dev1", "boto3", "cool-seq-tool ~=0.4.0.dev1", diff --git a/src/variation/main.py b/src/variation/main.py index 927846de..bdb675d2 100644 --- a/src/variation/main.py +++ b/src/variation/main.py @@ -10,24 +10,19 @@ from cool_seq_tool.schemas import Assembly, ResidueMode from fastapi import FastAPI, Query from ga4gh.vrs import models +from ga4gh.vrs.extras.translator import ValidationError as VrsPythonValidationError from hgvs.exceptions import HGVSError from pydantic import ValidationError from variation import logger from variation.query import QueryHandler -from variation.schemas import NormalizeService, ServiceMeta, ToVRSService +from variation.schemas import ServiceMeta from variation.schemas.copy_number_schema import ( - AmplificationToCxVarService, ParsedToCnVarQuery, ParsedToCnVarService, ParsedToCxVarQuery, ParsedToCxVarService, ) -from variation.schemas.gnomad_vcf_to_protein_schema import GnomadVcfToProteinService -from variation.schemas.hgvs_to_copy_number_schema import ( - HgvsToCopyNumberChangeService, - HgvsToCopyNumberCountService, -) from variation.schemas.normalize_response_schema import ( HGVSDupDelModeOption, TranslateIdentifierService, @@ -98,14 +93,12 @@ class Tag(Enum): "/variation/to_vrs", summary=translate_summary, response_description=translate_response_description, - response_model=ToVRSService, - response_model_exclude_none=True, description=translate_description, tags=[Tag.MAIN], ) async def to_vrs( q: str = Query(..., description=q_description), -) -> ToVRSService: +) -> dict: """Translate a HGVS, gnomAD VCF and Free Text descriptions to VRS variation(s). Performs fully-justified allele normalization. Does not do any liftover operations or make any inferences about the query. @@ -113,7 +106,8 @@ async def to_vrs( :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly :return: ToVRSService model for variation """ - return await query_handler.to_vrs_handler.to_vrs(unquote(q)) + resp = await query_handler.to_vrs_handler.to_vrs(unquote(q)) + return resp.model_dump(exclude_none=True) normalize_summary = ( @@ -136,8 +130,6 @@ async def to_vrs( "/variation/normalize", summary=normalize_summary, response_description=normalize_response_description, - response_model=NormalizeService, - response_model_exclude_none=True, description=normalize_description, tags=[Tag.MAIN], ) @@ -154,7 +146,7 @@ async def normalize( None, description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", ), -) -> NormalizeService: +) -> dict: """Normalize and translate a HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly to a single VRS Variation. Performs fully-justified allele normalization. Will liftover to GRCh38 and aligns to a priority transcript. Will @@ -170,12 +162,13 @@ async def normalize( query. :return: NormalizeService for variation """ - return await query_handler.normalize_handler.normalize( + resp = await query_handler.normalize_handler.normalize( unquote(q), hgvs_dup_del_mode=hgvs_dup_del_mode, baseline_copies=baseline_copies, copy_change=copy_change, ) + return resp.model_dump(exclude_none=True) @app.get( @@ -223,49 +216,64 @@ def translate_identifier( ) -from_fmt_descr = ( - "Format of input variation to translate. Must be one of `beacon`, " - "`gnomad`, `hgvs`, or `spdi`" -) +from_fmt_descr = "Format of input variation to translate. Must be one of `beacon`, `gnomad`, `hgvs`, or `spdi`. If not provided, will assume the appropriate format." +require_validation_descr = "If `True` then validation checks must pass in order to return a VRS object. A `ValidationError` will be raised if validation checks fail. If `False` then VRS object will be returned even if validation checks fail. Defaults to `True`." +rle_seq_limit_descr = "If RLE is set as the new state after normalization, this sets the limit for the length of the `sequence`. To exclude `sequence` from the response, set to 0. For no limit, set to `None`." @app.get( "/variation/translate_from", - summary="Given variation as beacon, gnomad, hgvs or spdi representation, " - "return VRS Allele object using vrs-python's translator class", + summary="Given variation as beacon, gnomad, hgvs or spdi representation, return VRS Allele object using VRS-Python's AlleleTranslator class", response_description="A response to a validly-formed query.", - response_model_exclude_none=True, description="Return VRS Allele object", - response_model=TranslateFromService, tags=[Tag.VRS_PYTHON], ) def vrs_python_translate_from( variation: str = Query( ..., - description="Variation to translate to VRS object." - " Must be represented as either beacon, " - "gnomad, hgvs, or spdi.", + description="Variation to translate to VRS object. Must be represented as either beacon, gnomad, hgvs, or spdi.", ), fmt: Optional[TranslateFromFormat] = Query(None, description=from_fmt_descr), -) -> TranslateFromService: - """Given variation query, return VRS Allele object using vrs-python"s translator - class - - :param str variation: Variation to translate to VRS object. Must be represented - as either beacon, gnomad, hgvs, or spdi - :param Optional[TranslateFromFormat] fmt: Format of variation. If not supplied, - vrs-python will infer its format. + assembly_name: str = Query( + "GRCh38", + description="Assembly used for `variation`. Only used for beacon and gnomad.", + ), + require_validation: bool = Query(True, description=require_validation_descr), + rle_seq_limit: Optional[int] = Query(50, description=rle_seq_limit_descr), +) -> dict: + """Given variation query, return VRS Allele object. + This endpoint exposes vrs-python AlleleTranslator's translate_from method + + :param variation: Variation to translate to VRS object. Must be represented as + either beacon, gnomad, hgvs, or spdi. + :param fmt: Format of input variation to translate. Must be one of `beacon`, + `gnomad`, `hgvs`, or `spdi`. If not provided, will assume the appropriate format + :param assembly_name: Assembly used for `variation`. Only used for beacon and gnomad + :param require_validation: If `True` then validation checks must pass in order to + return a VRS object. A `ValidationError` will be raised if validation checks + fail. If `False` then VRS object will be returned even if validation checks + fail. Defaults to `True`. + :param rle_seq_limit: If RLE is set as the new state after normalization, this sets + the limit for the length of the `sequence`. To exclude `sequence` from the + response, set to 0. For no limit, set to `None`. :return: TranslateFromService containing VRS Allele object """ variation_query = unquote(variation.strip()) warnings = [] vrs_variation = None try: - resp = query_handler.vrs_python_tlr.translate_from(variation_query, fmt) + resp = query_handler.vrs_python_tlr.translate_from( + variation_query, + fmt, + assembly_name=assembly_name, + require_validation=require_validation, + rle_seq_limit=rle_seq_limit, + ) except ( KeyError, ValueError, ValidationError, + VrsPythonValidationError, ) as e: warnings.append(f"vrs-python translator raised {type(e).__name__}: {e}") except HGVSError as e: @@ -286,7 +294,7 @@ def vrs_python_translate_from( vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ) + ).model_dump(exclude_none=True) g_to_p_summary = ( @@ -305,21 +313,20 @@ def vrs_python_translate_from( "/variation/gnomad_vcf_to_protein", summary=g_to_p_summary, response_description=g_to_p_response_description, - response_model_exclude_none=True, description=g_to_p_description, - response_model=GnomadVcfToProteinService, tags=[Tag.TO_PROTEIN_VARIATION], ) async def gnomad_vcf_to_protein( q: str = Query(..., description=q_description), -) -> GnomadVcfToProteinService: +) -> dict: """Return VRS representation for variation on protein coordinate. :param q: gnomad VCF to normalize to protein variation. :return: GnomadVcfToProteinService for variation """ q = unquote(q.strip()) - return await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) + resp = await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) + return resp.model_dump(exclude_none=True) hgvs_dup_del_mode_decsr = ( @@ -355,11 +362,9 @@ def _get_allele( "Request body must contain `variation` and `fmt`. `variation` is" " a VRS Allele object represented as a dict. `fmt` must be either" " `spdi` or `hgvs`", - response_model=TranslateToService, - response_model_exclude_none=True, tags=[Tag.VRS_PYTHON], ) -async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateToService: +async def vrs_python_translate_to(request_body: TranslateToQuery) -> dict: """Given VRS Allele object as a dict, return variation expressed as queried format using vrs-python's translator class @@ -394,7 +399,7 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ) + ).model_dump(exclude_none=True) to_hgvs_descr = ( @@ -411,11 +416,9 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo summary="Given VRS Allele object as a dict, return HGVS expression(s)", response_description="A response to a validly-formed query.", description=to_hgvs_descr, - response_model=TranslateToService, - response_model_exclude_none=True, tags=[Tag.VRS_PYTHON], ) -async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToService: +async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> dict: """Given VRS Allele object as a dict, return variation expressed as HGVS expression(s) @@ -452,7 +455,7 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version ), - ) + ).model_dump(exclude_none=True) @app.get( @@ -460,8 +463,6 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS summary="Given HGVS expression, return VRS Copy Number Count Variation", response_description="A response to a validly-formed query.", description="Return VRS Copy Number Count Variation", - response_model=HgvsToCopyNumberCountService, - response_model_exclude_none=True, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) async def hgvs_to_copy_number_count( @@ -472,7 +473,7 @@ async def hgvs_to_copy_number_count( do_liftover: bool = Query( False, description="Whether or not to liftover " "to GRCh38 assembly." ), -) -> HgvsToCopyNumberCountService: +) -> dict: """Given hgvs expression, return copy number count variation :param hgvs_expr: HGVS expression @@ -480,11 +481,12 @@ async def hgvs_to_copy_number_count( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberCountService """ - return await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( + resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( unquote(hgvs_expr.strip()), baseline_copies, do_liftover, ) + return resp.model_dump(exclude_none=True) @app.get( @@ -492,8 +494,6 @@ async def hgvs_to_copy_number_count( summary="Given HGVS expression, return VRS Copy Number Change Variation", response_description="A response to a validly-formed query.", description="Return VRS Copy Number Change Variation", - response_model=HgvsToCopyNumberChangeService, - response_model_exclude_none=True, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) async def hgvs_to_copy_number_change( @@ -502,7 +502,7 @@ async def hgvs_to_copy_number_change( do_liftover: bool = Query( False, description="Whether or not to liftover " "to GRCh38 assembly." ), -) -> HgvsToCopyNumberChangeService: +) -> dict: """Given hgvs expression, return copy number change variation :param hgvs_expr: HGVS expression @@ -510,11 +510,12 @@ async def hgvs_to_copy_number_change( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberChangeService """ - return await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( + resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( unquote(hgvs_expr.strip()), copy_change, do_liftover, ) + return resp.model_dump(exclude_none=True) @app.post( @@ -523,11 +524,9 @@ async def hgvs_to_copy_number_change( "Variation", response_description="A response to a validly-formed query.", description="Return VRS Copy Number Count Variation", - response_model=ParsedToCnVarService, - response_model_exclude_none=True, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) -def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: +def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> dict: """Given parsed genomic components, return Copy Number Count Variation. :param request_body: Request body @@ -549,7 +548,7 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: ), ) else: - return resp + return resp.model_dump(exclude_none=True) @app.post( @@ -558,11 +557,9 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: "Variation", response_description="A response to a validly-formed query.", description="Return VRS Copy Number Change Variation", - response_model=ParsedToCxVarService, - response_model_exclude_none=True, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) -def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: +def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> dict: """Given parsed genomic components, return Copy Number Change Variation :param request_body: Request body @@ -584,7 +581,7 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: ), ) else: - return resp + return resp.model_dump(exclude_none=True) amplification_to_cx_var_descr = ( @@ -601,8 +598,6 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: summary="Given amplification query, return VRS Copy Number Change Variation", response_description="A response to a validly-formed query.", description=amplification_to_cx_var_descr, - response_model=AmplificationToCxVarService, - response_model_exclude_none=True, tags=[Tag.TO_COPY_NUMBER_VARIATION], ) def amplification_to_cx_var( @@ -612,7 +607,7 @@ def amplification_to_cx_var( None, description="Start position as residue coordinate" ), end: Optional[int] = Query(None, description="End position as residue coordinate"), -) -> AmplificationToCxVarService: +) -> dict: """Given amplification query, return Copy Number Change Variation Parameter priority: 1. sequence, start, end (must provide ALL) @@ -633,7 +628,7 @@ def amplification_to_cx_var( sequence_id=sequence_id, start=start, end=end, - ) + ).model_dump(exclude_none=True) @app.get( diff --git a/src/variation/schemas/copy_number_schema.py b/src/variation/schemas/copy_number_schema.py index 6dcbb8b0..cf53717f 100644 --- a/src/variation/schemas/copy_number_schema.py +++ b/src/variation/schemas/copy_number_schema.py @@ -257,10 +257,12 @@ class ParsedToCnVarService(ServiceResponse): json_schema_extra={ "example": { "copy_number_count": { - "id": "ga4gh:CN.Qrs0TaGCcJiibMvhcML6BTSCVtX95FBl", + "id": "ga4gh:CN.pbVk38-x5YGW7yhEtaBnWYjrzcb25L16", + "digest": "pbVk38-x5YGW7yhEtaBnWYjrzcb25L16", "type": "CopyNumberCount", "location": { - "id": "ga4gh:SL.g6xj5oKF99OysSxcfHyGYbh8NFNn2r61", + "id": "ga4gh:SL.6jZXELPqf5JDeN4CpOGde8foTUkHi1jy", + "digest": "6jZXELPqf5JDeN4CpOGde8foTUkHi1jy", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -322,10 +324,12 @@ class ParsedToCxVarService(ServiceResponse): "example": { "copy_number_change": { "type": "CopyNumberChange", - "id": "ga4gh:CX.BTNwndSs3RylLhtL9Y45GePsVX35eeTT", + "id": "ga4gh:CX.5kaJC-7Jj851bfJ6EipsHV413feg1T4T", + "digest": "5kaJC-7Jj851bfJ6EipsHV413feg1T4T", "location": { "type": "SequenceLocation", - "id": "ga4gh:SL.Pu3oAKHColJSZ3zY_Xu5MeezINaTFlNq", + "id": "ga4gh:SL.Iz_azSFTEulx7tCluLgGhE1n0hTLUocb", + "digest": "Iz_azSFTEulx7tCluLgGhE1n0hTLUocb", "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", @@ -375,10 +379,12 @@ class AmplificationToCxVarService(ServiceResponse): }, "amplification_label": "BRAF Amplification", "copy_number_change": { - "id": "ga4gh:CX.89PECTeQjhhXnNW9yg24DheWOQMgmKk2", + "id": "ga4gh:CX._UsXDMCLtPwsVKiNByhbwfS569K1wLWW", + "digest": "_UsXDMCLtPwsVKiNByhbwfS569K1wLWW", "type": "CopyNumberChange", "location": { - "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "id": "ga4gh:SL.0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B", + "digest": "0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", diff --git a/src/variation/schemas/hgvs_to_copy_number_schema.py b/src/variation/schemas/hgvs_to_copy_number_schema.py index 4ddbd19c..9ea9f60d 100644 --- a/src/variation/schemas/hgvs_to_copy_number_schema.py +++ b/src/variation/schemas/hgvs_to_copy_number_schema.py @@ -19,10 +19,12 @@ class HgvsToCopyNumberCountService(ServiceResponse): "example": { "hgvs_expr": "NC_000003.12:g.49531262dup", "copy_number_count": { - "id": "ga4gh:CN.07iM14yvZ80N_AiaM7G_V4f1pCkmFYz4", + "id": "ga4gh:CN.gF1l6Zh6aY3vy_TR7rrat6FTmwiwIukY", + "digest": "gF1l6Zh6aY3vy_TR7rrat6FTmwiwIukY", "type": "CopyNumberCount", "location": { - "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "id": "ga4gh:SL.2vbgFGHGB0QGODwgZNi05fWbROkkjf04", + "digest": "2vbgFGHGB0QGODwgZNi05fWbROkkjf04", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -55,10 +57,12 @@ class HgvsToCopyNumberChangeService(ServiceResponse): "example": { "hgvs_expr": "NC_000003.12:g.49531262dup", "copy_number_change": { - "id": "ga4gh:CX.d8BWSLNKN0K4n8ySG0jWPCr4cJIqEf5g", + "id": "ga4gh:CX.Zzws_y4cnoooQ7WXjg2B3nKIyFWXzOg3", + "digest": "Zzws_y4cnoooQ7WXjg2B3nKIyFWXzOg3", "type": "CopyNumberChange", "location": { - "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "id": "ga4gh:SL.2vbgFGHGB0QGODwgZNi05fWbROkkjf04", + "digest": "2vbgFGHGB0QGODwgZNi05fWbROkkjf04", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", diff --git a/src/variation/schemas/normalize_response_schema.py b/src/variation/schemas/normalize_response_schema.py index fa10ade0..3bdec411 100644 --- a/src/variation/schemas/normalize_response_schema.py +++ b/src/variation/schemas/normalize_response_schema.py @@ -82,9 +82,11 @@ class NormalizeService(ServiceResponse): "example": { "variation_query": "BRAF V600E", "variation": { - "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "id": "ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", + "digest": "j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", "location": { - "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "id": "ga4gh:SL.t-3DrWALhgLdXHsupI-e-M00aL3HgK3y", + "digest": "t-3DrWALhgLdXHsupI-e-M00aL3HgK3y", "end": 600, "start": 599, "sequenceReference": { diff --git a/src/variation/schemas/to_vrs_response_schema.py b/src/variation/schemas/to_vrs_response_schema.py index ed899324..f5dcafd3 100644 --- a/src/variation/schemas/to_vrs_response_schema.py +++ b/src/variation/schemas/to_vrs_response_schema.py @@ -27,9 +27,11 @@ class ToVRSService(BaseModel): "warnings": [], "variations": [ { - "id": "ga4gh:VA.PJu8CCaVzEyqXMAEcMNegyDWyvT_jzNn", + "id": "ga4gh:VA.GGJOybg6mckctDlXxLY1kHZQ6dbB0U75", + "digest": "GGJOybg6mckctDlXxLY1kHZQ6dbB0U75", "location": { - "id": "ga4gh:SL.EpHaD2ygDuPMvyURI9L4yetEwF3W0G7G", + "id": "ga4gh:SL.0Y2ZW1zB9mf0qGesJx1kDYtwfB-67Gnf", + "digest": "0Y2ZW1zB9mf0qGesJx1kDYtwfB-67Gnf", "end": 600, "start": 599, "sequenceReference": { @@ -42,9 +44,11 @@ class ToVRSService(BaseModel): "type": "Allele", }, { - "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "id": "ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", + "digest": "j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L", "location": { - "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "id": "ga4gh:SL.t-3DrWALhgLdXHsupI-e-M00aL3HgK3y", + "digest": "t-3DrWALhgLdXHsupI-e-M00aL3HgK3y", "end": 600, "start": 599, "sequenceReference": { @@ -57,9 +61,11 @@ class ToVRSService(BaseModel): "type": "Allele", }, { - "id": "ga4gh:VA.c-oRhbu7nDrBrSW2fPbFlDM15V6jiaho", + "id": "ga4gh:VA.jSy0uhhLefGH3396djPYcJeSVyDvRYGc", + "digest": "jSy0uhhLefGH3396djPYcJeSVyDvRYGc", "location": { - "id": "ga4gh:SL.gkevJbLNOScKXhxhzOZXiG3hW8zeyo-q", + "id": "ga4gh:SL.CowRxWqyJfqVwlfs5YdswnrnZDQL5QCi", + "digest": "CowRxWqyJfqVwlfs5YdswnrnZDQL5QCi", "start": 599, "end": 600, "sequenceReference": { @@ -72,9 +78,11 @@ class ToVRSService(BaseModel): "type": "Allele", }, { - "id": "ga4gh:VA.3ex0cvKXjHbq8NLuitOAfVwSPzqZUFrR", + "id": "ga4gh:VA.T1wKHKsXNF6gm7BJJ9kkyCgYrdzeG5Eh", + "digest": "T1wKHKsXNF6gm7BJJ9kkyCgYrdzeG5Eh", "location": { - "id": "ga4gh:SL.Q4MXez2kHFPQqGJKLP8quVHAskuCrOAA", + "id": "ga4gh:SL.5_mc1jrXsfhj-Cp8iFEEtvgHWXRtPerb", + "digest": "5_mc1jrXsfhj-Cp8iFEEtvgHWXRtPerb", "start": 599, "end": 600, "sequenceReference": { diff --git a/src/variation/schemas/vrs_python_translator_schema.py b/src/variation/schemas/vrs_python_translator_schema.py index 6e36b001..844f4b6c 100644 --- a/src/variation/schemas/vrs_python_translator_schema.py +++ b/src/variation/schemas/vrs_python_translator_schema.py @@ -44,10 +44,12 @@ class TranslateToQuery(BaseModel): json_schema_extra={ "example": { "variation": { - "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "id": "ga4gh:VA.nmp-bzYpO00NYIqr3CaVF0ZH2ZpSj1ly", + "digest": "nmp-bzYpO00NYIqr3CaVF0ZH2ZpSj1ly", "type": "Allele", "location": { - "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "id": "ga4gh:SL.hVna-JOV5bBTGdXexL--IQm135MG3bGT", + "digest": "hVna-JOV5bBTGdXexL--IQm135MG3bGT", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -74,10 +76,12 @@ class TranslateToHGVSQuery(BaseModel): json_schema_extra={ "example": { "variation": { - "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "id": "ga4gh:VA.nmp-bzYpO00NYIqr3CaVF0ZH2ZpSj1ly", + "digest": "nmp-bzYpO00NYIqr3CaVF0ZH2ZpSj1ly", "type": "Allele", "location": { - "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "id": "ga4gh:SL.hVna-JOV5bBTGdXexL--IQm135MG3bGT", + "digest": "hVna-JOV5bBTGdXexL--IQm135MG3bGT", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", diff --git a/src/variation/version.py b/src/variation/version.py index 8e569a7e..2f615d6c 100644 --- a/src/variation/version.py +++ b/src/variation/version.py @@ -1,2 +1,2 @@ """Module for version of app""" -__version__ = "0.8.1" +__version__ = "0.8.2" diff --git a/tests/conftest.py b/tests/conftest.py index babeed4f..09865eed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,6 +72,7 @@ def test_cnv_handler(test_query_handler): @pytest.fixture(scope="session") def braf_ncbi_seq_loc(): """Create test fixture for BRAF ncbi priority sequence location""" + digest = "0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B" return { "sequenceReference": { "type": "SequenceReference", @@ -79,7 +80,8 @@ def braf_ncbi_seq_loc(): }, "start": 140713327, "end": 140924929, - "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "id": f"ga4gh:SL.{digest}", + "digest": digest, "type": "SequenceLocation", } @@ -101,8 +103,10 @@ def prpf8_ncbi_seq_loc(): @pytest.fixture(scope="session") def braf_600loc(): """Create test fixture for BRAF 600 location""" + digest = "t-3DrWALhgLdXHsupI-e-M00aL3HgK3y" return { - "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "id": f"ga4gh:SL.{digest}", + "digest": digest, "end": 600, "start": 599, "sequenceReference": { @@ -116,8 +120,10 @@ def braf_600loc(): @pytest.fixture(scope="session") def braf_v600e(braf_600loc): """Create BRAF V600E protein test fixture.""" + digest = "j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L" params = { - "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "id": f"ga4gh:VA.{digest}", + "digest": digest, "location": braf_600loc, "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, "type": "Allele", @@ -211,8 +217,10 @@ def braf_v600e_genomic_sub(): @pytest.fixture(scope="session") def genomic_dup1_seq_loc_normalized(): """Create test fixture containing genomic dup1 sequence location normalized""" + digest = "XyxdODl0lLloyj3EQgIzIOEukl2WiaHw" return { - "id": "ga4gh:SL.f0nAiaxOC3rPToQEYRRhbVBNO6HKutyc", + "id": f"ga4gh:SL.{digest}", + "digest": digest, "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", @@ -228,8 +236,10 @@ def genomic_dup1_seq_loc_not_normalized(): """Create test fixture containing genomic dup1 sequence location that was normalized """ + digest = "2vbgFGHGB0QGODwgZNi05fWbROkkjf04" return { - "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "id": f"ga4gh:SL.{digest}", + "digest": digest, "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", @@ -243,9 +253,11 @@ def genomic_dup1_seq_loc_not_normalized(): @pytest.fixture(scope="session") def genomic_dup1_38_cn(genomic_dup1_seq_loc_not_normalized): """Create test fixture for copy number count dup1 on GRCh38""" + digest = "gF1l6Zh6aY3vy_TR7rrat6FTmwiwIukY" params = { "type": "CopyNumberCount", - "id": "ga4gh:CN.07iM14yvZ80N_AiaM7G_V4f1pCkmFYz4", + "id": f"ga4gh:CN.{digest}", + "digest": digest, "location": genomic_dup1_seq_loc_not_normalized, "copies": 3, } @@ -490,8 +502,10 @@ def grch38_genomic_insertion_variation(grch38_genomic_insertion_seq_loc): @pytest.fixture(scope="session") def braf_amplification(braf_ncbi_seq_loc): """Create test fixture for BRAF Amplification""" + digest = "_UsXDMCLtPwsVKiNByhbwfS569K1wLWW" params = { - "id": "ga4gh:CX.89PECTeQjhhXnNW9yg24DheWOQMgmKk2", + "id": f"ga4gh:CX.{digest}", + "digest": digest, "location": braf_ncbi_seq_loc, "copyChange": "efo:0030072", "type": "CopyNumberChange", @@ -521,24 +535,47 @@ def genomic_del3_dup3_cn_38(genomic_del3_dup3_loc_not_normalized): return models.CopyNumberCount(**params) -def _delete_id(vrs_obj_dict): - """Delete ID property from VRS object""" +def _delete_id_and_digest(vrs_obj_dict): + """Delete id and digest properties from VRS object""" with contextlib.suppress(KeyError): # Some fixtures have IDs for other tests del vrs_obj_dict["id"] + with contextlib.suppress(KeyError): + # Some fixtures have digests for other tests + del vrs_obj_dict["digest"] + + +def _vrs_id_and_digest_existence_checks(vrs_obj_dict, prefix=None): + """Check that VRS id and digest exists. + + Does not check actual values. + `vrs_obj_dict` will be mutated (id and digest fields removed). + """ + variation_vrs_digest = vrs_obj_dict.pop("digest") + variation_vrs_id = vrs_obj_dict.pop("id") + + if not prefix: + prefix = ("ga4gh:VA.", "ga4gh:CX", "ga4gh:CN.") + + assert variation_vrs_id.startswith(prefix) + assert variation_vrs_id.endswith(variation_vrs_digest) + + location_vrs_digest = vrs_obj_dict["location"].pop("digest") + location_vrs_id = vrs_obj_dict["location"].pop("id") + assert location_vrs_id == f"ga4gh:SL.{location_vrs_digest}" + def assertion_checks(normalize_response, test_variation, check_vrs_id=False): """Check that normalize_response and test_variation are equal.""" actual = normalize_response.variation.model_dump(exclude_none=True) if not check_vrs_id: - assert actual.pop("id").startswith(("ga4gh:VA.", "ga4gh:CX", "ga4gh:CN.")) - assert actual["location"].pop("id").startswith("ga4gh:SL.") + _vrs_id_and_digest_existence_checks(actual) expected = test_variation.copy().model_dump(exclude_none=True) if not check_vrs_id: - _delete_id(expected) - _delete_id(expected["location"]) + _delete_id_and_digest(expected) + _delete_id_and_digest(expected["location"]) assert actual == expected, "variation" @@ -555,13 +592,12 @@ def cnv_assertion_checks(resp, test_fixture, check_vrs_id=False): prefix = "ga4gh:CN." if not check_vrs_id: - assert actual.pop("id").startswith(prefix) - assert actual["location"].pop("id").startswith("ga4gh:SL.") + _vrs_id_and_digest_existence_checks(actual, prefix=prefix) expected = test_fixture.copy().model_dump(exclude_none=True) if not check_vrs_id: - _delete_id(expected) - _delete_id(expected["location"]) + _delete_id_and_digest(expected) + _delete_id_and_digest(expected["location"]) assert actual == expected assert resp.warnings == [] diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py index f6f2eec0..14a48927 100644 --- a/tests/test_hgvs_dup_del_mode.py +++ b/tests/test_hgvs_dup_del_mode.py @@ -15,9 +15,11 @@ def test_handler(test_query_handler): @pytest.fixture(scope="module") def genomic_dup1_lse(genomic_dup1_seq_loc_normalized): """Create a test fixture for genomic dup LSE.""" + digest = "vfLfV0PTIdjGBINwgHKFBoVjPSkZ7s5-" params = { "type": "Allele", - "id": "ga4gh:VA.CHNQRjx52keAGF5WcbvKORtfLiitZKE4", + "id": f"ga4gh:VA.{digest}", + "digest": digest, "location": genomic_dup1_seq_loc_normalized, "state": { "type": "ReferenceLengthExpression", @@ -32,9 +34,11 @@ def genomic_dup1_lse(genomic_dup1_seq_loc_normalized): @pytest.fixture(scope="module") def genomic_dup1_cx(genomic_dup1_seq_loc_not_normalized): """Create a test fixture for genomic dup copy number change.""" + digest = "yHUIaSwa0aIRvhfiTUIHPTkjNdaQdN4P" params = { "type": "CopyNumberChange", - "id": "ga4gh:CX.7WKEz2E_jwZZdyRc2Gw-_LIbHDJyRXwr", + "id": f"ga4gh:CX.{digest}", + "digest": digest, "location": genomic_dup1_seq_loc_not_normalized, "copyChange": "efo:0030072", } @@ -336,7 +340,6 @@ def genomic_dup6_cn(genomic_dup6_loc): """Create a test fixture for genomic dup copy number count.""" params = { "type": "CopyNumberCount", - "id": "ga4gh:CN.KSFn5KQIPuPVJ6FjWaF0vzl7eRwwHbX9", "location": genomic_dup6_loc, "copies": 2, } diff --git a/tests/test_normalize.py b/tests/test_normalize.py index e719aa71..107bad6c 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -450,10 +450,8 @@ def gnomad_vcf_genomic_delins3(): "type": "SequenceLocation", }, "state": { - "length": 26, - "repeatSubunitLength": 24, "sequence": "GGCAGCGCATAAAGCGCATTCTCCGG", - "type": "ReferenceLengthExpression", + "type": "LiteralSequenceExpression", }, "type": "Allele", } @@ -930,28 +928,31 @@ async def test_no_matches(test_handler): async def test_service_meta(): """Test that service meta info populates correctly.""" response = await normalize_get_response("BRAF v600e", "default") - service_meta = response.service_meta_ - assert service_meta.name == "variation-normalizer" - assert service_meta.version - assert isinstance(service_meta.response_datetime, datetime) + service_meta = response["service_meta_"] + assert service_meta["name"] == "variation-normalizer" + assert service_meta["version"] + assert isinstance(service_meta["response_datetime"], datetime) assert ( - service_meta.url == "https://github.com/cancervariants/variation-normalization" + service_meta["url"] + == "https://github.com/cancervariants/variation-normalization" ) response = await normalize_get_response("this-wont-normalize", "default") - service_meta = response.service_meta_ - assert service_meta.name == "variation-normalizer" - assert service_meta.version - assert isinstance(service_meta.response_datetime, datetime) + service_meta = response["service_meta_"] + assert service_meta["name"] == "variation-normalizer" + assert service_meta["version"] + assert isinstance(service_meta["response_datetime"], datetime) assert ( - service_meta.url == "https://github.com/cancervariants/variation-normalization" + service_meta["url"] + == "https://github.com/cancervariants/variation-normalization" ) response = await to_vrs_get_response("this-wont-normalize") - service_meta = response.service_meta_ - assert service_meta.name == "variation-normalizer" - assert service_meta.version - assert isinstance(service_meta.response_datetime, datetime) + service_meta = response["service_meta_"] + assert service_meta["name"] == "variation-normalizer" + assert service_meta["version"] + assert isinstance(service_meta["response_datetime"], datetime) assert ( - service_meta.url == "https://github.com/cancervariants/variation-normalization" + service_meta["url"] + == "https://github.com/cancervariants/variation-normalization" ) diff --git a/tests/test_translator.py b/tests/test_translator.py index a87eaeca..1d6d0dcc 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -5,6 +5,7 @@ import yaml from tests import PROJECT_ROOT +from tests.conftest import _vrs_id_and_digest_existence_checks from variation.hgvs_dup_del_mode import HGVSDupDelMode from variation.translators import ( Amplification, @@ -87,10 +88,7 @@ async def translator_checks( vr, [] ) vrs_variation = translation_result.vrs_variation - assert vrs_variation.pop("id").startswith( - ("ga4gh:VA.", "ga4gh:CX", "ga4gh:CN.") - ) - assert vrs_variation["location"].pop("id").startswith("ga4gh:SL.") + _vrs_id_and_digest_existence_checks(vrs_variation) if vrs_variation and vrs_variation not in translations: assert vrs_variation in expected, query diff --git a/tests/to_copy_number_variation/test_amplification_to_cx_var.py b/tests/to_copy_number_variation/test_amplification_to_cx_var.py index f6253e47..e9e06e2b 100644 --- a/tests/to_copy_number_variation/test_amplification_to_cx_var.py +++ b/tests/to_copy_number_variation/test_amplification_to_cx_var.py @@ -9,11 +9,11 @@ def kit_amplification(): """Create test fixture for KIT amplification""" params = { "type": "CopyNumberChange", - "id": "ga4gh:CX.wQv1KnYyhMd1aKoXFrOVzT3rMNvo0OIS", + "id": "ga4gh:CX.8ENbdAlnf3hK6681-74YhcnfD-J6WQbN", "copyChange": "efo:0030072", "location": { "type": "SequenceLocation", - "id": "ga4gh:SL.5UgZnBz5pAVUWzNMyC1YJBeVnAA_DGUE", + "id": "ga4gh:SL.2xCHxtZBqOXxl4W4ACxq9Um4FqcqZKxL", "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.iy7Zfceb5_VGtTQzJ-v5JpPbpeifHD_V", diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py index 3d825a90..309a491a 100644 --- a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -8,9 +8,11 @@ @pytest.fixture(scope="module") def genomic_dup1_cx_38(genomic_dup1_seq_loc_not_normalized): """Create test fixture copy number change variation""" + digest = "Zzws_y4cnoooQ7WXjg2B3nKIyFWXzOg3" params = { "type": "CopyNumberChange", - "id": "ga4gh:CX.d8BWSLNKN0K4n8ySG0jWPCr4cJIqEf5g", + "id": f"ga4gh:CX.{digest}", + "digest": digest, "location": genomic_dup1_seq_loc_not_normalized, "copyChange": "efo:0030069", } diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py index c966e485..b4091475 100644 --- a/tests/to_copy_number_variation/test_parsed_to_copy_number.py +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -21,12 +21,16 @@ def cn_gain1(): """Create test fixture for clinvar copy number gain. https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true """ + cn_digest = "pbVk38-x5YGW7yhEtaBnWYjrzcb25L16" + loc_digest = "6jZXELPqf5JDeN4CpOGde8foTUkHi1jy" variation = { "type": "CopyNumberCount", - "id": "ga4gh:CN.Qrs0TaGCcJiibMvhcML6BTSCVtX95FBl", + "id": f"ga4gh:CN.{cn_digest}", + "digest": cn_digest, "location": { "type": "SequenceLocation", - "id": "ga4gh:SL.g6xj5oKF99OysSxcfHyGYbh8NFNn2r61", + "id": f"ga4gh:SL.{loc_digest}", + "digest": loc_digest, "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", @@ -147,12 +151,16 @@ def cn_definite_number(): @pytest.fixture(scope="module") def cx_numbers(): """Create test fixture for copy number change using numbers for start and end""" + cx_digest = "5kaJC-7Jj851bfJ6EipsHV413feg1T4T" + loc_digest = "Iz_azSFTEulx7tCluLgGhE1n0hTLUocb" variation = { "type": "CopyNumberChange", - "id": "ga4gh:CX.BTNwndSs3RylLhtL9Y45GePsVX35eeTT", + "id": f"ga4gh:CX.{cx_digest}", + "digest": cx_digest, "location": { "type": "SequenceLocation", - "id": "ga4gh:SL.Pu3oAKHColJSZ3zY_Xu5MeezINaTFlNq", + "id": f"ga4gh:SL.{loc_digest}", + "digest": loc_digest, "sequenceReference": { "type": "SequenceReference", "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", @@ -755,8 +763,10 @@ def test_to_parsed_cn_var(test_cnv_handler, cn_definite_number): ) resp = test_cnv_handler.parsed_to_copy_number(rb) cnc = resp.copy_number_count.model_dump(exclude_none=True) - assert cnc.pop("id").startswith("ga4gh:CN.") - assert cnc["location"].pop("id").startswith("ga4gh:SL.") + cn_digest = cnc.pop("digest") + assert cnc.pop("id") == f"ga4gh:CN.{cn_digest}" + loc_digest = cnc["location"].pop("digest") + assert cnc["location"].pop("id") == f"ga4gh:SL.{loc_digest}" assert cnc == { "type": "CopyNumberCount", "location": {