Skip to content

Commit

Permalink
Merge pull request #89 from cancervariants/issue-84
Browse files Browse the repository at this point in the history
Issue 84
  • Loading branch information
korikuzma authored Apr 28, 2021
2 parents 18fb631 + dfb5403 commit b8aca4a
Show file tree
Hide file tree
Showing 91 changed files with 5,231 additions and 557 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ flake8-docstrings = "*"
pre-commit = "*"
variant-normalization = {editable = true, path = "."}
pyyaml = "*"
jupyter = "*"
ipykernel = "*"

[packages]
hgvs = "*"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Services and guidelines for normalizing variant terms
Variant Normalization relies on some local data caches which you will need to set up. It uses pipenv to manage its environment, which you will also need to install.

### Installation
Variant Normalization relies on [seqrepo](https://github.com/biocommons/biocommons.seqrepo), which you must download yourself.

From the _variant_ directory of the repository:
```
pipenv sync
Expand All @@ -18,8 +20,6 @@ sudo mv $seqrepo_date_dir latest
```

### Data
Variant Normalization relies on [seqrepo](https://github.com/biocommons/biocommons.seqrepo). We are currently using version `2021-01-29`.

Variant Normalization uses [Ensembl BioMart](http://www.ensembl.org/biomart/martview) to retrieve `variant/data/transcript_mappings.tsv`. We currently use `Human Genes (GRCh38.p13)` for the dataset and the following attributes we use are: Gene stable ID, Gene stable ID version, Transcript stable ID, Transcript stable ID version, Protein stable ID, Protein stable ID version, RefSeq match transcript (MANE Select), Gene name.

![image](biomart.png)
Expand Down
16 changes: 16 additions & 0 deletions tests/classifiers/test_coding_dna_delins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Coding DNA DelIns Classifier."""
import unittest
from variant.classifiers import CodingDNADelInsClassifier
from .classifier_base import ClassifierBase


class TestCodingDNADelInsClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Coding DNA DelIns Classifier."""

def classifier_instance(self):
"""Return CodingDNADelInsClassifier instance."""
return CodingDNADelInsClassifier()

def fixture_name(self):
"""Return CodingDNADelInsClassifier fixture name."""
return 'coding_dna_delins'
16 changes: 16 additions & 0 deletions tests/classifiers/test_coding_dna_silent_mutation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Coding DNA Silent Mutation Classifier."""
import unittest
from variant.classifiers import CodingDNASilentMutationClassifier
from .classifier_base import ClassifierBase


class TestCodingDNASilentMutationClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Coding DNA Silent Mutation Classifier."""

def classifier_instance(self):
"""Return CodingDNASilentMutationClassifier instance."""
return CodingDNASilentMutationClassifier()

def fixture_name(self):
"""Return CodingDNASilentMutationClassifier fixture name."""
return 'coding_dna_silent_mutation'
16 changes: 16 additions & 0 deletions tests/classifiers/test_coding_dna_substitution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Coding DNA Substitution Classifier."""
import unittest
from variant.classifiers import CodingDNASubstitutionClassifier
from .classifier_base import ClassifierBase


class TestCodingDNASubstitutionClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Coding DNA Substitution Classifier."""

def classifier_instance(self):
"""Return CodingDNASubstitutionClassifier instance."""
return CodingDNASubstitutionClassifier()

def fixture_name(self):
"""Return CodingDNASubstitutionClassifier fixture name."""
return 'coding_dna_substitution'
16 changes: 16 additions & 0 deletions tests/classifiers/test_genomic_delins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Genomic DelIns Classifier."""
import unittest
from variant.classifiers import GenomicDelInsClassifier
from .classifier_base import ClassifierBase


class TestGenomicDelInsClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Genomic DelIns Classifier."""

def classifier_instance(self):
"""Return GenomicDelInsClassifier instance."""
return GenomicDelInsClassifier()

def fixture_name(self):
"""Return GenomicDelInsClassifier fixture name."""
return 'genomic_delins'
16 changes: 16 additions & 0 deletions tests/classifiers/test_genomic_silent_mutation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Genomic Silent Mutation Classifier."""
import unittest
from variant.classifiers import GenomicSilentMutationClassifier
from .classifier_base import ClassifierBase


class TestGenomicSilentMutationClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Genomic Silent Mutation Classifier."""

def classifier_instance(self):
"""Return GenomicSilentMutationClassifier instance."""
return GenomicSilentMutationClassifier()

def fixture_name(self):
"""Return GenomicSilentMutationClassifier fixture name."""
return 'genomic_silent_mutation'
16 changes: 16 additions & 0 deletions tests/classifiers/test_genomic_substitution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Module for testing Genomic Substitution Classifier."""
import unittest
from variant.classifiers import GenomicSubstitutionClassifier
from .classifier_base import ClassifierBase


class TestGenomicSubstitutionClassifier(ClassifierBase, unittest.TestCase):
"""A class to test the Coding DNA Substitution Classifier."""

def classifier_instance(self):
"""Return GenomicSubstitutionClassifier instance."""
return GenomicSubstitutionClassifier()

def fixture_name(self):
"""Return GenomicSubstitutionClassifier fixture name."""
return 'genomic_substitution'
95 changes: 94 additions & 1 deletion tests/fixtures/classifiers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ fusion:
- query: fused
- query: fusio


amino_acid_substitution:
should_match:
- query: BRAF V600E
Expand Down Expand Up @@ -63,3 +62,97 @@ silent_mutation:
confidence: ConfidenceRating.INTERSECTION
should_not_match:
- query: Leu862==

coding_dna_substitution:
should_match:
- query: V170D (c.509T>A)
confidence: ConfidenceRating.SUPERSET
- query: NM_000551.3:c.292T>C
confidence: ConfidenceRating.EXACT
- query: NM_000551.3:c.292TC
confidence: ConfidenceRating.INTERSECTION
- query: foo Y98H (c.292T>C)
confidence: ConfidenceRating.SUPERSET
- query: BRAF V600E c.23T>A
confidence: ConfidenceRating.EXACT
- query: LRG_199t1:c.54G>H
confidence: ConfidenceRating.EXACT
should_not_match:
- query: V170 (c.509F>A)
- query: RX_:g.292TC

genomic_substitution:
should_match:
- query: V170D (g.509T>A)
confidence: ConfidenceRating.SUPERSET
- query: NC_000017.10:g.292T>C
confidence: ConfidenceRating.EXACT
- query: NC_000017.10:g.292TC
confidence: ConfidenceRating.INTERSECTION
- query: foo Y98H (g.292T>C)
confidence: ConfidenceRating.SUPERSET
- query: BRAF V600E g.23T>A
confidence: ConfidenceRating.EXACT
should_not_match:
- query: V170 (g.509F>A)
- query: RX_:c.292TC

coding_dna_silent_mutation:
should_match:
- query: NM_004006.2:c.123=
confidence: ConfidenceRating.EXACT
- query: foo VHL c.123=
confidence: ConfidenceRating.SUPERSET
should_not_match:
- query: CODING_DNA_:c.123=
- query: g.123=

genomic_silent_mutation:
should_match:
- query: NC_000017.10:g.123=
confidence: ConfidenceRating.EXACT
- query: foo VHL g.123=
confidence: ConfidenceRating.SUPERSET
should_not_match:
- query: GENOMIC_:g.123=
- query: c.123=

coding_dna_delins:
should_match:
- query: NM_005157.6:c.1423_1424delinsGT
confidence: ConfidenceRating.EXACT
- query: ENST00000277541.6:c.7330delinsACA
confidence: ConfidenceRating.EXACT
- query: NM_000797.3:c.812_829delins908_925
confidence: ConfidenceRating.INTERSECTION
- query: foo c.131_234delinsA
confidence: ConfidenceRating.SUPERSET
- query: foo NM_005157.6:c.1423_1424delinsGT
confidence: ConfidenceRating.INTERSECTION
- query: NM_000551.3:c.615delinsAA
confidence: ConfidenceRating.EXACT
- query: LRG_199t1:c.79_80delinsTT
confidence: ConfidenceRating.EXACT
- query: LRG_199:c.79_80delinsTT
confidence: ConfidenceRating.EXACT
should_not_match:
- query: N_005157.6:g.1423_1424delinsGT
- query: c.1423delinsX

genomic_delins:
should_match:
- query: NC_000017.10:g.1423_1424delinsGT
confidence: ConfidenceRating.EXACT
- query: NC_000017.10:g.7330delinsACA
confidence: ConfidenceRating.EXACT
- query: NC_000017.10:g.812_829delins908_925
confidence: ConfidenceRating.INTERSECTION
- query: foo g.131_234delinsA
confidence: ConfidenceRating.SUPERSET
- query: foo NC_000017.10:g.1423_1424delinsGT
confidence: ConfidenceRating.INTERSECTION
- query: NC_000003.12:g.10149938delinsAA
confidence: ConfidenceRating.EXACT
should_not_match:
- query: N_000017.10:c.1423_1424delinsGT
- query: g.1423delinsX
138 changes: 137 additions & 1 deletion tests/fixtures/tokenizers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,144 @@ silent_mutation:
hgvs:
should_match:
- token: NC_000007.13:g.36561662C>T
- token: LRG_199p1:p.Trp24Cys
- token: NM_01234.5:c.22+1A>T
- token: NP_000918.2:p.Ile1145=
should_not_match:
- token: NP004324.2
- token: ERBB2:c.2326_2327delinsCT

coding_dna_substitution:
should_match:
- token: (c.292T>C)
- token: c.292T>C
- token: (c.233A>G)
- token: c.509T>A
- token: c.54G>H
should_not_match:
- token: (c.292T>C
- token: g.292T>C
- token: c.292T<C
- token: c.292Z>C
- token: c.j324T<C
- token: 292T<C
- token: c.509T>
- token: c.509>A
- token: c.T>A

genomic_substition:
should_match:
- token: (g.292T>C)
- token: g.292T>C
- token: (g.233A>G)
- token: g.509T>A
- token: g.54G>H
should_not_match:
- token: (g.292T>C
- token: c.292T>C
- token: g.292T<C
- token: g.292Z>C
- token: g.j324T<C
- token: 292T<C
- token: g.509T>
- token: g.509>A
- token: g.T>A

coding_dna_silent_mutation:
should_match:
- token: c.123=
- token: (c.123=)
should_not_match:
- token: c.292T>C
- token: g.292T>C
- token: g.123
- token: (c.123=
- token: c.123=)
- token: c.123
- token: c.123==

genomic_silent_mutation:
should_match:
- token: g.123=
- token: (g.123=)
should_not_match:
- token: c.292T>C
- token: c.292T>C
- token: c.123
- token: (g.123=
- token: g.123=)
- token: g.123
- token: g.123==


coding_dna_delins:
should_match:
- token: c.32386323delinsGA
- token: c.6775_6777delinsC
- token: c.145_147delinsTGG
- token: c.9002_9009delinsTTT
- token: c.850_901delinsTTCCTCGATGCCTG
# - token: c.42522624_42522669delins42536337_42536382
# - token: c.812_829delins908_925
- token: (c.301_302delinsGG)
- token: c.615delinsAA
should_not_match:
- token: c.150_147delinsTGG
- token: 32386323delinsGA
- token: c.145_147delinsTGGS
- token: c.145_147delTGG
- token: g.32386323delinsGA
- token: NM_000797.3:c.812_829delins908_
- token: c.42522624_42522669delins_42536382
- token: c.delinsGA
- token: c.32386323delins
- token: (c.301_302delinsGG
- token: c.delins
- token: delins
- token: c._147delinsTGG
- token: c.145_delinsTGG
- token: c.delinsTGG
- token: c.d_delinsTG


genomic_delins:
should_match:
- token: g.32386323delinsGA
- token: g.6775_6777delinsC
- token: g.145_147delinsTGG
- token: g.9002_9009delinsTTT
- token: g.850_901delinsTTCCTCGATGCCTG
# - token: g.42522624_42522669delins42536337_42536382
# - token: g.812_829delins908_925
- token: (g.301_302delinsGG)
- token: g.10149938delinsAA
should_not_match:
- token: g.150_147delinsTGG
- token: 32386323delinsGA
- token: g.145_147delinsTGGS
- token: g.145_147delTGG
- token: c.32386323delinsGA
- token: NM_000797.3:g.812_829delins908_
- token: g.42522624_42522669delins_42536382
- token: g.delinsGA
- token: g.32386323delins
- token: (g.301_302delinsGG
- token: g.delins
- token: delins
- token: g._147delinsTGG
- token: g.145_delinsTGG
- token: g.delinsTGG
- token: g.d_delinsTG

locus_reference_genomic:
should_match:
- token: LRG_199
- token: LRG_199t1
- token: LRG_199p1
should_not_match:
- token: LRG_199t1p1
- token: LRG_199p1t1
- token: LRG_
- token: LRG_t1
- token: LRG_p1
- token: LRGt1
- token: LRGp1
Loading

0 comments on commit b8aca4a

Please sign in to comment.