Skip to content

Commit

Permalink
Merge branch 'master' into qc_null_check
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Nov 21, 2023
2 parents 718c35d + 5bf37af commit d7730d3
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 13 deletions.
2 changes: 1 addition & 1 deletion ontobio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import absolute_import

__version__ = '2.8.17'
__version__ = '2.8.18'


from .ontol_factory import OntologyFactory
Expand Down
9 changes: 8 additions & 1 deletion ontobio/io/gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,15 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
qualifiers = [association.Curie.from_str(curie_util.contract_uri(relations.lookup_label(q), strict=False)[0]) for q in qualifiers]

object = association.Term(association.Curie.from_str(gaf_line[4]), taxon)
if isinstance(object, association.Error):
if isinstance(object, association.Error) or isinstance(object.id, association.Error):
report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Problem parsing GO Term", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

# Check GO Term namespace and identifier
go_term = object.id
if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

# References
references = [association.Curie.from_str(e) for e in gaf_line[5].split("|") if e]
Expand Down
10 changes: 9 additions & 1 deletion ontobio/io/gpadparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ def from_1_2(gpad_line: List[str], report=None, group="unknown", dataset="unknow
if go_term.is_error():
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[3], "Problem parsing GO Term", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[3], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

object = association.Term(go_term, taxon)

Expand Down Expand Up @@ -449,7 +453,11 @@ def from_2_0(gpad_line: List[str], report=None, group="unknown", dataset="unknow
if go_term.is_error():
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[ONTOLOGY_CLASS_INDEX], "Problem parsing GO Term", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)


if go_term.namespace != "GO" or go_term.identity.isnumeric == False:
report.error(source_line, Report.INVALID_SYMBOL, gpad_line[ONTOLOGY_CLASS_INDEX], "Namespace should be \"GO\" and identity a numeric value greater than \"0\"", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

object = association.Term(go_term, taxon)

evidence_type = association.Curie.from_str(gpad_line[EVIDENCE_INDEX])
Expand Down
14 changes: 8 additions & 6 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def message(self, state: RepairState) -> str:
if state == RepairState.REPAIRED:
message = "Found violation of: `{}` but was repaired".format(self.title)
elif state == RepairState.FAILED:
message = "Found violatoin of: `{}` and could not be repaired".format(self.title)
message = "Found violation of: `{}` and could not be repaired".format(self.title)

return message

Expand Down Expand Up @@ -352,12 +352,13 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
class GoRule16(GoRule):

def __init__(self):
super().__init__("GORULE:0000016", "All IC annotations should include a GO ID in the \"With/From\" column", FailMode.SOFT)
super().__init__("GORULE:0000016", "All IC annotations should include a GO ID in the \"With/From\" column that is also different from the entry in the \"GO ID\" column", FailMode.SOFT)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
evidence = str(annotation.evidence.type)
withfrom = annotation.evidence.with_support_from


okay = True
if evidence == ic_eco:
if len(withfrom) == 0:
Expand Down Expand Up @@ -511,7 +512,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
class GoRule37(GoRule):

def __init__(self):
super().__init__("GORULE:0000037", "IBA annotations should ONLY be assigned_by GO_Central and have PMID:21873635 as a reference", FailMode.HARD)
super().__init__("GORULE:0000037", "IBA annotations should ONLY be assigned_by GO_Central and have GOREF:0000033 as a reference", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
# If the evidence code is IBA, then (1) the assigned_by field must be GO_Central and (2) the reference field must be PMID:21873635
Expand All @@ -521,7 +522,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP

result = self._result(True) # By default we pass
if evidence == iba_eco:
result = self._result(assigned_by == "GO_Central" and "PMID:21873635" in references)
result = self._result(assigned_by == "GO_Central" and "GOREF:0000033" in references)

return result

Expand Down Expand Up @@ -915,7 +916,8 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return TestResult(repair_result(RepairState.OKAY, self.fail_mode), "{}: {}".format(self.message(repair_state), "GO term has no namespace"), annotation)

allowed_str = ", ".join([str(a) for a in allowed])
return TestResult(repair_result(repair_state, self.fail_mode), "{}: {} should be one of {}".format(self.message(repair_state), relation, allowed_str), repaired_annotation)
repaired_str = ", ".join([str(a) for a in repaired_annotation.qualifiers])
return TestResult(repair_result(repair_state, self.fail_mode), "{}: {} should be one of {}. Repaired to {}".format(self.message(repair_state), relation, allowed_str, repaired_str), repaired_annotation)


GoRules = enum.Enum("GoRules", {
Expand All @@ -938,7 +940,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule43": GoRule43(),
"GoRule46": GoRule46(),
"GoRule50": GoRule50(),
"GoRule51": GoRule51(),
#"GoRule51": GoRule51(), Do not run test
"GoRule55": GoRule55(),
"GoRule57": GoRule57(),
"GoRule58": GoRule58(),
Expand Down
6 changes: 6 additions & 0 deletions tests/test_gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,12 @@ def test_bad_date():
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\tTODAY\tPomBase\tfoo(X:1)")
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_go_id():
p = GafParser()
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tINVALID:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)")
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_taxon():
p = GafParser()
Expand Down
44 changes: 44 additions & 0 deletions tests/test_gpad_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,27 @@ def test_parse_interacting_taxon():
]
result = to_association(list(vals), report=report, version="1.2")
assert result.associations[0].interacting_taxon == Curie(namespace="NCBITaxon", identity="5678")

def test_parse_go_id_1_2():
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
"MGI",
"MGI:1918911",
"enables",
"UBERON:1234",
"MGI:MGI:2156816|GO_REF:0000015",
"ECO:0000307",
"",
"",
"20100209",
"MGI",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]
result = to_association(list(vals), report=report, version="1.2")
assert result.skipped == 1
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 1
assert len(result.associations) == 0


def test_duplicate_key_annot_properties():
Expand Down Expand Up @@ -189,6 +210,29 @@ def test_parse_2_0():
# Test annotation property retrieval
contributors = result.associations[0].annotation_property_values(property_key="contributor-id")
assert set(contributors) == {"http://orcid.org/0000-0003-2689-5511"}


def test_parse_go_id_2_0():
version = "2.0"
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
"MGI:MGI:1918911",
"",
"RO:0002327",
"UBERON:5678",
"MGI:MGI:2156816|GO_REF:0000015",
"ECO:0000307",
"",
"",
"2020-09-17",
"MGI",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]
result = to_association(list(vals), report=report, version=version)
assert result.skipped == 1
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 1
assert len(result.associations) == 0


def test_aspect_fill_for_obsolete_terms():
Expand Down
19 changes: 15 additions & 4 deletions tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,20 @@ def test_go_rules_15():
assert test_result.result_type == qc.ResultType.WARNING

def test_go_rule_16():
# GO term same as with/ID
assoc = make_annotation(goid="GO:0044419", evidence="IC", withfrom="GO:0044419").associations[0]

#GO term same as withfrom
test_result = qc.GoRule16().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.WARNING

#GO term same as one of the withfrom terms
assoc = make_annotation(goid="GO:0044419", evidence="IC", withfrom="GO:0044419|GO:0035821").associations[0]
test_result = qc.GoRule16().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

# No GO term w/ID
assoc = make_annotation(evidence="IC", withfrom="BLAH:12345").associations[0]

test_result = qc.GoRule16().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.WARNING

Expand Down Expand Up @@ -440,7 +451,7 @@ def test_gorule30():
assert test_result.result_type == qc.ResultType.PASS

def test_gorule37():
assoc = make_annotation(evidence="IBA", references="PMID:21873635", assigned_by="GO_Central").associations[0]
assoc = make_annotation(evidence="IBA", references="GOREF:0000033", assigned_by="GO_Central").associations[0]

test_result = qc.GoRule37().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS
Expand All @@ -454,7 +465,7 @@ def test_gorule37():
test_result = qc.GoRule37().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.ERROR

assoc.evidence.has_supporting_reference = [Curie.from_str("PMID:21873635")]
assoc.evidence.has_supporting_reference = [Curie.from_str("GOREF:0000033")]
assoc.provided_by = "Pascale" # IBA, but wrong assigned_by
test_result = qc.GoRule37().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.ERROR
Expand Down Expand Up @@ -756,7 +767,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 25
assert len(test_results.keys()) == 24
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down

0 comments on commit d7730d3

Please sign in to comment.