Merge pull request #19 from lozuponelab/reaction_to_newfield

Parse REACTION as a separate field
lozuponelab · Feb 5, 2024 · 3204d6f · 3204d6f
2 parents 2e3bf83 + 658e23c
commit 3204d6f
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 6 deletions.
diff --git a/KEGG_parser/parsers.py b/KEGG_parser/parsers.py
@@ -154,7 +154,8 @@ def split_module_reaction(current_dict, current_entry_name, current_entry_data):
 PARSE_KO_BY_FIELD = {
     'ENTRY': split_entry, 'NAME': split_name_by_comma, 'DEFINITION': return_self,
     'PATHWAY': split_and_append, 'MODULE': split_and_append, 'DISEASE': split_and_append,
-    'CLASS': add_class, 'DBLINKS': add_nested_dict, 'GENES': add_nested_dict
+    'CLASS': add_class, 'DBLINKS': add_nested_dict, 'GENES': add_nested_dict,
+    'REACTION': split_and_append
 }
 
 PARSE_RN_BY_FIELD = {
@@ -194,7 +195,7 @@ def split_module_reaction(current_dict, current_entry_name, current_entry_data):
     'COMPOUND': add_module_orthology, 'COMMENT': return_self, 'DBLINKS': add_nested_dict
 }
 
-NOT_CAPTURED_KO_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'SEQUENCE', 'BRITE', 'SYMBOL', 'REACTION',
+NOT_CAPTURED_KO_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'SEQUENCE', 'BRITE', 'SYMBOL',
                           'NETWORK', 'ELEMENT')
 
 NOT_CAPTURED_RN_FIELDS = ('REFERENCE', 'AUTHORS', 'TITLE', 'JOURNAL', 'BRITE')

diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py
@@ -8,8 +8,10 @@ def ko_raw_record():
            "PATHWAY     ko00000 a fake pathway\n" \
            "DISEASE     H00000 A bad one\n" \
            "CLASS       Metabolism; Carbohydrate Metabolism; Glycolysis / Gluconeogenesis[PATH:ko00010]\n" \
-           "DBLINKS     RN: R00000\n" \
-           "            COG: COG0000\n" \
+           "REACTION    R00623  primary_alcohol:NAD+ oxidoreductase\n" \
+           "            R00754  ethanol:NAD+ oxidoreductase\n" \
+           "DBLINKS     COG: COG0000\n" \
+           "            GO: 0004022 0004023 0004024 0004025\n" \
            "GENES       HSA: hsa00000\n" \
            "REFERENCE\n" \
            "  AUTHORS   Fake G.\n" \

diff --git a/tests/test_parse_KEGG.py b/tests/test_parse_KEGG.py
@@ -29,9 +29,12 @@ def test_get_from_kegg_rxns(loop, list_of_rxns):
 
 def test_parse_ko(ko_raw_record):
     ko_record = parse_ko(ko_raw_record)
-    assert len(ko_record) == 8
-    assert tuple(ko_record['DBLINKS']['RN']) == tuple(['R00000'])
+    assert len(ko_record) == 9
+    assert tuple(ko_record['REACTION']) == tuple([('R00623', 'primary_alcohol:NAD+ oxidoreductase'),
+                                                   ('R00754', 'ethanol:NAD+ oxidoreductase')
+                                                   ])
     assert tuple(ko_record['DBLINKS']['COG']) == tuple(['COG0000'])
+    assert tuple(ko_record['DBLINKS']['GO']) == tuple(["0004022", "0004023", "0004024", "0004025"])
 
 
 @pytest.fixture()