Skip to content

Commit

Permalink
RFCT removed special casing for resfinder manual curation
Browse files Browse the repository at this point in the history
- Note: code to integrate manual curation now removes duplicate ARO mappings. This has corrected a MEGARes annotation (GMGC10.027_903_362.EMRE) which had a one to many ARO mapping. Better manual curation for MEGARes will be present in the version after v0.3.0 when MEGARes will be investigated to check for CDSs, gene clusters and RC genes.
  • Loading branch information
Vedanth-Ramji authored and luispedro committed Apr 25, 2024
1 parent d9ea531 commit de775cd
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 16 deletions.
24 changes: 9 additions & 15 deletions argnorm/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,18 @@ def is_number(num):
return True

def get_aro_mapping_table(database):
df = pd.read_csv(os.path.join(_ROOT, 'data', f'{database}_ARO_mapping.tsv'), sep='\t')
aro_mapping_table = pd.read_csv(os.path.join(_ROOT, 'data', f'{database}_ARO_mapping.tsv'), sep='\t')

manual_curation = pd.read_csv(os.path.join(_ROOT, 'data/manual_curation', f'{database}_curation.tsv'), sep='\t')
manual_curation['Database'] = df['Database']
aro_mapping_table = df
manual_curation['Database'] = aro_mapping_table['Database']

if database != 'resfinder':
aro_mapping_table = pd.concat([df, manual_curation])
else:
# Handle gene clusters and reverse complements
aro_mapping_table = aro_mapping_table.drop_duplicates(subset=['Original ID'], ignore_index=True).set_index('Original ID')

for i in manual_curation['Original ID']:
if i in aro_mapping_table.index:
aro_mapping_table.loc[i, 'ARO'] = manual_curation.set_index('Original ID').loc[i, 'ARO']
aro_mapping_table.loc[i, 'Gene Name in CARD'] = manual_curation.set_index('Original ID').loc[i, 'Gene Name in CARD']
else:
aro_mapping_table.loc[i] = manual_curation.set_index('Original ID').loc[i]
aro_mapping_table = aro_mapping_table.drop_duplicates(subset=['Original ID'], ignore_index=True).set_index('Original ID')
for i in manual_curation['Original ID']:
if i in aro_mapping_table.index:
aro_mapping_table.loc[i, 'ARO'] = manual_curation.set_index('Original ID').loc[i, 'ARO']
aro_mapping_table.loc[i, 'Gene Name in CARD'] = manual_curation.set_index('Original ID').loc[i, 'Gene Name in CARD']
else:
aro_mapping_table.loc[i] = manual_curation.set_index('Original ID').loc[i]

aro_mapping_table[TARGET_ARO_COL] = aro_mapping_table[TARGET_ARO_COL].map(lambda a: f'ARO:{int(a)}' if is_number(a) else a)
return aro_mapping_table.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion outputs/hamronized/abricate.megares.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ GMGC10.027_121_620.RBPA GMGC10.95nr_block_0005 RBPA Drugs:Rifampin:RNA-polymeras
GMGC10.027_126_791.RBPA GMGC10.95nr_block_0005 RBPA Drugs:Rifampin:RNA-polymerase_binding_protein:RBPA megares 2021-Mar-27 MEG_6047 abricate abricate 1.0.1 91.39 1 302 + 87.54 ARO:3000245 ARO:3000169,ARO:3000517,ARO:3000530,ARO:3000534 ARO:3000157,ARO:3000157,ARO:3000157,ARO:3000157
GMGC10.027_135_808.MMR GMGC10.95nr_block_0005 MMR Multi-compound:Drug_and_biocide_resistance:Drug_and_biocide_SMR_efflux_pumps:MMR megares 2021-Mar-27 MEG_3996 abricate abricate 1.0.1 80.33 20 324 + 94.14 ARO:3005009 ARO:3005386 ARO:3005386
GMGC10.027_272_655.SOXS GMGC10.95nr_block_0005 SOXS Multi-compound:Drug_and_biocide_and_metal_resistance:Drug_and_biocide_and_metal_resistance_regulator:SOXS megares 2021-Mar-27 MEG_6551 abricate abricate 1.0.1 80.29 36 314 + 86.11 ARO:3003511 ARO:0000036,ARO:3000385,ARO:3007045 ARO:0000001,ARO:0000001,ARO:3000387
GMGC10.027_903_362.EMRE GMGC10.95nr_block_0005 QACH Multi-compound:Drug_and_biocide_resistance:Drug_and_biocide_SMR_efflux_pumps:QACH megares 2021-Mar-27 MEG_5847 abricate abricate 1.0.1 87.65 1 324 + 100.0 ARO:3006954 ARO:0000020 ARO:3000007
GMGC10.027_903_362.EMRE GMGC10.95nr_block_0005 QACH Multi-compound:Drug_and_biocide_resistance:Drug_and_biocide_SMR_efflux_pumps:QACH megares 2021-Mar-27 MEG_5847 abricate abricate 1.0.1 87.65 1 324 + 100.0 ARO:3003836 ARO:3005386 ARO:3005386
GMGC10.028_155_496.SOXS GMGC10.95nr_block_0005 SOXS Multi-compound:Drug_and_biocide_and_metal_resistance:Drug_and_biocide_and_metal_resistance_regulator:SOXS megares 2021-Mar-27 MEG_6551 abricate abricate 1.0.1 91.8 1 317 + 97.84 ARO:3003511 ARO:0000036,ARO:3000385,ARO:3007045 ARO:0000001,ARO:0000001,ARO:3000387
GMGC10.028_171_025.RBPA GMGC10.95nr_block_0005 RBPA Drugs:Rifampin:RNA-polymerase_binding_protein:RBPA megares 2021-Mar-27 MEG_6047 abricate abricate 1.0.1 85.54 1 332 + 96.23 ARO:3000245 ARO:3000169,ARO:3000517,ARO:3000530,ARO:3000534 ARO:3000157,ARO:3000157,ARO:3000157,ARO:3000157
GMGC10.030_070_454.RBPA GMGC10.95nr_block_0005 RBPA Drugs:Rifampin:RNA-polymerase_binding_protein:RBPA megares 2021-Mar-27 MEG_6047 abricate abricate 1.0.1 92.77 1 332 + 96.23 ARO:3000245 ARO:3000169,ARO:3000517,ARO:3000530,ARO:3000534 ARO:3000157,ARO:3000157,ARO:3000157,ARO:3000157
Expand Down

0 comments on commit de775cd

Please sign in to comment.