Skip to content

Commit

Permalink
updated reconcile.py to include support for other databases
Browse files Browse the repository at this point in the history
  • Loading branch information
Vedanth-Ramji committed Feb 27, 2024
1 parent 45a450c commit eeaaf31
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions db_harmonisation/reconcile.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,19 @@ def get_aro_for_hits(fasta, rgi_output, database):
rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: "_".join(x.split('_')[:-1]))
elif database == 'ncbi':
rgi_hits['Original ID'] = rgi_hits['ORF_ID']

elif database == 'sarg':
rgi_hits['Original ID'] = rgi_hits['ORF_ID'].apply(lambda x: x.split()[0])
elif database == 'deeparg':
rgi_hits['Original ID'] = rgi_hits['ORF_ID']
elif database == 'resfinder_fg':
rgi_hits['Original ID'] = rgi_hits['ORF_ID']
elif database == 'argannot':
rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: '_'.join(x.split('_')[:-1]))
elif database == 'megares':
rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: '_'.join(x.split('_')[:-1]))
# homolog models only for now
rgi_hits = rgi_hits[rgi_hits['Model_type'] == "protein homolog model"]


# tidy up "ORF ID"
mapping = rgi_hits[['Original ID', "Best_Hit_ARO", 'ARO']]
mapping = mapping.astype({'ARO': 'str'})
Expand Down Expand Up @@ -68,7 +76,8 @@ def get_aro_for_hits(fasta, rgi_output, database):
parser.add_argument("-r", "--rgi", required=True, type=check_file,
help="Corresponding rgi output tsv for the fasta file")
parser.add_argument("-d", "--database", required=True, type=str,
help="Name of the database", choices=['resfinder', 'ncbi'])
help="Name of the database",
choices=['resfinder', 'ncbi', 'sarg', 'deeparg', 'resfinder_fg', 'megares', 'argannot'])


args = parser.parse_args()
Expand All @@ -77,4 +86,4 @@ def get_aro_for_hits(fasta, rgi_output, database):

output_file = f"{args.database}_ARO_mapping.tsv"
print(f"Writing mapping to {output_file}")
mapping.to_csv(output_file, sep='\t')
mapping.to_csv(output_file, sep='\t')

0 comments on commit eeaaf31

Please sign in to comment.