From eeaaf31debcae6fc85cd9916a00199d4cecaa4ad Mon Sep 17 00:00:00 2001 From: Vedanth Date: Tue, 27 Feb 2024 07:15:22 +0530 Subject: [PATCH] updated reconcile.py to include support for other databases --- db_harmonisation/reconcile.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/db_harmonisation/reconcile.py b/db_harmonisation/reconcile.py index bae0841..15e2536 100644 --- a/db_harmonisation/reconcile.py +++ b/db_harmonisation/reconcile.py @@ -33,11 +33,19 @@ def get_aro_for_hits(fasta, rgi_output, database): rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: "_".join(x.split('_')[:-1])) elif database == 'ncbi': rgi_hits['Original ID'] = rgi_hits['ORF_ID'] - + elif database == 'sarg': + rgi_hits['Original ID'] = rgi_hits['ORF_ID'].apply(lambda x: x.split()[0]) + elif database == 'deeparg': + rgi_hits['Original ID'] = rgi_hits['ORF_ID'] + elif database == 'resfinder_fg': + rgi_hits['Original ID'] = rgi_hits['ORF_ID'] + elif database == 'argannot': + rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: '_'.join(x.split('_')[:-1])) + elif database == 'megares': + rgi_hits['Original ID'] = rgi_hits['Contig'].apply(lambda x: '_'.join(x.split('_')[:-1])) # homolog models only for now rgi_hits = rgi_hits[rgi_hits['Model_type'] == "protein homolog model"] - # tidy up "ORF ID" mapping = rgi_hits[['Original ID', "Best_Hit_ARO", 'ARO']] mapping = mapping.astype({'ARO': 'str'}) @@ -68,7 +76,8 @@ def get_aro_for_hits(fasta, rgi_output, database): parser.add_argument("-r", "--rgi", required=True, type=check_file, help="Corresponding rgi output tsv for the fasta file") parser.add_argument("-d", "--database", required=True, type=str, - help="Name of the database", choices=['resfinder', 'ncbi']) + help="Name of the database", + choices=['resfinder', 'ncbi', 'sarg', 'deeparg', 'resfinder_fg', 'megares', 'argannot']) args = parser.parse_args() @@ -77,4 +86,4 @@ def get_aro_for_hits(fasta, rgi_output, database): output_file = f"{args.database}_ARO_mapping.tsv" print(f"Writing mapping to {output_file}") - mapping.to_csv(output_file, sep='\t') + mapping.to_csv(output_file, sep='\t') \ No newline at end of file