-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patho-populate-datasets-from-VAMPS-download
executable file
·78 lines (62 loc) · 2.58 KB
/
o-populate-datasets-from-VAMPS-download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2010 - 2012, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.
import sys
import argparse
import Oligotyping.lib.fastalib as u
def main(input_file, output_file, taxon = None):
fasta = u.SequenceSource(input_file)
VAMPS_output_type = None
next(fasta)
if len(fasta.id.split('|')) == 5:
# >GR7EWKD02FPDXN|MBJ_GOS_Bv6v4|C65_5|1.7% from GASTtaxonomy|Count
VAMPS_output_type = 1
elif len(fasta.id.split('|')) == 6:
# >H3ZRT2102JJIZB|refhvr_ids=v4v6_CD343|GAST Dist=0.01000|Rank=genus|Abundance=1|GASTtaxonomy
VAMPS_output_type = 2
else:
print("The input FASTA doesn't seem to be a VAMPS output :/")
sys.exit()
fasta.reset()
output = u.FastaOutput(output_file)
while next(fasta):
if taxon and fasta.id.find(taxon) == -1:
continue
if VAMPS_output_type == 2:
acc = fasta.id.split('|')[0]
project_sample = fasta.id.split('|')[1].split('=')[1]
gast = fasta.id.split('|')[5]
new_id = project_sample + '_' + acc
abundance = int(fasta.id.split('|')[4].split('=')[1])
if VAMPS_output_type == 1:
acc = fasta.id.split('|')[0]
project = fasta.id.split('|')[1]
sample = fasta.id.split('|')[2]
gast = fasta.id.split('|')[3].split()[2]
new_id = project + '_' + sample + '_' + acc
abundance = int(fasta.id.split('|')[4])
for i in range(0, abundance):
output.write_id('%s-%s|%s' % (new_id, str(i), gast))
output.write_seq(fasta.seq, split = False)
fasta.close()
output.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Populate VAMPS download')
parser.add_argument('input_file', metavar = 'FASTA',
help = 'FASTA file downloaded from VAMPS')
parser.add_argument('-t', '--taxon', help = 'Isolate a particular taxon', default = None)
parser.add_argument('-o', '--output', help = 'Output file name', default = None)
args = parser.parse_args()
output = args.output
if not output and args.taxon:
output = args.taxon + ".fa"
elif not output and not args.taxon:
output = args.input_file + "-POPULATED.fa"
main(args.input_file, output, args.taxon)