-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patho-create-GG-alignment-template-from-taxon
executable file
·64 lines (51 loc) · 2.39 KB
/
o-create-GG-alignment-template-from-taxon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2010 - 2012, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.
import argparse
import os
import Oligotyping.lib.fastalib as u
def get_ids (taxon, otu_id_to_greengenes):
ids = []
otu = open(otu_id_to_greengenes)
for line in otu.readlines():
try:
id, tax = line.strip().split('\t')
if tax.find (taxon) > 0:
ids.append(id)
except ValueError:
print (line)
return ids
def gen_tmpl(taxon, otu_id_to_greengenes, greengenes_alignment, output_file_path = None):
ids = list(set(get_ids(taxon, otu_id_to_greengenes)))
print('%d ids found for %s.' % (len(ids), taxon))
o_path = '%s.tmpl' % taxon
if output_file_path:
o_path = os.path.join(output_file_path, (taxon + ".tmpl"))
template = u.FastaOutput(o_path)
fasta = u.SequenceSource(greengenes_alignment)
while next(fasta):
if fasta.id in ids:
template.store(fasta, split = False)
ids.remove(fasta.id)
fasta.close()
template.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Create GreenGenes Alignment Template')
parser.add_argument('taxon', metavar = 'TAXON',
help = '"Taxon" name to be searched in GreenGenes')
parser.add_argument('otu_id_to_greengenes', metavar = 'OTU_ID_TO_GREENGENES',
help = 'Path to the "otu_id_to_greengenes" file. You can download it from \
"http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/taxonomies/otu_id_to_greengenes.txt"')
parser.add_argument('greengenes_alignment', metavar = 'GREENGENES_ALIGNMENT',
help = 'Path to the GreenGenes alignment file. You can download it from \
"http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/rep_set/gg_97_otus_6oct2010_aligned.fasta"')
parser.add_argument('-o', '--output', help = 'Path to the output folder', default = None)
args = parser.parse_args()
gen_tmpl(args.taxon, args.otu_id_to_greengenes, args.greengenes_alignment, args.output)