-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patho-keep-or-remove-samples-from-fasta
executable file
·78 lines (60 loc) · 2.95 KB
/
o-keep-or-remove-samples-from-fasta
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2010 - 2012, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.
# removes samples from FASTA file:
#
# ./me SAMPLES_LIST FASTA_FILE
#
# SAMPLES_LIST file contains one sample name each line
#
import sys
import Oligotyping.lib.fastalib as u
from Oligotyping.utils.utils import pretty_print as pp
def main(fasta_file_path, samples_file_path, retain_samples = False, output_file_path = None):
if not output_file_path:
output_file_path = "%s-%s" % (fasta_file_path, 'SAMPLES-RETAINED' if retain_samples else 'SAMPLES-REMOVED')
samples_list = [s.strip() for s in open(samples_file_path).readlines()]
fasta = u.SequenceSource(fasta_file_path)
output = u.FastaOutput(output_file_path)
sys.stderr.write('\n%d samples will be %s from "%s": %s (...)\n' % (len(samples_list),
'retained' if retain_samples else 'removed',
fasta_file_path,
', '.join(samples_list[0:3])))
while next(fasta):
if fasta.pos % 1000 == 0:
sys.stderr.write('\rreads processed so far: %s' % (pp(fasta.pos)))
sys.stderr.flush()
sample_name = '_'.join(fasta.id.split('_')[:-1])
if retain_samples and sample_name in samples_list:
output.store(fasta, split=False)
elif (not retain_samples) and sample_name not in samples_list:
output.store(fasta, split=False)
sys.stderr.write('\rNew FASTA file .............: %s\n' % output_file_path)
fasta.close()
output.close()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Remove or retain samples from a given FASTA file')
parser.add_argument('fasta', metavar = 'FASTA_FILE_PATH',
help = 'FASTA file to remove or retain samples from')
parser.add_argument('samples', metavar = 'SAMPLES_FILE_PATH',
help = 'File that contains a sample name for each line')
parser.add_argument('--retain', action = 'store_true', default = False,
help = 'If declared, resulting FASTA file would contain samples that "match"\
sample names listed in the "samples" file. The default behavior\
is the vice versa.')
parser.add_argument('-o', '--output', metavar = 'FILE_FILE_PATH', default = None,
help = 'Output file name.')
args = parser.parse_args()
sys.exit(main(args.fasta,
args.samples,
args.retain,
args.output))