-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfetch_entrez_seq.py
35 lines (29 loc) · 992 Bytes
/
fetch_entrez_seq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
'''Reads sequence IDs or gene names from a text file and retrieve sequences
from Entrez Db.
'''
import sys
from Bio import Entrez, SeqIO
def fetch(queries, email, db="protein",
rettype="fasta", retmode="text"):
for qid in queries:
handle = Entrez.efetch(db=db, rettype=rettype,
retmode=retmode, id=qid)
seq_record = SeqIO.read(handle, rettype)
SeqIO.write(seq_record, sys.stdout, 'fasta')
print >> sys.stderr, '%s...' % seq_record.description[:45]
handle.close()
def read_id(id_file):
queries = []
for line in open(sys.argv[1]):
queries.append(line.strip())
return queries
if __name__=='__main__':
try:
id_file = sys.argv[1]
Entrez.email = sys.argv[2]
except IndexError:
print >> sys.stderr, 'Usage: python fetch_seq.py <id file> <email>'
raise SystemExit
else:
queries = read_id(id_file)
fetch(queries, Entrez.email)