-
Notifications
You must be signed in to change notification settings - Fork 0
/
lab8.py
103 lines (82 loc) · 3.34 KB
/
lab8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
## less than 100 genbanks at at time on their preferred server
##
##from Bio import Entrez
##Entrez.email = "[email protected]"
##
##handle = Entrez.einfo()
##
##record = Entrez.read(handle)
##record["DbList"]
##from Bio import SeqIO
##shortSeq = []
##for record in SeqIO.parse(open("cor6_6.gb","rU"),"genbank"): ## there is no such file currently
## if len(record.seq) < 300:
## shortSeq.append(record)
##print "Found {0} short sequences".format(len(shortSeq))
##
##outputHandle = open("shortSeqs.fasta","w")
##SeqIO.write(shortSeq,outputHandle,"fasta")
##outputHandle.close()
def translate(dna):
gencode = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 'ACA':'T',
'ACC':'T', 'ACG':'T', 'ACT':'T', 'AAC':'N', 'AAT':'N',
'AAA':'K', 'AAG':'K', 'AGC':'S', 'AGT':'S', 'AGA':'R',
'AGG':'R', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAC':'H',
'CAT':'H', 'CAA':'Q', 'CAG':'Q', 'CGA':'R', 'CGC':'R',
'CGG':'R', 'CGT':'R', 'GTA':'V', 'GTC':'V', 'GTG':'V',
'GTT':'V', 'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 'GGA':'G',
'GGC':'G', 'GGG':'G', 'GGT':'G', 'TCA':'S', 'TCC':'S',
'TCG':'S', 'TCT':'S', 'TTC':'F', 'TTT':'F', 'TTA':'L',
'TTG':'L', 'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}
prot = ""
for bp in range(0,len(dna),3):
codon = dna[bp:bp+3]
if len(codon) == 3:
prot = prot + gencode.get(codon)
return prot
def bpCounts(dna): ## works as is for DNA, RNA, or PROTEIN
bpCounts = {}
for bp in dna:
if bp in bpCounts:
bpCounts[bp] += 1
else:
bpCounts[bp] = 1
return bpCounts
if __name__ == '__main__':
dna = "GATGGAACTTGACTACGTAAATT"
print "translate",translate(dna)
print "counts",bpCounts(dna)
prot = "IMTNTHRVGSSL_WEAGFYLCSGGGQ_"
print "protCount",bpCounts(prot)
rna = "GAUUACAUCGAGCUCA"
print "rnaCount",bpCounts(rna)
##def translateMany(data,dataType):
## gencode = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 'ACA':'T',
## 'ACC':'T', 'ACG':'T', 'ACT':'T', 'AAC':'N', 'AAT':'N',
## 'AAA':'K', 'AAG':'K', 'AGC':'S', 'AGT':'S', 'AGA':'R',
## 'AGG':'R', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
## 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAC':'H',
## 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 'CGA':'R', 'CGC':'R',
## 'CGG':'R', 'CGT':'R', 'GTA':'V', 'GTC':'V', 'GTG':'V',
## 'GTT':'V', 'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
## 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 'GGA':'G',
## 'GGC':'G', 'GGG':'G', 'GGT':'G', 'TCA':'S', 'TCC':'S',
## 'TCG':'S', 'TCT':'S', 'TTC':'F', 'TTT':'F', 'TTA':'L',
## 'TTG':'L', 'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
## 'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}
##
## out = ""
##
## if dataType != "PROTEIN":
## if dataType == "RNA":
## rna = data.replace("T","U")
## out = translate(rna)
## else:
## out = translate(data)
## else:
##
##
## return out