-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamino acid analysis (count).py
56 lines (53 loc) · 2.73 KB
/
amino acid analysis (count).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import fastaparser
from collections import Counter
print("This tool will count amino acids in sequences according to FASTA nomenclature. Further BioSeqIO analysis is not supported.")
choice = input("Do you want to submit a FASTA- or TXT-file ?\n (1) FASTA\n (2) TXT\n")
filename = input("Please enter the full path of the amino acid sequence file!: ")
ffull= open(filename + "_analysis_full.txt","w+")
print("----------------------------------------------------------", file=ffull)
print("Table of amino acids including their 3- or 1-letter codes:", file=ffull)
print("----------------------------------------------------------\n ", file=ffull)
print('G','\t','Gly','\t','Glycine', file=ffull)
print('A','\t','Ala','\t','Alanine', file=ffull)
print('L','\t','Leu','\t','Leucine', file=ffull)
print('M','\t','Met','\t','Methionine', file=ffull)
print('F','\t','Phe','\t','Phenylalanine', file=ffull)
print('W','\t','Trp','\t','Tryptophan', file=ffull)
print('K','\t','Lys','\t','Lysine', file=ffull)
print('Q','\t','Gln','\t','Glutamine', file=ffull)
print('E','\t','Glu','\t','Glutamic Acid', file=ffull)
print('S','\t','Ser','\t','Serine', file=ffull)
print('P','\t','Pro','\t','Proline', file=ffull)
print('V','\t','Val','\t','Valine', file=ffull)
print('I','\t','Ile','\t','Isoleucine', file=ffull)
print('C','\t','Cys','\t','Cysteine', file=ffull)
print('Y','\t','Tyr','\t','Tyrosine', file=ffull)
print('H','\t','His','\t','Histidine', file=ffull)
print('R','\t','Arg','\t','Arginine', file=ffull)
print('N','\t','Asn','\t','Asparagine', file=ffull)
print('D','\t','Asp','\t','Aspartic Acid', file=ffull)
print('T','\t','Thr','\t','Threonine', file=ffull)
print('B','\t','-','\t','Aspartic Acid or Asparagine', file=ffull)
print('J','\t','-','\t','Leucine or Isoleucine', file=ffull)
print('O','\t','-','\t','Pyrrolysine', file=ffull)
print('U','\t','-','\t','Selenocysteine', file=ffull)
print('Z','\t','-','\t','Glutamic Acid or Glutamine', file=ffull)
print('X','\t','-','\t','any', file=ffull)
print('*','\t','-','\t','stop', file=ffull)
print("----------------------------------------------------------\n ----------------------------------------------------------", file=ffull)
if choice == "1":
f= open(filename + ".txt",'w')
with open(filename) as fasta_file:
parser = fastaparser.Reader(fasta_file, parse_method='quick')
for seq in parser:
# seq is a namedtuple('Fasta', ['header', 'sequence'])
f.write(seq.sequence)
f.close()
seqsource = open(filename + ".txt")
if choice == "2":
seqsource = open(filename)
seqsource_r = seqsource.read()
amino_count = Counter(seqsource_r)
seq_lenght = len(seqsource_r)
print("absolute composition of ", seq_lenght, " amino acids: \n", amino_count, file=ffull)
ffull.close()