-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcladecompare.py
executable file
·92 lines (80 loc) · 3.5 KB
/
cladecompare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
"""Compare protein sequence alignments to identify contrasting sites."""
import logging
import sys
from cladecomparelib.core import process_args
if __name__ == '__main__':
import argparse
AP = argparse.ArgumentParser(description=__doc__,
epilog="""Examples:
# Basic
cladecompare.py fg.seq bg.seq > fg-v-bg.out
cladecompare.py fg.cma bg.cma > fg-v-bg.out
# Save per-site p-values and "pattern" file of significant sites
cladecompare.py fg.seq bg.seq -p fg-v-bg.pttrn -o fg-v-bg.out
# Align sequences on the fly by giving the MAPGAPS profile
cladecompare.py fg.fasta bg.fasta -s urn --mapgaps /share/data/PK \\
-p fg.pttrn -o fg.out
# HMMer/PDB style
cladecompare.py fg.fasta bg.fasta --hmm Pkinase.hmm --pdb 1ATP.pdb
See README or http://github.com/etal/cladecompare for full documentation.
""", formatter_class=argparse.RawDescriptionHelpFormatter)
# Input
AP.add_argument('foreground',
help="Foreground sequence (alignment) file.")
AP.add_argument('background',
nargs='*',
help="""Background sequence (alignment) file, or additional
foreground alignments for each-vs-all comparisons.""")
AP.add_argument('-f', '--format',
default='fasta',
help="Input alignment format (default: fasta).")
AP.add_argument('--hmm',
help="""Align the foreground and background sequences with this
HMMer 3.0 profile.""")
AP.add_argument('--mapgaps',
help="""Align the foreground and background sequences with this
MAPGAPS profile.""")
AP.add_argument('--pdb',
action='append',
help="""3D structure coordinates in Protein Data Bank format.""")
# Options
# add_argument_group?
AP.add_argument('-a', '--alpha',
default=0.005, type=float,
help="Significance threshold for pattern columns.")
AP.add_argument('-s', '--strategy',
default='gtest',
choices=('gtest', 'urn', 'phospho', 'jsd', 'hypg'),
help="""Strategy used to compare alignments:
'gtest' = G-test of all character frequencies;
'urn' = ball-in-urn model of consensus residue conservation;
'phospho' = urn model for phosphorylatable residues (S/T/Y);
'jsd' = Jensen-Shannon divergence.
'hypg' = hypergeometric distribution
""")
AP.add_argument('-w', '--weight',
action='store_true',
help="""Enable sequence weighting for statistical tests.""")
# Output
AP.add_argument('-o', '--output',
default=sys.stdout,
help="""Write per-column probabilities (standard output) to this
filename, for use with cladereport.py. (Single-foreground
comparison only.""")
AP.add_argument('-p', '--pattern',
help="""Write an mcBPPS-style pattern file to this filename.
(Single-foreground comparison only.""")
AP.add_argument('--pmlout',
help="Write a PyMOL script to this filename (if --pdb given).")
AP.add_argument('-q', '--quiet',
action='store_true',
help="Don't print status messages, only warnings and errors.")
args = AP.parse_args()
if args.quiet:
logging.basicConfig(level=logging.WARNING,
format="%(module)s: %(message)s")
else:
logging.basicConfig(level=logging.INFO,
format="%(module)s [@%(lineno)s]: %(message)s")
process_args(args)