-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsekhon.py
executable file
·87 lines (56 loc) · 2.61 KB
/
sekhon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
from __future__ import division
from scipy.stats import beta
import time
import scipy
__version__ = "0.1.1"
def test(a, b, c, d, prob_diff=0.0):
"""Run the Sekhon Fisher test of P1 - P2 > prob_diff"""
def f(t):
return (1 - beta.cdf(t+prob_diff, a+1, c+1))*beta.pdf(t, b+1, d+1)
return scipy.integrate.quad(f, 0, 1)[0]
def simulation(a, b, c, d, samples=100000, prob_diff=0.0, significance_level=0.95,
ensure_convergence=False, ensure_samples=10000000, ensure_radius=0.005):
"""For what it's worth... Not as accurate or efficient as the integration,
but still fun to play with"""
difference = beta.rvs(a + 1, c + 1, size=samples) - beta.rvs(b + 1, d + 1, size=samples)
successes = (difference > prob_diff).sum()
result = successes / samples
if ensure_convergence and abs(result - significance_level) < ensure_radius:
result = test(a, b, c, d, samples=ensure_samples, prob_diff=prob_diff, ensure_convergence=False)
return result
def simulation_convergence_test(samples, trials):
"""Again, largely for fun at this point"""
t0 = time.time()
results = []
for i in xrange(0, trials):
results.append(simulation(8, 250, 3, 250, samples))
t1 = time.time()
print "Mean:", scipy.mean(results)
print "Std Dev:", scipy.std(results)
print "Range:", max(results) - min(results)
print "Time:", t1 - t0, "\n"
def cli():
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Sekhon (v {}) contingency table test for table
X1 X2
succ a b
fail c d
Evaluates posterior probability that P1 - P2 > prob_diff.
See "Making Inferences from 2x2 Tables: The Inadequacy of the Fisher Exact Test
for Observational Data and a Principled Bayesian Alternative" by Jasjeet S. Sekhon, 2005
(http://polmeth.wustl.edu/media/Paper/SekhonFisherTest.pdf)""".format(__version__)
)
parser.add_argument('a', type=int, help='# sucesses in X1')
parser.add_argument('b', type=int, help='# sucesses in X2')
parser.add_argument('c', type=int, help='# failures in X1')
parser.add_argument('d', type=int, help='# failures in X2')
parser.add_argument('--prob-diff', type=float, default=0.0, help='Test that P1 - P2 > prob-diff')
args = parser.parse_args()
a, b, c, d = [getattr(args, x) for x in ['a', 'b', 'c', 'd']]
result = test(a, b, c, d, prob_diff=args.prob_diff)
print "\nP ( P1 - P2 > prob-diff ) =", result, '\n'
if __name__ == '__main__':
cli()