forked from mathemajician/AIQ
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ComputeFromLog.py
148 lines (112 loc) · 4.08 KB
/
ComputeFromLog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#
# Estimate AIQ from a file of log results
#
# Copyright Shane Legg 2011
# Copyright Ondřej Vadinský 2018, 2023
# Copyright Petr Zeman 2023
# Copyright Jan Štipl 2023
# Released under GNU GPLv3
from numpy import ones, zeros, floor, array, sqrt, cov
import getopt, sys
from os.path import basename
def estimate(file, detailed):
# load in the strata distribution
dist_line = ["0.0"]
dist_line += file.readline().split()
dist = array(dist_line, float)
p = dist # probabilyt of a program being in each strata
I = len(dist) # number of strata, including passive
A = I - 1 # active strata
Y = [[] for i in range(I)] # empty collection of samples divided up by stratum
Y[0] = [0]
s = ones((I)) # estimated standard deviations for each stage & strata
# read in log file results
num_samples = 0
for result in file:
split_result = result.split()
# stamp = split_result[0]
stratum = split_result[1]
perf1 = split_result[2]
perf2 = split_result[3]
# fail1 = split_result[4]
# fail2 = split_result[5]
# program = split_result[6]
z = int(stratum)
if True: # z > 10:
Y[int(stratum)].append((float(perf1), float(perf2)))
num_samples += 2
# compute empirical standard deviations for each stratum
for i in range(1, I):
if p[i] > 0.0 and len(Y[i]) > 2:
YA = array(Y[i])
sample1 = YA[:, 0] # positive antithetic runs
sample2 = YA[:, 1] # negative antithetic runs
s1 = sample1.std(ddof=1) # 1 degree of freedom
s2 = sample2.std(ddof=1) # 1 degree of freedom
covariance = cov(sample1, sample2)[0, 1] # default is 1 df
var = 0.25 * (s1 * s1 + s2 * s2 + 2.0 * covariance)
# Covariance can be negative 0 -> sqrt from negative -> NaN
# Cov in general can be negative, but it didn't happen in testing
# Wrong calculation of cov results in it being not quite zero instead of zero
# Will fix when the problem occurs
if abs(var) <= 1e-10:
var = 0.0
s[i] = sqrt(var)
else:
s[i] = 1.0
# report current estimates by strata
if detailed:
for i in range(1, I):
stratum_samples = len(Y[i]) * 2.0
print(" % 3d % 5d" % (i, stratum_samples), end=' ')
if stratum_samples == 0:
# no samples, so skip mean and half CI
print()
elif stratum_samples < 4:
# don't report half CI with less than 4 samples
print(" % 6.1f" % (array(Y[i]).mean()))
else:
# do a full report
print(" % 6.1f +/- % 5.1f SD % 5.1f"
% (array(Y[i]).mean(), 1.96 * s[i] / sqrt(stratum_samples), s[i]))
print()
# compute the current estimate and 95% confidence interval
est = 0.0
for i in range(1, I):
stratum_samples = len(Y[i]) * 2.0
if p[i] > 0.0 and stratum_samples > 2:
est += p[i] / stratum_samples * array(Y[i]).sum()
ssd = sum(p * s)
delta = 1.96 * ssd / sqrt(num_samples)
print(f"{num_samples:6d} {est: 5.1f} +/- {delta: 5.1f} SD {ssd: 5.1f}", end=' ')
return
# print basic usage
def usage():
print("python ComputeFromLog [--full] log_file_name [log_file_name ...]")
# main function that just sets things up and then calls the sampler
logging = False
log_file = None
def main():
global logging, log_file
detailed = False
print()
print("Compute AIQ from log file results, version 1.0")
print()
sys.argv.pop(0)
if len(sys.argv) == 0:
usage()
sys.exit()
if sys.argv[0] == "--full":
detailed = True
sys.argv.pop(0)
if len(sys.argv) == 0:
usage()
sys.exit()
for file_name in sys.argv:
file = open(file_name, 'r')
estimate(file, detailed)
print(":" + basename(file_name))
if detailed: print()
file.close()
if __name__ == "__main__":
main()