-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcase2.py
88 lines (67 loc) · 3.63 KB
/
case2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import sys
import numpy as np
from sharedDefs import tsprint, stimestamp, stimediff
from case2Defs import drawSample, sequential, parallel, statistical
ECO_SEED = 23
#-----------------------------------------------------------------------------------------------------------
# CASE 2 - computing the average of a large sample
#-----------------------------------------------------------------------------------------------------------
def main(nc, ss, sz):
# draws a sample of size <ss>, as specified in the command line
tsprint('Drawing a sample with {0} million individuals.'.format(ss))
ss = int(ss * 1E6)
sample = drawSample(ss, ECO_SEED)
tsprint('-- The drawn sample uses up {0:4.1f} MB.'.format(sys.getsizeof(sample)/(2 ** 20)))
if(nc == 0):
# obtains the estimate for the center of the distribution using a statistical scheme
alpha = 0.01
tsprint('Sequential execution started (using an approximate solver with {0} points).'.format(sz))
startTs = stimestamp()
(lb, point_estimate, ub) = statistical(sample, ss, sz, alpha)
finishTs = stimestamp()
tsprint('Sequential execution completed.')
# presents the obtained results
tsprint('-- the point estimate for the center of the distribution is {0:4.1f}.'.format(point_estimate))
tsprint('-- we are {2:4.1f}% confident that the real value is between {0:4.1f} and {1:4.1f}.'.format(lb, ub, 100 * (1 - alpha)))
tsprint('-- the process took about {0} seconds to complete.'.format(stimediff(finishTs, startTs)))
elif(nc == 1):
# obtains the estimate for the center of the distribution using a sequential execution scheme
tsprint('Sequential execution started (process {0}).'.format(os.getpid()))
startTs = stimestamp()
point_estimate = sequential(sample)
finishTs = stimestamp()
tsprint('Sequential execution completed.')
# presents the obtained results
tsprint('-- the point estimate for the center of the distribution is {0:4.1f}.'.format(point_estimate))
tsprint('-- the process took about {0} seconds to complete.'.format(stimediff(finishTs, startTs)))
elif(nc > 1):
# obtains the estimate for the center of the distribution using a parallel execution scheme
tsprint('Parallel execution with {0} processes spawned from {1}.'.format(nc, os.getpid()))
startTs = stimestamp()
point_estimate = parallel(sample, nc)
finishTs = stimestamp()
tsprint('Parallel execution completed.')
# presents the obtained results
tsprint('-- the point estimate for the center of the distribution is {0:4.1f}.'.format(point_estimate))
tsprint('-- the process took about {0} seconds to complete.'.format(stimediff(finishTs, startTs)))
else:
raise ValueError('Number of cores was wrongly specified.')
if __name__ == "__main__":
# command line:
#
# python case2.py <number of cores> <sample size> [<bootstrap sample size>]
# -- <number of cores>: how many cores can be assigned to tasks;
# 0 commands sequential execution of an approximate solver
# 1 commands sequential execution of an "exact" solver
# >1 commands parallel execution of an "exact" solver
# -- <sample size>: number of points in the sample, in millions (e.g. '1' means 1 million points)
# -- <bootstrap sample size>: number of points in the bootstrap (re)sample
nc = int(sys.argv[1])
ss = int(sys.argv[2]) # sample size, in million of points
if(nc == 0):
# this is a call for a sequential execution using an approximated solver ...
sz = int(sys.argv[3]) # .. so we need the (re)sample size for the approximation
else:
sz = None
main(nc, ss, sz)