-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathomnifold.py
114 lines (83 loc) · 3.8 KB
/
omnifold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.model_selection import train_test_split
def reweight(events,model,batch_size=10000):
f = model.predict(events, batch_size=batch_size)
weights = f / (1. - f)
return np.squeeze(np.nan_to_num(weights))
# Binary crossentropy for classifying two samples with weights
# Weights are "hidden" by zipping in y_true (the labels)
def weighted_binary_crossentropy(y_true, y_pred):
weights = tf.gather(y_true, [1], axis=1) # event weights
y_true = tf.gather(y_true, [0], axis=1) # actual y_true for loss
# Clip the prediction value to prevent NaN's and Inf's
epsilon = K.epsilon()
y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
t_loss = -weights * ((y_true) * K.log(y_pred) +
(1 - y_true) * K.log(1 - y_pred))
return K.mean(t_loss)
def omnifold(theta0,theta_unknown_S,iterations,model,verbose=0):
weights = np.empty(shape=(iterations, 2, len(theta0)))
# shape = (iteration, step, event)
theta0_G = theta0[:,0]
theta0_S = theta0[:,1]
labels0 = np.zeros(len(theta0))
labels_unknown = np.ones(len(theta_unknown_S))
labels_unknown_step2 = np.ones(len(theta0_G))
xvals_1 = np.concatenate((theta0_S, theta_unknown_S))
yvals_1 = np.concatenate((labels0, labels_unknown))
xvals_2 = np.concatenate((theta0_G, theta0_G))
yvals_2 = np.concatenate((labels0, labels_unknown_step2))
# initial iterative weights are ones
weights_pull = np.ones(len(theta0_S))
weights_push = np.ones(len(theta0_S))
for i in range(iterations):
if (verbose>0):
print("\nITERATION: {}\n".format(i + 1))
pass
# STEP 1: classify Sim. (which is reweighted by weights_push) to Data
# weights reweighted Sim. --> Data
if (verbose>0):
print("STEP 1\n")
pass
weights_1 = np.concatenate((weights_push, np.ones(len(theta_unknown_S))))
X_train_1, X_test_1, Y_train_1, Y_test_1, w_train_1, w_test_1 = train_test_split(xvals_1, yvals_1, weights_1)
# zip ("hide") the weights with the labels
Y_train_1 = np.stack((Y_train_1, w_train_1), axis=1)
Y_test_1 = np.stack((Y_test_1, w_test_1), axis=1)
model.compile(loss=weighted_binary_crossentropy,
optimizer='Adam',
metrics=['accuracy'])
model.fit(X_train_1,
Y_train_1,
epochs=20,
batch_size=10000,
validation_data=(X_test_1, Y_test_1),
verbose=verbose)
weights_pull = weights_push * reweight(theta0_S,model)
weights[i, :1, :] = weights_pull
# STEP 2: classify Gen. to reweighted Gen. (which is reweighted by weights_pull)
# weights Gen. --> reweighted Gen.
if (verbose>0):
print("\nSTEP 2\n")
pass
weights_2 = np.concatenate((np.ones(len(theta0_G)), weights_pull))
# ones for Gen. (not MC weights), actual weights for (reweighted) Gen.
X_train_2, X_test_2, Y_train_2, Y_test_2, w_train_2, w_test_2 = train_test_split(xvals_2, yvals_2, weights_2)
# zip ("hide") the weights with the labels
Y_train_2 = np.stack((Y_train_2, w_train_2), axis=1)
Y_test_2 = np.stack((Y_test_2, w_test_2), axis=1)
model.compile(loss=weighted_binary_crossentropy,
optimizer='Adam',
metrics=['accuracy'])
model.fit(X_train_2,
Y_train_2,
epochs=20,
batch_size=2000,
validation_data=(X_test_2, Y_test_2),
verbose=verbose)
weights_push = reweight(theta0_G,model)
weights[i, 1:2, :] = weights_push
pass
return weights