-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_batch_bkb.py
87 lines (74 loc) · 2.82 KB
/
example_batch_bkb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# MIT License
#
# Copyright (c) 2020 Laboratory for Computational and Statistical Learning
#
# authors: Daniele Calandriello, Luigi Carratino
# email: [email protected]
# Website: http://lcsl.mit.edu
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
from bkb_lib import Batch_BKB
from sklearn.gaussian_process.kernels import PairwiseKernel
import matplotlib.pyplot as plt
r_state = np.random.RandomState(seed=42)
d = 3
k = 1000
T = 2000
dot_func = PairwiseKernel(metric='linear')
w_star = r_state.randn(d).reshape(1,-1)
arms = r_state.randn(k, d)
arms_score = dot_func(arms, w_star.reshape(1,-1))
best_arm = np.argmax(arms_score)
noise_ratio = 0.01
noise_std = np.sqrt((arms_score.max() - arms_score.min()) * noise_ratio)
f = lambda x: dot_func(x, w_star) + r_state.randn() * noise_std
bbkb_alg = Batch_BKB(
ratio_threshold=1.,
lam=noise_std ** 2.,
dot=dot_func,
noise_variance=noise_std ** 2.,
fnorm=1.0,
delta=0.5,
qbar=1,
verbose=0
)
instant_regret = np.zeros(T)
cum_regret = 0
bbkb_alg.initialize(arms, [0,1], np.array([f(arms[0, :]), f(arms[1, :])]).reshape(-1))
#run it
cumulative_regret_over_time = np.zeros((T,))
t = 0
while t < T:
chosen_arms_idx, ucbs = bbkb_alg.predict()
if t + len(chosen_arms_idx) > T:
chosen_arms_idx = chosen_arms_idx[:T - t]
#batch
feedback_list = []
for i in range(len(chosen_arms_idx)):
arm_idx = chosen_arms_idx[i]
feedback_list.append(f(arms[arm_idx,:]))
instant_regret[t] = arms_score[best_arm] - arms_score[arm_idx]
cum_regret = cum_regret + instant_regret[t]
cumulative_regret_over_time[t] = cum_regret
t = t + 1
bbkb_alg.update(chosen_arms_idx, np.array(feedback_list), r_state)
plt.plot(np.arange(T), cumulative_regret_over_time)
plt.xlabel('t')
plt.ylabel('cumulative regret')
plt.show()