-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharchive.py
44 lines (30 loc) · 1.53 KB
/
archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def calculate_gradient(alpha, gamma, iteration, objective, policy,
time_horizon, road_list, rush_hours, orange_time,
theta):
c_i = 1 / ((iteration + 1) ** gamma)
delta = (2 * np.random.randint(0, 2, size=(len(theta))) - 1)
theta_p = theta + c_i * delta
theta_n = theta - c_i * delta
objective_p = main(policy, time_horizon, road_list, rush_hours,
orange_time=orange_time, light_times=theta_p, verbose=0)
objective_n = main(policy, time_horizon, road_list, rush_hours,
orange_time=orange_time, light_times=theta_n, verbose=0)
gradient_est = (objective_p - objective_n) / 2 * c_i * delta
return gradient_est
def gradient_decent(alpha, gamma, max_iter, initial_theta, policy, time_horizon, road_list, rush_hours, orange_time):
theta = initial_theta
current_obj = main(policy, time_horizon, road_list, rush_hours,
orange_time=orange_time, light_times=initial_theta, verbose=0)
for i in range(max_iter):
loop = True
sigma = 100
a_i = 1 / (i + 1) ** alpha
gradient = calculate_gradient(alpha, gamma, i, current_obj, policy,
time_horizon, road_list, rush_hours, orange_time, theta)
print(gradient)
obj = main(policy, time_horizon, road_list, rush_hours,
orange_time=orange_time, light_times=theta, verbose=0)
print(obj)
theta = theta + a_i * -gradient
print(theta)
pass