-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_DQN_static_agent.py
206 lines (149 loc) · 8.96 KB
/
run_DQN_static_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from evaluation_functions_static import evaluate_error_rates, evaluate_fixed_errors
from plot_functions import plot_benchmark_MWPM
import numpy as np
from DQN_static_agent import DQN_agent
# SETTINGS FOR RUNNING THIS SCRIPT
train=True # if True the agent will be trained, if False the agent will be loaded given the specified settings below. Please specify the storing folder in 'DQN_static_agent.py'
curriculum=False #if False, curriculum learning on multiple sequential error values is disabled. Please specify either 'training_error_rates' or 'training_N' as a list containing a single value.
# If True, curriculum is enabled. Please specify 'training_error_rates' of 'training_N' as a list containing multiple values to train on sequentially.
log = True # if set to True the learning curve during training is registered and saved. Please specify the storing folder in 'DQN_static_agent.py'
correlated=False # if True, the agent will be initialised on an environment providing correlated bit-flip errors. If False, the environment will introduce uncorrelated bit-flip errors.
fixed=False # if set to True the agent is trained on training examples with a fixed amount of N initial errors. If set to False the agent is trained on training examples given an error rate 'error_rate' for each qubit to have a chance to be flipped.
evaluate_fixed=False # if set to True the trained model is evaluated on examples with a fixed amount of N initial errors. If set to False the trained model is evaluated on examples in which each qubit is flipped with a chance of 'error_rate'.
board_size=5 # board of size dxd plaquettes and stars
error_rate=0.1 # bit-flip error rate each qubit on the board is subject to
exp_frac = 0.1 # exploration parameter increment
exp_init = 1.0 # begin value of the exploration parameter
exp_fin = 0.05 # end value of the exploration parameter
buffer_size = 1000000 # size of the replay buffer
N=1 # the number of fixed initinal flips N the agent model is trained on or loaded when 'fixed' is set to True
logical_error_reward=5 # the reward the agent gets when it has removed all syndrome points, but the terminal board state claims that there is a logical error.
success_reward=10 # the reward the agent gets when it has removed all syndrome points, and the terminal board state claims that there is no logical error, ans therefore the agent has successfully done its job.
continue_reward=-1 # the reward the agent gets for each action that does not result in the terminal board state. If negative it gets penalized for each move it does, therefore giving the agent an incentive to remove syndromes in as less moves as possible.
illegal_action_reward=-2 # the reward the agent gets when 'mask_actions' is set to False and therefore the agent gets penalized by choosing an illegal action.
total_timesteps=5000 # total amount of times the env.step() is called during training. Note this is not equal to number of training episodes!
learning_rate= 0.001 # learning rate during training
training_N=[N] # values of N initial flips the agent model is trained on
# training_error_rates=list(np.linspace(0.01,0.15,6))
training_error_rates=[error_rate] # values of error rates the agent model is trained on
evaluate=True # if False, the agent won't be evaluated. If True, the agent will be evaluated.
check_fails=False # if True, during evaluation all cases in which the agent fails, but MWPM succeeds, will be rendered.
render=False # if True, the environment with the agent's actions will be rendered per timestep.
save_files=True # if True results will be saved. Please specify the storing folder in the file 'evaluation_functions_static.py'
number_evaluations=10 # the number of evaluations the agent will be evaluated on
max_moves=50 # the maximum amount of moves the agent is allowed to make per evaluation episode
N_evaluates = [1,2,3,4,5] # the number of fixed initial flips N the agent is evaluated on if 'evaluate_fixed' is set to True.
error_rates_eval=list(np.linspace(0.01,0.15,10)) # the error rates the agent is evaluated on if 'evaluate_fixed' is set to False.
#SET SETTINGS TO INITIALISE AGENT ON
initialisation_settings = {'board_size': board_size,
'error_rate': error_rate,
'l_reward': logical_error_reward,
's_reward': success_reward,
'c_reward':continue_reward,
'i_reward':illegal_action_reward,
'lr':learning_rate,
'total_timesteps': total_timesteps,
'mask_actions': False,
'fixed':fixed,
'N':N,
'correlated':correlated,
'exp_frac':exp_frac,
'exp_init':exp_init,
'exp_fin':exp_fin,
'buff':buffer_size,
'max_moves':max_moves
}
#SET SETTINGS TO LOAD TRAINED AGENT ON
loaded_model_settings = {'board_size': board_size,
'error_rate': error_rate,
'l_reward': logical_error_reward,
's_reward': success_reward,
'c_reward':continue_reward,
'i_reward':illegal_action_reward,
'lr':learning_rate,
'total_timesteps': total_timesteps,
'mask_actions': False,
'fixed':fixed,
'N':N,
'correlated':correlated,
'exp_frac':exp_frac,
'exp_init':exp_init,
'exp_fin':exp_fin,
'buff':buffer_size,
'max_moves':max_moves
}
evaluation_settings = {'board_size': board_size,
'error_rate': error_rate,
'l_reward': logical_error_reward,
's_reward': success_reward,
'c_reward':continue_reward,
'i_reward':illegal_action_reward,
'lr':learning_rate,
'total_timesteps': total_timesteps,
'mask_actions': False,
'fixed':fixed,
'N':N,
'correlated':correlated,
'exp_frac':exp_frac,
'exp_init':exp_init,
'exp_fin':exp_fin,
'buff':buffer_size,
'max_moves':max_moves
}
success_rates_all=[]
success_rates_all_MWPM=[]
if fixed:
training_values=training_N
else:
training_values=training_error_rates
for training_value in training_values:
if (train==True) and (curriculum == False) and(training_values.index(training_value)>0):
train=False
curriculum=True
save_model_path =''
for key, value in initialisation_settings.items():
save_model_path+=f"{key}={value}"
load_model_path =''
for key, value in loaded_model_settings.items():
load_model_path+=f"{key}={value}"
#initialise DQN Agent
AgentDQN = DQN_agent(initialisation_settings, log)
if train:
AgentDQN.train_model(save_model_path=save_model_path)
else:
print(f"{loaded_model_settings['N']=}")
print(f"{loaded_model_settings['error_rate']=}")
AgentDQN.load_model(load_model_path=load_model_path)
if curriculum:
if fixed:
print(f"{training_value=}")
initialisation_settings['N']=training_value
else:
print(f"{training_value=}")
initialisation_settings['error_rate']=training_value
save_model_path =''
for key, value in initialisation_settings.items():
save_model_path+=f"{key}={value}"
AgentDQN.change_environment_settings(initialisation_settings)
AgentDQN.train_model(save_model_path=save_model_path)
if fixed:
loaded_model_settings['N']=training_value
else:
loaded_model_settings['error_rate']=training_value
if evaluate:
if evaluate_fixed:
success_rates, success_rates_MWPM = evaluate_fixed_errors(AgentDQN, evaluate_fixed, fixed,evaluation_settings, loaded_model_settings,N_evaluates, render, number_evaluations, max_moves, check_fails, save_files)
else:
success_rates, success_rates_MWPM = evaluate_error_rates(AgentDQN, evaluate_fixed, fixed, evaluation_settings, loaded_model_settings,error_rates_eval, render, number_evaluations, max_moves, check_fails, save_files)
success_rates_all.append(success_rates)
success_rates_all_MWPM.append(success_rates_MWPM)
evaluation_path =''
for key, value in evaluation_settings.items():
evaluation_path+=f"{key}={value}"
success_rates_all=np.array(success_rates_all)
success_rates_all_MWPM=np.array(success_rates_all_MWPM)
if fixed:
path_plot = f"/Users/lindsayspoor/Library/Mobile Documents/com~apple~CloudDocs/Documents/Studiedocumenten/2023-2024/MSc Research Project/Results/Figure_results/Results_benchmarks/DQN_vs_MWPM_{evaluation_path}_{loaded_model_settings['N']}.pdf"
else:
path_plot = f"/Users/lindsayspoor/Library/Mobile Documents/com~apple~CloudDocs/Documents/Studiedocumenten/2023-2024/MSc Research Project/Results/Figure_results/Results_benchmarks/DQN_vs_MWPM_{evaluation_path}_{loaded_model_settings['error_rate']}.pdf"
plot_benchmark_MWPM(success_rates_all, success_rates_all_MWPM, N_evaluates, error_rates_eval, board_size,path_plot,evaluate_fixed)