forked from goramos/pyrl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
403 lines (292 loc) · 14.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
'''
Created on 04/08/2014
@author: Gabriel de O. Ramos <[email protected]>
'''
import sys#@UnusedImport
import environment as Env
import environment.NFG as NFG
from environment.cliffwalking import CliffWalking
from environment.sumo import SUMO
from environment.sumo import SUMORouteChoice
from environment.sumotl import SUMOTrafficLights
from environment.NFG.twoplayer_twoaction import TwoPlayerTwoAction
from learner.q_learning import QLearner
from learner.wpl import WPL
from learner.opportune import *#@UnusedWildImport
from exploration.epsilon_greedy import EpsilonGreedy
from exploration.boltzmann import Boltzmann
import tools.misc as misc#@UnusedImport
import external.KSP as KSP
from itertools import *#@UnusedWildImport #chain, combinations
import datetime
def test_cliff():
#a cliff walking environment
env = CliffWalking()
#an exploration strategy
#exp = EpsilonGreedy(1, 0.99)
exp = Boltzmann(0.1)
#a Q-learner
learner = QLearner("Agent-1", env, env.get_starting_state(), env.get_goal_state(), 0.3, 0.9, exp)
#learner = WPL("Agent-1", env, env.get_starting_state(), env.get_goal_state())
#print learner
#number of episodes
n_episodes = 1000
#for each episode
for i in xrange(n_episodes):
#print "===== Episode %i ==========================================" % (i)
env.run_episode()
print "%i\t%i\t%s\t%f" % (i+1, env._steps, learner._state, learner._accumulated_reward)
#print learner._policy[env.get_starting_state()]
def test_SUMO():
#a SUMO environment
#env = SUMO('nets/simple/simple-traci.sumocfg', 8813, False)
env = SUMO('nets/OW/OW-traci.sumocfg', 8813, False)
#an exploration strategy
exp = EpsilonGreedy(epsilon=1, min_epsilon=0.1, decay_rate=0.99)
#for each vehicle in the route file
for vehID in env.get_vehicles_ID_list():
vehDic = env.get_vehicle_dict(vehID)
#a reinforcement learner
_ = QLearner(vehID, env, vehDic['origin'], vehDic['destination'], 0.3, 0.9, exp)
#_ = WPL(vehID, env, vehDic['origin'], vehDic['destination'], 0.002, 0.1)
#number of episodes
n_episodes = 100
#sys.stdout = open('out.txt', 'w')
#sys.stderr = open('err.txt', 'w')
print 'ep\tavg tt\truntime'
#for each episode
for _ in xrange(n_episodes):
#print "===== Episode %i ==========================================" % (i)
env.run_episode(50000)
#print "%i\t%s\t%f" % (env._steps, learner._state, learner._accumulated_reward)
def test_SUMORouteChoice():
# a SUMO environment
env = SUMORouteChoice('nets/OW/OW-traci.sumocfg', 8813, False)
# convert the SUMO net file to the one accepted by KSP
#misc.convert_SUMO_to_KSP('nets/OW/OW-traci.sumocfg')
# create a set of routes for each OD-pair (through KSP algorithm),
# and define one such set for each OD-pair (these sets will correspond
# to the actions available on each state)
pairs = env.get_OD_pairs()
for origin, destination in pairs:
RKSP = KSP.getKRoutesNetFile('nets/OW/OW_for_KSP.net', origin, destination, 4)
routes = [" ".join(r[0]) for r in RKSP]
env.set_routes_OD_pair(origin, destination, routes)
# an exploration strategy
exp = EpsilonGreedy(epsilon=1, min_epsilon=0.1, decay_rate=0.99)
# for each vehicle in the route file
for vehID in env.get_vehicles_ID_list():
vehDic = env.get_vehicle_dict(vehID)
# in the SUMORouteChoice environment the origin is an encoding of the OD-pair
origin = env.encode_OD(vehDic['origin'], vehDic['destination'])
# create a learner
_ = QLearner(vehID, env, origin, vehDic['destination'], 0.8, 0.9, exp)
#_ = WPL(vehID, env, origin, vehDic['destination'], 0.002, 0.1)
#print '%s (%s,%s) is in %s'%(Q.get_name(), vehDic['origin'], vehDic['destination'], Q.get_state())
# number of episodes
n_episodes = 100
print 'ep\tavg tt\truntime'
# for each episode
for _ in xrange(n_episodes):
env.run_episode(50000)
#print env._learners['1.0']._QTable
def test_SUMOTrafficLights():
print datetime.datetime.now().time()
print 'SUMO traffic lights'
# a SUMO environment
env = SUMOTrafficLights('nets/3x3grid/3x3grid.sumocfg', 8813, False)
# an exploration strategy
exp = EpsilonGreedy(epsilon=1, min_epsilon=0.0, decay_rate=0.95, manual_decay=True)
# for each traffic light in the net file
for tlID in env.get_trafficlights_ID_list():
# create a learner
_ = QLearner(tlID, env, 0, 0 , 0.1, 0.8, exp)
# number of episodes
n_episodes = 100
# for each episode
for i in xrange(n_episodes):
# print queue length
arq_avg_nome = 'tl_%d.txt' % (i)
arq_tl = open(arq_avg_nome, 'w') #para salvar saida em um arquivo
arq_tl.writelines('##%s## \n' % (datetime.datetime.now().time()))
arq_tl.write('step,tl0,tl1,tl2,tl3,tl4,tl5,tl6,tl7,tl8,average,all\n')
env.run_episode(28800, arq_tl, exp)
arq_tl.close()
print datetime.datetime.now().time()
def test_NFG():
env = TwoPlayerTwoAction(NFG.GAME_MATCHING_PENNIES)
_ = WPL("p1", env, Env.ENV_SINGLE_STATE, Env.ENV_SINGLE_STATE, 0.002, 0.1)
_ = WPL("p2", env, Env.ENV_SINGLE_STATE, Env.ENV_SINGLE_STATE, 0.002, 0.1)
for _ in xrange(10000):
env.run_episode()
def test_OPPORTUNE():
# a SUMO environment
env = SUMO('nets/OW/OW-traci.sumocfg', 8813, False)
# an exploration strategy
exp = EpsilonGreedy(1, 0.925)
#----------------------------------------------------------
#create a list (vehD) of vehicles with the OD-pair of each vehicle (each entry is in the form <O, D, "O###D">),
#and also a list (OD_grouping) of vehicles grouped by OD-pair (each entry is the list of vehicles with same OD-pair);
#the vehicles of the same OD-pair are considered neighbours
vehD = {}
OD_grouping = {}
for vehID in env.get_vehicles_ID_list():
vehDic = env.get_vehicle_dict(vehID)
ODpair = '%s###%s' % (vehDic['origin'], vehDic['destination'])
vehD[vehID] = [ODpair, vehDic['origin'], vehDic['destination']]
if ODpair not in OD_grouping:
OD_grouping[ODpair] = []
OD_grouping[ODpair].append(vehID)
#sort the lists of neighbours
for k in OD_grouping.keys():
OD_grouping[k].sort()
# create the communication layer among the learners
OCL = OPPORTUNECommLayer()
#create the learners
for vehID in env.get_vehicles_ID_list():
# create the list of neighbours of vehID (in this example, such a
# list is comprised by all vehicles from the same OD pair as vehID)
Ni = list(OD_grouping[vehD[vehID][0]])
Ni.remove(vehID)
# create the learner corresponding to vehID
_ = OPPORTUNE(vehID, env, vehD[vehID][1], vehD[vehID][2], 0.3, 0.9, 0.001, exp, Ni, OCL)
# vehDic = env.get_vehicle_dict(vehID)
# #a reinforcement learner
# _ = QLearner(vehID, env, vehDic['origin'], vehDic['destination'], 0.3, 0.9, exp)
#----------------------------------------------------------
# number of episodes
n_episodes = 1000
print 'ep\tavg tt\truntime'
# for each episode
for _ in xrange(n_episodes):
env.run_episode(50000)
def test_OPPORTUNE_route_choice():
# a SUMO environment
env = SUMORouteChoice('nets/OW/OW-traci.sumocfg', 8813, False)
# convert the SUMO net file to the one accepted by KSP
#misc.convert_SUMO_to_KSP('nets/OW/OW-traci.sumocfg')
# create a set of routes for each OD-pair (through KSP algorithm),
# and define one such set for each OD-pair (these sets will correspond
# to the actions available on each state)
pairs = env.get_OD_pairs()
for origin, destination in pairs:
RKSP = KSP.getKRoutesNetFile('nets/OW/OW_for_KSP.net', origin, destination, 4)
routes = [" ".join(r[0]) for r in RKSP]
env.set_routes_OD_pair(origin, destination, routes)
# an exploration strategy
exp = EpsilonGreedy(0.05, 0)
#----------------------------------------------------------
#create a list (vehD) of vehicles with the OD-pair of each vehicle (each entry is in the form <O, D, "O###D">),
#and also a list (OD_grouping) of vehicles grouped by OD-pair (each entry is the list of vehicles with same OD-pair);
#the vehicles of the same OD-pair are considered neighbours
vehD = {}
OD_grouping = {}
for vehID in env.get_vehicles_ID_list():
vehDic = env.get_vehicle_dict(vehID)
ODpair = env.encode_OD(vehDic['origin'], vehDic['destination'])
vehD[vehID] = [ODpair, vehDic['origin'], vehDic['destination']]
if ODpair not in OD_grouping:
OD_grouping[ODpair] = []
OD_grouping[ODpair].append(vehID)
#sort the lists of neighbours
for k in OD_grouping.keys():
OD_grouping[k].sort()
# create the communication layer among the learners
OCL = OPPORTUNECommLayer()
#create the learners
for vehID in env.get_vehicles_ID_list():
# create the list of neighbours of vehID (in this example, such a
# list is comprised by all vehicles from the same OD pair as vehID)
Ni = list(OD_grouping[vehD[vehID][0]])
Ni.remove(vehID)
# in the SUMORouteChoice environment the origin is an encoding of the OD-pair
origin = vehD[vehID][0]
# create the learner corresponding to vehID
_ = OPPORTUNE(vehID, env, origin, vehD[vehID][2], 0.5, 0.9, 0.05, exp, Ni, OCL)
#----------------------------------------------------------
# number of episodes
n_episodes = 10000
print 'ep\tavg tt\truntime'
# for each episode
for _ in xrange(n_episodes):
env.run_episode(50000)
#print env._learners['1.0']._QTable
def test_combinations_update_QTable():
_name = '1.1'
_S = JointState({'1.1': 'A1###L1', '2.0': 'A1###M1', '2.1': 'A1###M1'})
_A = JointAction({'1.1': 'A', '2.0': 'B', '2.1': 'B'})
_QTable = {
JointState({'1.1': 'A1###L1'}): {JointAction({'1.1': 'A'}):1},
JointState({'1.1': 'A1###L1', '2.0': 'A1###M1'}): {JointAction({'1.1': 'A', '2.0': 'A'}):2, JointAction({'1.1': 'A', '2.0': 'B'}):3},
JointState({'1.1': 'A1###L1', '2.1': 'A1###M1'}): {JointAction({'1.1': 'A'}):2, JointAction({'1.1': 'A', '2.1': 'B'}):1, JointAction({'1.1': 'A', '2.1': 'C'}):4},
JointState({'1.1': 'A1###L1', '2.0': 'A1###M1', '2.1': 'A1###M1'}): {JointAction({'1.1': 'A', '2.0': 'B', '2.1': 'B'}): 2, JointAction({'1.1': 'A', '2.0': 'B', '2.1': 'C'}): 7}
}
Actions = []
for z in chain.from_iterable(combinations(_A.get_learners(), r) for r in range(1,len(_A.get_learners())+1)):
if _name in z:
ac = JointAction({ x : _A.get_action(x) for x in z })
Actions.append(ac)
#for a in Actions: print a
for z in chain.from_iterable(combinations(_S.get_learners(), r) for r in range(1,len(_S.get_learners())+1)):
if _name in z:
st = JointState({ x : _S.get_state(x) for x in z })
if st in _QTable:
for ac in Actions:
if ac in _QTable[st].keys():
print "Q(%s, %s) = %i" % (st.get_learners(), ac.get_learners(), _QTable[st][ac])
def test_combinations():
_name = '1.1'
_S = JointState({'1.1': 'A1###L1', '2.0': 'A1###M1', '2.1': 'A1###M1'})
_QTable = {
JointState({'1.1': 'A1###L1'}): {'a':1, 'b':2},
JointState({'1.1': 'A1###L1', '2.0': 'A1###M1'}): {'a':1, 'b':3},
JointState({'1.1': 'A1###L1', '2.1': 'A1###M1'}): {'a':1, 'c':4}
}
bestsubstate = None
bestsubstatev = float('-inf')
# check for all combinations of substates of _S
for z in chain.from_iterable(combinations(_S.get_learners(), r) for r in range(2,len(_S.get_learners()))):
substate = JointState({ x : _S.get_state(x) for x in z })
# if the substate is in Q-table
if substate in _QTable:
# get the maximum Q-value in the substate
maxs = max(v for v in _QTable[substate].values())
# keep the best substate
if maxs > bestsubstatev:
bestsubstatev = maxs
bestsubstate = substate
if bestsubstate:
print "Best substate found is %s, whose maximum expected payoff is %f" % (bestsubstate, bestsubstatev)
else:
bestsubstate = JointState({ _name : _S.get_state(_name) })
print "None of the substates is valid! The single state %s was selected, whose maximum expected payoff is %f" % (bestsubstate, max(v for v in _QTable[bestsubstate].values()))
def test_combinations_old():
_S = {'1.1': 'A1###L1', '2.0': 'A1###M1', '2.1': 'A1###M1'}
_Qtable = {"{'1.1': 'A1###L1'}": {'A1A AC CG GJ JL LL1': 0, 'A1A AC CG GJ JI IL LL1': 0, 'A1A AC CD DG GJ JI IL LL1': 0, 'A1A AC CF FI IL LL1': 0}}
for z in chain.from_iterable(combinations(_S, r) for r in range(2,len(_S))):
print z
def test():
a = {1:'A', 2:'B', 3:'C', 4:'D'}
print { x:a[x] for x in a if x > 2}
def test_old():
_A = {'A':1, 'B':2, 'C':2}
_replies = {'A':'OK', 'B':'OK', 'C':'OK'}
# process the cases in which the neighbour has not replied (this scenario
# only happens if the neighbour is not performing and act() right now)
for a in _A.keys():
if a not in _replies:
_replies[a] = 'NOT'
# count the number of neighbours that (i) did not replied or (ii) refused the bid
print sum([1 for a in _A.keys() if a not in _replies or (a in _replies and _replies[a] == 'NOT')])
if __name__ == '__main__':
#misc.convert_SUMO_to_KSP('nets/OW/OW-traci.sumocfg')
#test_cliff()
#test_SUMO()
#test_SUMORouteChoice()
test_SUMOTrafficLights()
#test_NFG()
#test_OPPORTUNE()
#test_OPPORTUNE_route_choice()
#test_combinations()
#test_combinations_update_QTable()
#test()