forked from pblouw/nengo_rl
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtd_grid.py
584 lines (482 loc) · 17.4 KB
/
td_grid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
import math
import random
import sys
import numpy as np
neighbour_synonyms = ('neighbours', 'neighbors', 'neighbour', 'neighbor')
class Cell(object):
wall = False
def __init__(self):
self.reward = 0
def __getattr__(self, key):
if key in neighbour_synonyms:
pts = [self.world.get_point_in_direction(
self.x, self.y, dir) for dir in range(self.world.directions)]
ns = tuple([self.world.grid[y][x] for (x, y) in pts])
for n in neighbour_synonyms:
self.__dict__[n] = ns
return ns
raise AttributeError(key)
class Agent(object):
world = None
cell = None
def __init__(self):
self.reward = 0
def __setattr__(self, key, val):
if key == 'cell':
old = self.__dict__.get(key, None)
if old is not None:
old.agents.remove(self)
if val is not None:
val.agents.append(self)
self.__dict__[key] = val
def __getattr__(self, key):
if key == 'left_cell':
return self.get_cell_on_left()
elif key == 'right_cell':
return self.get_cell_on_right()
elif key == 'ahead_cell':
return self.get_cell_ahead()
raise AttributeError(key)
def turn(self, amount):
self.dir = (self.dir + amount) % self.world.directions
def turn_left(self):
self.turn(-1)
def turn_right(self):
self.turn(1)
def turn_around(self):
self.turn(self.world.directions / 2)
def go_in_direction(self, dir):
target = self.cell.neighbour[dir]
if getattr(target, 'wall', False):
return False
self.cell = target
return True
def go_forward(self):
if self.world is None:
raise CellularException('Agent has not been put in a World')
return self.go_in_direction(self.dir)
def go_backward(self):
self.turn_around()
r = self.go_forward()
self.turn_around()
return r
def get_cell_ahead(self):
return self.cell.neighbour[self.dir]
def get_cell_on_left(self):
return self.cell.neighbour[(self.dir - 1) % self.world.directions]
def get_cell_on_right(self):
return self.cell.neighbour[(self.dir + 1) % self.world.directions]
def go_towards(self, target, y=None):
if not isinstance(target, Cell):
target = self.world.grid[int(y)][int(target)]
if self.world is None:
raise CellularException('Agent has not been put in a World')
if self.cell == target:
return
best = None
for i, n in enumerate(self.cell.neighbours):
if n == target:
best = target
bestDir = i
break
if getattr(n, 'wall', False):
continue
dist = (n.x - target.x) ** 2 + (n.y - target.y) ** 2
if best is None or bestDist > dist:
best = n
bestDist = dist
bestDir = i
if best is not None:
if getattr(best, 'wall', False):
return False
self.cell = best
self.dir = bestDir
return True
def update(self):
pass
class World(object):
def __init__(self, cell=None, width=None, height=None, directions=8, filename=None, map=None):
if cell is None:
cell = Cell
self.Cell = cell
self.directions = directions
if filename or map:
if filename:
data = file(filename).readlines()
else:
data = map.splitlines()
if len(data[0]) == 0:
del data[0]
if height is None:
height = len(data)
if width is None:
width = max([len(x.rstrip()) for x in data])
if width is None:
width = 20
if height is None:
height = 20
self.width = width
self.height = height
self.image = None
self.reset()
if filename or map:
self.load(filename=filename, map=map)
def get_cell(self, x, y):
return self.grid[y][x]
def find_cells(self, filter):
for row in self.grid:
for cell in row:
if filter(cell):
yield cell
def reset(self):
self.grid = [[self._make_cell(
i, j) for i in range(self.width)] for j in range(self.height)]
self.dictBackup = [[{} for i in range(self.width)]
for j in range(self.height)]
self.agents = []
self.age = 0
def _make_cell(self, x, y):
c = self.Cell()
c.x = x
c.y = y
c.world = self
c.agents = []
return c
def randomize(self):
if not hasattr(self.Cell, 'randomize'):
return
for row in self.grid:
for cell in row:
cell.randomize()
def save(self, f=None):
if not hasattr(self.Cell, 'save'):
return
if isinstance(f, type('')):
f = file(f, 'w')
total = ''
for j in range(self.height):
line = ''
for i in range(self.width):
line += self.grid[j][i].save()
total += '%s\n' % line
if f is not None:
f.write(total)
f.close()
else:
return total
def load(self, filename=None, map=None):
if not hasattr(self.Cell, 'load'):
return
if filename:
if isinstance(filename, type('')):
filename = file(filename)
lines = filename.readlines()
else:
lines = map.splitlines()
if len(lines[0]) == 0:
del lines[0]
lines = [x.rstrip() for x in lines]
fh = len(lines)
fw = max([len(x) for x in lines])
if fh > self.height:
fh = self.height
starty = 0
else:
starty = int((self.height - fh) / 2)
if fw > self.width:
fw = self.width
startx = 0
else:
startx = int((self.width - fw) / 2)
self.reset()
for j in range(fh):
line = lines[j]
for i in range(min(fw, len(line))):
self.grid[starty + j][startx + i].load(line[i])
def update(self):
if hasattr(self.Cell, 'update'):
for j, row in enumerate(self.grid):
for i, c in enumerate(row):
self.dictBackup[j][i].update(c.__dict__)
c.update()
c.__dict__, self.dictBackup[j][
i] = self.dictBackup[j][i], c.__dict__
for j, row in enumerate(self.grid):
for i, c in enumerate(row):
c.__dict__, self.dictBackup[j][
i] = self.dictBackup[j][i], c.__dict__
for a in self.agents:
a.update()
else:
for a in self.agents:
oldCell = a.cell
a.update()
self.age += 1
def get_offset_in_direction(self, x, y, dir):
if self.directions == 8:
dx, dy = [(0, -1), (1, -1), (
1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)][dir]
elif self.directions == 4:
dx, dy = [(0, -1), (1, 0), (0, 1), (-1, 0)][dir]
elif self.directions == 6:
if y % 2 == 0:
dx, dy = [(1, 0), (0, 1), (-1, 1), (-1, 0),
(-1, -1), (0, -1)][dir]
else:
dx, dy = [(1, 0), (1, 1), (0, 1), (-1, 0),
(0, -1), (1, -1)][dir]
return dx, dy
def get_point_in_direction(self, x, y, dir):
dx, dy = self.get_offset_in_direction(x, y, dir)
x2 = x + dx
y2 = y + dy
if x2 < 0:
x2 += self.width
if y2 < 0:
y2 += self.height
if x2 >= self.width:
x2 -= self.width
if y2 >= self.height:
y2 -= self.height
return (x2, y2)
def remove(self, agent):
self.agents.remove(agent)
agent.world = None
agent.cell = None
def add(self, agent, x=None, y=None, cell=None, dir=None):
self.agents.append(agent)
if x is not None and y is not None:
cell = self.grid[y][x]
if cell is None:
while True:
xx = x
yy = y
if xx is None:
xx = random.randrange(self.width)
if yy is None:
yy = random.randrange(self.height)
if not getattr(self.grid[yy][xx], 'wall', False):
y = yy
x = xx
break
else:
x = cell.x
y = cell.y
if dir is None:
dir = random.randrange(self.directions)
agent.cell = self.grid[y][x]
agent.dir = dir
agent.world = self
agent.x = x
agent.y = y
class CellularException(Exception):
pass
class ContinuousAgent(Agent):
def go_in_direction(self, dir, distance=1, return_obstacle=False):
dir1=int(dir)
dir2=(dir1+1)%self.world.directions
dx1, dy1 = self.world.get_offset_in_direction(self.cell.x, self.cell.y, dir1)
dx2, dy2 = self.world.get_offset_in_direction(self.cell.x, self.cell.y, dir2)
scale=dir % 1
x = self.x + distance*(dx2*scale + dx1*(1 - scale))
y = self.y + distance*(dy2*scale + dy1*(1 - scale))
closest = self.cell
dist = (x-self.cell.x)**2 + (y-self.cell.y)**2
for n in self.cell.neighbour:
d = (x-n.x)**2 + (y-n.y)**2
if d<dist:
closest = n
dist = d
if closest is not self.cell:
if closest.wall:
if return_obstacle:
return closest
else:
return False
else:
self.cell=closest
self.x=x
self.y=y
if return_obstacle:
return None
else:
return True
def set_position(self, x, y):
'''Update the position of the agent in the map'''
if self.world.grid[y][x].wall:
self.reward = -1
else:
self.x = x
self.y = y
self.cell = self.world.grid[y][x]
self.reward = self.cell.reward
def go_forward(self, distance=1):
return self.go_in_direction(self.dir, distance=distance)
def go_backward(self, distance=1):
return self.go_in_direction(self.dir, distance=-distance)
def detect(self, direction, max_distance=None):
start_x = self.x
start_y = self.y
cell = self.cell
distance = 0.0
delta = 1.0
min_delta = 1.0 / 64
obstacle = None
if max_distance is None:
max_distance = self.world.width + self.world.height
while distance < max_distance:
obstacle = self.go_in_direction(direction, delta, return_obstacle=True)
if obstacle is None:
distance += delta
elif delta > min_delta:
delta = delta / 2
else:
distance = math.sqrt((start_x-self.x)**2 + (start_y-self.y)**2)
break
self.cell = cell
self.x = start_x
self.y = start_y
return distance, obstacle
def get_direction_to(self, cell):
dx = cell.x - self.x
dy = cell.y - self.y
theta = math.atan2(dy, dx) + math.pi/2
theta *= self.world.directions / (2 * math.pi)
return theta
def get_distance_to(self, cell):
dx = cell.x - self.x
dy = cell.y - self.y
return math.sqrt(dx**2 + dy**2)
import nengo
# GridNode sets up the pacman world for visualization
class GridNode(nengo.Node):
def __init__(self, world, dt=0.001):
# The initalizer sets up the html layout for display
def svg(t):
last_t = getattr(svg, '_nengo_html_t_', None)
if last_t is None or t >= last_t + dt or t <= last_t:
svg._nengo_html_ = self.generate_svg(world)
svg._nengo_html_t_ = t
super(GridNode, self).__init__(svg)
# This function sets up an SVG (used to embed html code in the environment)
def generate_svg(self, world):
cells = []
# Runs through every cell in the world (walls & food)
for i in range(world.width):
for j in range(world.height):
cell = world.get_cell(i, j)
color = cell.color
if callable(color):
color = color()
if color is not None:
cells.append('<rect x=%d y=%d width=1 height=1 style="fill:%s"/>' %
(i, j, color))
# Runs through every agent in the world
agents = []
for agent in world.agents:
# sets variables like agent direction, color and size
direction = agent.dir * 360.0 / world.directions
color = getattr(agent, 'color', 'blue')
if callable(color):
color = color()
shape = getattr(agent, 'shape', 'circle')
if shape == 'triangle':
agent_poly = ('<polygon points="0.25,0.25 -0.25,0.25 0,-0.5"'
' style="fill:%s" transform="translate(%f,%f) rotate(%f)"/>'
% (color, agent.x+0.5, agent.y+0.5, direction))
elif shape == 'circle':
agent_poly = ('<circle '
' style="fill:%s" cx="%f" cy="%f" r="0.4"/>'
% (color, agent.x+0.5, agent.y+0.5))
agents.append(agent_poly)
# Sets up the environment as a HTML SVG
svg = '''<svg style="background: white" width="100%%" height="100%%" viewbox="0 0 %d %d">
%s
%s
</svg>''' % (world.width, world.height,
''.join(cells), ''.join(agents))
return svg
class GridCell(Cell):
def color(self):
if self.wall:
return 'black'
elif self.reward > 0:
return 'green'
def load(self, char):
if char == '#':
self.wall = True
elif char == 'G':
self.reward = 1
else:
self.reward = 0.0
class EnvironmentInterface(object):
def __init__(self, agent, n_actions, epsilon=0.1, stepsize=10):
self.epsilon = epsilon
self.agent = agent
self.output = np.zeros(3 * n_actions)
self.n_actions = n_actions
self.current_action_index = 0
self.terminal = False
self.terminal_clock = stepsize
self.stepsize = stepsize
def compute_position(self, action_idx):
'''Adjust x, y coordinates based on a selected action index'''
if action_idx == 0:
# move up
x_pos = self.agent.x
y_pos = self.agent.y - 1
elif action_idx == 1:
# move right
x_pos = self.agent.x + 1
y_pos = self.agent.y
elif action_idx == 2:
# move down
x_pos = self.agent.x
y_pos = self.agent.y + 1
else:
# move left
x_pos = self.agent.x - 1
y_pos = self.agent.y
return x_pos, y_pos
def take_action(self, action_idx, epsilon=0.1):
'''Pick an action to perform in the environment'''
do_random = np.random.choice([1, 0], p=[epsilon, 1-epsilon])
if do_random:
action_idx = np.random.choice(np.arange(4))
x_pos, y_pos = self.compute_position(action_idx)
self.agent.set_position(x_pos, y_pos)
return action_idx
def step(self, t, x):
'''Prepare Q value info for computing error signal'''
# if t is multiple of action step size, do step
if self.terminal:
self.terminal_clock -= 1
if self.terminal_clock == 0:
self.terminal = False
self.terminal_clock = self.stepsize
return np.zeros_like(self.output)
if int(t * 1000) == 1:
print('STARTING')
if int(t * 1000) % self.stepsize == 0:
if self.agent.reward > 0:
self.output = np.zeros_like(self.output)
x_pos = np.random.choice([1,5])
y_pos = np.random.choice([1,5])
self.agent.set_position(x_pos, y_pos)
self.terminal = True
return self.output
qs = self.output[8:]
idx = np.argmax(qs)
self.current_action_index = self.take_action(idx)
# then on next step store new qvalues
elif int(t * 1000) % self.stepsize == 1:
qvalues = x
qmax = qvalues[np.argmax(qvalues)]
# set output to be current state q, qmax, last state selection
c_output = np.zeros(self.n_actions)
c_output[self.current_action_index] = qvalues[self.current_action_index]
f_output = np.zeros(self.n_actions)
f_output[self.current_action_index] = 0.9 * qmax + self.agent.reward
self.output = np.concatenate(
(c_output, f_output, qvalues))
return self.output