-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathold_gridworld.py
265 lines (216 loc) · 7.2 KB
/
old_gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
"""
This file contains all the functions related to the grid world game
"""
import numpy as np
from scipy.stats import bernoulli
def action_map(action):
mapping = {
(-1, 0): 0, # up
(1, 0): 1, # down
(0, -1): 2, # left
(0, 1): 3, # right
(0, 0): 4 # stay
}
return mapping[action]
def rand_pair(h, w):
return np.random.randint(0, h), np.random.randint(0, w)
# Initialize stationary grid, all items are placed deterministically
def init_grid():
state = np.zeros((3, 5, 4))
# place player
state[0, 1] = np.array([0, 0, 0, 1])
# place wall
state[2, 2] = np.array([0, 0, 1, 0])
# place pit
state[1, 1] = np.array([0, 1, 0, 0])
# place reward
state[2, 3] = np.array([1, 0, 0, 0])
return state
# Initialize player in random location, but keep wall, reward and pit stationary
def init_grid_player():
state = np.zeros((3, 5, 4))
# place player
state[rand_pair(3, 5)] = np.array([0, 0, 0, 1])
# place wall
state[2, 2] = np.array([0, 0, 1, 0])
# place pit
state[1, 1] = np.array([0, 1, 0, 0])
# place reward
state[1, 2] = np.array([1, 0, 0, 0])
a = find_location(state, 3) # find grid position of player (agent)
w = find_location(state, 2) # find wall
g = find_location(state, 0) # find reward
p = find_location(state, 1) # find pit
if not a or not w or not g or not p:
# print('Invalid grid. Rebuilding..')
return init_grid_player()
return state
# Initialize grid so that reward, pit, wall, player are all randomly placed
def init_grid_rand():
state = np.zeros((3, 5, 4))
# place player
state[rand_pair(3, 5)] = np.array([0, 0, 0, 1])
# place wall
state[rand_pair(3, 5)] = np.array([0, 0, 1, 0])
# place pit
state[rand_pair(3, 5)] = np.array([0, 1, 0, 0])
# place reward
state[rand_pair(3, 5)] = np.array([1, 0, 0, 0])
a = find_location(state, 3) # find grid position of player (agent)
w = find_location(state, 2) # find wall
g = find_location(state, 0) # find reward
p = find_location(state, 1) # find pit
# If any of the "objects" are superimposed, just call the function again to re-place
if not a or not w or not g or not p:
# print('Invalid grid. Rebuilding..')
return init_grid_rand()
return state
# Initialize grid so that the grid is of size n * 5, where n is a arbitrary positive integer. The walls
def init_grid_dynamic_size(height):
state = np.zeros((height, 5, 4))
# place player
state[height - 1, 4] = np.array([0, 0, 0, 1])
# place walls
for i in range(1, height - 1):
state[i, 2] = np.array([0, 0, 1, 0])
# place pit
state[height - 1, 2] = np.array([0, 1, 0, 0])
# place fixed reward
state[0, 0] = np.array([1, 0, 0, 0])
# place stochastic rewards
# state[0, 4] = np.array([1, 0, 0, 0])
# state[height - 1, 0] = np.array([1, 0, 0, 0])
return state
def check_optimal_policy(height, location, action):
# 0 = up, 1 = down, 2 = left, 3 = right.
optimal = {}
for i in range(height):
for j in range(5):
optimal[(i, j)] = [4]
for i in range(1, height - 1):
optimal[(i, 0)] = [0]
optimal[(i, 1)] = [2, 0]
optimal[(i + 1, 4)] = [2, 0]
for j in range(1, 4):
optimal[(0, j)] = [2]
for i in range(1, height):
optimal[(i, 3)] = [0]
optimal[(height - 1, 1)] = [0]
optimal[(1, 4)] = [2]
# print("{} -- {}".format(optimal[location], action))
"""
for i in range(height):
row = []
for j in range(5):
row.append(optimal[(i, j)])
print(row)
if type(action) == int:
action_index = action
else:
action_index = action_map(action)
if action_index in optimal[location]:
"""
if action in optimal[location]:
return 1
else:
return 0
def place_player(state, new_loc):
curr_loc = find_location(state, 3)
state[curr_loc][3] = 0
state[new_loc][3] = 1
return state
def check_availability(state, loc):
return state[loc][0] or state[loc][1] or state[loc][2]
def place_rand_reward(state):
height = len(state)
a = find_location(state, 3) # find grid position of player (agent)
w = find_location(state, 2) # find wall
g = find_location(state, 0) # find reward
p = find_location(state, 1) # find pit
pair = rand_pair(height, 5)
while pair == a or pair == p or pair in w or pair in g:
pair = rand_pair(height, 5)
state[pair[0], pair[1]] = np.array([1, 0, 0, 0])
def make_move(state, action):
# need to locate player in grid
# need to determine what object (if any) is in the new grid spot the player is moving to
height = len(state)
player_loc = find_location(state, 3)
wall = find_location(state, 2)
reward = find_location(state, 0)
pit = find_location(state, 1)
state = np.zeros((height, 5, 4))
actions = [[-1, 0], [1, 0], [0, -1], [0, 1]]
# e.g. up => (player row - 1, player column + 0)
new_loc = (player_loc[0] + actions[action][0], player_loc[1] + actions[action][1])
if new_loc not in wall:
if (np.array(new_loc) <= (height - 1, 4)).all() and (np.array(new_loc) >= (0, 0)).all():
state[new_loc][3] = 1
new_player_loc = find_location(state, 3)
if not new_player_loc:
state[player_loc] = np.array([0, 0, 0, 1])
# re-place pit
state[pit][1] = 1
# re-place wall
for w in wall:
state[w][2] = 1
# re-place reward
for r in reward:
state[r][0] = 1
return state
def find_location(state, level):
height = len(state)
if level == 0 or level == 2:
object_list = []
for i in range(height):
for j in range(5):
if state[i, j][level] == 1:
object_list.append((i, j))
return object_list
else:
for i in range(height):
for j in range(5):
if state[i, j][level] == 1:
return i, j
def get_reward(state):
player_loc = find_location(state, 3)
pit = find_location(state, 1)
reward = find_location(state, 0)
if player_loc == pit:
return -10
elif player_loc in reward:
if player_loc[0] == 0 and player_loc[1] == 0:
return 10
else:
bernoulli_list = bernoulli.rvs(0.5, size=10) * 20
index = np.random.randint(0, 10)
return bernoulli_list[index] - 10
else:
return -1
def display_grid(state):
height = len(state)
grid = np.zeros((height, 5), dtype=str)
player_loc = find_location(state, 3)
wall = find_location(state, 2)
reward = find_location(state, 0)
pit = find_location(state, 1)
for i in range(height):
for j in range(5):
grid[i, j] = ' '
if player_loc:
grid[player_loc] = 'P' # player
for w in wall:
if w:
grid[w] = 'W' # wall
for r in reward:
if r:
if player_loc != r:
grid[r] = '+' # reward
else:
grid[r] = 'V' # Win!
if pit:
if player_loc != pit:
grid[pit] = '-' # pit
else:
grid[pit] = 'L' # Lose!
return grid