-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaa228_bump.jl
188 lines (155 loc) · 5.36 KB
/
aa228_bump.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# activate project environment
# include these lines of code in any future scripts/notebooks
#---
import Pkg
if !haskey(Pkg.installed(), "AA228FinalProject")
jenv = joinpath(dirname(@__FILE__()), ".") # this assumes the notebook is in the same dir
# as the Project.toml file, which should be in top level dir of the project.
# Change accordingly if this is not the case.
Pkg.activate(jenv)
end
#---
# import necessary packages
using AA228FinalProject
using POMDPs
using POMDPPolicies
using BeliefUpdaters
using ParticleFilters
using POMDPSimulators
using Cairo
using Gtk
using Random
using Printf
using POMDPModels, BasicPOMCP
vlist = [0, 5, 10]
omlist = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
aspace = vec(collect(RoombaAct(v, om) for v in vlist, om in omlist))
sensor = Bumper() #Lidar() # or Bumper() for the bumper version of the environment
config = 3 # 1,2, or 3
m = RoombaPOMDP(sensor=sensor, mdp=RoombaMDP(config=config, aspace=aspace))
num_particles = 2000
# resampler = LidarResampler(num_particles, LowVarianceResampler(num_particles))
# for the bumper environment
resampler = BumperResampler(num_particles)
spf = SimpleParticleFilter(m, resampler)
v_noise_coefficient = 2.0
om_noise_coefficient = 0.5
belief_updater = RoombaParticleFilter(spf, v_noise_coefficient, om_noise_coefficient);
# Define the policy to test
mutable struct ToEnd <: Policy
ts::Int64 # to track the current time-step.
end
# extract goal for heuristic controller
goal_xy = get_goal_xy(m)
# define a new function that takes in the policy struct and current belief,
# and returns the desired action
function POMDPs.action(p::ToEnd, b::ParticleCollection{RoombaState})
# spin around to localize for the first 25 time-steps
# if p.ts < 25
# p.ts += 1
# return RoombaAct(0.,1.0) # all actions are of type RoombaAct(v,om)
# end
if p.ts < 10
p.ts += 1
if AA228FinalProject.wall_contact(m, particles(b)[1])
# println("True")
return RoombaAct(1.0, pi)
else
# println("False")
return RoombaAct(10., 0.0)
end
end
p.ts += 1
# compute mean belief of a subset of particles
s = mean(b)
# Need to figure out how to grab a single particle
# @printf "%s \n" typeof(b)
# compute the difference between our current heading and one that would
# point to the goal
goal_x, goal_y = goal_xy
# @printf "Goal X: %d; Goal Y: %d \n" goal_x goal_y
x,y,th = s[1:3]
# @printf "Current X: %d; Current Y: %d; Current Theta: %d \n" x y th
del_x = goal_x - x
del_y = goal_y - y
# if abs(del_x) < 1
# ang_to_goal = atan(del_y, 0)
# elseif abs(del_y) < 1
# ang_to_goal = atan(0, del_x)
# else
# ang_to_goal = atan(del_y, del_x)
# end
ang_to_goal = atan(goal_y - y, goal_x - x)
# @printf "Delta X: %d; Delta Y: %d \n" del_x del_y
if length(particles(b)) > 0
if AA228FinalProject.wall_contact(m, particles(b)[1])
ang_to_goal = ang_to_goal + rand(-2.0:2.0)
end
end
del_angle = wrap_to_pi(ang_to_goal - th)
# @printf "Belief Theta: %d \n" th
# apply proportional control to compute the turn-rate
Kprop = 1.0
om = Kprop * del_angle
# @printf "Angle to Goal: %d; Delta Angle: %d Heading Change: %d \n" ang_to_goal del_angle om
# if abs(om) > 2
# v = 0.1
# elseif 1 < abs(om) <= 2
# v = 1.0
# else
# v = 2.0
v = 5.0
return RoombaAct(v, om)
end
#function evaluate(b::ParticleCollection{RoombaState})
# goal_x, goal_y = goal_xy
# s = mean(b)
# x,y,th = s[1:3]
# distance = (goal_x - x)**2 + (goal_y - y)**2
# return 5*(1/distance)
#end
# first seed the environment
Random.seed!(3)
# reset the policy
p = ToEnd(0) # here, the argument sets the time-steps elapsed to 0
#p = RandomPolicy(m)
solver = POMCPSolver(default_action=p)#estimate_value=FORollout(p))
planner = solve(solver, m)
# run the simulation
c = @GtkCanvas()
win = GtkWindow(c, "Roomba Environment", 600, 600)
#for (s, a, o) in stepthrough(m, planner, "sao", max_steps=10)
# println("State was $s,")
# println("action $a was taken,")
# println("and observation $o was received.\n")
# sleep(0.1)
#end
for exp = 1:5
Random.seed!(exp)
for (t, step) in enumerate(stepthrough(m, planner, belief_updater, max_steps=100))
@guarded draw(c) do widget
#@printf "Time Step: %d; SARS: {} \n" t step#"Time Step: {}; SARS: {}".format(t, step)
# the following lines render the room, the particles, and the roomba
ctx = getgc(c)
set_source_rgb(ctx,1,1,1)
paint(ctx)
render(ctx, m, step)
# render some information that can help with debugging
# here, we render the time-step, the state, and the observation
move_to(ctx,300,400)
show_text(ctx, @sprintf("t=%d, state=%s, o=%.3f",t,string(step.s),step.o))
end
show(c)
sleep(0.1) # to slow down the simulation
end
end
using Statistics
total_rewards = []
for exp = 1:5
println(string(exp))
Random.seed!(exp)
p = ToEnd(0)
traj_rewards = sum([step.r for step in stepthrough(m,planner ,belief_updater, max_steps=100)])
push!(total_rewards, traj_rewards)
end
@printf("Mean Total Reward: %.3f, StdErr Total Reward: %.3f", mean(total_rewards), std(total_rewards)/sqrt(5))