forked from Sven-J-Steinert/Rocket-Hopper-DDPG
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhopperenv.py
210 lines (149 loc) · 5.45 KB
/
hopperenv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
#import CoolProp.CoolProp as CP
# some global variables
g0 = 9.80665
F_T = 0 # define global var
def F_Thrust_fast(p_valve):
coefficients = np.array([10.600789308865751,-9.503317777109121])
linear_fit = np.poly1d(coefficients)
return max(0,linear_fit(p_valve))
class HopperEnv(Env):
def __init__(self):
# Actions we can take: set pressure between 0 and 7 bar
self.action_space = Box(low=0.0, high=7.0, shape=(1,), dtype=np.float32) # pressure
# Altitude range
self.observation_space = Box(low=np.array([0.,5., -0.1,0.]), high=np.array([0.,5., 0.1,7.]), shape=(4,)) # [x_target,x,a,p_actual]
# Set simulation time
self.sim_time = 5 # [s] total sim time
self.tn = 1/60 # [s] step sim time
self.h = 1/120 # [s] stepsize ode
self.p_actual = 0 # [bar]
#self.counter = 0 # for constant penalty
#self.x_old = 0 # for constant penalty
self.v_old = 0
#self.action_old = 0
# test trajectory: select 3 random hover points and landing in the end
self.x_target = random.uniform(1, 3)
#self.x_target = 2
# Set start altitude and velocity
self.y = np.array([0.,0.])
self.state = np.array([self.x_target,0.,0.,0.])
def reset(self):
# Reset all variables
self.state = np.array([self.x_target,0.,0.,0.])
self.y = np.array([0.,0.])
self.p_actual = 0
self.x_target = random.uniform(1, 3)
#self.x_target = 2
self.sim_time = 5 # [s]
self.v_old = 0
#self.action_old = 0
return self.state
def step(self, action,raw=None):
#print('time',self.sim_time,'x_target',self.x_target)
#print('x_target',self.x_target)
# Apply action
p_set = action
self.p_actual = valve_behavior(p_set,self.p_actual)
y = self.sim_step(self.y,self.p_actual)
self.y = y[:-1]
self.state = np.array([self.x_target,y[0],y[2],self.p_actual]) # craft [x_target,x,a,p_actual]
# Reduce sim_time by a step
self.sim_time -= self.tn
# reward small error in gauss dist
error = self.state[1]-self.x_target
scale = 1
sigma = 0.5 # variance - gauss parameter
reward = scale*(1/(sigma*np.sqrt(2*np.pi))* np.exp(-0.5*((error)/(sigma))**2)) # small error gauss
# boundary penalty
#if y[0] >= 5 or y[0] <= 0:
# reward -= 0.5
# Penalty for valve actuation
#reward -= 0.1 * abs(action - self.last_action) / self.h
#self.last_action = action
# Penalty for high velocities
#self.penalty3[self.step2] = - 0.1 * abs(v)
# Penalty for high accelerations
#self.penalty4[self.step2] = 0
# penalize fast change in p_set
#if abs(self.p_set_old - p_set) > 0.5:
# reward += -10
# update for next loop
#self.x_old = y[0]
# Check if shower is done
if self.sim_time <= 0:
done = True
else:
done = False
# Apply sensor noise
#self.state += random.randint(-1,1)
# Set placeholder for info
info = [self.p_actual,self.y[1]]
# Return step information
return self.state, reward, done, info
def render(self):
# Implement viz
pass
def sim_step(self,y,p):
p = np.squeeze(p) # make sure its one dimensional
t0 = 0 # initial time in seconds
time = np.linspace(t0, self.tn, int((self.tn-t0)/self.h)+1)
# update Thrust
global F_T
F_T = F_Thrust_fast(p)
for t in time[1:]:
y = rk4_e(ode, y, self.h, t)
x = y[0]
v = y[1]
a = v-self.v_old
self.v_old = v
return np.array([x,v,a])
# resistance modelling - smoothed to stablize model
def F_R(v):
v_thr = 0.01
F_RR = 10 # N
if abs(v) > v_thr:
F_R = np.sign(v)*F_RR
else:
F_R = (abs(v)/v_thr)*np.sign(v)*F_RR
return F_R
def ode(t, y):
"""
Defines the system of ODEs for free fall.
Parameters:
t (float): Time.
y (array): Array containing the position and velocity [x, v].
Returns:
dydt (array): Array containing the derivatives [v, a].
"""
global F_T
x = y[0]
v = y[1]
k = 6 # N /m
m = 3.5+1 # kg
a = (1/m)*(F_T - (m*g0) - (k *x) - (F_R(v)))
# restrict movement to be not able to go below 0 in position
if (x == 0 or x < 0 ) and a < 0:
# would move into negative realm
a = 0
dydt = np.array([v, a])
return dydt
def rk4_e(f, y, h, t):
# runge kutta 4th order explicit
tk_05 = t + 0.5*h
yk_025 = y + 0.5 * h * f(t, y)
yk_05 = y + 0.5 * h * f(tk_05, yk_025)
yk_075 = y + h * f(tk_05, yk_05)
return y + h/6 * (f(t, y) + 2 * f(tk_05, yk_025) + 2 * f(tk_05, yk_05) + f(t+h, yk_075))
def valve_behavior(p_set,p_current):
# make both inputs scalar
p_set = np.squeeze(p_set)
p_current = np.squeeze(p_current)
freq=1/60 #[s^-1]
T=0.2
# explicit computation of value after freq time
p_actual = p_set*(1-np.exp(-freq/T))+p_current*np.exp(-freq/T)
return p_actual