From 87d8c267d7bbe5321c319096144e2bec1d032e04 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:15:29 +0000 Subject: [PATCH 01/29] added her goal observation --- flyer_env/envs/common/observation.py | 34 ++++++ flyer_env/envs/flyer_env.py | 69 ++++++++++-- flyer_env/envs/trajectory_env.py | 1 + scripts/her_flyer_plot.py | 159 +++++++++++++++++++++++++++ scripts/sac_control_plot.py | 1 - scripts/test_plot.py | 73 ++++++++++++ scripts/train.py | 31 ++++-- 7 files changed, 349 insertions(+), 19 deletions(-) create mode 100644 scripts/her_flyer_plot.py create mode 100644 scripts/test_plot.py diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py index 66700fc..6bc3b19 100644 --- a/flyer_env/envs/common/observation.py +++ b/flyer_env/envs/common/observation.py @@ -163,6 +163,38 @@ def observe(self) -> np.ndarray: obs = df.values.copy() return obs.astype(self.space().dtype) +class DynamicGoalObservation(DynamicObservation): + + def __init__(self, + env: "AbstractEnv", + **kwargs: dict) -> None: + super().__init__(env, **kwargs) + if hasattr(env, "goal"): + self.goal = env.goal + + def space(self) -> spaces.Space: + try: + obs = self.observe() + return spaces.Dict(dict( + desired_goal=spaces.Box(-np.inf, np.inf, shape=obs["desired_goal"].shape, dtype=np.float64), + achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype=np.float64), + observation=spaces.Box(-np.inf, np.inf, shape=obs["observation"].shape, dtype=np.float64) + )) + except AttributeError: + return spaces.Space() + + def observe(self) -> Dict[str, np.ndarray]: + df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features] + df = df[self.features] + obs = df.values.copy() + # obs = obs.astype(self.space().dtype) + obs = OrderedDict([ + ("observation", obs[0]), + ("achieved_goal", obs[0][0:3]), + ("desired_goal", self.goal) + ]) + return obs + def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: if config["type"] == "Dynamics" or config["type"] == "dynamics": @@ -173,5 +205,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: return ControlObservation(env, **config) elif config["type"] == "Longitudinal" or config["type"] == "longitudinal": return LongitudinalObservation(env, **config) + elif config["type"] == "Goal" or config["type"] == "goal" or config["type"] == "DynamicGoal": + return DynamicGoalObservation(env, **config) else: raise ValueError("Unknown observation type") diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py index 4d168f7..79cbb9b 100644 --- a/flyer_env/envs/flyer_env.py +++ b/flyer_env/envs/flyer_env.py @@ -2,6 +2,8 @@ import sys import os import numpy as np +from gymnasium import Env +from abc import abstractmethod from flyer_env import utils from flyer_env.aircraft import ControlledAircraft @@ -10,7 +12,32 @@ from pyflyer import World, Aircraft -class FlyerEnv(AbstractEnv): +class GoalEnv(Env): + """ + Interface for a goal-based environment + + Similar to HighwayEnv https://github.com/Farama-Foundation/HighwayEnv/blob/master/highway_env/envs/parking_env.py. + This interface is needed for agents to interact with agents such as Stable Baseline3's Hindsight Experience Replay (HER) agent. + + As a goal-based environment it functions in the same way as any regular OpenAI Gym Environment, but imposes a required structure on the obs space. + More concretely, the observation space is required to contain at least 3 elements, namely `observation`, `desired_goal`, and `achieved goal`. + """ + + @abstractmethod + def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict) -> float: + """ + Compute the step reward. This externalizes the reward function and makes it dependent on a desired goal and the one that was achieved. + + :param achieved_goal: the goal that was achieved during execution + :param desired_goal: the desired goal that we asked the agent to attempt to achieve + :param info (dict): an info dictionary with additional information + :return: the reward the corresponds to the provided goal achieved w.r.t. the desired goal + + """ + raise NotImplementedError + + +class FlyerEnv(AbstractEnv, GoalEnv): """ A goal-oriented flying environment @@ -23,7 +50,7 @@ def default_config(cls) -> dict: config = super().default_config() config.update({ "observation": { - "type": "Dynamics" + "type": "Goal" }, "action": { "type": "ContinuousAction" @@ -31,9 +58,10 @@ def default_config(cls) -> dict: "area": (1024, 1024), # terrain map area [tiles] "vehicle_type": "Dynamic", # vehicle type, only dynamic available "duration": 10.0, # simulation duration [s] - "collision_reward": -200.0, # max -ve reward for crashing - "point_reward": 100.0, # max +ve reward for hitting the goal - "normalize_reward": True, # whether to normalize the reward [-1, +1] + "collision_reward": -100.0, # max -ve reward for crashing + "reward_type": "dense", # reward type + "point_reward": 1.0, # multiplier for distance from goal + "normalize_reward": False, # whether to normalize the reward [-1, +1], not working at the moment "goal_generation": { "heading_limits": [-np.pi, np.pi], "pitch_limits": [-10.0 * np.pi/180.0, 10.0 * np.pi/180.0], @@ -61,6 +89,7 @@ def _create_world(self, seed) -> None: def _create_vehicles(self) -> None: """Create an aircraft to fly around the world""" + self.controlled_vehicles = [] path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/") start_pos = [0.0, 0.0, -1000.0] heading = 0.0 @@ -82,7 +111,6 @@ def _create_vehicles(self) -> None: dt = 1/self.config["simulation_frequency"], ) self.controlled_vehicles.append(vehicle) - def _create_goal(self, seed) -> None: """Create a random goal in 3D space to navigate to, based on the aircraft's initial starting position""" @@ -102,7 +130,25 @@ def get_goal(): g_pos = get_goal() self.goal = g_pos return - + + def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict) -> float: + """ + Proximity to goal is rewarded + Just use _point_reward for now, could be more explicit. + TODO: look at how the gripper point robots select the points + """ + + def _goal_distance(goal_a, goal_b): + assert goal_a.shape == goal_b.shape + return np.linalg.norm(goal_a - goal_b, axis=-1) + + dist_terminal = self.config["goal_generation"]["dist_terminal"] + d = _goal_distance(achieved_goal, desired_goal) + if self.config["reward_type"] == "sparse": + return -(d > dist_terminal).astype(np.float32) + else: + return -d * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"]) + def _reward(self, action: Action) -> float: """ Reward vehicle if it makes progress towards the goal state @@ -136,9 +182,10 @@ def _point_reward(self): Reward for reaching the goal state """ distance = self.vehicle.goal_dist(self.goal) - point_reward = self.config["point_reward"] - dist_terminal = self.config["goal_generation"]["dist_terminal"] - reward = point_reward * dist_terminal / distance + # point_reward = self.config["point_reward"] + # dist_terminal = self.config["goal_generation"]["dist_terminal"] + # reward = point_reward * dist_terminal / distance + reward = -distance * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"]) return reward def _crash_reward(self) -> float: @@ -146,7 +193,7 @@ def _crash_reward(self) -> float: Penalize if the aircraft crashes """ if self.vehicle.crashed: - return -200.0 + return 1.0 else: return 0.0 diff --git a/flyer_env/envs/trajectory_env.py b/flyer_env/envs/trajectory_env.py index 7b1391c..f123a47 100644 --- a/flyer_env/envs/trajectory_env.py +++ b/flyer_env/envs/trajectory_env.py @@ -10,6 +10,7 @@ from pyflyer import World, Aircraft + class TrajectoryEnv(AbstractEnv): """ diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py new file mode 100644 index 0000000..f1eaefa --- /dev/null +++ b/scripts/her_flyer_plot.py @@ -0,0 +1,159 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import gymnasium as gym +from stable_baselines3 import SAC + +plt.rcParams.update({ + "text.usetex": True +}) + +COLOURS = [[0, 18, 25], [0, 95, 115], [10, 147, 150], [148, 210, 189], [233, 216, 166], [238, 155, 0], [202, 103, 2], [187, 62, 3], [174, 32, 18], [155, 34, 38]] +COLOURS = [[value/255 for value in rgb] for rgb in COLOURS] + +def main(): + + env_config = { + "observation": { + "type": "Goal" + }, + "action": { + "type": "ControlledAction" + }, + "duration": 100.0, + "simulation_frequency": 100.0, + "seed": 0 + } + + env = gym.make("flyer-v1", config=env_config) + policy = SAC.load("models/flyer_controlled_her-v1/best_model.zip", env=env) + + obs, info = env.reset() + done = False + + observations = [] + times = [] + # targets = [] + dt = 1/env_config["simulation_frequency"] + time = 0.0 + + while not done: + action, _states = policy.predict(obs, deterministic=True) + print(f'obs: {obs}') + print(f'action: {action}') + obs, reward, terminated, truncated, info = env.step(action) + + v_dict = env.unwrapped.vehicle.dict + controls = env.unwrapped.vehicle.controls + + obs_dict = { + 'elevator': controls[1], + 'aileron': controls[0], + 'rudder': 0.0, + 'x': v_dict['x'], + 'y': v_dict['y'], + 'z': v_dict['z'], + 'x_com': obs['desired_goal'][0], + 'y_com': obs['desired_goal'][1], + 'z_com': obs['desired_goal'][2], + 'pitch': v_dict['pitch'], + 'roll': v_dict['roll'], + 'yaw': v_dict['yaw'], + 'u': v_dict['u'], + 'reward': reward + } + + times.append(time) + # targets.append(info['t_pos']) + observations.append(obs_dict) + time += dt + + if terminated or truncated: + done = True + + env.close() + observations = pd.DataFrame.from_dict(observations) + plot_long(observations, times, env_config["duration"]) + plot_lat(observations, times, env_config["duration"]) + plot_track(observations) + plt.show() + +def plot_long(outputs, times, exp_len): + fig, ax = plt.subplots(5, 1, sharex=True) + [axis.grid() for axis in ax] + fig.subplots_adjust(hspace=0.0) + fig.set_figheight(10) + fig.set_figwidth(20) + + ax[0].set_title(r"\textbf{Longitudinal Tracking}") + ax[0].plot(times, outputs['elevator'], c=COLOURS[5], label=r'elevator') + ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15) + ax[0].legend(title=r'\textbf{Control}') + + ax[1].plot(times, outputs['pitch'] * 180.0 / np.pi, c=COLOURS[1]) + ax[1].set_ylabel(r"$\theta [^{\circ}]$", fontsize=15) + + ax[2].plot(times, outputs['z'], c=COLOURS[1]) + ax[2].plot(times, outputs['z_com'], linestyle='dashed', c=COLOURS[2]) + ax[2].set_ylabel(r"$z [m]$", fontsize=15) + + ax[3].plot(times, outputs['u'], c=COLOURS[1]) + ax[3].set_ylabel(r"$u [\frac{m}{s}]$", fontsize=15) + + ax[4].plot(times, outputs['reward'], c=COLOURS[1]) + ax[4].set_ylabel(r'Reward', fontsize=15) + ax[4].set_xlabel(r'time [$s$]', fontsize=15) + + [axis.set_xlim(0.0, exp_len) for axis in ax] + [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] + [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] + fig.savefig("flyer_long_control_her.pdf") + +def plot_lat(outputs, times, exp_len): + + fig, ax = plt.subplots(4, 1, sharex=True) + [axis.grid() for axis in ax] + fig.subplots_adjust(hspace=0.0) + fig.set_figheight(10) + fig.set_figwidth(20) + + ax[0].set_title(r"\textbf{Lateral-Directional Tracking}") + ax[0].plot(times, outputs['aileron'], c=COLOURS[5], label=r'aileron') + ax[0].plot(times, outputs['rudder'], c=COLOURS[7], linestyle='dashed', label=r'rudder') + ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15) + ax[0].legend(title=r'\textbf{Control}') + + ax[1].plot(times, outputs['roll'] * 180.0 / np.pi, c=COLOURS[1]) + ax[1].plot(times, outputs['yaw'] * 180.0 / np.pi, c=COLOURS[2], linestyle='dashed', label=r'$\psi$') + ax[1].set_ylabel(r"$\theta [^{\circ}]$", fontsize=15) + + ax[2].plot(times, outputs['u'], c=COLOURS[1]) + ax[2].set_ylabel(r"$u [\frac{m}{s}]$", fontsize=15) + + ax[3].plot(times, outputs['reward'], c=COLOURS[1]) + ax[3].set_ylabel(r'Reward', fontsize=15) + ax[3].set_xlabel(r'time [$s$]', fontsize=15) + + + [axis.set_xlim(0.0, exp_len) for axis in ax] + [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] + [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] + fig.savefig("flyer_lat_control_her.pdf") + +def plot_track(outputs): + + fig, ax = plt.subplots(1, 1) + fig.set_figheight(10) + fig.set_figwidth(10) + + ax.plot(outputs['x'], outputs['y'], c=COLOURS[1]) + ax.scatter(outputs['x_com'][1], outputs['y_com'][1], c=COLOURS[2]) + ax.set_ylabel(r"$y [m]$", fontsize=15) + ax.set_xlabel(r"$x [m]$", fontsize=15) + ax.set_aspect('equal') + ax.grid() + + fig.savefig("flyer_track_control_her.pdf") + +if __name__=="__main__": + main() \ No newline at end of file diff --git a/scripts/sac_control_plot.py b/scripts/sac_control_plot.py index 570170a..fd69fa7 100644 --- a/scripts/sac_control_plot.py +++ b/scripts/sac_control_plot.py @@ -184,7 +184,6 @@ def plot_lat(outputs, times, exp_len): ax[3].set_ylabel(r'Reward', fontsize=15) ax[3].set_xlabel(r'time [$s$]', fontsize=15) - [axis.set_xlim(0.0, exp_len) for axis in ax] [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] diff --git a/scripts/test_plot.py b/scripts/test_plot.py new file mode 100644 index 0000000..189b22e --- /dev/null +++ b/scripts/test_plot.py @@ -0,0 +1,73 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import gymnasium as gym +from stable_baselines3 import SAC + +plt.rcParams.update({ + "text.usetex": True +}) + +COLOURS = [[0, 18, 25], [0, 95, 115], [10, 147, 150], [148, 210, 189], [233, 216, 166], [238, 155, 0], [202, 103, 2], [187, 62, 3], [174, 32, 18], [155, 34, 38]] +COLOURS = [[value/255 for value in rgb] for rgb in COLOURS] + +def main(): + + env = gym.make("Pendulum-v1") + policy = SAC.load("models/sac_pendulum-v1/best_model.zip") + + obs, info = env.reset() + done = False + observations = [] + dt = 0.05 + times = [] + time = 0.0 + + while not done: + action, _states = policy.predict(obs, deterministic=True) + obs, reward, terminated, truncated, info = env.step(action) + + obs_dict = { + 'x': obs[0], + 'y': obs[1], + 'theta_dot': obs[2], + 'action': action, + 'reward': reward + } + times.append(time) + observations.append(obs_dict) + time += dt + + if terminated or truncated: + done = True + env.close() + + observations = pd.DataFrame.from_dict(observations) + plot_pendulum(observations, times) + +def plot_pendulum(outputs, times): + fig, ax = plt.subplots(4, 1, sharex=True) + [axis.grid() for axis in ax] + fig.subplots_adjust(hspace=0.0) + fig.set_figheight(10) + fig.set_figwidth(20) + + ax[0].set_title(r"\textbf{Pendulum Swingup}") + ax[0].plot(times, outputs['x'], c=COLOURS[1], label=r'x') + ax[0].set_ylabel(r'$x [m]$') + + ax[1].plot(times, outputs['y'], c=COLOURS[1], label=r'y') + ax[1].set_ylabel(r'$y [m]$') + + ax[2].plot(times, outputs['action'], c=COLOURS[1], label=r'tau') + ax[2].set_ylabel(r'$\tau [Nm]$') + + ax[3].plot(times, outputs['reward'], c=COLOURS[1]) + ax[3].set_ylabel(r'$reward [-]$') + + [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] + [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] + fig.savefig("test_pendulum.pdf") + +if __name__=="__main__": + main() diff --git a/scripts/train.py b/scripts/train.py index 1426d8c..6d25c35 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -2,7 +2,8 @@ import hydra from pathlib import Path from omegaconf import DictConfig, OmegaConf -from stable_baselines3 import SAC, PPO, DDPG + +from stable_baselines3 import HerReplayBuffer, SAC, PPO, DDPG from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor @@ -34,12 +35,28 @@ def __init__(self, cfg): eval_freq=cfg.eval_freq, deterministic=True, render=False) - self.model = SAC( - "MlpPolicy", - self.train_env, - verbose=1, - tensorboard_log=f".runs/sac" - ) + + if cfg.use_her: + self.model = SAC( + "MultiInputPolicy", + self.train_env, + verbose=1, + replay_buffer_class=HerReplayBuffer, + # Parameters for HER + replay_buffer_kwargs=dict( + n_sampled_goal=4, + goal_selection_strategy="future" + ), + learning_starts=self.cfg.learning_starts, + tensorboard_log=f".runs/sac" + ) + else: + self.model = SAC( + "MlpPolicy", + self.train_env, + verbose=1, + tensorboard_log=f".runs/sac" + ) return def train(self): From 4352213e33ec9d591b2cc1c7fc82313c934b189a Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:15:54 +0000 Subject: [PATCH 02/29] added Goal --- scripts/conf/goal.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml index 9c2ffa9..097e64d 100644 --- a/scripts/conf/goal.yaml +++ b/scripts/conf/goal.yaml @@ -7,10 +7,10 @@ env_name: flyer-v1 env_config: observation: - type: Trajectory + type: Goal action: type: ContinuousAction - duration: 10.0 + duration: 100.0 area: - 256 - 256 @@ -18,9 +18,11 @@ env_config: # train settings agent_type: SAC +use_her: True total_timesteps: 10000000 n_envs: 32 log_interval: 4 +learning_starts: 100000 # how many steps to sample before beginning learning # eval settings eval_freq: 500 From bf95acf010f7feef4f0d2a5af2830ecc77d8815d Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:16:57 +0000 Subject: [PATCH 03/29] added parking script --- scripts/conf/parking.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 scripts/conf/parking.yaml diff --git a/scripts/conf/parking.yaml b/scripts/conf/parking.yaml new file mode 100644 index 0000000..850d197 --- /dev/null +++ b/scripts/conf/parking.yaml @@ -0,0 +1,23 @@ +defaults: + - override hydra/launcher: submitit_local + +# env settings +policy_type: MlpPolicy +env_name: parking-v0 +env_config: False + +# train settings +agent_type: SAC +use_her: True +total_timesteps: 10000000 +n_envs: 32 +log_interval: 4 +learning_starts: 100000 + +# eval settings +eval_freq: 500 + +# misc +use_wandb: True +seed: 0 +render: False From 0a433ccf3edfc8da6710df48fac16709f6294acf Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:18:26 +0000 Subject: [PATCH 04/29] added reach robotic environment --- scripts/conf/reach.yaml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 scripts/conf/reach.yaml diff --git a/scripts/conf/reach.yaml b/scripts/conf/reach.yaml new file mode 100644 index 0000000..0a17866 --- /dev/null +++ b/scripts/conf/reach.yaml @@ -0,0 +1,23 @@ +defaults: + - override hydra/launcher: submitit_local + +# env settings +policy_type: MlpPolicy +env_name: 'FetchReachDense-v2' +env_config: False + +# train settings +agent_type: SAC +use_her: True +total_timesteps: 10000000 +n_envs: 32 +log_interval: 4 +learning_starts: 100000 + +# eval settings +eval_freq: 500 + +# misc +use_wandb: True +seed: 0 +render: False From 966d23a8183b31bdf4306c9256f7ffc3bb43c144 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:18:35 +0000 Subject: [PATCH 05/29] add use_her flag --- scripts/conf/test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/conf/test.yaml b/scripts/conf/test.yaml index 3cd7c74..1258dea 100644 --- a/scripts/conf/test.yaml +++ b/scripts/conf/test.yaml @@ -11,6 +11,7 @@ agent_type: SAC total_timesteps: 1000000 n_envs: 32 log_interval: 4 +use_her: False # eval settings eval_freq: 500 From 6a4d9864bc39d6f0127f22a5afe9d54538d27ace Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 29 Nov 2023 13:18:46 +0000 Subject: [PATCH 06/29] added use_her flag --- scripts/conf/trajectory.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/conf/trajectory.yaml b/scripts/conf/trajectory.yaml index 1b38d0f..8478661 100644 --- a/scripts/conf/trajectory.yaml +++ b/scripts/conf/trajectory.yaml @@ -24,6 +24,7 @@ agent_type: SAC total_timesteps: 50000000 n_envs: 32 log_interval: 4 +use_her: False # eval settings eval_freq: 500 From aa134879a1150d029b1cdd44970739ebd8afca03 Mon Sep 17 00:00:00 2001 From: quessy Date: Thu, 30 Nov 2023 15:08:48 +0000 Subject: [PATCH 07/29] simplify limits --- flyer_env/envs/flyer_env.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py index 79cbb9b..a4dc1d0 100644 --- a/flyer_env/envs/flyer_env.py +++ b/flyer_env/envs/flyer_env.py @@ -63,8 +63,8 @@ def default_config(cls) -> dict: "point_reward": 1.0, # multiplier for distance from goal "normalize_reward": False, # whether to normalize the reward [-1, +1], not working at the moment "goal_generation": { - "heading_limits": [-np.pi, np.pi], - "pitch_limits": [-10.0 * np.pi/180.0, 10.0 * np.pi/180.0], + "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0], + "pitch_limits": [-0.1 * np.pi/180.0, 0.1 * np.pi/180.0], "dist_limits": [1000.0, 10000.0], "dist_terminal": 20.0 } # goal generation details @@ -124,6 +124,7 @@ def get_goal(): pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1]) dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1]) rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)]) + print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}') pos = v_pos + rel_pos return pos From bc2c927722e394dfb9b619548847f8966fbdf62a Mon Sep 17 00:00:00 2001 From: quessy Date: Thu, 30 Nov 2023 15:08:55 +0000 Subject: [PATCH 08/29] add plot --- scripts/her_flyer_plot.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py index f1eaefa..5d496c3 100644 --- a/scripts/her_flyer_plot.py +++ b/scripts/her_flyer_plot.py @@ -18,7 +18,7 @@ def main(): "type": "Goal" }, "action": { - "type": "ControlledAction" + "type": "ContinuousAction" }, "duration": 100.0, "simulation_frequency": 100.0, @@ -26,9 +26,14 @@ def main(): } env = gym.make("flyer-v1", config=env_config) - policy = SAC.load("models/flyer_controlled_her-v1/best_model.zip", env=env) + print(f'reset_obs: {env.reset()}') + + policy = SAC.load("models/flyer_her_simple-v1/best_model.zip", env=env) obs, info = env.reset() + print(f'env: {env}') + print(f'env.config: {env.config}') + print(f'obs: {obs}') done = False observations = [] @@ -39,16 +44,17 @@ def main(): while not done: action, _states = policy.predict(obs, deterministic=True) - print(f'obs: {obs}') - print(f'action: {action}') + # print(f'obs: {obs}') + # print(f'action: {action}') obs, reward, terminated, truncated, info = env.step(action) v_dict = env.unwrapped.vehicle.dict controls = env.unwrapped.vehicle.controls + # print(f'controls: {controls}') obs_dict = { - 'elevator': controls[1], - 'aileron': controls[0], + 'elevator': controls['elevator'], + 'aileron': controls['aileron'], 'rudder': 0.0, 'x': v_dict['x'], 'y': v_dict['y'], From c8c761397f4f9914d14ab0ee0d554c82aefe1389 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Dec 2023 09:38:28 +0000 Subject: [PATCH 09/29] comment out print --- flyer_env/envs/flyer_env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py index a4dc1d0..da0f882 100644 --- a/flyer_env/envs/flyer_env.py +++ b/flyer_env/envs/flyer_env.py @@ -124,7 +124,7 @@ def get_goal(): pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1]) dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1]) rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)]) - print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}') + # print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}') pos = v_pos + rel_pos return pos @@ -186,6 +186,7 @@ def _point_reward(self): # point_reward = self.config["point_reward"] # dist_terminal = self.config["goal_generation"]["dist_terminal"] # reward = point_reward * dist_terminal / distance + # print(f'distance: {distance}, self.goal: {self.goal}, pos: {self.vehicle.dict}') reward = -distance * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"]) return reward From 17564957802359a367146a64a2624ec63d100f8d Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Dec 2023 09:38:42 +0000 Subject: [PATCH 10/29] add heading only action --- flyer_env/envs/common/action.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/flyer_env/envs/common/action.py b/flyer_env/envs/common/action.py index 066c0ef..a010ed5 100644 --- a/flyer_env/envs/common/action.py +++ b/flyer_env/envs/common/action.py @@ -168,6 +168,60 @@ def act(self, action: np.ndarray) -> None: self.last_action = action +class HeadingAction(ActionType): + + HEADING_RANGE = (-np.pi, np.pi) + + """ + A continuous action space with a fixed altitude and speed. + Controls are only [aileron]. + """ + + def __init__(self, + env: 'AbstractEnv', + heading_range: Optional[Tuple[float, float]] = None, + powered: bool = True, + clip: bool = True, + **kwargs) -> None: + """ + Create a continuous laterally constrained action space + """ + super().__init__(env) + + self.heading_range = heading_range if heading_range else self.HEADING_RANGE + + self.powered = powered + self.clip = clip + self.size = 2 if self.powered else 1 + + self.last_action = np.zeros(self.size) + + def space(self) -> spaces.Box: + return spaces.Box(-1.0, 1.0, shape=(self.size,), dtype=np.float32) + + @property + def vehicle_class(self) -> Callable: + return functools.partial(ControlledAircraft) + + def act(self, action: np.ndarray) -> None: + """ + Apply the action to the controlled vehicle + + :param action: action array with [sine, cosine] mapped between ranges + """ + + if self.clip: + action = np.clip(action, -1.0, 1.0) + + if self.powered: + self.controlled_vehicle.act({ + 'heading': np.arctan2(action[0], action[1]), + 'alt': -1000.0, + 'speed': 80.0 + }) + self.last_action = action + + class ControlledAction(ActionType): """ An action that controls the aircraft using a PID controller to track towards the target. @@ -328,6 +382,8 @@ def action_factory(env: 'AbstractEnv', config: dict) -> ActionType: return ContinuousAction(env, **config) elif config["type"] == "LongitudinalAction": return LongitudinalAction(env, **config) + elif config["type"] == "HeadingAction": + return HeadingAction(env, **config) elif config["type"] == "ControlledAction": return ControlledAction(env, **config) elif config["type"] == "PursuitAction": From ec15607c898f033d7ec4fd01add95670f5b2c631 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Dec 2023 09:38:54 +0000 Subject: [PATCH 11/29] add lateral goal observation --- flyer_env/envs/common/observation.py | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py index 6bc3b19..5c4fbc3 100644 --- a/flyer_env/envs/common/observation.py +++ b/flyer_env/envs/common/observation.py @@ -196,6 +196,42 @@ def observe(self) -> Dict[str, np.ndarray]: return obs +class LateralGoalObservation(DynamicObservation): + + FEATURES: List[str] = ['x', 'y', 'u', 'v', 'yaw'] + + def __init__(self, + env: "AbstractEnv", + features: List[str] = None, + **kwargs: dict) -> None: + super().__init__(env, **kwargs) + self.features = features or self.FEATURES + if hasattr(env, "goal"): + self.goal = env.goal + + def space(self) -> spaces.Space: + try: + obs = self.observe() + return spaces.Dict(dict( + desired_goal=spaces.Box(-np.inf, np.inf, shape=obs["desired_goal"].shape, dtype=np.float64), + achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype=np.float64), + observation=spaces.Box(-np.inf, np.inf, shape=obs["observation"].shape, dtype=np.float64) + )) + except AttributeError: + return spaces.Space() + + def observe(self) -> Dict[str, np.ndarray]: + df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features] + df = df[self.features] + obs = df.values.copy() + obs = OrderedDict([ + ("observation", obs[0]), + ("achieved_goal", obs[0][0:3]), + ("desired_goal", self.goal) + ]) + return obs + + def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: if config["type"] == "Dynamics" or config["type"] == "dynamics": return DynamicObservation(env, **config) @@ -207,5 +243,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: return LongitudinalObservation(env, **config) elif config["type"] == "Goal" or config["type"] == "goal" or config["type"] == "DynamicGoal": return DynamicGoalObservation(env, **config) + elif config["type"] == "LateralGoal" or config["type"] == "lateral_goal": + return LateralGoalObservation(env, **config) else: raise ValueError("Unknown observation type") From c591c7caa0c7a62e8a9bc910263351cc88f834ad Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Dec 2023 09:39:11 +0000 Subject: [PATCH 12/29] change to use heading_action --- scripts/her_flyer_plot.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py index 5d496c3..07ffc55 100644 --- a/scripts/her_flyer_plot.py +++ b/scripts/her_flyer_plot.py @@ -18,7 +18,7 @@ def main(): "type": "Goal" }, "action": { - "type": "ContinuousAction" + "type": "HeadingAction" }, "duration": 100.0, "simulation_frequency": 100.0, @@ -28,7 +28,7 @@ def main(): env = gym.make("flyer-v1", config=env_config) print(f'reset_obs: {env.reset()}') - policy = SAC.load("models/flyer_her_simple-v1/best_model.zip", env=env) + policy = SAC.load("models/flyer_heading_her-v1/best_model.zip", env=env) obs, info = env.reset() print(f'env: {env}') @@ -45,12 +45,15 @@ def main(): while not done: action, _states = policy.predict(obs, deterministic=True) # print(f'obs: {obs}') + # action = np.array((0.0, 0.0)) # print(f'action: {action}') obs, reward, terminated, truncated, info = env.step(action) + print(f'reward: {reward}') v_dict = env.unwrapped.vehicle.dict - controls = env.unwrapped.vehicle.controls - # print(f'controls: {controls}') + controls = env.unwrapped.vehicle.aircraft.controls + # print(f'vehicle: {env.unwrapped.vehicle.aircraft.controls}') + # print(f'v_dict: {v_dict}') obs_dict = { 'elevator': controls['elevator'], From 71e8afa45eee8aa439bd92a03167cff208311042 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Dec 2023 09:39:35 +0000 Subject: [PATCH 13/29] change goal to training environment --- scripts/conf/goal.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml index 097e64d..8613f73 100644 --- a/scripts/conf/goal.yaml +++ b/scripts/conf/goal.yaml @@ -7,9 +7,12 @@ env_name: flyer-v1 env_config: observation: - type: Goal + type: LateralGoal action: - type: ContinuousAction + type: HeadingAction + heading_range: + - -0.1 + - 0.1 duration: 100.0 area: - 256 @@ -19,7 +22,7 @@ env_config: # train settings agent_type: SAC use_her: True -total_timesteps: 10000000 +total_timesteps: 100000000 n_envs: 32 log_interval: 4 learning_starts: 100000 # how many steps to sample before beginning learning From 1bba652c20a96b61c18b7ca9d9b143b8043f7019 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:20:42 +0000 Subject: [PATCH 14/29] add seeding --- flyer_env/envs/common/abstract.py | 2 +- flyer_env/envs/flyer_env.py | 50 ++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/flyer_env/envs/common/abstract.py b/flyer_env/envs/common/abstract.py index 1456b5e..470569e 100644 --- a/flyer_env/envs/common/abstract.py +++ b/flyer_env/envs/common/abstract.py @@ -180,7 +180,7 @@ def reset(self, self.time = 0.0 self.steps = 0 self.done = False - self._reset(seed) + self._reset() # Second, to link the obs and actions to the vehicles once the scene is created self.define_spaces() diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py index da0f882..6c6a416 100644 --- a/flyer_env/envs/flyer_env.py +++ b/flyer_env/envs/flyer_env.py @@ -66,25 +66,33 @@ def default_config(cls) -> dict: "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0], "pitch_limits": [-0.1 * np.pi/180.0, 0.1 * np.pi/180.0], "dist_limits": [1000.0, 10000.0], - "dist_terminal": 20.0 + "dist_terminal": 100.0 } # goal generation details }) return config - def _reset(self, seed) -> None: - if not seed: seed = 1 # set seed to 1 if None TODO: set to be random on None, look @ HighwayEnv - self._create_world(seed) + def _info(self, obs, action) -> dict: + info = super(FlyerEnv, self)._info(obs, action) + success = self._is_success() + info.update({"is_success": success}) + return info + + def _reset(self) -> None: + + self.np_random = np.random.RandomState() + self._create_world() self._create_vehicles() - self._create_goal(seed) + self._create_goal() - def _create_world(self, seed) -> None: + def _create_world(self) -> None: """Create the world map""" self.world = World() path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets") self.world.assets_dir = path path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data") self.world.terrain_data_dir = path - self.world.create_map(seed, area=self.config["area"]) + world_seed = self.np_random.randint(100) # set 100 possible seeds by default + self.world.create_map(world_seed, area=self.config["area"]) return def _create_vehicles(self) -> None: @@ -112,17 +120,15 @@ def _create_vehicles(self) -> None: ) self.controlled_vehicles.append(vehicle) - def _create_goal(self, seed) -> None: + def _create_goal(self) -> None: """Create a random goal in 3D space to navigate to, based on the aircraft's initial starting position""" v_pos = self.world.vehicles[0].position gg = self.config["goal_generation"] - np_random = np.random.RandomState(seed) - def get_goal(): - heading = np_random.uniform(gg["heading_limits"][0], gg["heading_limits"][1]) - pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1]) - dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1]) + heading = self.np_random.uniform(gg["heading_limits"][0], gg["heading_limits"][1]) + pitch = self.np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1]) + dist = self.np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1]) rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)]) # print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}') pos = v_pos + rel_pos @@ -145,6 +151,7 @@ def _goal_distance(goal_a, goal_b): dist_terminal = self.config["goal_generation"]["dist_terminal"] d = _goal_distance(achieved_goal, desired_goal) + # print(f'distance: {d}, achieved_goal: {achieved_goal}, desired_goal: {desired_goal}') if self.config["reward_type"] == "sparse": return -(d > dist_terminal).astype(np.float32) else: @@ -199,15 +206,22 @@ def _crash_reward(self) -> float: else: return 0.0 + def _is_success(self) -> bool: + v_pos = self.vehicle.position + difference = np.subtract(v_pos, self.goal) + distance = np.linalg.norm(difference) + dist_terminal = self.config["goal_generation"]["dist_terminal"] + return distance < dist_terminal + def _is_terminated(self) -> bool: """ The episode is over if the the ego vehicle crashed, or it hits the ground """ - v_pos = self.vehicle.position - difference = np.subtract(v_pos, self.goal) - distance = np.linalg.norm(difference) - dist_terminal = self.config["goal_generation"]["dist_terminal"] + # v_pos = self.vehicle.position + # difference = np.subtract(v_pos, self.goal) + # distance = np.linalg.norm(difference) + # dist_terminal = self.config["goal_generation"]["dist_terminal"] # If crashed terminate if self.vehicle.crashed: @@ -216,7 +230,7 @@ def _is_terminated(self) -> bool: if self.vehicle.position[-1] >= 0.0: return True # If reached goal region - if distance < dist_terminal: + if self._is_success(): return True return False From 412ebf240101b871dc27957e1793288813cd3374 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:21:07 +0000 Subject: [PATCH 15/29] heading action is one value now --- flyer_env/envs/common/action.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/flyer_env/envs/common/action.py b/flyer_env/envs/common/action.py index a010ed5..7cd6617 100644 --- a/flyer_env/envs/common/action.py +++ b/flyer_env/envs/common/action.py @@ -189,15 +189,13 @@ def __init__(self, super().__init__(env) self.heading_range = heading_range if heading_range else self.HEADING_RANGE - self.powered = powered self.clip = clip - self.size = 2 if self.powered else 1 - + self.size = 1 self.last_action = np.zeros(self.size) def space(self) -> spaces.Box: - return spaces.Box(-1.0, 1.0, shape=(self.size,), dtype=np.float32) + return spaces.Box(self.heading_range[0], self.heading_range[1], shape=(self.size,), dtype=np.float32) @property def vehicle_class(self) -> Callable: @@ -211,11 +209,11 @@ def act(self, action: np.ndarray) -> None: """ if self.clip: - action = np.clip(action, -1.0, 1.0) + action = np.clip(action, self.heading_range[0], self.heading_range[1]) if self.powered: self.controlled_vehicle.act({ - 'heading': np.arctan2(action[0], action[1]), + 'heading': action, 'alt': -1000.0, 'speed': 80.0 }) From 4e848a675cd6ecdcc185169aec546b46c20c1af5 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:21:16 +0000 Subject: [PATCH 16/29] fixed setup tool --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cd62890..2794dac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=42", + "setuptools==68.2.2", "setuptools-rust", "wheel" ] From 6772459ce6149fe5572beeab0dc1daca338ec137 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:21:32 +0000 Subject: [PATCH 17/29] Add Lateral Trajectory Observation --- flyer_env/envs/common/observation.py | 40 ++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py index 5c4fbc3..b1f85de 100644 --- a/flyer_env/envs/common/observation.py +++ b/flyer_env/envs/common/observation.py @@ -101,6 +101,40 @@ def observe(self) -> np.ndarray: obs[0, 2] = self.goal[2] - obs[0, 2] return obs.astype(self.space().dtype) +class LateralTrajectoryObservation(ObservationType): + + """ + Observe dynamics of vehicle relative to goal position, restricted to horizontal plane + ONLY FOR USE WITH TRAJECTORY ENV + """ + + FEATURES: List[str] = ['x', 'y', 'u', 'v', 'yaw'] + + def __init__(self, + env: "AbstractEnv", + features: List[str] = None, + vehicles_count: int = 1, + features_range: Dict[str, List[float]] = None, + **kwargs: dict) -> None: + + super().__init__(env) + self.features = features or self.FEATURES + self.vehicles_count = vehicles_count + self.features_range = features_range + if hasattr(env, "goal"): + self.goal = env.goal + + def space(self) -> spaces.Space: + return spaces.Box(shape=(self.vehicles_count, len(self.features)), low=-np.inf, high=np.inf, dtype=np.float32) + + def observe(self) -> np.ndarray: + + df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features] + df = df[self.features] + obs = df.values.copy() + obs[0, 0] = self.goal[0] - obs[0, 0] + obs[0, 1] = self.goal[1] - obs[0, 1] + return obs.astype(self.space().dtype) class ControlObservation(ObservationType): @@ -226,8 +260,8 @@ def observe(self) -> Dict[str, np.ndarray]: obs = df.values.copy() obs = OrderedDict([ ("observation", obs[0]), - ("achieved_goal", obs[0][0:3]), - ("desired_goal", self.goal) + ("achieved_goal", obs[0][0:2]), + ("desired_goal", self.goal[0:2]) ]) return obs @@ -237,6 +271,8 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: return DynamicObservation(env, **config) elif config["type"] == "Trajectory" or config["type"] == "trajectory": return TrajectoryObservation(env, **config) + elif config["type"] == "LateralTrajectory" or config["lateral_trajectory"]: + return LateralTrajectoryObservation(env, **config) elif config["type"] == "Control" or config["type"] == "control": return ControlObservation(env, **config) elif config["type"] == "Longitudinal" or config["type"] == "longitudinal": From 04d81043bae44466e425a8d9f2c1b1a6ed66a262 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:21:44 +0000 Subject: [PATCH 18/29] Add LateralGoal Observation --- scripts/her_flyer_plot.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py index 07ffc55..76e2981 100644 --- a/scripts/her_flyer_plot.py +++ b/scripts/her_flyer_plot.py @@ -15,20 +15,27 @@ def main(): env_config = { "observation": { - "type": "Goal" + "type": "LateralGoal" }, "action": { - "type": "HeadingAction" + "type": "HeadingAction", + "heading_range": (-0.1, 0.1) }, "duration": 100.0, "simulation_frequency": 100.0, - "seed": 0 + "seed": 0, + "goal_generation": { + "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0], + "pitch_limits": [-0.0001, 0.0001], + "dist_limits": [1000.0, 2000.0], + "dist_terminal": 100.0 + } } env = gym.make("flyer-v1", config=env_config) print(f'reset_obs: {env.reset()}') - policy = SAC.load("models/flyer_heading_her-v1/best_model.zip", env=env) + policy = SAC.load("models/flyer_heading_her_1000_2000-v1/best_model.zip", env=env) obs, info = env.reset() print(f'env: {env}') @@ -44,11 +51,11 @@ def main(): while not done: action, _states = policy.predict(obs, deterministic=True) - # print(f'obs: {obs}') - # action = np.array((0.0, 0.0)) - # print(f'action: {action}') + print(f'obs: {obs}') + action = np.arctan2(obs['desired_goal'][1], obs['desired_goal'][0]) + print(f'action: {action * (180.0/np.pi)}') obs, reward, terminated, truncated, info = env.step(action) - print(f'reward: {reward}') + # print(f'reward: {reward}') v_dict = env.unwrapped.vehicle.dict controls = env.unwrapped.vehicle.aircraft.controls @@ -64,7 +71,7 @@ def main(): 'z': v_dict['z'], 'x_com': obs['desired_goal'][0], 'y_com': obs['desired_goal'][1], - 'z_com': obs['desired_goal'][2], + 'z_com': -1000.0, 'pitch': v_dict['pitch'], 'roll': v_dict['roll'], 'yaw': v_dict['yaw'], @@ -85,6 +92,7 @@ def main(): plot_long(observations, times, env_config["duration"]) plot_lat(observations, times, env_config["duration"]) plot_track(observations) + print(f'return: {np.sum(observations["reward"])}') plt.show() def plot_long(outputs, times, exp_len): From d49eff2ace1dd058aeb9d7b09bedc33261160004 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:22:14 +0000 Subject: [PATCH 19/29] comment out EvalCallback --- scripts/train.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 6d25c35..0f33023 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -30,11 +30,15 @@ def __init__(self, cfg): self.train_env = make_vec_env(cfg.env_name, n_envs=cfg.n_envs, seed=cfg.seed) self.eval_env = gym.make(cfg.env_name) self.eval_env = Monitor(self.eval_env) - self.eval_callback = EvalCallback(self.eval_env, - best_model_save_path=f"./logs/{exp_name}", - eval_freq=cfg.eval_freq, - deterministic=True, - render=False) + + # Supressed eval callback for now, seems to interfer with training + # print(f'self.eval_env: {self.eval_env.config}') + + # self.eval_callback = EvalCallback(self.eval_env, + # best_model_save_path=f"./logs/{exp_name}", + # eval_freq=cfg.eval_freq, + # deterministic=True, + # render=False) if cfg.use_her: self.model = SAC( @@ -65,7 +69,7 @@ def train(self): callback = [WandbCallback( model_save_path=f"{self.work_dir}/{self.run.id}", verbose=2 - ), self.eval_callback + ) #, self.eval_callback ] else: callback = [self.eval_callback] @@ -74,6 +78,8 @@ def train(self): log_interval=self.cfg.log_interval, progress_bar=True, callback=callback) + + self.model.save(".runs/model") if self.cfg.use_wandb: self.run.finish() From d090b3d3c531187ecbec086d72befc85878d3027 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:22:29 +0000 Subject: [PATCH 20/29] Fixed responses --- scripts/trim_disturbance.py | 42 ++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/scripts/trim_disturbance.py b/scripts/trim_disturbance.py index 10b0e67..1bea462 100644 --- a/scripts/trim_disturbance.py +++ b/scripts/trim_disturbance.py @@ -28,22 +28,34 @@ def simulate(): controls = [] states = [] times = [] + duration = 100.0 - for ids in range(int(30.0/dt)): + for ids in range(int(duration/dt)): time = ids*dt + # if 5.0 < time < 6.0: + # control_input = [0.0, -5.0 * np.pi/180.0, tla, 0.0] + # elif 6.0 < time < 7.0: + # control_input = [0.0, 5.0 * np.pi/180.0, tla, 0.0] + # if 12.0 < time < 13.0: + # control_input = [-5.0 * np.pi/180.0, elevator, tla, 0.0] + # elif 13.0 < time < 14.0: + # control_input = [5.0 * np.pi/180.0, elevator, tla, 0.0] + # elif 19.0 < time < 20.0: + # control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0] + # elif 20.0 < time < 21.0: + # control_input = [0.0, elevator, tla, 5.0 * np.pi/180.0] + # else: + # control_input = [0.0, elevator, tla, 0.0] + if 5.0 < time < 6.0: - control_input = [0.0, -5.0 * np.pi/180.0, tla, 0.0] - elif 6.0 < time < 7.0: - control_input = [0.0, 5.0 * np.pi/180.0, tla, 0.0] - elif 12.0 < time < 13.0: control_input = [-5.0 * np.pi/180.0, elevator, tla, 0.0] - elif 13.0 < time < 14.0: + elif 6.0 < time < 7.0: control_input = [5.0 * np.pi/180.0, elevator, tla, 0.0] - elif 19.0 < time < 20.0: - control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0] elif 20.0 < time < 21.0: + control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0] + elif 21.0 < time < 22.0: control_input = [0.0, elevator, tla, 5.0 * np.pi/180.0] else: control_input = [0.0, elevator, tla, 0.0] @@ -72,7 +84,7 @@ def plot_long(inputs, outputs, times): fig.set_figwidth(20) ax[0].set_title(r"\textbf{Longitudinal Disturbance}", fontsize=30) - ax[0].plot(times, inputs['elevator'], c=COLOURS[7]) + ax[0].plot(times, inputs['elevator'] * (180.0/np.pi), c=COLOURS[7]) ax[0].set_ylabel(r"$\delta_{e} [^{\circ}]$", fontsize=15) ax[1].plot(times, outputs['q'] * 180.0 / np.pi, c=COLOURS[1]) @@ -85,10 +97,10 @@ def plot_long(inputs, outputs, times): ax[3].set_ylabel(r"$V_{\infty} [m/s]$", fontsize=15) ax[3].set_xlabel(r"time [$s$]", fontsize=15) - [axis.set_xlim(0.0, 30.0) for axis in ax] + [axis.set_xlim(0.0, 100.0) for axis in ax] [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] - fig.show() + fig.savefig("long_trim_dist.pdf") def plot_lat(inputs, outputs, times): fig, ax = plt.subplots(4, 1, sharex=True) @@ -98,8 +110,8 @@ def plot_lat(inputs, outputs, times): fig.set_figwidth(20) ax[0].set_title(r"\textbf{Lateral-Directional Disturbance}", fontsize=30) - ax[0].plot(times, inputs['aileron'], c=COLOURS[5], label=r'aileron') - ax[0].plot(times, inputs['rudder'], c=COLOURS[7], linestyle='dashed', label=r'rudder') + ax[0].plot(times, inputs['aileron'] * (180.0/np.pi), c=COLOURS[5], label=r'aileron') + ax[0].plot(times, inputs['rudder'] * (180.0/np.pi), c=COLOURS[7], linestyle='dashed', label=r'rudder') ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15) ax[0].legend(title=r'\textbf{Control}') @@ -115,10 +127,10 @@ def plot_lat(inputs, outputs, times): ax[3].set_xlabel(r"time [$s$]", fontsize=15) ax[3].legend(title=r'\textbf{Attitude}') - [axis.set_xlim(10.0, 30.0) for axis in ax] + [axis.set_xlim(0.0, 40.0) for axis in ax] [axis.xaxis.set_tick_params(labelsize=15) for axis in ax] [axis.yaxis.set_tick_params(labelsize=15) for axis in ax] - fig.show() + fig.savefig("lat_trim_dist.pdf") def main(): simulate() From 1a7b4a9d6da2bc4d087197bc4dacdb742cc14da6 Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:22:48 +0000 Subject: [PATCH 21/29] Add SAC flyer solution --- scripts/conf/flyer.yaml | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 scripts/conf/flyer.yaml diff --git a/scripts/conf/flyer.yaml b/scripts/conf/flyer.yaml new file mode 100644 index 0000000..f9f726c --- /dev/null +++ b/scripts/conf/flyer.yaml @@ -0,0 +1,48 @@ +defaults: + - override hydra/launcher: submitit_local + +# env settings +policy_type: MlpPolicy +env_name: flyer-v1 + +env_config: + observation: + type: LateralTrajectory + action: + type: HeadingAction + heading_range: + - -0.5 + - 0.5 + duration: 100.0 + area: + - 256 + - 256 + simulation_frequency: 100.0 + goal_generation: + heading_limits: + - 1.0707 + - 2.0708 + pitch_limits: + - -0.0001 + - 0.0001 + dist_limits: + - 1000.0 + - 1001.0 + dist_terminal: + - 100.0 + +# train settings +agent_type: SAC +use_her: False +total_timesteps: 40000000 +n_envs: 32 +log_interval: 4 +learning_starts: 100000 # how many steps to sample before beginning learning + +# eval settings +eval_freq: 10000 + +# misc +use_wandb: True +seed: 0 +render: False From d8a3bb5fe3fbf7460fcf36c05437192701014beb Mon Sep 17 00:00:00 2001 From: quessy Date: Mon, 11 Dec 2023 16:23:02 +0000 Subject: [PATCH 22/29] restrict goal_generation method --- scripts/conf/goal.yaml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml index 8613f73..e852250 100644 --- a/scripts/conf/goal.yaml +++ b/scripts/conf/goal.yaml @@ -18,6 +18,18 @@ env_config: - 256 - 256 simulation_frequency: 100.0 + goal_generation: + heading_limits: + - 1.4835 + - 1.6580 + pitch_limits: + - -0.0001 + - 0.0001 + dist_limits: + - 1000.0 + - 2000.0 + dist_terminal: + - 100.0 # train settings agent_type: SAC @@ -25,12 +37,12 @@ use_her: True total_timesteps: 100000000 n_envs: 32 log_interval: 4 -learning_starts: 100000 # how many steps to sample before beginning learning +learning_starts: 1000000 # how many steps to sample before beginning learning # eval settings eval_freq: 500 # misc -use_wandb: True +use_wandb: False seed: 0 render: False From b4c7f6de6ec811d11bdbc547a9edad492f0f4bad Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:17:25 +0000 Subject: [PATCH 23/29] changed seed generation method --- flyer_env/envs/control_env.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/flyer_env/envs/control_env.py b/flyer_env/envs/control_env.py index 9687e08..2d831a9 100644 --- a/flyer_env/envs/control_env.py +++ b/flyer_env/envs/control_env.py @@ -40,19 +40,21 @@ def default_config(cls) -> dict: }) return config - def _reset(self, seed) -> None: - if not seed: seed = 1 - self._create_world(seed) + def _reset(self) -> None: + + self.np_random = np.random.RandomState() + self._create_world() self._create_vehicles() - def _create_world(self, seed) -> None: + def _create_world(self) -> None: """Create the world map""" self.world = World() path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets") self.world.assets_dir = path path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data") self.world.terrain_data_dir = path - self.world.create_map(seed, area=self.config["area"]) + world_seed = self.np_random.randint(100) # set 100 possible seeds by default + self.world.create_map(world_seed, area=self.config["area"]) def _create_vehicles(self) -> None: """Create an aircraft to fly around the world""" From 8abeee5401ba9fefd31334ff759a8f9907f5e2d5 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:17:38 +0000 Subject: [PATCH 24/29] changed seed generation method --- flyer_env/envs/runway_env.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flyer_env/envs/runway_env.py b/flyer_env/envs/runway_env.py index 375ebbc..aa89624 100644 --- a/flyer_env/envs/runway_env.py +++ b/flyer_env/envs/runway_env.py @@ -41,20 +41,21 @@ def default_config(cls) -> dict: }) return config - def _reset(self, seed) -> None: - if not seed: seed = 1 - self._create_world(seed) + def _reset(self) -> None: + self.np_random = np.random.RandomState() + self._create_world() self._create_runway() self._create_vehicles() - def _create_world(self, seed) -> None: + def _create_world(self) -> None: """Create the world map""" self.world = World() path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets") self.world.assets_dir = path path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data") self.world.terrain_data_dir = path - self.world.create_map(seed, area=self.config["area"]) + world_seed = self.np_random.randint(100) # set 100 possible seeds by default + self.world.create_map(world_seed, area=self.config["area"]) return def _create_runway(self) -> None: From ec4665b7dc94b6c93209c396d6ff4dec65e88402 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:18:01 +0000 Subject: [PATCH 25/29] corrected naming error --- flyer_env/envs/common/observation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py index b1f85de..dde458c 100644 --- a/flyer_env/envs/common/observation.py +++ b/flyer_env/envs/common/observation.py @@ -271,7 +271,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType: return DynamicObservation(env, **config) elif config["type"] == "Trajectory" or config["type"] == "trajectory": return TrajectoryObservation(env, **config) - elif config["type"] == "LateralTrajectory" or config["lateral_trajectory"]: + elif config["type"] == "LateralTrajectory" or config["type"] == "lateral_trajectory": return LateralTrajectoryObservation(env, **config) elif config["type"] == "Control" or config["type"] == "control": return ControlObservation(env, **config) From 5e0d5e0c0ca49320cdf6ecc886b5a025ba3478ca Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:18:26 +0000 Subject: [PATCH 26/29] improved landing plot --- scripts/pid_landing.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/scripts/pid_landing.py b/scripts/pid_landing.py index ae3a881..42a6ae8 100644 --- a/scripts/pid_landing.py +++ b/scripts/pid_landing.py @@ -68,6 +68,8 @@ def main(): alt = -1000.0 observations = [] + dt = 1/env.unwrapped.config["simulation_frequency"] + time = 0.0 print(f'target_list: {target_list}') @@ -75,6 +77,15 @@ def main(): pos = env.unwrapped.vehicle.position heading = nav_track.arc_path(pos) + + if time > 550.0: + alt = 0.0 + else: + alt = -1000.0 + time += dt + + + # print(f'heading_com: {heading * 180.0/np.pi}, heading_act: {env.unwrapped.vehicle.dict["yaw"] * 180.0/np.pi}') action = [np.sin(heading), np.cos(heading), utils.lmap(alt, env.unwrapped.action_type.alt_range, [-1.0, 1.0]), @@ -86,13 +97,22 @@ def main(): controls = env.unwrapped.vehicle.aircraft.controls obs_dict = { 'x': v_dict['x'], - 'y': v_dict['y'] + 'y': v_dict['y'], + 'z': v_dict['z'] } + # print(f'z: {v_dict["z"]}') observations.append(obs_dict) env.close() observations = pd.DataFrame.from_dict(observations) + + # fig, ax = plt.subplots(subplot_kw=dict(projection='3d')) + # # ax.set_aspect('equal', adjustable='box') + # ax.plot(observations['x'], observations['y'], -1.0 * observations['z']) + # plt.show() + plot_position(observations, targets) + plot_3d(observations, targets) plt.show() def plot_position(outputs, targets): @@ -102,13 +122,26 @@ def plot_position(outputs, targets): fig.set_figwidth(10) ax.plot(outputs['x'], outputs['y'], c=COLOURS[1]) - ax.scatter(targets['x'], targets['y'], c=COLOURS[2]) + ax.scatter(targets['x'], targets['y'], c=COLOURS[5]) ax.set_ylabel(r"$y [m]$", fontsize=15) ax.set_xlabel(r"$x [m]$", fontsize=15) + ax.axes.set_ylim(top=2500) ax.set_aspect('equal') ax.grid() - fig.show() + fig.savefig("2d_landing.pdf") + +def plot_3d(outputs, targets): + + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.plot(outputs['x'], outputs['y'], -1.0 * outputs['z'], c=COLOURS[1]) + ax.set_aspect('equal') + # ax.axes.set_xlim3d(left=-12005, right=0.5) + # ax.axes.set_ylim3d(bottom=-0.5, top=25500) + # ax.axes.set_zlim3d(bottom=-5.0, top=1005) + # ax.scatter(targets['x'], targets['y'], np.linspace(0, )) + fig.savefig("3d_landing.pdf") if __name__=="__main__": main() From 775cab327468e8ed27bb0065dc693eed3bb7dd02 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:19:47 +0000 Subject: [PATCH 27/29] added eval_callback --- scripts/train.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 0f33023..1942618 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -34,11 +34,11 @@ def __init__(self, cfg): # Supressed eval callback for now, seems to interfer with training # print(f'self.eval_env: {self.eval_env.config}') - # self.eval_callback = EvalCallback(self.eval_env, - # best_model_save_path=f"./logs/{exp_name}", - # eval_freq=cfg.eval_freq, - # deterministic=True, - # render=False) + self.eval_callback = EvalCallback(self.eval_env, + best_model_save_path=f"./logs/{exp_name}", + eval_freq=cfg.eval_freq, + deterministic=True, + render=False) if cfg.use_her: self.model = SAC( @@ -69,7 +69,7 @@ def train(self): callback = [WandbCallback( model_save_path=f"{self.work_dir}/{self.run.id}", verbose=2 - ) #, self.eval_callback + ), self.eval_callback ] else: callback = [self.eval_callback] From 905e59351be4bf39034a65abce7a773b57acc630 Mon Sep 17 00:00:00 2001 From: quessy Date: Wed, 6 Mar 2024 15:20:01 +0000 Subject: [PATCH 28/29] changed seeds --- scripts/conf/control.yaml | 7 ++++--- scripts/conf/flyer.yaml | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/scripts/conf/control.yaml b/scripts/conf/control.yaml index bbf7f17..1e4a749 100644 --- a/scripts/conf/control.yaml +++ b/scripts/conf/control.yaml @@ -7,9 +7,9 @@ env_name: control-v1 env_config: observation: - type: Dynamics + type: Longitudinal action: - type: ContinuousAction + type: LongitudinalAction duration: 10.0 area: - 256 @@ -18,6 +18,7 @@ env_config: # train settings agent_type: SAC +use_her: False total_timesteps: 20000000 n_envs: 32 log_interval: 4 @@ -27,5 +28,5 @@ eval_freq: 500 # misc use_wandb: True -seed: 0 +seed: 5 render: False diff --git a/scripts/conf/flyer.yaml b/scripts/conf/flyer.yaml index f9f726c..7f73b64 100644 --- a/scripts/conf/flyer.yaml +++ b/scripts/conf/flyer.yaml @@ -11,17 +11,17 @@ env_config: action: type: HeadingAction heading_range: - - -0.5 - - 0.5 - duration: 100.0 + - -0.10 + - 0.10 + duration: 20.0 area: - 256 - 256 simulation_frequency: 100.0 goal_generation: heading_limits: - - 1.0707 - - 2.0708 + - 1.471 + - 1.671 pitch_limits: - -0.0001 - 0.0001 From 9438aca136ce27553da0f48620f00839b3da851c Mon Sep 17 00:00:00 2001 From: AOS55 Date: Thu, 14 Mar 2024 14:24:26 +0000 Subject: [PATCH 29/29] add pre-commit job --- .github/workflows/pre-commit.yml | 21 +++++++++++++++++++++ .pre-commit-config.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..024c134 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,21 @@ +# https://pre-commit.com +# This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file +name: pre-commit +on: + pull_request: + push: + branches: [master] + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - run: python -m pip install pre-commit + - run: python -m pre_commit --version + - run: python -m pre_commit install + - run: python -m pre_commit run --all-files \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cc95bb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,24 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + args: + - '--per-file-ignores=**/__init__.py:F401,F403,E402' + - --ignore=E203,W503,E741,E731 + - --max-complexity=30 + - --max-line-length=456 + - --show-source + - --statistics + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black"] + exclude: "__init__.py" + - repo: https://github.com/python/black + rev: 23.3.0 + hooks: + - id: black