From 87d8c267d7bbe5321c319096144e2bec1d032e04 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:15:29 +0000
Subject: [PATCH 01/29] added her goal observation

---
 flyer_env/envs/common/observation.py |  34 ++++++
 flyer_env/envs/flyer_env.py          |  69 ++++++++++--
 flyer_env/envs/trajectory_env.py     |   1 +
 scripts/her_flyer_plot.py            | 159 +++++++++++++++++++++++++++
 scripts/sac_control_plot.py          |   1 -
 scripts/test_plot.py                 |  73 ++++++++++++
 scripts/train.py                     |  31 ++++--
 7 files changed, 349 insertions(+), 19 deletions(-)
 create mode 100644 scripts/her_flyer_plot.py
 create mode 100644 scripts/test_plot.py

diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py
index 66700fc..6bc3b19 100644
--- a/flyer_env/envs/common/observation.py
+++ b/flyer_env/envs/common/observation.py
@@ -163,6 +163,38 @@ def observe(self) -> np.ndarray:
         obs = df.values.copy()
         return obs.astype(self.space().dtype)
 
+class DynamicGoalObservation(DynamicObservation):
+    
+    def __init__(self, 
+                 env: "AbstractEnv",
+                 **kwargs: dict) -> None:
+        super().__init__(env, **kwargs)
+        if hasattr(env, "goal"):
+            self.goal = env.goal
+
+    def space(self) -> spaces.Space:
+        try:
+            obs = self.observe()
+            return spaces.Dict(dict(
+                desired_goal=spaces.Box(-np.inf, np.inf, shape=obs["desired_goal"].shape, dtype=np.float64),
+                achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype=np.float64),
+                observation=spaces.Box(-np.inf, np.inf, shape=obs["observation"].shape, dtype=np.float64)
+            ))
+        except AttributeError:
+            return spaces.Space()
+    
+    def observe(self) -> Dict[str, np.ndarray]:
+        df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features]
+        df = df[self.features]
+        obs = df.values.copy()
+        # obs = obs.astype(self.space().dtype)
+        obs = OrderedDict([
+            ("observation", obs[0]),
+            ("achieved_goal", obs[0][0:3]),
+            ("desired_goal", self.goal)
+        ])
+        return obs
+
 
 def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
     if config["type"] == "Dynamics" or config["type"] == "dynamics":
@@ -173,5 +205,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
         return ControlObservation(env, **config)
     elif config["type"] == "Longitudinal" or config["type"] == "longitudinal":
         return LongitudinalObservation(env, **config)
+    elif config["type"] == "Goal" or config["type"] == "goal" or config["type"] == "DynamicGoal":
+        return DynamicGoalObservation(env, **config)
     else:
         raise ValueError("Unknown observation type")
diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py
index 4d168f7..79cbb9b 100644
--- a/flyer_env/envs/flyer_env.py
+++ b/flyer_env/envs/flyer_env.py
@@ -2,6 +2,8 @@
 import sys
 import os
 import numpy as np
+from gymnasium import Env
+from abc import abstractmethod
 
 from flyer_env import utils
 from flyer_env.aircraft import ControlledAircraft
@@ -10,7 +12,32 @@
 from pyflyer import World, Aircraft
 
 
-class FlyerEnv(AbstractEnv):
+class GoalEnv(Env):
+    """
+    Interface for a goal-based environment 
+
+    Similar to HighwayEnv https://github.com/Farama-Foundation/HighwayEnv/blob/master/highway_env/envs/parking_env.py. 
+    This interface is needed for agents to interact with agents such as Stable Baseline3's Hindsight Experience Replay (HER) agent.
+
+    As a goal-based environment it functions in the same way as any regular OpenAI Gym Environment, but imposes a required structure on the obs space.
+    More concretely, the observation space is required to contain at least 3 elements, namely `observation`, `desired_goal`, and `achieved goal`.
+    """
+
+    @abstractmethod
+    def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict) -> float:
+        """
+        Compute the step reward. This externalizes the reward function and makes it dependent on a desired goal and the one that was achieved.
+
+        :param achieved_goal: the goal that was achieved during execution
+        :param desired_goal: the desired goal that we asked the agent to attempt to achieve
+        :param info (dict): an info dictionary with additional information
+        :return: the reward the corresponds to the provided goal achieved w.r.t. the desired goal
+
+        """
+        raise NotImplementedError
+
+
+class FlyerEnv(AbstractEnv, GoalEnv):
     """
     A goal-oriented flying environment
 
@@ -23,7 +50,7 @@ def default_config(cls) -> dict:
         config = super().default_config()
         config.update({
             "observation": {
-                "type": "Dynamics"
+                "type": "Goal"
             }, 
             "action": {
                 "type": "ContinuousAction"
@@ -31,9 +58,10 @@ def default_config(cls) -> dict:
             "area": (1024, 1024),  # terrain map area [tiles]
             "vehicle_type": "Dynamic",  # vehicle type, only dynamic available
             "duration": 10.0,  # simulation duration [s]
-            "collision_reward": -200.0,  # max -ve reward for crashing
-            "point_reward": 100.0,  # max +ve reward for hitting the goal
-            "normalize_reward": True, # whether to normalize the reward [-1, +1]
+            "collision_reward": -100.0,  # max -ve reward for crashing
+            "reward_type": "dense",  # reward type
+            "point_reward": 1.0,  # multiplier for distance from goal
+            "normalize_reward": False,  # whether to normalize the reward [-1, +1], not working at the moment
             "goal_generation": {
                 "heading_limits": [-np.pi, np.pi],
                 "pitch_limits": [-10.0 * np.pi/180.0, 10.0 * np.pi/180.0],
@@ -61,6 +89,7 @@ def _create_world(self, seed) -> None:
     
     def _create_vehicles(self) -> None:
         """Create an aircraft to fly around the world"""
+        self.controlled_vehicles = []
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/")
         start_pos = [0.0, 0.0, -1000.0]
         heading = 0.0
@@ -82,7 +111,6 @@ def _create_vehicles(self) -> None:
                 dt = 1/self.config["simulation_frequency"],
             )
         self.controlled_vehicles.append(vehicle)
-
     
     def _create_goal(self, seed) -> None:
         """Create a random goal in 3D space to navigate to, based on the aircraft's initial starting position"""
@@ -102,7 +130,25 @@ def get_goal():
         g_pos = get_goal()
         self.goal = g_pos
         return
-    
+
+    def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict) -> float:
+        """
+        Proximity to goal is rewarded
+        Just use _point_reward for now, could be more explicit.
+        TODO: look at how the gripper point robots select the points
+        """
+
+        def _goal_distance(goal_a, goal_b):
+            assert goal_a.shape == goal_b.shape
+            return np.linalg.norm(goal_a - goal_b, axis=-1)
+
+        dist_terminal = self.config["goal_generation"]["dist_terminal"]
+        d = _goal_distance(achieved_goal, desired_goal)
+        if self.config["reward_type"] == "sparse":
+            return -(d > dist_terminal).astype(np.float32)
+        else:
+            return -d * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"])
+
     def _reward(self, action: Action) -> float:
         """
         Reward vehicle if it makes progress towards the goal state
@@ -136,9 +182,10 @@ def _point_reward(self):
         Reward for reaching the goal state
         """
         distance = self.vehicle.goal_dist(self.goal)
-        point_reward = self.config["point_reward"]
-        dist_terminal = self.config["goal_generation"]["dist_terminal"]
-        reward = point_reward * dist_terminal / distance
+        # point_reward = self.config["point_reward"]
+        # dist_terminal = self.config["goal_generation"]["dist_terminal"]
+        # reward = point_reward * dist_terminal / distance
+        reward = -distance * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"])
         return reward
 
     def _crash_reward(self) -> float:
@@ -146,7 +193,7 @@ def _crash_reward(self) -> float:
         Penalize if the aircraft crashes
         """
         if self.vehicle.crashed: 
-            return -200.0
+            return 1.0
         else: 
             return 0.0
 
diff --git a/flyer_env/envs/trajectory_env.py b/flyer_env/envs/trajectory_env.py
index 7b1391c..f123a47 100644
--- a/flyer_env/envs/trajectory_env.py
+++ b/flyer_env/envs/trajectory_env.py
@@ -10,6 +10,7 @@
 
 from pyflyer import World, Aircraft
 
+
 class TrajectoryEnv(AbstractEnv):
 
     """
diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py
new file mode 100644
index 0000000..f1eaefa
--- /dev/null
+++ b/scripts/her_flyer_plot.py
@@ -0,0 +1,159 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import gymnasium as gym
+from stable_baselines3 import SAC
+
+plt.rcParams.update({
+    "text.usetex": True
+})
+
+COLOURS = [[0, 18, 25], [0, 95, 115], [10, 147, 150], [148, 210, 189], [233, 216, 166], [238, 155, 0], [202, 103, 2], [187, 62, 3], [174, 32, 18], [155, 34, 38]]
+COLOURS = [[value/255 for value in rgb] for rgb in COLOURS]
+
+def main():
+
+    env_config = {
+        "observation": {
+            "type": "Goal"
+        },
+        "action": {
+            "type": "ControlledAction"
+        },
+        "duration": 100.0,
+        "simulation_frequency": 100.0,
+        "seed": 0
+    }
+
+    env = gym.make("flyer-v1", config=env_config)
+    policy = SAC.load("models/flyer_controlled_her-v1/best_model.zip", env=env)
+
+    obs, info = env.reset()
+    done = False
+
+    observations = []
+    times = []
+    # targets = []
+    dt = 1/env_config["simulation_frequency"]
+    time = 0.0
+
+    while not done:
+        action, _states = policy.predict(obs, deterministic=True)
+        print(f'obs: {obs}')
+        print(f'action: {action}')
+        obs, reward, terminated, truncated, info = env.step(action)
+
+        v_dict = env.unwrapped.vehicle.dict
+        controls = env.unwrapped.vehicle.controls
+
+        obs_dict = {
+            'elevator': controls[1],
+            'aileron': controls[0],
+            'rudder': 0.0,
+            'x': v_dict['x'],
+            'y': v_dict['y'],
+            'z': v_dict['z'],
+            'x_com': obs['desired_goal'][0],
+            'y_com': obs['desired_goal'][1],
+            'z_com': obs['desired_goal'][2],
+            'pitch': v_dict['pitch'],
+            'roll': v_dict['roll'],
+            'yaw': v_dict['yaw'],
+            'u': v_dict['u'],
+            'reward': reward
+        }
+
+        times.append(time)
+        # targets.append(info['t_pos'])
+        observations.append(obs_dict)
+        time += dt
+
+        if terminated or truncated:
+            done = True
+
+    env.close()
+    observations = pd.DataFrame.from_dict(observations)
+    plot_long(observations, times, env_config["duration"])
+    plot_lat(observations, times, env_config["duration"])
+    plot_track(observations)
+    plt.show()
+
+def plot_long(outputs, times, exp_len):
+    fig, ax = plt.subplots(5, 1, sharex=True)
+    [axis.grid() for axis in ax]
+    fig.subplots_adjust(hspace=0.0)
+    fig.set_figheight(10)
+    fig.set_figwidth(20)
+
+    ax[0].set_title(r"\textbf{Longitudinal Tracking}")
+    ax[0].plot(times, outputs['elevator'], c=COLOURS[5], label=r'elevator')
+    ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15)
+    ax[0].legend(title=r'\textbf{Control}')
+
+    ax[1].plot(times, outputs['pitch'] * 180.0 / np.pi, c=COLOURS[1])
+    ax[1].set_ylabel(r"$\theta [^{\circ}]$", fontsize=15)
+
+    ax[2].plot(times, outputs['z'], c=COLOURS[1])
+    ax[2].plot(times, outputs['z_com'], linestyle='dashed', c=COLOURS[2])
+    ax[2].set_ylabel(r"$z [m]$", fontsize=15)
+
+    ax[3].plot(times, outputs['u'], c=COLOURS[1])
+    ax[3].set_ylabel(r"$u [\frac{m}{s}]$", fontsize=15)
+
+    ax[4].plot(times, outputs['reward'], c=COLOURS[1])
+    ax[4].set_ylabel(r'Reward', fontsize=15)
+    ax[4].set_xlabel(r'time [$s$]', fontsize=15)
+
+    [axis.set_xlim(0.0, exp_len) for axis in ax]
+    [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
+    [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
+    fig.savefig("flyer_long_control_her.pdf")
+
+def plot_lat(outputs, times, exp_len):
+
+    fig, ax = plt.subplots(4, 1, sharex=True)
+    [axis.grid() for axis in ax]
+    fig.subplots_adjust(hspace=0.0)
+    fig.set_figheight(10)
+    fig.set_figwidth(20)
+
+    ax[0].set_title(r"\textbf{Lateral-Directional Tracking}")
+    ax[0].plot(times, outputs['aileron'], c=COLOURS[5], label=r'aileron')
+    ax[0].plot(times, outputs['rudder'], c=COLOURS[7], linestyle='dashed', label=r'rudder')
+    ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15)
+    ax[0].legend(title=r'\textbf{Control}')
+
+    ax[1].plot(times, outputs['roll'] * 180.0 / np.pi, c=COLOURS[1])
+    ax[1].plot(times, outputs['yaw'] * 180.0 / np.pi, c=COLOURS[2], linestyle='dashed', label=r'$\psi$')
+    ax[1].set_ylabel(r"$\theta [^{\circ}]$", fontsize=15)
+
+    ax[2].plot(times, outputs['u'], c=COLOURS[1])
+    ax[2].set_ylabel(r"$u [\frac{m}{s}]$", fontsize=15)
+
+    ax[3].plot(times, outputs['reward'], c=COLOURS[1])
+    ax[3].set_ylabel(r'Reward', fontsize=15)
+    ax[3].set_xlabel(r'time [$s$]', fontsize=15)
+
+
+    [axis.set_xlim(0.0, exp_len) for axis in ax]
+    [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
+    [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
+    fig.savefig("flyer_lat_control_her.pdf")
+
+def plot_track(outputs):
+
+    fig, ax = plt.subplots(1, 1)
+    fig.set_figheight(10)
+    fig.set_figwidth(10)
+
+    ax.plot(outputs['x'], outputs['y'], c=COLOURS[1])
+    ax.scatter(outputs['x_com'][1], outputs['y_com'][1], c=COLOURS[2])
+    ax.set_ylabel(r"$y [m]$", fontsize=15)
+    ax.set_xlabel(r"$x [m]$", fontsize=15)
+    ax.set_aspect('equal')
+    ax.grid()
+
+    fig.savefig("flyer_track_control_her.pdf")
+
+if __name__=="__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/sac_control_plot.py b/scripts/sac_control_plot.py
index 570170a..fd69fa7 100644
--- a/scripts/sac_control_plot.py
+++ b/scripts/sac_control_plot.py
@@ -184,7 +184,6 @@ def plot_lat(outputs, times, exp_len):
     ax[3].set_ylabel(r'Reward', fontsize=15)
     ax[3].set_xlabel(r'time [$s$]', fontsize=15)
 
-
     [axis.set_xlim(0.0, exp_len) for axis in ax]
     [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
     [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
diff --git a/scripts/test_plot.py b/scripts/test_plot.py
new file mode 100644
index 0000000..189b22e
--- /dev/null
+++ b/scripts/test_plot.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import gymnasium as gym
+from stable_baselines3 import SAC
+
+plt.rcParams.update({
+    "text.usetex": True
+})
+
+COLOURS = [[0, 18, 25], [0, 95, 115], [10, 147, 150], [148, 210, 189], [233, 216, 166], [238, 155, 0], [202, 103, 2], [187, 62, 3], [174, 32, 18], [155, 34, 38]]
+COLOURS = [[value/255 for value in rgb] for rgb in COLOURS]
+
+def main():
+
+    env = gym.make("Pendulum-v1")
+    policy = SAC.load("models/sac_pendulum-v1/best_model.zip")
+
+    obs, info = env.reset()
+    done = False
+    observations = []
+    dt = 0.05
+    times = []
+    time = 0.0
+
+    while not done:
+        action, _states = policy.predict(obs, deterministic=True)
+        obs, reward, terminated, truncated, info = env.step(action)
+
+        obs_dict = {
+            'x': obs[0],
+            'y': obs[1],
+            'theta_dot': obs[2],
+            'action': action,
+            'reward': reward
+        }
+        times.append(time)
+        observations.append(obs_dict)
+        time += dt
+
+        if terminated or truncated:
+            done = True
+    env.close()
+
+    observations = pd.DataFrame.from_dict(observations)
+    plot_pendulum(observations, times)
+
+def plot_pendulum(outputs, times):
+    fig, ax = plt.subplots(4, 1, sharex=True)
+    [axis.grid() for axis in ax]
+    fig.subplots_adjust(hspace=0.0)
+    fig.set_figheight(10)
+    fig.set_figwidth(20)
+
+    ax[0].set_title(r"\textbf{Pendulum Swingup}")
+    ax[0].plot(times, outputs['x'], c=COLOURS[1], label=r'x')
+    ax[0].set_ylabel(r'$x [m]$')
+
+    ax[1].plot(times, outputs['y'], c=COLOURS[1], label=r'y')
+    ax[1].set_ylabel(r'$y [m]$')
+
+    ax[2].plot(times, outputs['action'], c=COLOURS[1], label=r'tau')
+    ax[2].set_ylabel(r'$\tau [Nm]$')
+
+    ax[3].plot(times, outputs['reward'], c=COLOURS[1])
+    ax[3].set_ylabel(r'$reward [-]$')
+
+    [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
+    [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
+    fig.savefig("test_pendulum.pdf")
+
+if __name__=="__main__":
+    main()
diff --git a/scripts/train.py b/scripts/train.py
index 1426d8c..6d25c35 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -2,7 +2,8 @@
 import hydra
 from pathlib import Path
 from omegaconf import DictConfig, OmegaConf
-from stable_baselines3 import SAC, PPO, DDPG
+
+from stable_baselines3 import HerReplayBuffer, SAC, PPO, DDPG
 from stable_baselines3.common.env_util import make_vec_env
 from stable_baselines3.common.callbacks import EvalCallback
 from stable_baselines3.common.monitor import Monitor
@@ -34,12 +35,28 @@ def __init__(self, cfg):
                                           eval_freq=cfg.eval_freq,
                                           deterministic=True,
                                           render=False)
-        self.model = SAC(
-            "MlpPolicy",
-            self.train_env,
-            verbose=1,
-            tensorboard_log=f".runs/sac"
-        )
+        
+        if cfg.use_her:
+            self.model = SAC(
+                "MultiInputPolicy",
+                self.train_env,
+                verbose=1,
+                replay_buffer_class=HerReplayBuffer,
+                # Parameters for HER
+                replay_buffer_kwargs=dict(
+                    n_sampled_goal=4,
+                    goal_selection_strategy="future"
+                ),
+                learning_starts=self.cfg.learning_starts,
+                tensorboard_log=f".runs/sac"
+            )
+        else:
+            self.model = SAC(
+                "MlpPolicy",
+                self.train_env,
+                verbose=1,
+                tensorboard_log=f".runs/sac"
+            )
         return
 
     def train(self):

From 4352213e33ec9d591b2cc1c7fc82313c934b189a Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:15:54 +0000
Subject: [PATCH 02/29] added Goal

---
 scripts/conf/goal.yaml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml
index 9c2ffa9..097e64d 100644
--- a/scripts/conf/goal.yaml
+++ b/scripts/conf/goal.yaml
@@ -7,10 +7,10 @@ env_name: flyer-v1
 
 env_config:
   observation:
-    type: Trajectory
+    type: Goal
   action:
     type: ContinuousAction
-  duration: 10.0
+  duration: 100.0
   area:
     - 256
     - 256
@@ -18,9 +18,11 @@ env_config:
 
 # train settings
 agent_type: SAC
+use_her: True
 total_timesteps: 10000000
 n_envs: 32
 log_interval: 4
+learning_starts: 100000  # how many steps to sample before beginning learning
 
 # eval settings
 eval_freq: 500

From bf95acf010f7feef4f0d2a5af2830ecc77d8815d Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:16:57 +0000
Subject: [PATCH 03/29] added parking script

---
 scripts/conf/parking.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 scripts/conf/parking.yaml

diff --git a/scripts/conf/parking.yaml b/scripts/conf/parking.yaml
new file mode 100644
index 0000000..850d197
--- /dev/null
+++ b/scripts/conf/parking.yaml
@@ -0,0 +1,23 @@
+defaults:
+  - override hydra/launcher: submitit_local
+
+# env settings
+policy_type: MlpPolicy
+env_name: parking-v0
+env_config: False
+
+# train settings 
+agent_type: SAC
+use_her: True
+total_timesteps: 10000000
+n_envs: 32
+log_interval: 4
+learning_starts: 100000
+
+# eval settings
+eval_freq: 500
+
+# misc
+use_wandb: True
+seed: 0
+render: False

From 0a433ccf3edfc8da6710df48fac16709f6294acf Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:18:26 +0000
Subject: [PATCH 04/29] added reach robotic environment

---
 scripts/conf/reach.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 scripts/conf/reach.yaml

diff --git a/scripts/conf/reach.yaml b/scripts/conf/reach.yaml
new file mode 100644
index 0000000..0a17866
--- /dev/null
+++ b/scripts/conf/reach.yaml
@@ -0,0 +1,23 @@
+defaults:
+  - override hydra/launcher: submitit_local
+
+# env settings
+policy_type: MlpPolicy
+env_name: 'FetchReachDense-v2'
+env_config: False
+
+# train settings
+agent_type: SAC
+use_her: True
+total_timesteps: 10000000
+n_envs: 32
+log_interval: 4
+learning_starts: 100000
+
+# eval settings
+eval_freq: 500
+
+# misc
+use_wandb: True
+seed: 0
+render: False

From 966d23a8183b31bdf4306c9256f7ffc3bb43c144 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:18:35 +0000
Subject: [PATCH 05/29] add use_her flag

---
 scripts/conf/test.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/conf/test.yaml b/scripts/conf/test.yaml
index 3cd7c74..1258dea 100644
--- a/scripts/conf/test.yaml
+++ b/scripts/conf/test.yaml
@@ -11,6 +11,7 @@ agent_type: SAC
 total_timesteps: 1000000
 n_envs: 32
 log_interval: 4
+use_her: False
 
 # eval settings
 eval_freq: 500

From 6a4d9864bc39d6f0127f22a5afe9d54538d27ace Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 29 Nov 2023 13:18:46 +0000
Subject: [PATCH 06/29] added use_her flag

---
 scripts/conf/trajectory.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/conf/trajectory.yaml b/scripts/conf/trajectory.yaml
index 1b38d0f..8478661 100644
--- a/scripts/conf/trajectory.yaml
+++ b/scripts/conf/trajectory.yaml
@@ -24,6 +24,7 @@ agent_type: SAC
 total_timesteps: 50000000
 n_envs: 32
 log_interval: 4
+use_her: False
 
 # eval settings
 eval_freq: 500

From aa134879a1150d029b1cdd44970739ebd8afca03 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Thu, 30 Nov 2023 15:08:48 +0000
Subject: [PATCH 07/29] simplify limits

---
 flyer_env/envs/flyer_env.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py
index 79cbb9b..a4dc1d0 100644
--- a/flyer_env/envs/flyer_env.py
+++ b/flyer_env/envs/flyer_env.py
@@ -63,8 +63,8 @@ def default_config(cls) -> dict:
             "point_reward": 1.0,  # multiplier for distance from goal
             "normalize_reward": False,  # whether to normalize the reward [-1, +1], not working at the moment
             "goal_generation": {
-                "heading_limits": [-np.pi, np.pi],
-                "pitch_limits": [-10.0 * np.pi/180.0, 10.0 * np.pi/180.0],
+                "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0],
+                "pitch_limits": [-0.1 * np.pi/180.0, 0.1 * np.pi/180.0],
                 "dist_limits": [1000.0, 10000.0],
                 "dist_terminal": 20.0
             }  # goal generation details
@@ -124,6 +124,7 @@ def get_goal():
             pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1])
             dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1])
             rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)])
+            print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}')
             pos = v_pos + rel_pos
             return pos
 

From bc2c927722e394dfb9b619548847f8966fbdf62a Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Thu, 30 Nov 2023 15:08:55 +0000
Subject: [PATCH 08/29] add plot

---
 scripts/her_flyer_plot.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py
index f1eaefa..5d496c3 100644
--- a/scripts/her_flyer_plot.py
+++ b/scripts/her_flyer_plot.py
@@ -18,7 +18,7 @@ def main():
             "type": "Goal"
         },
         "action": {
-            "type": "ControlledAction"
+            "type": "ContinuousAction"
         },
         "duration": 100.0,
         "simulation_frequency": 100.0,
@@ -26,9 +26,14 @@ def main():
     }
 
     env = gym.make("flyer-v1", config=env_config)
-    policy = SAC.load("models/flyer_controlled_her-v1/best_model.zip", env=env)
+    print(f'reset_obs: {env.reset()}')
+
+    policy = SAC.load("models/flyer_her_simple-v1/best_model.zip", env=env)
 
     obs, info = env.reset()
+    print(f'env: {env}')
+    print(f'env.config: {env.config}')
+    print(f'obs: {obs}')
     done = False
 
     observations = []
@@ -39,16 +44,17 @@ def main():
 
     while not done:
         action, _states = policy.predict(obs, deterministic=True)
-        print(f'obs: {obs}')
-        print(f'action: {action}')
+        # print(f'obs: {obs}')
+        # print(f'action: {action}')
         obs, reward, terminated, truncated, info = env.step(action)
 
         v_dict = env.unwrapped.vehicle.dict
         controls = env.unwrapped.vehicle.controls
+        # print(f'controls: {controls}')
 
         obs_dict = {
-            'elevator': controls[1],
-            'aileron': controls[0],
+            'elevator': controls['elevator'],
+            'aileron': controls['aileron'],
             'rudder': 0.0,
             'x': v_dict['x'],
             'y': v_dict['y'],

From c8c761397f4f9914d14ab0ee0d554c82aefe1389 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Dec 2023 09:38:28 +0000
Subject: [PATCH 09/29] comment out print

---
 flyer_env/envs/flyer_env.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py
index a4dc1d0..da0f882 100644
--- a/flyer_env/envs/flyer_env.py
+++ b/flyer_env/envs/flyer_env.py
@@ -124,7 +124,7 @@ def get_goal():
             pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1])
             dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1])
             rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)])
-            print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}')
+            # print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}')
             pos = v_pos + rel_pos
             return pos
 
@@ -186,6 +186,7 @@ def _point_reward(self):
         # point_reward = self.config["point_reward"]
         # dist_terminal = self.config["goal_generation"]["dist_terminal"]
         # reward = point_reward * dist_terminal / distance
+        # print(f'distance: {distance}, self.goal: {self.goal}, pos: {self.vehicle.dict}')
         reward = -distance * 100.0/(self.config["goal_generation"]["dist_limits"][1] * self.config["duration"] * self.config["simulation_frequency"])
         return reward
 

From 17564957802359a367146a64a2624ec63d100f8d Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Dec 2023 09:38:42 +0000
Subject: [PATCH 10/29] add heading only action

---
 flyer_env/envs/common/action.py | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/flyer_env/envs/common/action.py b/flyer_env/envs/common/action.py
index 066c0ef..a010ed5 100644
--- a/flyer_env/envs/common/action.py
+++ b/flyer_env/envs/common/action.py
@@ -168,6 +168,60 @@ def act(self, action: np.ndarray) -> None:
         self.last_action = action 
 
 
+class HeadingAction(ActionType):
+    
+    HEADING_RANGE = (-np.pi, np.pi)
+
+    """
+    A continuous action space with a fixed altitude and speed.
+    Controls are only [aileron].
+    """
+
+    def __init__(self,
+                 env: 'AbstractEnv',
+                 heading_range: Optional[Tuple[float, float]] = None,
+                 powered: bool = True,
+                 clip: bool = True,
+                 **kwargs) -> None:
+        """
+        Create a continuous laterally constrained action space
+        """
+        super().__init__(env)
+
+        self.heading_range = heading_range if heading_range else self.HEADING_RANGE
+
+        self.powered = powered
+        self.clip = clip
+        self.size = 2 if self.powered else 1
+
+        self.last_action = np.zeros(self.size)
+
+    def space(self) -> spaces.Box:
+        return spaces.Box(-1.0, 1.0, shape=(self.size,), dtype=np.float32)
+
+    @property
+    def vehicle_class(self) -> Callable:
+        return functools.partial(ControlledAircraft)
+
+    def act(self, action: np.ndarray) -> None:
+        """
+        Apply the action to the controlled vehicle
+        
+        :param action: action array with [sine, cosine] mapped between ranges
+        """
+        
+        if self.clip:
+            action = np.clip(action, -1.0, 1.0)
+        
+        if self.powered:
+            self.controlled_vehicle.act({
+               'heading': np.arctan2(action[0], action[1]),
+               'alt': -1000.0,
+               'speed': 80.0
+            })
+        self.last_action = action
+
+
 class ControlledAction(ActionType):
     """
     An action that controls the aircraft using a PID controller to track towards the target.
@@ -328,6 +382,8 @@ def action_factory(env: 'AbstractEnv', config: dict) -> ActionType:
         return ContinuousAction(env, **config)
     elif config["type"] == "LongitudinalAction":
         return LongitudinalAction(env, **config)
+    elif config["type"] == "HeadingAction":
+        return HeadingAction(env, **config)
     elif config["type"] == "ControlledAction":
         return ControlledAction(env, **config)
     elif config["type"] == "PursuitAction":

From ec15607c898f033d7ec4fd01add95670f5b2c631 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Dec 2023 09:38:54 +0000
Subject: [PATCH 11/29] add lateral goal observation

---
 flyer_env/envs/common/observation.py | 38 ++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py
index 6bc3b19..5c4fbc3 100644
--- a/flyer_env/envs/common/observation.py
+++ b/flyer_env/envs/common/observation.py
@@ -196,6 +196,42 @@ def observe(self) -> Dict[str, np.ndarray]:
         return obs
 
 
+class LateralGoalObservation(DynamicObservation):
+
+    FEATURES: List[str] = ['x', 'y',  'u', 'v', 'yaw']
+
+    def __init__(self,
+                 env: "AbstractEnv",
+                 features: List[str] = None,
+                 **kwargs: dict) -> None:
+        super().__init__(env, **kwargs)
+        self.features = features or self.FEATURES
+        if hasattr(env, "goal"):
+            self.goal = env.goal
+
+    def space(self) -> spaces.Space:
+        try:
+            obs = self.observe()
+            return spaces.Dict(dict(
+                desired_goal=spaces.Box(-np.inf, np.inf, shape=obs["desired_goal"].shape, dtype=np.float64),
+                achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype=np.float64),
+                observation=spaces.Box(-np.inf, np.inf, shape=obs["observation"].shape, dtype=np.float64)
+            ))
+        except AttributeError:
+            return spaces.Space()
+        
+    def observe(self) -> Dict[str, np.ndarray]:
+        df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features]
+        df = df[self.features]
+        obs = df.values.copy()
+        obs = OrderedDict([
+            ("observation", obs[0]),
+            ("achieved_goal", obs[0][0:3]),
+            ("desired_goal", self.goal)
+        ])
+        return obs
+
+
 def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
     if config["type"] == "Dynamics" or config["type"] == "dynamics":
         return DynamicObservation(env, **config)
@@ -207,5 +243,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
         return LongitudinalObservation(env, **config)
     elif config["type"] == "Goal" or config["type"] == "goal" or config["type"] == "DynamicGoal":
         return DynamicGoalObservation(env, **config)
+    elif config["type"] == "LateralGoal" or config["type"] == "lateral_goal":
+        return LateralGoalObservation(env, **config)
     else:
         raise ValueError("Unknown observation type")

From c591c7caa0c7a62e8a9bc910263351cc88f834ad Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Dec 2023 09:39:11 +0000
Subject: [PATCH 12/29] change to use heading_action

---
 scripts/her_flyer_plot.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py
index 5d496c3..07ffc55 100644
--- a/scripts/her_flyer_plot.py
+++ b/scripts/her_flyer_plot.py
@@ -18,7 +18,7 @@ def main():
             "type": "Goal"
         },
         "action": {
-            "type": "ContinuousAction"
+            "type": "HeadingAction"
         },
         "duration": 100.0,
         "simulation_frequency": 100.0,
@@ -28,7 +28,7 @@ def main():
     env = gym.make("flyer-v1", config=env_config)
     print(f'reset_obs: {env.reset()}')
 
-    policy = SAC.load("models/flyer_her_simple-v1/best_model.zip", env=env)
+    policy = SAC.load("models/flyer_heading_her-v1/best_model.zip", env=env)
 
     obs, info = env.reset()
     print(f'env: {env}')
@@ -45,12 +45,15 @@ def main():
     while not done:
         action, _states = policy.predict(obs, deterministic=True)
         # print(f'obs: {obs}')
+        # action = np.array((0.0, 0.0))
         # print(f'action: {action}')
         obs, reward, terminated, truncated, info = env.step(action)
+        print(f'reward: {reward}')
 
         v_dict = env.unwrapped.vehicle.dict
-        controls = env.unwrapped.vehicle.controls
-        # print(f'controls: {controls}')
+        controls = env.unwrapped.vehicle.aircraft.controls
+        # print(f'vehicle: {env.unwrapped.vehicle.aircraft.controls}')
+        # print(f'v_dict: {v_dict}')
 
         obs_dict = {
             'elevator': controls['elevator'],

From 71e8afa45eee8aa439bd92a03167cff208311042 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Dec 2023 09:39:35 +0000
Subject: [PATCH 13/29] change goal to training environment

---
 scripts/conf/goal.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml
index 097e64d..8613f73 100644
--- a/scripts/conf/goal.yaml
+++ b/scripts/conf/goal.yaml
@@ -7,9 +7,12 @@ env_name: flyer-v1
 
 env_config:
   observation:
-    type: Goal
+    type: LateralGoal
   action:
-    type: ContinuousAction
+    type: HeadingAction
+    heading_range:
+      - -0.1
+      - 0.1
   duration: 100.0
   area:
     - 256
@@ -19,7 +22,7 @@ env_config:
 # train settings
 agent_type: SAC
 use_her: True
-total_timesteps: 10000000
+total_timesteps: 100000000
 n_envs: 32
 log_interval: 4
 learning_starts: 100000  # how many steps to sample before beginning learning

From 1bba652c20a96b61c18b7ca9d9b143b8043f7019 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:20:42 +0000
Subject: [PATCH 14/29] add seeding

---
 flyer_env/envs/common/abstract.py |  2 +-
 flyer_env/envs/flyer_env.py       | 50 ++++++++++++++++++++-----------
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/flyer_env/envs/common/abstract.py b/flyer_env/envs/common/abstract.py
index 1456b5e..470569e 100644
--- a/flyer_env/envs/common/abstract.py
+++ b/flyer_env/envs/common/abstract.py
@@ -180,7 +180,7 @@ def reset(self,
         self.time = 0.0
         self.steps = 0
         self.done = False
-        self._reset(seed)
+        self._reset()
 
         # Second, to link the obs and actions to the vehicles once the scene is created
         self.define_spaces()
diff --git a/flyer_env/envs/flyer_env.py b/flyer_env/envs/flyer_env.py
index da0f882..6c6a416 100644
--- a/flyer_env/envs/flyer_env.py
+++ b/flyer_env/envs/flyer_env.py
@@ -66,25 +66,33 @@ def default_config(cls) -> dict:
                 "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0],
                 "pitch_limits": [-0.1 * np.pi/180.0, 0.1 * np.pi/180.0],
                 "dist_limits": [1000.0, 10000.0],
-                "dist_terminal": 20.0
+                "dist_terminal": 100.0
             }  # goal generation details
         })
         return config
 
-    def _reset(self, seed) -> None:
-        if not seed: seed = 1  # set seed to 1 if None TODO: set to be random on None, look @ HighwayEnv
-        self._create_world(seed)
+    def _info(self, obs, action) -> dict:
+        info = super(FlyerEnv, self)._info(obs, action)
+        success = self._is_success()
+        info.update({"is_success": success})
+        return info
+
+    def _reset(self) -> None:
+
+        self.np_random = np.random.RandomState()
+        self._create_world()
         self._create_vehicles()
-        self._create_goal(seed)
+        self._create_goal()
 
-    def _create_world(self, seed) -> None:
+    def _create_world(self) -> None:
         """Create the world map"""
         self.world = World()
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets")
         self.world.assets_dir = path
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data")
         self.world.terrain_data_dir = path
-        self.world.create_map(seed, area=self.config["area"])
+        world_seed = self.np_random.randint(100)  # set 100 possible seeds by default 
+        self.world.create_map(world_seed, area=self.config["area"])
         return
     
     def _create_vehicles(self) -> None:
@@ -112,17 +120,15 @@ def _create_vehicles(self) -> None:
             )
         self.controlled_vehicles.append(vehicle)
     
-    def _create_goal(self, seed) -> None:
+    def _create_goal(self) -> None:
         """Create a random goal in 3D space to navigate to, based on the aircraft's initial starting position"""
         v_pos = self.world.vehicles[0].position
         gg = self.config["goal_generation"]
 
-        np_random = np.random.RandomState(seed)
-
         def get_goal():
-            heading = np_random.uniform(gg["heading_limits"][0], gg["heading_limits"][1])
-            pitch = np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1])
-            dist = np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1])
+            heading = self.np_random.uniform(gg["heading_limits"][0], gg["heading_limits"][1])
+            pitch = self.np_random.uniform(gg["pitch_limits"][0], gg["pitch_limits"][1])
+            dist = self.np_random.uniform(gg["dist_limits"][0], gg["dist_limits"][1])
             rel_pos = dist*np.array([np.cos(pitch)*np.sin(heading), np.cos(pitch)*np.cos(heading), np.sin(pitch)])
             # print(f'rel_pos: {rel_pos}, pitch: {pitch}, heading: {heading}, dist: {dist}')
             pos = v_pos + rel_pos
@@ -145,6 +151,7 @@ def _goal_distance(goal_a, goal_b):
 
         dist_terminal = self.config["goal_generation"]["dist_terminal"]
         d = _goal_distance(achieved_goal, desired_goal)
+        # print(f'distance: {d}, achieved_goal: {achieved_goal}, desired_goal: {desired_goal}')
         if self.config["reward_type"] == "sparse":
             return -(d > dist_terminal).astype(np.float32)
         else:
@@ -199,15 +206,22 @@ def _crash_reward(self) -> float:
         else: 
             return 0.0
 
+    def _is_success(self) -> bool:
+        v_pos = self.vehicle.position
+        difference = np.subtract(v_pos, self.goal)
+        distance = np.linalg.norm(difference)
+        dist_terminal = self.config["goal_generation"]["dist_terminal"] 
+        return distance < dist_terminal 
+
     def _is_terminated(self) -> bool:
         """
         The episode is over if the the ego vehicle crashed, or it hits the ground
         """
 
-        v_pos = self.vehicle.position
-        difference = np.subtract(v_pos, self.goal)
-        distance = np.linalg.norm(difference)
-        dist_terminal = self.config["goal_generation"]["dist_terminal"]
+        # v_pos = self.vehicle.position
+        # difference = np.subtract(v_pos, self.goal)
+        # distance = np.linalg.norm(difference)
+        # dist_terminal = self.config["goal_generation"]["dist_terminal"]
 
         # If crashed terminate
         if self.vehicle.crashed:
@@ -216,7 +230,7 @@ def _is_terminated(self) -> bool:
         if self.vehicle.position[-1] >= 0.0:
             return True
         # If reached goal region
-        if distance < dist_terminal:
+        if self._is_success():
             return True
         return False
 

From 412ebf240101b871dc27957e1793288813cd3374 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:21:07 +0000
Subject: [PATCH 15/29] heading action is one value now

---
 flyer_env/envs/common/action.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/flyer_env/envs/common/action.py b/flyer_env/envs/common/action.py
index a010ed5..7cd6617 100644
--- a/flyer_env/envs/common/action.py
+++ b/flyer_env/envs/common/action.py
@@ -189,15 +189,13 @@ def __init__(self,
         super().__init__(env)
 
         self.heading_range = heading_range if heading_range else self.HEADING_RANGE
-
         self.powered = powered
         self.clip = clip
-        self.size = 2 if self.powered else 1
-
+        self.size = 1
         self.last_action = np.zeros(self.size)
 
     def space(self) -> spaces.Box:
-        return spaces.Box(-1.0, 1.0, shape=(self.size,), dtype=np.float32)
+        return spaces.Box(self.heading_range[0], self.heading_range[1], shape=(self.size,), dtype=np.float32)
 
     @property
     def vehicle_class(self) -> Callable:
@@ -211,11 +209,11 @@ def act(self, action: np.ndarray) -> None:
         """
         
         if self.clip:
-            action = np.clip(action, -1.0, 1.0)
+            action = np.clip(action, self.heading_range[0], self.heading_range[1])
         
         if self.powered:
             self.controlled_vehicle.act({
-               'heading': np.arctan2(action[0], action[1]),
+               'heading': action,
                'alt': -1000.0,
                'speed': 80.0
             })

From 4e848a675cd6ecdcc185169aec546b46c20c1af5 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:21:16 +0000
Subject: [PATCH 16/29] fixed setup tool

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cd62890..2794dac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 requires = [
-    "setuptools>=42",
+    "setuptools==68.2.2",
     "setuptools-rust",
     "wheel"
 ]

From 6772459ce6149fe5572beeab0dc1daca338ec137 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:21:32 +0000
Subject: [PATCH 17/29] Add Lateral Trajectory Observation

---
 flyer_env/envs/common/observation.py | 40 ++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py
index 5c4fbc3..b1f85de 100644
--- a/flyer_env/envs/common/observation.py
+++ b/flyer_env/envs/common/observation.py
@@ -101,6 +101,40 @@ def observe(self) -> np.ndarray:
         obs[0, 2] = self.goal[2] - obs[0, 2]
         return obs.astype(self.space().dtype)
 
+class LateralTrajectoryObservation(ObservationType):
+
+    """
+    Observe dynamics of vehicle relative to goal position, restricted to horizontal plane
+    ONLY FOR USE WITH TRAJECTORY ENV
+    """
+
+    FEATURES: List[str] = ['x', 'y', 'u', 'v', 'yaw']
+
+    def __init__(self,
+                 env: "AbstractEnv",
+                 features: List[str] = None,
+                 vehicles_count: int = 1,
+                 features_range: Dict[str, List[float]] = None,
+                 **kwargs: dict) -> None:
+        
+        super().__init__(env)
+        self.features = features or self.FEATURES
+        self.vehicles_count = vehicles_count
+        self.features_range = features_range
+        if hasattr(env, "goal"):
+            self.goal = env.goal
+
+    def space(self) -> spaces.Space:
+        return spaces.Box(shape=(self.vehicles_count, len(self.features)), low=-np.inf, high=np.inf, dtype=np.float32)
+    
+    def observe(self) -> np.ndarray:
+
+        df = pd.DataFrame.from_records([self.observer_vehicle.dict])[self.features]
+        df = df[self.features]
+        obs = df.values.copy()
+        obs[0, 0] = self.goal[0] - obs[0, 0]
+        obs[0, 1] = self.goal[1] - obs[0, 1]
+        return obs.astype(self.space().dtype)
 
 class ControlObservation(ObservationType):
     
@@ -226,8 +260,8 @@ def observe(self) -> Dict[str, np.ndarray]:
         obs = df.values.copy()
         obs = OrderedDict([
             ("observation", obs[0]),
-            ("achieved_goal", obs[0][0:3]),
-            ("desired_goal", self.goal)
+            ("achieved_goal", obs[0][0:2]),
+            ("desired_goal", self.goal[0:2])
         ])
         return obs
 
@@ -237,6 +271,8 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
         return DynamicObservation(env, **config)
     elif config["type"] == "Trajectory" or config["type"] == "trajectory":
         return TrajectoryObservation(env, **config)
+    elif config["type"] == "LateralTrajectory" or config["lateral_trajectory"]:
+        return LateralTrajectoryObservation(env, **config)
     elif config["type"] == "Control" or config["type"] == "control":
         return ControlObservation(env, **config)
     elif config["type"] == "Longitudinal" or config["type"] == "longitudinal":

From 04d81043bae44466e425a8d9f2c1b1a6ed66a262 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:21:44 +0000
Subject: [PATCH 18/29] Add LateralGoal Observation

---
 scripts/her_flyer_plot.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/scripts/her_flyer_plot.py b/scripts/her_flyer_plot.py
index 07ffc55..76e2981 100644
--- a/scripts/her_flyer_plot.py
+++ b/scripts/her_flyer_plot.py
@@ -15,20 +15,27 @@ def main():
 
     env_config = {
         "observation": {
-            "type": "Goal"
+            "type": "LateralGoal"
         },
         "action": {
-            "type": "HeadingAction"
+            "type": "HeadingAction",
+            "heading_range": (-0.1, 0.1)
         },
         "duration": 100.0,
         "simulation_frequency": 100.0,
-        "seed": 0
+        "seed": 0,
+        "goal_generation": {
+            "heading_limits": [85.0 * np.pi/180.0, 95.0 * np.pi/180.0],
+            "pitch_limits": [-0.0001, 0.0001],
+            "dist_limits": [1000.0, 2000.0],
+            "dist_terminal": 100.0
+        }
     }
 
     env = gym.make("flyer-v1", config=env_config)
     print(f'reset_obs: {env.reset()}')
 
-    policy = SAC.load("models/flyer_heading_her-v1/best_model.zip", env=env)
+    policy = SAC.load("models/flyer_heading_her_1000_2000-v1/best_model.zip", env=env)
 
     obs, info = env.reset()
     print(f'env: {env}')
@@ -44,11 +51,11 @@ def main():
 
     while not done:
         action, _states = policy.predict(obs, deterministic=True)
-        # print(f'obs: {obs}')
-        # action = np.array((0.0, 0.0))
-        # print(f'action: {action}')
+        print(f'obs: {obs}')
+        action = np.arctan2(obs['desired_goal'][1], obs['desired_goal'][0])
+        print(f'action: {action * (180.0/np.pi)}')
         obs, reward, terminated, truncated, info = env.step(action)
-        print(f'reward: {reward}')
+        # print(f'reward: {reward}')
 
         v_dict = env.unwrapped.vehicle.dict
         controls = env.unwrapped.vehicle.aircraft.controls
@@ -64,7 +71,7 @@ def main():
             'z': v_dict['z'],
             'x_com': obs['desired_goal'][0],
             'y_com': obs['desired_goal'][1],
-            'z_com': obs['desired_goal'][2],
+            'z_com': -1000.0,
             'pitch': v_dict['pitch'],
             'roll': v_dict['roll'],
             'yaw': v_dict['yaw'],
@@ -85,6 +92,7 @@ def main():
     plot_long(observations, times, env_config["duration"])
     plot_lat(observations, times, env_config["duration"])
     plot_track(observations)
+    print(f'return: {np.sum(observations["reward"])}')
     plt.show()
 
 def plot_long(outputs, times, exp_len):

From d49eff2ace1dd058aeb9d7b09bedc33261160004 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:22:14 +0000
Subject: [PATCH 19/29] comment out EvalCallback

---
 scripts/train.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/scripts/train.py b/scripts/train.py
index 6d25c35..0f33023 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -30,11 +30,15 @@ def __init__(self, cfg):
             self.train_env = make_vec_env(cfg.env_name, n_envs=cfg.n_envs, seed=cfg.seed)
             self.eval_env = gym.make(cfg.env_name)
         self.eval_env = Monitor(self.eval_env)
-        self.eval_callback = EvalCallback(self.eval_env,
-                                          best_model_save_path=f"./logs/{exp_name}",
-                                          eval_freq=cfg.eval_freq,
-                                          deterministic=True,
-                                          render=False)
+
+        # Supressed eval callback for now, seems to interfer with training
+        # print(f'self.eval_env: {self.eval_env.config}')
+
+        # self.eval_callback = EvalCallback(self.eval_env,
+        #                                   best_model_save_path=f"./logs/{exp_name}",
+        #                                   eval_freq=cfg.eval_freq,
+        #                                   deterministic=True,
+        #                                   render=False)
         
         if cfg.use_her:
             self.model = SAC(
@@ -65,7 +69,7 @@ def train(self):
             callback = [WandbCallback(
                             model_save_path=f"{self.work_dir}/{self.run.id}",
                                 verbose=2
-                            ), self.eval_callback
+                            ) #, self.eval_callback
                         ]
         else:
             callback = [self.eval_callback]
@@ -74,6 +78,8 @@ def train(self):
                          log_interval=self.cfg.log_interval,
                          progress_bar=True,
                          callback=callback)
+        
+        self.model.save(".runs/model")
 
         if self.cfg.use_wandb:
             self.run.finish()

From d090b3d3c531187ecbec086d72befc85878d3027 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:22:29 +0000
Subject: [PATCH 20/29] Fixed responses

---
 scripts/trim_disturbance.py | 42 ++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/scripts/trim_disturbance.py b/scripts/trim_disturbance.py
index 10b0e67..1bea462 100644
--- a/scripts/trim_disturbance.py
+++ b/scripts/trim_disturbance.py
@@ -28,22 +28,34 @@ def simulate():
     controls = []
     states = []
     times = []
+    duration = 100.0
 
-    for ids in range(int(30.0/dt)):
+    for ids in range(int(duration/dt)):
         
         time = ids*dt
 
+        # if 5.0 < time < 6.0:
+        #     control_input = [0.0, -5.0 * np.pi/180.0, tla, 0.0]
+        # elif 6.0 < time < 7.0:
+        #     control_input = [0.0, 5.0 * np.pi/180.0, tla, 0.0]
+        # if 12.0 < time < 13.0:
+        #     control_input = [-5.0 * np.pi/180.0, elevator, tla, 0.0]
+        # elif 13.0 < time < 14.0:
+        #     control_input = [5.0 * np.pi/180.0, elevator, tla, 0.0]
+        # elif 19.0 < time < 20.0:
+        #     control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0]
+        # elif 20.0 < time < 21.0:
+        #     control_input = [0.0, elevator, tla, 5.0 * np.pi/180.0]
+        # else:
+        #     control_input = [0.0, elevator, tla, 0.0]
+
         if 5.0 < time < 6.0:
-            control_input = [0.0, -5.0 * np.pi/180.0, tla, 0.0]
-        elif 6.0 < time < 7.0:
-            control_input = [0.0, 5.0 * np.pi/180.0, tla, 0.0]
-        elif 12.0 < time < 13.0:
             control_input = [-5.0 * np.pi/180.0, elevator, tla, 0.0]
-        elif 13.0 < time < 14.0:
+        elif 6.0 < time < 7.0:
             control_input = [5.0 * np.pi/180.0, elevator, tla, 0.0]
-        elif 19.0 < time < 20.0:
-            control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0]
         elif 20.0 < time < 21.0:
+            control_input = [0.0, elevator, tla, -5.0 * np.pi/180.0]
+        elif 21.0 < time < 22.0:
             control_input = [0.0, elevator, tla, 5.0 * np.pi/180.0]
         else:
             control_input = [0.0, elevator, tla, 0.0]
@@ -72,7 +84,7 @@ def plot_long(inputs, outputs, times):
     fig.set_figwidth(20)
 
     ax[0].set_title(r"\textbf{Longitudinal Disturbance}", fontsize=30)
-    ax[0].plot(times, inputs['elevator'], c=COLOURS[7])
+    ax[0].plot(times, inputs['elevator'] * (180.0/np.pi), c=COLOURS[7])
     ax[0].set_ylabel(r"$\delta_{e} [^{\circ}]$", fontsize=15)
 
     ax[1].plot(times, outputs['q'] * 180.0 / np.pi, c=COLOURS[1])
@@ -85,10 +97,10 @@ def plot_long(inputs, outputs, times):
     ax[3].set_ylabel(r"$V_{\infty} [m/s]$", fontsize=15)
     ax[3].set_xlabel(r"time [$s$]", fontsize=15)
 
-    [axis.set_xlim(0.0, 30.0) for axis in ax]
+    [axis.set_xlim(0.0, 100.0) for axis in ax]
     [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
     [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
-    fig.show()
+    fig.savefig("long_trim_dist.pdf")
 
 def plot_lat(inputs, outputs, times):
     fig, ax = plt.subplots(4, 1, sharex=True)
@@ -98,8 +110,8 @@ def plot_lat(inputs, outputs, times):
     fig.set_figwidth(20)
 
     ax[0].set_title(r"\textbf{Lateral-Directional Disturbance}", fontsize=30)
-    ax[0].plot(times, inputs['aileron'], c=COLOURS[5], label=r'aileron')
-    ax[0].plot(times, inputs['rudder'], c=COLOURS[7], linestyle='dashed', label=r'rudder')
+    ax[0].plot(times, inputs['aileron'] * (180.0/np.pi), c=COLOURS[5], label=r'aileron')
+    ax[0].plot(times, inputs['rudder'] * (180.0/np.pi), c=COLOURS[7], linestyle='dashed', label=r'rudder')
     ax[0].set_ylabel(r"$\delta [^{\circ}]$", fontsize=15)
     ax[0].legend(title=r'\textbf{Control}')
 
@@ -115,10 +127,10 @@ def plot_lat(inputs, outputs, times):
     ax[3].set_xlabel(r"time [$s$]", fontsize=15)
     ax[3].legend(title=r'\textbf{Attitude}')
 
-    [axis.set_xlim(10.0, 30.0) for axis in ax]
+    [axis.set_xlim(0.0, 40.0) for axis in ax]
     [axis.xaxis.set_tick_params(labelsize=15) for axis in ax]
     [axis.yaxis.set_tick_params(labelsize=15) for axis in ax]
-    fig.show()
+    fig.savefig("lat_trim_dist.pdf")
 
 def main():
     simulate()

From 1a7b4a9d6da2bc4d087197bc4dacdb742cc14da6 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:22:48 +0000
Subject: [PATCH 21/29] Add SAC flyer solution

---
 scripts/conf/flyer.yaml | 48 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 scripts/conf/flyer.yaml

diff --git a/scripts/conf/flyer.yaml b/scripts/conf/flyer.yaml
new file mode 100644
index 0000000..f9f726c
--- /dev/null
+++ b/scripts/conf/flyer.yaml
@@ -0,0 +1,48 @@
+defaults:
+  - override hydra/launcher: submitit_local
+
+# env settings
+policy_type: MlpPolicy
+env_name: flyer-v1
+
+env_config:
+  observation:
+    type: LateralTrajectory
+  action:
+    type: HeadingAction
+    heading_range:
+      - -0.5
+      - 0.5
+  duration: 100.0
+  area:
+    - 256
+    - 256
+  simulation_frequency: 100.0
+  goal_generation:
+    heading_limits:
+      - 1.0707
+      - 2.0708
+    pitch_limits:
+      - -0.0001
+      - 0.0001
+    dist_limits:
+      - 1000.0
+      - 1001.0
+    dist_terminal:
+      - 100.0
+
+# train settings
+agent_type: SAC
+use_her: False
+total_timesteps: 40000000
+n_envs: 32
+log_interval: 4
+learning_starts: 100000 # how many steps to sample before beginning learning
+
+# eval settings
+eval_freq: 10000
+
+# misc
+use_wandb: True
+seed: 0
+render: False

From d8a3bb5fe3fbf7460fcf36c05437192701014beb Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Mon, 11 Dec 2023 16:23:02 +0000
Subject: [PATCH 22/29] restrict goal_generation method

---
 scripts/conf/goal.yaml | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/scripts/conf/goal.yaml b/scripts/conf/goal.yaml
index 8613f73..e852250 100644
--- a/scripts/conf/goal.yaml
+++ b/scripts/conf/goal.yaml
@@ -18,6 +18,18 @@ env_config:
     - 256
     - 256
   simulation_frequency: 100.0
+  goal_generation:
+    heading_limits:
+      - 1.4835
+      - 1.6580
+    pitch_limits:
+      - -0.0001
+      - 0.0001
+    dist_limits:
+      - 1000.0
+      - 2000.0
+    dist_terminal:
+      - 100.0
 
 # train settings
 agent_type: SAC
@@ -25,12 +37,12 @@ use_her: True
 total_timesteps: 100000000
 n_envs: 32
 log_interval: 4
-learning_starts: 100000  # how many steps to sample before beginning learning
+learning_starts: 1000000  # how many steps to sample before beginning learning
 
 # eval settings
 eval_freq: 500
 
 # misc
-use_wandb: True
+use_wandb: False
 seed: 0
 render: False

From b4c7f6de6ec811d11bdbc547a9edad492f0f4bad Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:17:25 +0000
Subject: [PATCH 23/29] changed seed generation method

---
 flyer_env/envs/control_env.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/flyer_env/envs/control_env.py b/flyer_env/envs/control_env.py
index 9687e08..2d831a9 100644
--- a/flyer_env/envs/control_env.py
+++ b/flyer_env/envs/control_env.py
@@ -40,19 +40,21 @@ def default_config(cls) -> dict:
         })
         return config
     
-    def _reset(self, seed) -> None:
-        if not seed: seed = 1
-        self._create_world(seed)
+    def _reset(self) -> None:
+        
+        self.np_random = np.random.RandomState()
+        self._create_world()
         self._create_vehicles()
     
-    def _create_world(self, seed) -> None:
+    def _create_world(self) -> None:
         """Create the world map"""
         self.world = World()
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets")
         self.world.assets_dir = path
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data")
         self.world.terrain_data_dir = path
-        self.world.create_map(seed, area=self.config["area"])
+        world_seed = self.np_random.randint(100)  # set 100 possible seeds by default 
+        self.world.create_map(world_seed, area=self.config["area"])
 
     def _create_vehicles(self) -> None:
         """Create an aircraft to fly around the world"""

From 8abeee5401ba9fefd31334ff759a8f9907f5e2d5 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:17:38 +0000
Subject: [PATCH 24/29] changed seed generation method

---
 flyer_env/envs/runway_env.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/flyer_env/envs/runway_env.py b/flyer_env/envs/runway_env.py
index 375ebbc..aa89624 100644
--- a/flyer_env/envs/runway_env.py
+++ b/flyer_env/envs/runway_env.py
@@ -41,20 +41,21 @@ def default_config(cls) -> dict:
         })
         return config
     
-    def _reset(self, seed) -> None:
-        if not seed: seed = 1
-        self._create_world(seed)
+    def _reset(self) -> None:
+        self.np_random = np.random.RandomState()
+        self._create_world()
         self._create_runway()
         self._create_vehicles()
 
-    def _create_world(self, seed) -> None:
+    def _create_world(self) -> None:
         """Create the world map"""
         self.world = World()
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets")
         self.world.assets_dir = path
         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "terrain_data")
         self.world.terrain_data_dir = path
-        self.world.create_map(seed, area=self.config["area"])
+        world_seed = self.np_random.randint(100)  # set 100 possible seeds by default 
+        self.world.create_map(world_seed, area=self.config["area"])
         return
     
     def _create_runway(self) -> None:

From ec4665b7dc94b6c93209c396d6ff4dec65e88402 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:18:01 +0000
Subject: [PATCH 25/29] corrected naming error

---
 flyer_env/envs/common/observation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flyer_env/envs/common/observation.py b/flyer_env/envs/common/observation.py
index b1f85de..dde458c 100644
--- a/flyer_env/envs/common/observation.py
+++ b/flyer_env/envs/common/observation.py
@@ -271,7 +271,7 @@ def observation_factory(env: "AbstractEnv", config: dict) -> ObservationType:
         return DynamicObservation(env, **config)
     elif config["type"] == "Trajectory" or config["type"] == "trajectory":
         return TrajectoryObservation(env, **config)
-    elif config["type"] == "LateralTrajectory" or config["lateral_trajectory"]:
+    elif config["type"] == "LateralTrajectory" or config["type"] == "lateral_trajectory":
         return LateralTrajectoryObservation(env, **config)
     elif config["type"] == "Control" or config["type"] == "control":
         return ControlObservation(env, **config)

From 5e0d5e0c0ca49320cdf6ecc886b5a025ba3478ca Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:18:26 +0000
Subject: [PATCH 26/29] improved landing plot

---
 scripts/pid_landing.py | 39 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/scripts/pid_landing.py b/scripts/pid_landing.py
index ae3a881..42a6ae8 100644
--- a/scripts/pid_landing.py
+++ b/scripts/pid_landing.py
@@ -68,6 +68,8 @@ def main():
     alt = -1000.0
     
     observations = []
+    dt = 1/env.unwrapped.config["simulation_frequency"]
+    time = 0.0
 
     print(f'target_list: {target_list}')
 
@@ -75,6 +77,15 @@ def main():
 
         pos = env.unwrapped.vehicle.position
         heading = nav_track.arc_path(pos)
+
+        if time > 550.0:
+            alt = 0.0
+        else:
+            alt = -1000.0
+        time += dt
+
+
+
         # print(f'heading_com: {heading * 180.0/np.pi}, heading_act: {env.unwrapped.vehicle.dict["yaw"] * 180.0/np.pi}')
         action = [np.sin(heading), np.cos(heading),
                   utils.lmap(alt, env.unwrapped.action_type.alt_range, [-1.0, 1.0]),
@@ -86,13 +97,22 @@ def main():
         controls = env.unwrapped.vehicle.aircraft.controls
         obs_dict = {
             'x': v_dict['x'],
-            'y': v_dict['y']
+            'y': v_dict['y'],
+            'z': v_dict['z']
         }
+        # print(f'z: {v_dict["z"]}')
         observations.append(obs_dict)     
     env.close()
 
     observations = pd.DataFrame.from_dict(observations)
+
+    # fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
+    # # ax.set_aspect('equal', adjustable='box')
+    # ax.plot(observations['x'], observations['y'], -1.0 * observations['z'])
+    # plt.show()
+
     plot_position(observations, targets)
+    plot_3d(observations, targets)
     plt.show()
 
 def plot_position(outputs, targets):
@@ -102,13 +122,26 @@ def plot_position(outputs, targets):
     fig.set_figwidth(10)
     
     ax.plot(outputs['x'], outputs['y'], c=COLOURS[1])
-    ax.scatter(targets['x'], targets['y'], c=COLOURS[2])
+    ax.scatter(targets['x'], targets['y'], c=COLOURS[5])
     ax.set_ylabel(r"$y [m]$", fontsize=15)
     ax.set_xlabel(r"$x [m]$", fontsize=15)
+    ax.axes.set_ylim(top=2500)
     ax.set_aspect('equal')
     ax.grid()
     
-    fig.show()
+    fig.savefig("2d_landing.pdf")
+
+def plot_3d(outputs, targets):
+
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    ax.plot(outputs['x'], outputs['y'], -1.0 * outputs['z'], c=COLOURS[1])
+    ax.set_aspect('equal')
+    # ax.axes.set_xlim3d(left=-12005, right=0.5)
+    # ax.axes.set_ylim3d(bottom=-0.5, top=25500)
+    # ax.axes.set_zlim3d(bottom=-5.0, top=1005)
+    # ax.scatter(targets['x'], targets['y'], np.linspace(0, ))
+    fig.savefig("3d_landing.pdf")
 
 if __name__=="__main__":
     main()

From 775cab327468e8ed27bb0065dc693eed3bb7dd02 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:19:47 +0000
Subject: [PATCH 27/29] added eval_callback

---
 scripts/train.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/train.py b/scripts/train.py
index 0f33023..1942618 100644
--- a/scripts/train.py
+++ b/scripts/train.py
@@ -34,11 +34,11 @@ def __init__(self, cfg):
         # Supressed eval callback for now, seems to interfer with training
         # print(f'self.eval_env: {self.eval_env.config}')
 
-        # self.eval_callback = EvalCallback(self.eval_env,
-        #                                   best_model_save_path=f"./logs/{exp_name}",
-        #                                   eval_freq=cfg.eval_freq,
-        #                                   deterministic=True,
-        #                                   render=False)
+        self.eval_callback = EvalCallback(self.eval_env,
+                                          best_model_save_path=f"./logs/{exp_name}",
+                                          eval_freq=cfg.eval_freq,
+                                          deterministic=True,
+                                          render=False)
         
         if cfg.use_her:
             self.model = SAC(
@@ -69,7 +69,7 @@ def train(self):
             callback = [WandbCallback(
                             model_save_path=f"{self.work_dir}/{self.run.id}",
                                 verbose=2
-                            ) #, self.eval_callback
+                            ), self.eval_callback
                         ]
         else:
             callback = [self.eval_callback]

From 905e59351be4bf39034a65abce7a773b57acc630 Mon Sep 17 00:00:00 2001
From: quessy <aq15777@bristol.ac.uk>
Date: Wed, 6 Mar 2024 15:20:01 +0000
Subject: [PATCH 28/29] changed seeds

---
 scripts/conf/control.yaml |  7 ++++---
 scripts/conf/flyer.yaml   | 10 +++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/scripts/conf/control.yaml b/scripts/conf/control.yaml
index bbf7f17..1e4a749 100644
--- a/scripts/conf/control.yaml
+++ b/scripts/conf/control.yaml
@@ -7,9 +7,9 @@ env_name: control-v1
 
 env_config:
   observation:
-    type: Dynamics
+    type: Longitudinal
   action:
-    type: ContinuousAction
+    type: LongitudinalAction
   duration: 10.0
   area:
     - 256
@@ -18,6 +18,7 @@ env_config:
 
 # train settings
 agent_type: SAC
+use_her: False
 total_timesteps: 20000000
 n_envs: 32
 log_interval: 4
@@ -27,5 +28,5 @@ eval_freq: 500
 
 # misc
 use_wandb: True
-seed: 0
+seed: 5
 render: False
diff --git a/scripts/conf/flyer.yaml b/scripts/conf/flyer.yaml
index f9f726c..7f73b64 100644
--- a/scripts/conf/flyer.yaml
+++ b/scripts/conf/flyer.yaml
@@ -11,17 +11,17 @@ env_config:
   action:
     type: HeadingAction
     heading_range:
-      - -0.5
-      - 0.5
-  duration: 100.0
+      - -0.10
+      - 0.10
+  duration: 20.0
   area:
     - 256
     - 256
   simulation_frequency: 100.0
   goal_generation:
     heading_limits:
-      - 1.0707
-      - 2.0708
+      - 1.471
+      - 1.671
     pitch_limits:
       - -0.0001
       - 0.0001

From 9438aca136ce27553da0f48620f00839b3da851c Mon Sep 17 00:00:00 2001
From: AOS55 <aq15777@bristol.ac.uk>
Date: Thu, 14 Mar 2024 14:24:26 +0000
Subject: [PATCH 29/29] add pre-commit job

---
 .github/workflows/pre-commit.yml | 21 +++++++++++++++++++++
 .pre-commit-config.yaml          | 24 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 .github/workflows/pre-commit.yml
 create mode 100644 .pre-commit-config.yaml

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000..024c134
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,21 @@
+# https://pre-commit.com
+# This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file
+name: pre-commit
+on:
+  pull_request:
+  push:
+    branches: [master]
+
+permissions:
+  contents: read # to fetch code (actions/checkout)
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+      - run: python -m pip install pre-commit
+      - run: python -m pre_commit --version
+      - run: python -m pre_commit install
+      - run: python -m pre_commit run --all-files
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..cc95bb9
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,24 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/PyCQA/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
+        args:
+          - '--per-file-ignores=**/__init__.py:F401,F403,E402'
+          - --ignore=E203,W503,E741,E731
+          - --max-complexity=30
+          - --max-line-length=456
+          - --show-source
+          - --statistics
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+        exclude: "__init__.py"
+  - repo: https://github.com/python/black
+    rev: 23.3.0
+    hooks:
+      - id: black