Skip to content

Commit

Permalink
final
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastien-mart-in committed Sep 25, 2024
1 parent 9e4ddb0 commit 2ef667b
Show file tree
Hide file tree
Showing 11 changed files with 15 additions and 3,001 deletions.
533 changes: 0 additions & 533 deletions cleanrl/cleanrl_explo/PA_version/apt_sac.py

This file was deleted.

483 changes: 0 additions & 483 deletions cleanrl/cleanrl_explo/PA_version/aux_sac.py

This file was deleted.

476 changes: 0 additions & 476 deletions cleanrl/cleanrl_explo/PA_version/icm_sac.py

This file was deleted.

549 changes: 0 additions & 549 deletions cleanrl/cleanrl_explo/PA_version/ngu_sac.py

This file was deleted.

453 changes: 0 additions & 453 deletions cleanrl/cleanrl_explo/PA_version/rnd_sac.py

This file was deleted.

18 changes: 3 additions & 15 deletions cleanrl/cleanrl_explo/sac_apt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import tyro
from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter
from lil_maze import LilMaze


@dataclass
Expand Down Expand Up @@ -100,12 +99,10 @@ class Args:
def make_env(env_id, seed, idx, capture_video, run_name):
def thunk():
if capture_video and idx == 0:
#env = gym.make(env_id, render_mode="rgb_array")
env = LilMaze(render_mode="rgb_array")
env = gym.make(env_id, render_mode="rgb_array")
env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
else:
#env = gym.make(env_id)
env = LilMaze()
env = gym.make(env_id)
env = gym.wrappers.RecordEpisodeStatistics(env)
env.action_space.seed(seed)
return env
Expand Down Expand Up @@ -327,9 +324,6 @@ def main(seed=None, sweep=False):

start_time = time.time()


pure_exploration_discrete_matrix = np.zeros((50,50))

# TRY NOT TO MODIFY: start the game
obs, _ = envs.reset(seed=args.seed)
for global_step in range(args.total_timesteps):
Expand All @@ -343,21 +337,15 @@ def main(seed=None, sweep=False):
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, rewards, terminations, truncations, infos = envs.step(actions)

for aaa in range(len(obs)):
pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] = min(1, pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] +1)


# TRY NOT TO MODIFY: record rewards for plotting purposes
if "final_info" in infos:
for info in infos["final_info"]:
if info is not None:
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
if sweep:
#episodic_returns_list.append(info["episode"]["r"])
episodic_returns_list.append(np.array([np.mean(pure_exploration_discrete_matrix)]))
episodic_returns_list.append(info["episode"]["r"])
corresponding_steps.append(global_step)
else:
writer.add_scalar("charts/mean_exploration", np.mean(pure_exploration_discrete_matrix), global_step)
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
break
Expand Down
16 changes: 3 additions & 13 deletions cleanrl/cleanrl_explo/sac_aux.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import tyro
from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter
from lil_maze import LilMaze

@dataclass
class Args:
Expand Down Expand Up @@ -92,12 +91,10 @@ class Args:
def make_env(env_id, seed, idx, capture_video, run_name):
def thunk():
if capture_video and idx == 0:
#env = gym.make(env_id, render_mode="rgb_array")
env = LilMaze(render_mode="rgb_array")
env = gym.make(env_id, render_mode="rgb_array")
env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
else:
#env = gym.make(env_id)
env = LilMaze()
env = gym.make(env_id)
env = gym.wrappers.RecordEpisodeStatistics(env)
env.action_space.seed(seed)
return env
Expand Down Expand Up @@ -315,8 +312,6 @@ def main(seed=None, sweep=False):
)
start_time = time.time()

pure_exploration_discrete_matrix = np.zeros((50,50))

# TRY NOT TO MODIFY: start the game
obs, _ = envs.reset(seed=args.seed)
for global_step in range(args.total_timesteps):
Expand All @@ -330,20 +325,15 @@ def main(seed=None, sweep=False):
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, rewards, terminations, truncations, infos = envs.step(actions)

for aaa in range(len(obs)):
pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] = min(1, pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] +1)

# TRY NOT TO MODIFY: record rewards for plotting purposes
if "final_info" in infos:
for info in infos["final_info"]:
if info is not None:
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
if sweep:
#episodic_returns_list.append(info["episode"]["r"])
episodic_returns_list.append(np.array([np.mean(pure_exploration_discrete_matrix)]))
episodic_returns_list.append(info["episode"]["r"])
corresponding_steps.append(global_step)
else:
writer.add_scalar("charts/mean_exploration", np.mean(pure_exploration_discrete_matrix), global_step)
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
break
Expand Down
18 changes: 3 additions & 15 deletions cleanrl/cleanrl_explo/sac_icm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import tyro
from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter
from lil_maze import LilMaze


@dataclass
Expand Down Expand Up @@ -92,12 +91,10 @@ class Args:
def make_env(env_id, seed, idx, capture_video, run_name):
def thunk():
if capture_video and idx == 0:
#env = gym.make(env_id, render_mode="rgb_array")
env = LilMaze(render_mode="rgb_array")
env = gym.make(env_id, render_mode="rgb_array")
env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
else:
#env = gym.make(env_id)
env = LilMaze()
env = gym.make(env_id)
env = gym.wrappers.RecordEpisodeStatistics(env)
env.action_space.seed(seed)
return env
Expand Down Expand Up @@ -317,9 +314,6 @@ def main(seed=None, sweep=False):
)
start_time = time.time()


pure_exploration_discrete_matrix = np.zeros((50,50))

# TRY NOT TO MODIFY: start the game
obs, _ = envs.reset(seed=args.seed)
for global_step in range(args.total_timesteps):
Expand All @@ -333,22 +327,16 @@ def main(seed=None, sweep=False):
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, rewards, terminations, truncations, infos = envs.step(actions)

for aaa in range(len(obs)):
pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] = min(1, pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] +1)



# TRY NOT TO MODIFY: record rewards for plotting purposes
if "final_info" in infos:
for info in infos["final_info"]:
if info is not None:
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
if sweep:
#episodic_returns_list.append(info["episode"]["r"])
episodic_returns_list.append(np.array([np.mean(pure_exploration_discrete_matrix)]))
episodic_returns_list.append(info["episode"]["r"])
corresponding_steps.append(global_step)
else:
writer.add_scalar("charts/mean_exploration", np.mean(pure_exploration_discrete_matrix), global_step)
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
break
Expand Down
18 changes: 3 additions & 15 deletions cleanrl/cleanrl_explo/sac_ngu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import tyro
from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter
from lil_maze import LilMaze


@dataclass
Expand Down Expand Up @@ -98,12 +97,10 @@ class Args:
def make_env(env_id, seed, idx, capture_video, run_name):
def thunk():
if capture_video and idx == 0:
#env = gym.make(env_id, render_mode="rgb_array")
env = LilMaze(render_mode="rgb_array")
env = gym.make(env_id, render_mode="rgb_array")
env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
else:
#env = gym.make(env_id)
env = LilMaze()
env = gym.make(env_id)
env = gym.wrappers.RecordEpisodeStatistics(env)
env.action_space.seed(seed)
return env
Expand Down Expand Up @@ -408,9 +405,6 @@ def main(seed=None, sweep=False):
)
start_time = time.time()


pure_exploration_discrete_matrix = np.zeros((50,50))

# TRY NOT TO MODIFY: start the game
obs, _ = envs.reset(seed=args.seed)
for global_step in range(args.total_timesteps):
Expand All @@ -424,10 +418,6 @@ def main(seed=None, sweep=False):
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, rewards, terminations, truncations, infos = envs.step(actions)

for aaa in range(len(obs)):
pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] = min(1, pure_exploration_discrete_matrix[min(int(obs[aaa][0]*50),49)][min(int(obs[aaa][1]*50),49)] +1)


# COMPUTE REWARD
reward_ngu = torch.zeros(args.num_envs)
for idx in range(args.num_envs):
Expand All @@ -441,11 +431,9 @@ def main(seed=None, sweep=False):
if info is not None:
print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
if sweep:
#episodic_returns_list.append(info["episode"]["r"])
episodic_returns_list.append(np.array([np.mean(pure_exploration_discrete_matrix)]))
episodic_returns_list.append(info["episode"]["r"])
corresponding_steps.append(global_step)
else:
writer.add_scalar("charts/mean_exploration", np.mean(pure_exploration_discrete_matrix), global_step)
writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
break
Expand Down
Loading

0 comments on commit 2ef667b

Please sign in to comment.