Skip to content

Commit

Permalink
last experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
BartekCupial committed Jan 27, 2025
1 parent b0e9226 commit 39133a5
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 1 deletion.
2 changes: 1 addition & 1 deletion mrunner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ contexts:
singularity_container: --nv -H $PWD:/homeplaceholder --env WANDBPWD=$PWD -B /net:/net -B $TMPDIR:/tmp /net/pr2/projects/plgrid/plgggmum_crl/bcupial/ncw.sif
slurm_url: [email protected]
storage_dir: /net/tscratch/people/plgbartekcupial/
time: 2880
time: 1000
84 changes: 84 additions & 0 deletions train_configs/simba2_separate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os

from mrunner.helpers.specification_helper import create_experiments_helper

name = globals()["script"][:-3]

num_minibatches = 1
num_epochs = 1
num_envs = 512
batch_size = 4096
num_steps = 32
num_workers = 16

# params for all exps
config = {
"exp_tag": name,
"env": "nethack_score",
"run_script": "sf_examples.nethack.train_nethack",
"train_for_env_steps": 100_000_000,
"num_workers": num_workers,
"num_envs_per_worker": num_envs // num_workers,
"worker_num_splits": 2,
"rollout": num_steps,
"batch_size": batch_size,
"num_batches_per_epoch": num_minibatches,
"num_epochs": num_epochs,
"penalty_step": 0.0,
"penalty_time": 0.0,
"async_rl": True,
"serial_mode": False,
"wandb_user": "bartekcupial",
"wandb_project": "nle_simba",
"wandb_group": "ideas-ncbr",
"with_wandb": True,
"decorrelate_envs_on_one_worker": True,
"character": "mon-hum-neu-mal",
"max_grad_norm": 40.0,
"learning_rate": 1e-4,
"exploration_loss_coeff": 0.001,
"gamma": 0.999,
"gae_lambda": 1.0,
"value_loss_coeff": 1.0,
"actor_critic_share_weights": False,
"critic_hidden_dim": 32,
"critic_depth": 1,
"actor_hidden_dim": 512,
"actor_depth": 3,
"model": "simba",
"add_image_observation": True,
"normalize_input": False,
"use_prev_action": True,
"use_learned_embeddings": True,
"use_max_pool": True,
"pixel_size": 1,
"expansion": 1,
}

# params different between exps
params_grid = [
{
"seed": list(range(1)),
"actor_hidden_dim": [128 * actor_depth],
"actor_depth": [actor_depth],
"critic_hidden_dim": [128 * critic_depth],
"critic_depth": [critic_depth],
}
for critic_depth in [1, 2, 3]
for actor_depth in [1, 2, 3]
]

experiments_list = create_experiments_helper(
experiment_name=name,
project_name="nle_simba",
with_neptune=False,
script="python3 mrunner_run.py",
python_path=".",
tags=[name],
env={
"WANDB_API_KEY": os.environ["WANDB_API_KEY"],
},
base_config=config,
params_grid=params_grid,
mrunner_ignore=".mrunnerignore",
)
78 changes: 78 additions & 0 deletions train_configs/vit2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import os

from mrunner.helpers.specification_helper import create_experiments_helper

name = globals()["script"][:-3]

num_minibatches = 1
num_epochs = 1
num_envs = 512
batch_size = 4096
num_steps = 32
num_workers = 16

# params for all exps
config = {
"exp_tag": name,
"env": "nethack_score",
"run_script": "sf_examples.nethack.train_nethack",
"train_for_env_steps": 100_000_000,
"num_workers": num_workers,
"num_envs_per_worker": num_envs // num_workers,
"worker_num_splits": 2,
"rollout": num_steps,
"batch_size": batch_size,
"num_batches_per_epoch": num_minibatches,
"num_epochs": num_epochs,
"penalty_step": 0.0,
"penalty_time": 0.0,
"async_rl": True,
"serial_mode": False,
"wandb_user": "bartekcupial",
"wandb_project": "nle_simba",
"wandb_group": "ideas-ncbr",
"with_wandb": True,
"decorrelate_envs_on_one_worker": True,
"character": "mon-hum-neu-mal",
"max_grad_norm": 40.0,
"learning_rate": 1e-4,
"exploration_loss_coeff": 0.001,
"gamma": 0.999,
"gae_lambda": 1.0,
"value_loss_coeff": 1.0,
"actor_critic_share_weights": True,
"critic_hidden_dim": 32,
"critic_depth": 1,
"actor_hidden_dim": 512,
"actor_depth": 3,
"model": "vit",
"add_image_observation": True,
"normalize_input": False,
"use_prev_action": True,
"use_learned_embeddings": True,
"pixel_size": 1,
}

# params different between exps
params_grid = [
{
"seed": list(range(1)),
"actor_hidden_dim": [512],
"actor_depth": [3],
},
]

experiments_list = create_experiments_helper(
experiment_name=name,
project_name="nle_simba",
with_neptune=False,
script="python3 mrunner_run.py",
python_path=".",
tags=[name],
env={
"WANDB_API_KEY": os.environ["WANDB_API_KEY"],
},
base_config=config,
params_grid=params_grid,
mrunner_ignore=".mrunnerignore",
)

0 comments on commit 39133a5

Please sign in to comment.