From 39133a59a91e3b6c5dc369a76d26a833e08fad39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Cupia=C5=82?= Date: Mon, 27 Jan 2025 17:58:01 +0100 Subject: [PATCH] last experiments --- mrunner.yaml | 2 +- train_configs/simba2_separate.py | 84 ++++++++++++++++++++++++++++++++ train_configs/vit2.py | 78 +++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 train_configs/simba2_separate.py create mode 100644 train_configs/vit2.py diff --git a/mrunner.yaml b/mrunner.yaml index cf05422d7..c859f0ca1 100644 --- a/mrunner.yaml +++ b/mrunner.yaml @@ -43,4 +43,4 @@ contexts: singularity_container: --nv -H $PWD:/homeplaceholder --env WANDBPWD=$PWD -B /net:/net -B $TMPDIR:/tmp /net/pr2/projects/plgrid/plgggmum_crl/bcupial/ncw.sif slurm_url: plgbartekcupial@athena.cyfronet.pl storage_dir: /net/tscratch/people/plgbartekcupial/ - time: 2880 + time: 1000 diff --git a/train_configs/simba2_separate.py b/train_configs/simba2_separate.py new file mode 100644 index 000000000..7c06a6e6f --- /dev/null +++ b/train_configs/simba2_separate.py @@ -0,0 +1,84 @@ +import os + +from mrunner.helpers.specification_helper import create_experiments_helper + +name = globals()["script"][:-3] + +num_minibatches = 1 +num_epochs = 1 +num_envs = 512 +batch_size = 4096 +num_steps = 32 +num_workers = 16 + +# params for all exps +config = { + "exp_tag": name, + "env": "nethack_score", + "run_script": "sf_examples.nethack.train_nethack", + "train_for_env_steps": 100_000_000, + "num_workers": num_workers, + "num_envs_per_worker": num_envs // num_workers, + "worker_num_splits": 2, + "rollout": num_steps, + "batch_size": batch_size, + "num_batches_per_epoch": num_minibatches, + "num_epochs": num_epochs, + "penalty_step": 0.0, + "penalty_time": 0.0, + "async_rl": True, + "serial_mode": False, + "wandb_user": "bartekcupial", + "wandb_project": "nle_simba", + "wandb_group": "ideas-ncbr", + "with_wandb": True, + "decorrelate_envs_on_one_worker": True, + "character": "mon-hum-neu-mal", + "max_grad_norm": 40.0, + "learning_rate": 1e-4, + "exploration_loss_coeff": 0.001, + "gamma": 0.999, + "gae_lambda": 1.0, + "value_loss_coeff": 1.0, + "actor_critic_share_weights": False, + "critic_hidden_dim": 32, + "critic_depth": 1, + "actor_hidden_dim": 512, + "actor_depth": 3, + "model": "simba", + "add_image_observation": True, + "normalize_input": False, + "use_prev_action": True, + "use_learned_embeddings": True, + "use_max_pool": True, + "pixel_size": 1, + "expansion": 1, +} + +# params different between exps +params_grid = [ + { + "seed": list(range(1)), + "actor_hidden_dim": [128 * actor_depth], + "actor_depth": [actor_depth], + "critic_hidden_dim": [128 * critic_depth], + "critic_depth": [critic_depth], + } + for critic_depth in [1, 2, 3] + for actor_depth in [1, 2, 3] +] + +experiments_list = create_experiments_helper( + experiment_name=name, + project_name="nle_simba", + with_neptune=False, + script="python3 mrunner_run.py", + python_path=".", + tags=[name], + env={ + "WANDB_API_KEY": os.environ["WANDB_API_KEY"], + }, + base_config=config, + params_grid=params_grid, + mrunner_ignore=".mrunnerignore", +) diff --git a/train_configs/vit2.py b/train_configs/vit2.py new file mode 100644 index 000000000..fc00fdea2 --- /dev/null +++ b/train_configs/vit2.py @@ -0,0 +1,78 @@ +import os + +from mrunner.helpers.specification_helper import create_experiments_helper + +name = globals()["script"][:-3] + +num_minibatches = 1 +num_epochs = 1 +num_envs = 512 +batch_size = 4096 +num_steps = 32 +num_workers = 16 + +# params for all exps +config = { + "exp_tag": name, + "env": "nethack_score", + "run_script": "sf_examples.nethack.train_nethack", + "train_for_env_steps": 100_000_000, + "num_workers": num_workers, + "num_envs_per_worker": num_envs // num_workers, + "worker_num_splits": 2, + "rollout": num_steps, + "batch_size": batch_size, + "num_batches_per_epoch": num_minibatches, + "num_epochs": num_epochs, + "penalty_step": 0.0, + "penalty_time": 0.0, + "async_rl": True, + "serial_mode": False, + "wandb_user": "bartekcupial", + "wandb_project": "nle_simba", + "wandb_group": "ideas-ncbr", + "with_wandb": True, + "decorrelate_envs_on_one_worker": True, + "character": "mon-hum-neu-mal", + "max_grad_norm": 40.0, + "learning_rate": 1e-4, + "exploration_loss_coeff": 0.001, + "gamma": 0.999, + "gae_lambda": 1.0, + "value_loss_coeff": 1.0, + "actor_critic_share_weights": True, + "critic_hidden_dim": 32, + "critic_depth": 1, + "actor_hidden_dim": 512, + "actor_depth": 3, + "model": "vit", + "add_image_observation": True, + "normalize_input": False, + "use_prev_action": True, + "use_learned_embeddings": True, + "pixel_size": 1, +} + +# params different between exps +params_grid = [ + { + "seed": list(range(1)), + "actor_hidden_dim": [512], + "actor_depth": [3], + }, +] + +experiments_list = create_experiments_helper( + experiment_name=name, + project_name="nle_simba", + with_neptune=False, + script="python3 mrunner_run.py", + python_path=".", + tags=[name], + env={ + "WANDB_API_KEY": os.environ["WANDB_API_KEY"], + }, + base_config=config, + params_grid=params_grid, + mrunner_ignore=".mrunnerignore", +)