From 39133a59a91e3b6c5dc369a76d26a833e08fad39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Cupia=C5=82?=
 <bartlomiej.cupial@gmail.com>
Date: Mon, 27 Jan 2025 17:58:01 +0100
Subject: [PATCH] last experiments

---
 mrunner.yaml                     |  2 +-
 train_configs/simba2_separate.py | 84 ++++++++++++++++++++++++++++++++
 train_configs/vit2.py            | 78 +++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 train_configs/simba2_separate.py
 create mode 100644 train_configs/vit2.py

diff --git a/mrunner.yaml b/mrunner.yaml
index cf05422d7..c859f0ca1 100644
--- a/mrunner.yaml
+++ b/mrunner.yaml
@@ -43,4 +43,4 @@ contexts:
     singularity_container: --nv -H $PWD:/homeplaceholder --env WANDBPWD=$PWD -B /net:/net -B $TMPDIR:/tmp /net/pr2/projects/plgrid/plgggmum_crl/bcupial/ncw.sif
     slurm_url: plgbartekcupial@athena.cyfronet.pl
     storage_dir: /net/tscratch/people/plgbartekcupial/
-    time: 2880
+    time: 1000
diff --git a/train_configs/simba2_separate.py b/train_configs/simba2_separate.py
new file mode 100644
index 000000000..7c06a6e6f
--- /dev/null
+++ b/train_configs/simba2_separate.py
@@ -0,0 +1,84 @@
+import os
+
+from mrunner.helpers.specification_helper import create_experiments_helper
+
+name = globals()["script"][:-3]
+
+num_minibatches = 1
+num_epochs = 1
+num_envs = 512
+batch_size = 4096
+num_steps = 32
+num_workers = 16
+
+# params for all exps
+config = {
+    "exp_tag": name,
+    "env": "nethack_score",
+    "run_script": "sf_examples.nethack.train_nethack",
+    "train_for_env_steps": 100_000_000,
+    "num_workers": num_workers,
+    "num_envs_per_worker": num_envs // num_workers,
+    "worker_num_splits": 2,
+    "rollout": num_steps,
+    "batch_size": batch_size,
+    "num_batches_per_epoch": num_minibatches,
+    "num_epochs": num_epochs,
+    "penalty_step": 0.0,
+    "penalty_time": 0.0,
+    "async_rl": True,
+    "serial_mode": False,
+    "wandb_user": "bartekcupial",
+    "wandb_project": "nle_simba",
+    "wandb_group": "ideas-ncbr",
+    "with_wandb": True,
+    "decorrelate_envs_on_one_worker": True,
+    "character": "mon-hum-neu-mal",
+    "max_grad_norm": 40.0,
+    "learning_rate": 1e-4,
+    "exploration_loss_coeff": 0.001,
+    "gamma": 0.999,
+    "gae_lambda": 1.0,
+    "value_loss_coeff": 1.0,
+    "actor_critic_share_weights": False,
+    "critic_hidden_dim": 32,
+    "critic_depth": 1,
+    "actor_hidden_dim": 512,
+    "actor_depth": 3,
+    "model": "simba",
+    "add_image_observation": True,
+    "normalize_input": False,
+    "use_prev_action": True,
+    "use_learned_embeddings": True,
+    "use_max_pool": True,
+    "pixel_size": 1,
+    "expansion": 1,
+}
+
+# params different between exps
+params_grid = [
+    {
+        "seed": list(range(1)),
+        "actor_hidden_dim": [128 * actor_depth],
+        "actor_depth": [actor_depth],
+        "critic_hidden_dim": [128 * critic_depth],
+        "critic_depth": [critic_depth],
+    }
+    for critic_depth in [1, 2, 3]
+    for actor_depth in [1, 2, 3]
+]
+
+experiments_list = create_experiments_helper(
+    experiment_name=name,
+    project_name="nle_simba",
+    with_neptune=False,
+    script="python3 mrunner_run.py",
+    python_path=".",
+    tags=[name],
+    env={
+        "WANDB_API_KEY": os.environ["WANDB_API_KEY"],
+    },
+    base_config=config,
+    params_grid=params_grid,
+    mrunner_ignore=".mrunnerignore",
+)
diff --git a/train_configs/vit2.py b/train_configs/vit2.py
new file mode 100644
index 000000000..fc00fdea2
--- /dev/null
+++ b/train_configs/vit2.py
@@ -0,0 +1,78 @@
+import os
+
+from mrunner.helpers.specification_helper import create_experiments_helper
+
+name = globals()["script"][:-3]
+
+num_minibatches = 1
+num_epochs = 1
+num_envs = 512
+batch_size = 4096
+num_steps = 32
+num_workers = 16
+
+# params for all exps
+config = {
+    "exp_tag": name,
+    "env": "nethack_score",
+    "run_script": "sf_examples.nethack.train_nethack",
+    "train_for_env_steps": 100_000_000,
+    "num_workers": num_workers,
+    "num_envs_per_worker": num_envs // num_workers,
+    "worker_num_splits": 2,
+    "rollout": num_steps,
+    "batch_size": batch_size,
+    "num_batches_per_epoch": num_minibatches,
+    "num_epochs": num_epochs,
+    "penalty_step": 0.0,
+    "penalty_time": 0.0,
+    "async_rl": True,
+    "serial_mode": False,
+    "wandb_user": "bartekcupial",
+    "wandb_project": "nle_simba",
+    "wandb_group": "ideas-ncbr",
+    "with_wandb": True,
+    "decorrelate_envs_on_one_worker": True,
+    "character": "mon-hum-neu-mal",
+    "max_grad_norm": 40.0,
+    "learning_rate": 1e-4,
+    "exploration_loss_coeff": 0.001,
+    "gamma": 0.999,
+    "gae_lambda": 1.0,
+    "value_loss_coeff": 1.0,
+    "actor_critic_share_weights": True,
+    "critic_hidden_dim": 32,
+    "critic_depth": 1,
+    "actor_hidden_dim": 512,
+    "actor_depth": 3,
+    "model": "vit",
+    "add_image_observation": True,
+    "normalize_input": False,
+    "use_prev_action": True,
+    "use_learned_embeddings": True,
+    "pixel_size": 1,
+}
+
+# params different between exps
+params_grid = [
+    {
+        "seed": list(range(1)),
+        "actor_hidden_dim": [512],
+        "actor_depth": [3],
+    },
+]
+
+experiments_list = create_experiments_helper(
+    experiment_name=name,
+    project_name="nle_simba",
+    with_neptune=False,
+    script="python3 mrunner_run.py",
+    python_path=".",
+    tags=[name],
+    env={
+        "WANDB_API_KEY": os.environ["WANDB_API_KEY"],
+    },
+    base_config=config,
+    params_grid=params_grid,
+    mrunner_ignore=".mrunnerignore",
+)