cinderella drl

tomyaacov · Apr 9, 2024 · 63d57e4 · 63d57e4
1 parent 842aca7
commit 63d57e4
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 11 deletions.
diff --git a/.gitignore b/.gitignore
@@ -136,3 +136,4 @@ dmypy.json
 model_checking/output/
 llm/secrets/*
 Dockerfile
+drl/output/
diff --git a/drl/bp_callback_mask.py b/drl/bp_callback_mask.py
@@ -6,7 +6,7 @@ class BPCallbackMask(BaseCallback):
 
     :param verbose: Verbosity level: 0 for no output, 1 for info messages, 2 for debug messages
     """
-    def __init__(self, verbose=0, repeat=1):
+    def __init__(self, verbose=0, repeat=1, threshold=0):
         super(BPCallbackMask, self).__init__(verbose)
         # Those variables will be accessible in the callback
         # (they are defined in the base class)
@@ -27,8 +27,12 @@ def __init__(self, verbose=0, repeat=1):
         # self.parent = None  # type: Optional[BaseCallback]
         self.should_end = False
         self.repeat = repeat
+        self.threshold = threshold
 
-    def test(self, model, env, threshold=0):
+
+    def test(self, model, env, threshold=None):
+        if threshold is None:
+            threshold = self.threshold
         _env = env.envs[0]
         observation = env.reset()
         reward_sum = 0

diff --git a/drl/cinderella_single_trace_drl.py b/drl/cinderella_single_trace_drl.py
@@ -13,7 +13,7 @@
 import random
 
 parser = argparse.ArgumentParser()
-parser.add_argument("parameters", nargs="*", default=[4, 5, 3, 5, 1_000])
+parser.add_argument("parameters", nargs="*", default=[4, 8, 2, 5, 10_000])
 args = parser.parse_args()
 
 A = int(args.parameters[0])
@@ -44,16 +44,18 @@ def action_masks(self):
                 observation_space=CinderellaObservationSpace([B+1] * N),
                 reward_function=lambda rewards: sum(filter(None, rewards)))
 
+
+
 log_dir = "output/" + RUN + "/"
 with warnings.catch_warnings():
     warnings.simplefilter("ignore")
     env = Monitor(env, log_dir)
     os.makedirs(log_dir, exist_ok=True)
     model = MaskablePPO("MlpPolicy", env, verbose=0)
 
-    callback = BPCallbackMask()
+    callback = BPCallbackMask(threshold=-0.5)
     model.learn(total_timesteps=STEPS,
-            callback=callback)
+                callback=callback)
 
 
 if model.num_timesteps >= STEPS:

diff --git a/drl/scripts/run_cinderella_single_trace_drl.sh b/drl/scripts/run_cinderella_single_trace_drl.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+### sbatch config parameters must start with #SBATCH and must precede any other command. to ignore just add another # - like so ##SBATCH
+#SBATCH --partition main ### specify partition name where to run a job
+#SBATCH --time 7-00:00:00 ### limit the time of job running. Format: D-H:MM:SS
+#SBATCH --job-name run_cinderella_single_trace_drl ### name of the job. replace my_job with your desired job name
+#SBATCH --output run_cinderella_single_trace_drl.out ### output log for running job - %J is the job number variable
+#SBATCH [email protected] ### users email for sending job status notifications ñ replace with yours
+#SBATCH --mail-type=BEGIN,END,FAIL ### conditions when to send the email. ALL,BEGIN,END,FAIL, REQUEU, NONE
+#SBATCH --mem=32G ### total amount of RAM // 500
+#SBATCH --ntasks=1
+#SBATCH --gpus=1
+
+### Start you code below ####
+module load anaconda ### load anaconda module
+source activate BPpyLiveness ### activating Conda environment. Environment must be configured before running the job
+cd ~/repos/BPpyEvaluation/drl || exit
+
+#options = ()
+#options=(
+#"4 8 2 5" "5 10 2 5" "6 12 2 5" "7 14 2 5"
+#"4 8 2 6" "5 10 2 6" "6 12 2 6" "7 14 2 6"
+#"4 8 2 7" "5 10 2 7" "6 12 2 7" "7 14 2 7"
+#)
+options=(
+"4 8 2 5"
+)
+
+echo "option,run,time,memory" > run_cinderella_single_trace_drl_output.csv
+for option in "${options[@]}"; do
+  echo "$option"
+#  for i in {1..30}
+  for i in {1..2}
+  do
+    timeout 120m /usr/bin/time -a -o run_cinderella_single_trace_drl_output.csv -f "$option,$i,%E,%M" ~/.conda/envs/BPpyLiveness/bin/python cinderella_single_trace_drl.py $option $i
+  done
+done
diff --git a/drl/scripts/run_cinderella_single_trace_search.sh b/drl/scripts/run_cinderella_single_trace_search.sh
@@ -15,12 +15,6 @@ source activate bppy-py39 ### activating Conda environment. Environment must be
 cd ~/repos/BPpyEvaluation/drl || exit
 
 #options = ()
-#options=(
-#"200 25" "200 50" "200 75" "200 100"
-#"300 25" "300 50" "300 75" "300 100"
-#"400 25" "400 50" "400 75" "400 100"
-#"500 25" "500 50" "500 75" "500 100"
-#)
 options=(
 "4 8 2 5" "5 10 2 5" "6 12 2 5" "7 14 2 5"
 "4 8 2 6" "5 10 2 6" "6 12 2 6" "7 14 2 6"