From 212423cc1679b6d3b2e272e408e25416da635d9e Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Tue, 23 Jul 2024 15:42:54 +0300
Subject: [PATCH 1/6] benchmark

---
 examples/benchmark.py     | 146 ++++++++++++++++++++++++++++++++++++++
 examples/requirements.txt |   1 +
 2 files changed, 147 insertions(+)
 create mode 100644 examples/benchmark.py

diff --git a/examples/benchmark.py b/examples/benchmark.py
new file mode 100644
index 000000000..87b4fa853
--- /dev/null
+++ b/examples/benchmark.py
@@ -0,0 +1,146 @@
+# Benchmark script
+
+import glob
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+
+import GPUtil
+
+
+@dataclass
+class BenchmarkConfig:
+    """Baseline config"""
+
+    # trainer to run
+    trainer: str = "simple_trainer.py"
+    # path to data
+    data_dir: str = "data/360_v2"
+    # scenes to run
+    scenes: set[str] = (
+        "bicycle",
+        "bonsai",
+        "counter",
+        "garden",
+        "stump",
+        "kitchen",
+        "room",
+    )
+    # downscale factors
+    factors: set[str] = (4, 2, 2, 4, 4, 2, 2)
+    # exclude gpus
+    excluded_gpus: set = field(default_factory=set)
+    # result directory
+    result_dir: str = "results/baseline"
+    # dry run, useful for debugging
+    dry_run: bool = False
+    # extra model specific configs
+    model_configs: dict = field(default_factory=dict)
+
+
+# Configurations to run
+baseline_config = BenchmarkConfig(model_configs={"--max_steps": 1})
+baseline_config_absgrad = BenchmarkConfig(
+    result_dir="results/absgrad",
+    model_configs={"--absgrad": True, "--grow_grad2d": 0.0006},
+)
+baseline_config_antialiased = BenchmarkConfig(
+    result_dir="results/antialiased", model_configs={"--antialiased": True}
+)
+mcmc_config = BenchmarkConfig(
+    trainer="simple_trainer_mcmc.py",
+    result_dir="results/mcmc",
+    model_configs={"--max_steps": 30000},
+)
+
+configs_to_run = [
+    mcmc_config,
+    # baseline_config,
+    # baseline_config_absgrad,
+    # baseline_config_antialiased,
+]
+
+
+def train_scene(gpu, scene, factor, config):
+    # additional user set model configs
+    model_config_args = " ".join(f"{k} {v}" for k, v in config.model_configs.items())
+
+    # train without eval
+    cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python {config.trainer} --eval_steps -1 --disable_viewer --data_factor {factor} --data_dir {config.data_dir}/{scene} --result_dir {config.result_dir}/{scene} {model_config_args}"
+
+    print(cmd)
+    if not config.dry_run:
+        os.system(cmd)
+
+    # eval and render for all the ckpts
+    ckpts = glob.glob(f"{config.result_dir}/{scene}/ckpts/*.pt")
+    for ckpt in ckpts:
+        cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python {config.trainer} --disable_viewer --data_factor {factor} --data_dir {config.data_dir}//{scene} --result_dir {config.result_dir}/{scene} --ckpt {ckpt} {model_config_args}"
+        print(cmd)
+        if not config.dry_run:
+            os.system(cmd)
+
+    return True
+
+
+def worker(gpu, scene, factor, config):
+    print(f"Starting {config.trainer} job on GPU {gpu} with scene {scene}\n")
+    train_scene(gpu, scene, factor, config)
+    print(f"Finished {config.trainer} job on GPU {gpu} with scene {scene}\n")
+    # This worker function starts a job and returns when it's done.
+
+
+def dispatch_jobs(jobs, executor, config):
+    future_to_job = {}
+    reserved_gpus = set()  # GPUs that are slated for work but may not be active yet
+
+    while jobs or future_to_job:
+        # Get the list of available GPUs, not including those that are reserved.
+        all_available_gpus = set(
+            GPUtil.getAvailable(order="first", limit=10, maxMemory=0.1, maxLoad=0.1)
+        )
+        # all_available_gpus = set([0,1,2,3])
+        available_gpus = list(all_available_gpus - reserved_gpus - config.excluded_gpus)
+
+        # Launch new jobs on available GPUs
+        while available_gpus and jobs:
+            gpu = available_gpus.pop(0)
+            job = jobs.pop(0)
+            future = executor.submit(
+                worker, gpu, *job, config
+            )  # Unpacking job as arguments to worker
+            future_to_job[future] = (gpu, job)
+
+            reserved_gpus.add(gpu)  # Reserve this GPU until the job starts processing
+
+        # Check for completed jobs and remove them from the list of running jobs.
+        # Also, release the GPUs they were using.
+        done_futures = [future for future in future_to_job if future.done()]
+        for future in done_futures:
+            job = future_to_job.pop(
+                future
+            )  # Remove the job associated with the completed future
+            gpu = job[0]  # The GPU is the first element in each job tuple
+            reserved_gpus.discard(gpu)  # Release this GPU
+            print(f"Job {job} has finished., releasing GPU {gpu}")
+        # (Optional) You might want to introduce a small delay here to prevent this loop from spinning very fast
+        # when there are no GPUs available.
+        time.sleep(5)
+
+    print("All jobs have been processed.")
+
+
+def main():
+    """Launch batch_configs in serial"""
+    for config in configs_to_run:
+        # num jobs = num scenes to run for batch_config
+        jobs = list(zip(config.scenes, config.factors))
+        print(jobs)
+        # Using ThreadPoolExecutor to manage the thread pool
+        with ThreadPoolExecutor(max_workers=8) as executor:
+            dispatch_jobs(jobs, executor, config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/requirements.txt b/examples/requirements.txt
index 73fa4f50a..462b6f71a 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -16,3 +16,4 @@ opencv-python
 tyro
 Pillow
 tensorboard
+GPUtil

From 01f95f9b149f5e238afd8752fb6e6471d702a586 Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Tue, 23 Jul 2024 15:49:09 +0300
Subject: [PATCH 2/6] update req

---
 examples/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/requirements.txt b/examples/requirements.txt
index 462b6f71a..1ea3f6045 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -17,3 +17,4 @@ tyro
 Pillow
 tensorboard
 GPUtil
+tyro

From 141ebe6ad2552277523822c28404666fd9653686 Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Tue, 23 Jul 2024 16:22:39 +0300
Subject: [PATCH 3/6] update

---
 examples/benchmark.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/benchmark.py b/examples/benchmark.py
index 87b4fa853..ccb5d65fb 100644
--- a/examples/benchmark.py
+++ b/examples/benchmark.py
@@ -18,7 +18,7 @@ class BenchmarkConfig:
     # path to data
     data_dir: str = "data/360_v2"
     # scenes to run
-    scenes: set[str] = (
+    scenes: set = (
         "bicycle",
         "bonsai",
         "counter",
@@ -28,7 +28,7 @@ class BenchmarkConfig:
         "room",
     )
     # downscale factors
-    factors: set[str] = (4, 2, 2, 4, 4, 2, 2)
+    factors: set = (4, 2, 2, 4, 4, 2, 2)
     # exclude gpus
     excluded_gpus: set = field(default_factory=set)
     # result directory
@@ -40,7 +40,7 @@ class BenchmarkConfig:
 
 
 # Configurations to run
-baseline_config = BenchmarkConfig(model_configs={"--max_steps": 1})
+baseline_config = BenchmarkConfig(model_configs={"--max_steps": 30000})
 baseline_config_absgrad = BenchmarkConfig(
     result_dir="results/absgrad",
     model_configs={"--absgrad": True, "--grow_grad2d": 0.0006},

From ac748c5bb487885ae208453b7eb17a6a949dba69 Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Wed, 24 Jul 2024 01:07:33 +0300
Subject: [PATCH 4/6] cleanup benchmark.py

---
 examples/benchmark.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/examples/benchmark.py b/examples/benchmark.py
index ccb5d65fb..b73efae1d 100644
--- a/examples/benchmark.py
+++ b/examples/benchmark.py
@@ -11,7 +11,7 @@
 
 @dataclass
 class BenchmarkConfig:
-    """Baseline config"""
+    """Benchmark config"""
 
     # trainer to run
     trainer: str = "simple_trainer.py"
@@ -40,29 +40,29 @@ class BenchmarkConfig:
 
 
 # Configurations to run
-baseline_config = BenchmarkConfig(model_configs={"--max_steps": 30000})
-baseline_config_absgrad = BenchmarkConfig(
+baseline_config = BenchmarkConfig()
+absgrad_config = BenchmarkConfig(
     result_dir="results/absgrad",
     model_configs={"--absgrad": True, "--grow_grad2d": 0.0006},
 )
-baseline_config_antialiased = BenchmarkConfig(
+antialiased_config = BenchmarkConfig(
     result_dir="results/antialiased", model_configs={"--antialiased": True}
 )
 mcmc_config = BenchmarkConfig(
     trainer="simple_trainer_mcmc.py",
     result_dir="results/mcmc",
-    model_configs={"--max_steps": 30000},
 )
 
 configs_to_run = [
-    mcmc_config,
-    # baseline_config,
-    # baseline_config_absgrad,
-    # baseline_config_antialiased,
+    baseline_config,
+    # mcmc_config,
+    # absgrad_config,
+    # antialiased_config,
 ]
 
 
 def train_scene(gpu, scene, factor, config):
+    """Train a single scene with config on current gpu"""
     # additional user set model configs
     model_config_args = " ".join(f"{k} {v}" for k, v in config.model_configs.items())
 
@@ -85,10 +85,10 @@ def train_scene(gpu, scene, factor, config):
 
 
 def worker(gpu, scene, factor, config):
+    """This worker function starts a job and returns when it's done."""
     print(f"Starting {config.trainer} job on GPU {gpu} with scene {scene}\n")
     train_scene(gpu, scene, factor, config)
     print(f"Finished {config.trainer} job on GPU {gpu} with scene {scene}\n")
-    # This worker function starts a job and returns when it's done.
 
 
 def dispatch_jobs(jobs, executor, config):
@@ -100,7 +100,6 @@ def dispatch_jobs(jobs, executor, config):
         all_available_gpus = set(
             GPUtil.getAvailable(order="first", limit=10, maxMemory=0.1, maxLoad=0.1)
         )
-        # all_available_gpus = set([0,1,2,3])
         available_gpus = list(all_available_gpus - reserved_gpus - config.excluded_gpus)
 
         # Launch new jobs on available GPUs
@@ -111,7 +110,6 @@ def dispatch_jobs(jobs, executor, config):
                 worker, gpu, *job, config
             )  # Unpacking job as arguments to worker
             future_to_job[future] = (gpu, job)
-
             reserved_gpus.add(gpu)  # Reserve this GPU until the job starts processing
 
         # Check for completed jobs and remove them from the list of running jobs.
@@ -132,11 +130,13 @@ def dispatch_jobs(jobs, executor, config):
 
 
 def main():
-    """Launch batch_configs in serial"""
+    """Launch batch_configs in serial but process each config in parallel (multi gpu)"""
+
     for config in configs_to_run:
-        # num jobs = num scenes to run for batch_config
+        # num jobs = num scenes to run for current config
         jobs = list(zip(config.scenes, config.factors))
-        print(jobs)
+
+        # Run multiple gpu train scripts
         # Using ThreadPoolExecutor to manage the thread pool
         with ThreadPoolExecutor(max_workers=8) as executor:
             dispatch_jobs(jobs, executor, config)

From b81a58850f09d9d8202e3960749aa2befa138834 Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Wed, 24 Jul 2024 13:00:15 +0300
Subject: [PATCH 5/6] fix absgrad antialaiased

---
 examples/benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/benchmark.py b/examples/benchmark.py
index b73efae1d..075b3de3d 100644
--- a/examples/benchmark.py
+++ b/examples/benchmark.py
@@ -43,10 +43,10 @@ class BenchmarkConfig:
 baseline_config = BenchmarkConfig()
 absgrad_config = BenchmarkConfig(
     result_dir="results/absgrad",
-    model_configs={"--absgrad": True, "--grow_grad2d": 0.0006},
+    model_configs={"--absgrad": "", "--grow_grad2d": 0.0006},
 )
 antialiased_config = BenchmarkConfig(
-    result_dir="results/antialiased", model_configs={"--antialiased": True}
+    result_dir="results/antialiased", model_configs={"--antialiased": ""}
 )
 mcmc_config = BenchmarkConfig(
     trainer="simple_trainer_mcmc.py",

From 9e3322501f8dacc10c3fe6a24ba564212c44665f Mon Sep 17 00:00:00 2001
From: maturk <matiasturkulainen@gmail.com>
Date: Thu, 8 Aug 2024 21:15:56 +0300
Subject: [PATCH 6/6] benchmark script

---
 examples/benchmark.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/benchmark.py b/examples/benchmark.py
index 075b3de3d..967746a32 100644
--- a/examples/benchmark.py
+++ b/examples/benchmark.py
@@ -11,7 +11,7 @@
 
 @dataclass
 class BenchmarkConfig:
-    """Benchmark config"""
+    """Baseline benchmark config"""
 
     # trainer to run
     trainer: str = "simple_trainer.py"
@@ -39,7 +39,7 @@ class BenchmarkConfig:
     model_configs: dict = field(default_factory=dict)
 
 
-# Configurations to run
+# Configurations of different GSPLAT options
 baseline_config = BenchmarkConfig()
 absgrad_config = BenchmarkConfig(
     result_dir="results/absgrad",
@@ -53,11 +53,12 @@ class BenchmarkConfig:
     result_dir="results/mcmc",
 )
 
+# Configs to run
 configs_to_run = [
     baseline_config,
-    # mcmc_config,
-    # absgrad_config,
-    # antialiased_config,
+    mcmc_config,
+    absgrad_config,
+    antialiased_config,
 ]