auto detect which model to run (#36)

* auto detect which model to run * fix model name * remove max_samples * fix bash script * chore: max tokens --------- Co-authored-by: Justus Mattern <[email protected]>
PrimeIntellect-ai · Feb 4, 2025 · 0fba3ac · 0fba3ac
1 parent 8a539ea
commit 0fba3ac
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -31,6 +31,11 @@ export GCP_CREDENTIALS_BASE64=$(base64 -w 0 /path/to/your/service-account-key.js
 uv run python src/genesys/generate.py @ configs/debug.toml --gcp_bucket checkpoints_pi/test_data
 ```
 
+automatically detect the right model to run
+```sh
+./script/entrypoint.sh
+```
+
 for dev setup:
 
 ```
@@ -59,5 +64,3 @@ run
 ```
 sudo docker run --gpus all  -it primeintellect/genesys:latest uv run python src/genesys/generate.py @ configs/debug.toml
 ```
-
-
diff --git a/configs/r1.toml → configs/deepseek_r1.toml b/configs/r1.toml → configs/deepseek_r1.toml
@@ -6,6 +6,5 @@ top_p = 0.95 # 0.95 is the value from the R1 model card for evaluation
 max_tokens = 12288
 
 [data]
-max_samples = 2000000
 batch_size = 512
 path = "justus27/test-vcu,justus27/test-vfc,justus27/test-jdg,justus27/test-vfm"
diff --git a/configs/llama70b.toml b/configs/llama70b.toml
@@ -0,0 +1,9 @@
+name_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+num_gpus = 8
+sample_per_file = 512
+temperature = 0.6 # 0.6 is the value from the R1 model card for evaluation
+top_p = 0.95 # 0.95 is the value from the R1 model card for evaluation
+
+[data]
+batch_size = 512
+path = "justus27/test-vcu,justus27/test-vfc,justus27/test-jdg,justus27/test-vfm"
diff --git a/script/entrypoint.sh b/script/entrypoint.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Save the original stdout to FD 3
+exec 3>&1
+
+# Capture both stdout and stderr while also printing to terminal
+output=$(uv run python src/genesys/auto_detect_model_config.py 2>&1 | tee /dev/fd/3)
+exit_code=${PIPESTATUS[0]}
+
+if [ $exit_code -ne 0 ]; then
+    echo "Error: $output"
+    exit $exit_code
+fi
+
+model_name=$output
+echo "The model to run is: $model_name"
+uv run python src/genesys/generate.py @ configs/"$model_name".toml
diff --git a/src/genesys/auto_detect_model_config.py b/src/genesys/auto_detect_model_config.py
@@ -0,0 +1,30 @@
+import torch
+import sys
+
+
+def get_model_name(device_name: str) -> str:
+    """find out which model we can run"""
+    if "A100" in device_name:
+        return "llama70b"
+    elif "H100" in device_name:
+        # data from https://www.nvidia.com/en-us/data-center/h100/
+        # NOTE: Specifications are one-half lower without sparsity.
+        if "NVL" in device_name:
+            return "llama70b"
+        elif "PCIe" in device_name:
+            return "llama70b"
+        else:  # for H100 SXM and other variants
+            return "llama70b"
+    elif "H200" in device_name:
+            return "deepseek_r1"
+    else:  # for other GPU types, assume A100
+        return "llama70b"
+
+if __name__ == "__main__":
+    try:
+        device_info_torch = torch.cuda.get_device_name(torch.device("cuda:0"))
+        run_model_name = get_model_name(device_info_torch)
+        print(run_model_name)
+    except Exception as e:
+        print(f"Error: {str(e)}", file=sys.stderr)
+        sys.exit(1)