diff --git a/README.md b/README.md index 25e9ea5..24b7caa 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,11 @@ export GCP_CREDENTIALS_BASE64=$(base64 -w 0 /path/to/your/service-account-key.js uv run python src/genesys/generate.py @ configs/debug.toml --gcp_bucket checkpoints_pi/test_data ``` +automatically detect the right model to run +```sh +./script/entrypoint.sh +``` + for dev setup: ``` @@ -59,5 +64,3 @@ run ``` sudo docker run --gpus all -it primeintellect/genesys:latest uv run python src/genesys/generate.py @ configs/debug.toml ``` - - diff --git a/configs/r1.toml b/configs/deepseek_r1.toml similarity index 94% rename from configs/r1.toml rename to configs/deepseek_r1.toml index 2a4e7a3..beda11b 100644 --- a/configs/r1.toml +++ b/configs/deepseek_r1.toml @@ -6,6 +6,5 @@ top_p = 0.95 # 0.95 is the value from the R1 model card for evaluation max_tokens = 12288 [data] -max_samples = 2000000 batch_size = 512 path = "justus27/test-vcu,justus27/test-vfc,justus27/test-jdg,justus27/test-vfm" diff --git a/configs/llama70b.toml b/configs/llama70b.toml new file mode 100644 index 0000000..69b2c5b --- /dev/null +++ b/configs/llama70b.toml @@ -0,0 +1,9 @@ +name_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" +num_gpus = 8 +sample_per_file = 512 +temperature = 0.6 # 0.6 is the value from the R1 model card for evaluation +top_p = 0.95 # 0.95 is the value from the R1 model card for evaluation + +[data] +batch_size = 512 +path = "justus27/test-vcu,justus27/test-vfc,justus27/test-jdg,justus27/test-vfm" \ No newline at end of file diff --git a/script/entrypoint.sh b/script/entrypoint.sh new file mode 100755 index 0000000..c669174 --- /dev/null +++ b/script/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Save the original stdout to FD 3 +exec 3>&1 + +# Capture both stdout and stderr while also printing to terminal +output=$(uv run python src/genesys/auto_detect_model_config.py 2>&1 | tee /dev/fd/3) +exit_code=${PIPESTATUS[0]} + +if [ $exit_code -ne 0 ]; then + echo "Error: $output" + exit $exit_code +fi + +model_name=$output +echo "The model to run is: $model_name" +uv run python src/genesys/generate.py @ configs/"$model_name".toml diff --git a/src/genesys/auto_detect_model_config.py b/src/genesys/auto_detect_model_config.py new file mode 100644 index 0000000..052bff7 --- /dev/null +++ b/src/genesys/auto_detect_model_config.py @@ -0,0 +1,30 @@ +import torch +import sys + + +def get_model_name(device_name: str) -> str: + """find out which model we can run""" + if "A100" in device_name: + return "llama70b" + elif "H100" in device_name: + # data from https://www.nvidia.com/en-us/data-center/h100/ + # NOTE: Specifications are one-half lower without sparsity. + if "NVL" in device_name: + return "llama70b" + elif "PCIe" in device_name: + return "llama70b" + else: # for H100 SXM and other variants + return "llama70b" + elif "H200" in device_name: + return "deepseek_r1" + else: # for other GPU types, assume A100 + return "llama70b" + +if __name__ == "__main__": + try: + device_info_torch = torch.cuda.get_device_name(torch.device("cuda:0")) + run_model_name = get_model_name(device_info_torch) + print(run_model_name) + except Exception as e: + print(f"Error: {str(e)}", file=sys.stderr) + sys.exit(1) \ No newline at end of file