diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml index 3b1721c0..28d9b435 100644 --- a/.github/workflows/test_api_cuda.yaml +++ b/.github/workflows/test_api_cuda.yaml @@ -37,15 +37,20 @@ jobs: --tag opt-bench-cuda:${{ matrix.image.cuda_version }} . + - name: Get GPUs with most free memory + id: get_devices + run: | + echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')" + - name: Run tests run: docker run --rm --pid host --shm-size 64G --env USE_CUDA="1" + --gpus '"device=${{ steps.get_devices.outputs.devices }}"' --volume $(pwd):/workspace/optimum-benchmark --workdir /workspace/optimum-benchmark - --gpus '"device=0,1"' --entrypoint /bin/bash opt-bench-cuda:${{ matrix.image.cuda_version }} -c "pip install -e .[testing,timm,diffusers] && pytest -k 'api and cuda' -x" diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml index 0e915b91..adb31be3 100644 --- a/.github/workflows/test_cli_cuda_onnxruntime.yaml +++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml @@ -28,6 +28,11 @@ jobs: --tag opt-bench-cuda:11.8.0 . + - name: Get GPUs with most free memory + id: get_devices + run: | + echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')" + - name: Run tests run: docker run --rm @@ -35,8 +40,8 @@ jobs: --shm-size 64G --env USE_CUDA="1" --entrypoint /bin/bash + --gpus '"device=${{ steps.get_devices.outputs.devices }}"' --volume $(pwd):/workspace/optimum-benchmark --workdir /workspace/optimum-benchmark - --gpus '"device=0,1"' opt-bench-cuda:11.8.0 -c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest -k 'cli and cuda and onnxruntime' -x" diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml index ebf2bf6c..204722db 100644 --- a/.github/workflows/test_cli_cuda_pytorch.yaml +++ b/.github/workflows/test_cli_cuda_pytorch.yaml @@ -37,15 +37,20 @@ jobs: --tag opt-bench-cuda:${{ matrix.image.cuda_version }} . + - name: Get GPUs with most free memory + id: get_devices + run: | + echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')" + - name: Run tests run: docker run --rm --pid host --shm-size 64G --env USE_CUDA="1" + --gpus '"device=${{ steps.get_devices.outputs.devices }}"' --volume $(pwd):/workspace/optimum-benchmark --workdir /workspace/optimum-benchmark - --gpus '"device=0,1"' --entrypoint /bin/bash opt-bench-cuda:${{ matrix.image.cuda_version }} -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x" diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml index 725d147c..680f3f0f 100644 --- a/.github/workflows/test_cli_cuda_torch_ort.yaml +++ b/.github/workflows/test_cli_cuda_torch_ort.yaml @@ -28,6 +28,11 @@ jobs: --tag opt-bench-cuda:11.8.0 . + - name: Get GPUs with most free memory + id: get_devices + run: | + echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')" + - name: Run tests run: docker run --rm @@ -35,8 +40,8 @@ jobs: --shm-size 64G --env USE_CUDA="1" --entrypoint /bin/bash + --gpus '"device=${{ steps.get_devices.outputs.devices }}"' --volume $(pwd):/workspace/optimum-benchmark --workdir /workspace/optimum-benchmark - --gpus '"device=0,1"' opt-bench-cuda:11.8.0 -c "pip install -e .[testing,torch-ort,peft] && python -m torch_ort.configure && pytest -k 'cli and cuda and torch_ort' -x" diff --git a/.github/workflows/test_cli_rocm_onnxruntime.yaml b/.github/workflows/test_cli_rocm_onnxruntime.yaml index 85ad9abb..8be58292 100644 --- a/.github/workflows/test_cli_rocm_onnxruntime.yaml +++ b/.github/workflows/test_cli_rocm_onnxruntime.yaml @@ -21,7 +21,7 @@ jobs: - name: Check if image exists id: check_image run: | - if [[ "$(docker images -q opt-bench-rocm-ort:5.7 2> /dev/null)" == "" ]]; then + if [[ "$(docker images -q opt-bench-rocm-ort:latest 2> /dev/null)" == "" ]]; then echo "::set-output name=exists::false" else echo "::set-output name=exists::true" @@ -33,8 +33,7 @@ jobs: --file docker/rocm-ort.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) - --build-arg ROCM_VERSION=5.7 - --tag opt-bench-rocm-ort:5.7 + --tag opt-bench-rocm-ort:latest . - name: Run tests @@ -49,5 +48,5 @@ jobs: --device /dev/dri/renderD128 --device /dev/dri/renderD129 --entrypoint /bin/bash - opt-bench-rocm-ort:5.7 + opt-bench-rocm-ort:latest -c "pip install -e .[testing,timm,diffusers] && pytest -k 'cli and rocm and onnxruntime' -x" diff --git a/.github/workflows/test_cli_tensorrt_llm.yaml b/.github/workflows/test_cli_tensorrt_llm.yaml index 4d8a1d0f..40438055 100644 --- a/.github/workflows/test_cli_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_tensorrt_llm.yaml @@ -26,15 +26,20 @@ jobs: --tag opt-bench-tensorrt-llm:latest . + - name: Get GPUs with most free memory + id: get_devices + run: | + echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')" + - name: Run tests run: docker run --rm --pid host --shm-size 64G --env USE_CUDA="1" + --gpus '"device=${{ steps.get_devices.outputs.devices }}"' --volume $(pwd):/workspace/optimum-benchmark --workdir /workspace/optimum-benchmark - --gpus '"device=0,1"' --entrypoint /bin/bash opt-bench-tensorrt-llm:latest -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest -k 'cli and tensorrt_llm' -x" diff --git a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml index d8e914f7..a98bfc15 100644 --- a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml +++ b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml @@ -23,9 +23,7 @@ jobs: --file docker/tensorrt.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) - --build-arg TENSORRT_VERSION=22.12 - --build-arg TORCH_CUDA=cu118 - --tag opt-bench-tensorrt:22.12 + --tag opt-bench-tensorrt:latest . - name: Run tests @@ -38,5 +36,5 @@ jobs: --workdir /workspace/optimum-benchmark --gpus '"device=0,1"' --entrypoint /bin/bash - opt-bench-tensorrt:22.12 + opt-bench-tensorrt:latest -c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest -k 'cli and tensorrt and onnxruntime' -x"