diff --git a/.github/workflows/unit_test_4gpu.yaml b/.github/workflows/unit_test_4gpu.yaml index cf077b0c..65116b05 100644 --- a/.github/workflows/unit_test_4gpu.yaml +++ b/.github/workflows/unit_test_4gpu.yaml @@ -16,14 +16,19 @@ jobs: runner: linux.g5.12xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: "12.1" - # This image is faster to clone than the default, but it lacks CC needed by triton - # (1m25s vs 2m37s) - docker-image: "pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime" + # Trying how much faster the nvidia-cuda image is + docker-image: "nvidia/cuda:12.4.1-runtime-ubuntu22.04" repository: "pytorch/torchtitan" upload-artifact: "outputs" # conda create -n "test" python=3.10 # conda activate test script: | + apt install -y wget + mkdir -p ~/miniconda3 + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 + rm -rf ~/miniconda3/miniconda.sh + ~/miniconda3/bin/conda init bash conda install -y -q git clang clangxx export CC=clang export CXX=clangxx