From 9094a1a14d3a35fcc430fc776128bc779c04d9ba Mon Sep 17 00:00:00 2001 From: Ganeshkumar Ashokavardhanan <35557827+ganeshkumarashok@users.noreply.github.com> Date: Wed, 7 Aug 2024 06:48:39 -0700 Subject: [PATCH] refactor: move GPU drivers to config and refactor pipelines for CUDA driver (#46) * Move driver config to justfile * Add set config step in main.yaml, remove cuda var in justfile * add load config step to ci.yaml * change relative path for GH actions * update main builds to use var from load config * testing * echo in load config * gs * change matrix.driver_version to steps.load_config.outputs.cuda_version * update to use steps.load_config.outputs.cuda_versionin main.yaml * GH action directory debug for main.yaml * move the fetch up * remove debug statements --- .github/workflows/ci.yaml | 17 +++++++++++------ .github/workflows/main.yaml | 14 ++++++++++---- driver_config.yml | 6 ++++++ justfile | 7 +++---- 4 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 driver_config.yml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c406c08..caf9afd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -5,17 +5,22 @@ on: - main workflow_dispatch: {} -jobs: +jobs: cuda: runs-on: ubuntu-latest strategy: matrix: - driver_version: ["550.90.07"] driver_kind: ["cuda"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 + - name: Load CUDA config + id: load_config + run: | + cuda_version=$(yq e '.cuda.version' driver_config.yml) + echo "CUDA_VERSION=$cuda_version" + echo "cuda_version=$cuda_version" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Cache Docker layers @@ -28,7 +33,7 @@ jobs: - uses: paulhatch/semantic-version@v5.0.0-alpha2 with: bump_each_commit: false - version_format: "${{ matrix.driver_kind}}-${{ matrix.driver_version }}-sha-${GITHUB_SHA:0:6}" + version_format: "cuda-${{ steps.load_config.outputs.cuda_version }}-sha-${GITHUB_SHA:0:6}" id: semver - name: 'Check version' run: | @@ -39,7 +44,7 @@ jobs: set -x echo "tag is: " echo ${{ steps.semver.outputs.version }} - docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ matrix.driver_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} . + docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ steps.load_config.outputs.cuda_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} . docker images - name: Move cache run: | @@ -62,9 +67,9 @@ jobs: uses: actions/cache@v2 with: path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ matrix.driver_version }}-${{ github.sha }} + key: ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ steps.load_config.outputs.cuda_version }}-${{ github.sha }} restore-keys: | - ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ matrix.driver_version }} + ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ steps.load_config.outputs.cuda_version }} - uses: paulhatch/semantic-version@v5.0.0-alpha2 with: bump_each_commit: false diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0ea3196..8ed7ec8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -8,17 +8,23 @@ permissions: id-token: write contents: read -jobs: +jobs: cuda: runs-on: ubuntu-latest strategy: matrix: - driver_version: ["550.90.07"] + driver_version: ["${{ needs.load_config.outputs.cuda_version }}"] driver_kind: ["cuda"] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 + - name: Load CUDA config + id: load_config + run: | + cuda_version=$(yq e '.cuda.version' driver_config.yml) + echo "CUDA_VERSION=$cuda_version" + echo "cuda_version=$cuda_version" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Cache Docker layers @@ -31,7 +37,7 @@ jobs: - uses: paulhatch/semantic-version@v5.0.0-alpha2 with: bump_each_commit: false - version_format: "${{ matrix.driver_kind}}-${{ matrix.driver_version }}-sha-${GITHUB_SHA:0:6}" + version_format: "cuda-${{ steps.load_config.outputs.cuda_version }}-sha-${GITHUB_SHA:0:6}" id: semver - name: 'Check version' run: | @@ -48,7 +54,7 @@ jobs: set -x echo "tag is: " echo ${{ steps.semver.outputs.version }} - docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ matrix.driver_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} . + docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ steps.load_config.outputs.cuda_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} . docker images az acr login -n ${{ secrets.AZURE_REGISTRY_SERVER }} docker push ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} diff --git a/driver_config.yml b/driver_config.yml new file mode 100644 index 0000000..1bcefbc --- /dev/null +++ b/driver_config.yml @@ -0,0 +1,6 @@ +cuda: + version: "550.90.07" + +grid: + version: "535.161.08" + url: "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-4e6a-bc9a-72ff64d7a13c/NVIDIA-Linux-x86_64-535.161.08-grid-azure.run" \ No newline at end of file diff --git a/justfile b/justfile index 7ed5781..00fef4e 100644 --- a/justfile +++ b/justfile @@ -2,17 +2,16 @@ grid_535_url := "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b- grid_535_driver := "535.161.08" -cuda_550_driver := "550.90.07" registry := "docker.io/alexeldeib" default: -pushallcuda: (pushcuda cuda_550_driver) +pushallcuda: (pushcuda) pushallgrid: (pushgrid grid_535_driver) -pushcuda VERSION: (buildcuda VERSION) - docker push {{ registry }}/aks-gpu:{{VERSION}}-cuda +pushcuda: (buildcuda) + docker push {{ registry }}/aks-gpu:$(yq e '.cuda.version' driver_config.yml)-cuda pushgrid VERSION URL: (buildgrid VERSION URL) docker push {{ registry }}/aks-gpu:{{VERSION}}-grid