Skip to content

Commit

Permalink
refactor: move GPU drivers to config and refactor pipelines for CUDA …
Browse files Browse the repository at this point in the history
…driver (#46)

* Move driver config to justfile

* Add set config step in main.yaml, remove cuda var in justfile

* add load config step to ci.yaml

* change relative path for GH actions

* update main builds to use var from load config

* testing

* echo in load config

* gs

* change matrix.driver_version to steps.load_config.outputs.cuda_version

* update to use steps.load_config.outputs.cuda_versionin main.yaml

* GH action directory debug for main.yaml

* move the fetch up

* remove debug statements
  • Loading branch information
ganeshkumarashok authored Aug 7, 2024
1 parent b40b851 commit 9094a1a
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 14 deletions.
17 changes: 11 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,22 @@ on:
- main
workflow_dispatch: {}

jobs:
jobs:
cuda:
runs-on: ubuntu-latest
strategy:
matrix:
driver_version: ["550.90.07"]
driver_kind: ["cuda"]
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Load CUDA config
id: load_config
run: |
cuda_version=$(yq e '.cuda.version' driver_config.yml)
echo "CUDA_VERSION=$cuda_version"
echo "cuda_version=$cuda_version" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
Expand All @@ -28,7 +33,7 @@ jobs:
- uses: paulhatch/[email protected]
with:
bump_each_commit: false
version_format: "${{ matrix.driver_kind}}-${{ matrix.driver_version }}-sha-${GITHUB_SHA:0:6}"
version_format: "cuda-${{ steps.load_config.outputs.cuda_version }}-sha-${GITHUB_SHA:0:6}"
id: semver
- name: 'Check version'
run: |
Expand All @@ -39,7 +44,7 @@ jobs:
set -x
echo "tag is: "
echo ${{ steps.semver.outputs.version }}
docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ matrix.driver_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} .
docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ steps.load_config.outputs.cuda_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} .
docker images
- name: Move cache
run: |
Expand All @@ -62,9 +67,9 @@ jobs:
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ matrix.driver_version }}-${{ github.sha }}
key: ${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ steps.load_config.outputs.cuda_version }}-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ matrix.driver_version }}
${{ runner.os }}-buildx-${{ matrix.driver_kind}}-${{ steps.load_config.outputs.cuda_version }}
- uses: paulhatch/[email protected]
with:
bump_each_commit: false
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@ permissions:
id-token: write
contents: read

jobs:
jobs:
cuda:
runs-on: ubuntu-latest
strategy:
matrix:
driver_version: ["550.90.07"]
driver_version: ["${{ needs.load_config.outputs.cuda_version }}"]
driver_kind: ["cuda"]
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Load CUDA config
id: load_config
run: |
cuda_version=$(yq e '.cuda.version' driver_config.yml)
echo "CUDA_VERSION=$cuda_version"
echo "cuda_version=$cuda_version" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
Expand All @@ -31,7 +37,7 @@ jobs:
- uses: paulhatch/[email protected]
with:
bump_each_commit: false
version_format: "${{ matrix.driver_kind}}-${{ matrix.driver_version }}-sha-${GITHUB_SHA:0:6}"
version_format: "cuda-${{ steps.load_config.outputs.cuda_version }}-sha-${GITHUB_SHA:0:6}"
id: semver
- name: 'Check version'
run: |
Expand All @@ -48,7 +54,7 @@ jobs:
set -x
echo "tag is: "
echo ${{ steps.semver.outputs.version }}
docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ matrix.driver_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} .
docker buildx build --build-arg DRIVER_KIND=${{ matrix.driver_kind }} --build-arg DRIVER_VERSION=${{ steps.load_config.outputs.cuda_version }} --cache-from=type=local,src=/tmp/.buildx-cache --cache-to=type=local,dest=/tmp/.buildx-cache-new --output=type=docker -t ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }} .
docker images
az acr login -n ${{ secrets.AZURE_REGISTRY_SERVER }}
docker push ${{ secrets.AZURE_REGISTRY_SERVER }}/public/aks/aks-gpu:${{ steps.semver.outputs.version }}
Expand Down
6 changes: 6 additions & 0 deletions driver_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cuda:
version: "550.90.07"

grid:
version: "535.161.08"
url: "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-4e6a-bc9a-72ff64d7a13c/NVIDIA-Linux-x86_64-535.161.08-grid-azure.run"
7 changes: 3 additions & 4 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@ grid_535_url := "https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-

grid_535_driver := "535.161.08"

cuda_550_driver := "550.90.07"
registry := "docker.io/alexeldeib"

default:

pushallcuda: (pushcuda cuda_550_driver)
pushallcuda: (pushcuda)

pushallgrid: (pushgrid grid_535_driver)

pushcuda VERSION: (buildcuda VERSION)
docker push {{ registry }}/aks-gpu:{{VERSION}}-cuda
pushcuda: (buildcuda)
docker push {{ registry }}/aks-gpu:$(yq e '.cuda.version' driver_config.yml)-cuda

pushgrid VERSION URL: (buildgrid VERSION URL)
docker push {{ registry }}/aks-gpu:{{VERSION}}-grid
Expand Down

0 comments on commit 9094a1a

Please sign in to comment.