silogen · salexo · Jan 31, 2025 · Jan 31, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -17,7 +17,7 @@ Please delete options that are not relevant.
 # Checklist:
 
 - [ ] My code follows the style guidelines of this project. See [contributing-guidelines.md](./../contributing-guidelines.md)
-- [ ] Existing workload examples run after my changes (if applicable)
+- [ ] Existing workload examples run to completion after my changes (if applicable)
 - [ ] I have performed a self-review of my code
 - [ ] I have commented my code, particularly in hard-to-understand areas
 - [ ] I have made corresponding changes to the documentation

diff --git a/.github/workflows/compile-release.yaml b/.github/workflows/compile-release.yaml
@@ -0,0 +1,46 @@
+name: compile-release
+on:
+  push:
+    tags:
+      - "v*"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v4
+        with:
+          go-version: '1.21'
+
+      - name: Extract version from tag
+        shell: bash
+        run: |
+          echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
+          echo "Using version: ${VERSION}"
+
+      - name: Run build script
+        shell: bash
+        run: |
+          set -e
+          chmod +x build_cli_all_arch.sh
+          ./build_cli_all_arch.sh "$VERSION"
+
+      - name: Compress workloads
+        shell: bash
+        run: |
+          zip -r workloads.zip ./workloads
+
+      - name: Create draft release and upload assets
+        uses: softprops/action-gh-release@v2
+        with:
+          files: |
+            builds/*
+            workloads.zip
+          token: '${{ secrets.GITHUB_TOKEN }}'
+          draft: true
+          prerelease: true
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -6,8 +6,8 @@
 	    "type": "go",
 	    "request": "launch",
 	    "mode": "debug",
-	    "program": "${workspaceFolder}",
-	    "args": ["submit", "-p", "workloads/training/LLMs/lora-supervised-finetuning/lora-sft-zero3-single-multinode", "--ray", "-g", "4", "--dry-run"],
+	    "program": "${workspaceFolder}/cmd/cli/main.go",
+	    "args": ["submit", "-p", "${workspaceFolder}/workloads/training/LLMs/lora-supervised-finetuning/lora-sft-zero3-single-multinode", "--ray", "-g", "4", "--dry-run", "--storage=100Gi,longhorn"],
 	    "env": {
 	      "GO111MODULE": "on"
 	    },
@@ -18,8 +18,8 @@
 		"type": "go",
 		"request": "launch",
 		"mode": "debug",
-		"program": "${workspaceFolder}",
-		"args": ["serve", "-p", "workloads/inference/LLMs/online-inference/vllm-online-single-multinode", "--ray", "--replicas", "1", "--gpus-per-replica", "4", "--dry-run"],
+		"program": "${workspaceFolder}/cmd/cli/main.go",
+		"args": ["serve", "-p", "${workspaceFolder}/workloads/inference/LLMs/online-inference/vllm-online-single-multinode", "--ray", "--replicas", "1", "--gpus-per-replica", "4", "--dry-run"],
 		"env": {
 		  "GO111MODULE": "on"
 		},
@@ -30,8 +30,8 @@
 		"type": "go",
 		"request": "launch",
 		"mode": "debug",
-		"program": "${workspaceFolder}",
-		"args": ["submit", "-p", "workloads/training/LLMs/bert/hf-accelerate-bert", "-g", "4", "--dry-run"],
+		"program": "${workspaceFolder}/cmd/cli/main.go",
+		"args": ["submit", "-p", "${workspaceFolder}/workloads/training/LLMs/bert/hf-accelerate-bert", "-g", "4", "--dry-run"],
 		"env": {
 		  "GO111MODULE": "on"
 		},
@@ -42,7 +42,7 @@
 		"type": "go",
 		"request": "launch",
 		"mode": "debug",
-		"program": "${workspaceFolder}",
+		"program": "${workspaceFolder}/cmd/cli/main.go",
 		"args": ["submit", "-i", "ghcr.io/silogen/rocm-ray:v0.4", "-g", "4"],
 		"env": {
 		  "GO111MODULE": "on"
@@ -54,8 +54,8 @@
 		"type": "go",
 		"request": "launch",
 		"mode": "debug",
-		"program": "${workspaceFolder}",
-		"args": ["submit", "-p", "workloads/training/LLMs/lora-supervised-finetuning/ds-zero3-single-multinode", "--ray", "-g", "4"],
+		"program": "${workspaceFolder}/cmd/cli/main.go",
+		"args": ["submit", "-p", "${workspaceFolder}/workloads/training/LLMs/lora-supervised-finetuning/ds-zero3-single-multinode", "--ray", "-g", "4"],
 		"env": {
 		  "GO111MODULE": "on"
 		},
@@ -66,7 +66,7 @@
 		"type": "go",
 		"request": "launch",
 		"mode": "debug",
-		"program": "${workspaceFolder}",
+		"program": "${workspaceFolder}/cmd/cli/main.go",
 		"args": ["monitor", "deployment/avsuni-gpu-monitoring", "-n", "av-test"],
 		"env": {
 		  "GO111MODULE": "on"

diff --git a/README.md b/README.md
@@ -243,6 +243,22 @@ You can access this in the template via
 {{ .Custom.parent.child }}
 ```
 
+### Storage
+
+You can use the Kaiwo CLI to instruct a workload to use storage from a given storage class. If you do not provide any input for the CLI, the following default values are used:
+
+* The storage class name is read from the specified namespace's label `kaiwo-cli/default-storage-class`
+* The storage amount is read from the specified namespace's label `kaiwo-cli/default-storage-quantity`
+
+If these values do not exist, an exception is raised. If you are using the cluster-admins examples from this repository, you can modify the namespace at [cluster-admins/kueue/cluster-queue.yaml](cluster-admins/kueue/cluster-queue.yaml) and add these values. If you want to skip adding storage, you must explicitly add the `--no-storage` flag.
+
+To specify storage, you can use the flags:
+
+* `--storage=2Gi` to specify the amount of storage and to use the default storage class name from the namespace labels
+* `--storage=2Gi,mystorageclass` to specify both the amount of storage and the storage class name 
+
+Note that the storage created is ephemeral and meant for caching, which means that it gets removed when the underlying pods get removed. However, the ephemeral storage is provisioned via a storage class, which ensures that the space requested is available and reserved for all pods before the workload starts.
+
 ## Interacting with workloads
 
 While Kaiwo's primary purpose is to deploy workloads, it can also be used as a light tool to discover and interact with running workloads.

diff --git a/cluster-admins/kueue/cluster-queue.yaml b/cluster-admins/kueue/cluster-queue.yaml
@@ -5,7 +5,7 @@ metadata:
 spec:
   namespaceSelector: {} # match all.
   resourceGroups:
-  - coveredResources: ["cpu", "memory", "amd.com/gpu", "ephemeral-storage"]
+  - coveredResources: ["cpu", "memory", "amd.com/gpu"]
     flavors:
     - name: base-gpu-flavour
       resources:
@@ -15,5 +15,3 @@ spec:
         nominalQuota: 1800Gi
       - name: "amd.com/gpu"
         nominalQuota: 16
-      - name: "ephemeral-storage"
-        nominalQuota: 2000Gi