diff --git a/.github/workflows/stable-diff-images.yaml b/.github/workflows/stable-diff-images.yaml index de84560..de00b3e 100644 --- a/.github/workflows/stable-diff-images.yaml +++ b/.github/workflows/stable-diff-images.yaml @@ -5,7 +5,15 @@ on: - main paths-ignore: - '**.md' - + - '!samples/stable-diffusion/**' + - '!.github/**' + workflow_dispatch: + inputs: + imageTag: + description: "container image tag" + required: false + default: latest + type: string workflow_call: inputs: imageTag: @@ -37,7 +45,7 @@ jobs: uses: docker/login-action@v3 with: registry: ghcr.io - username: ${{ github.actor }} + username: kedify password: ${{ secrets.GITHUB_TOKEN }} - name: Set up QEMU diff --git a/samples/stable-diffusion/Makefile b/samples/stable-diffusion/Makefile index df67593..512afc6 100644 --- a/samples/stable-diffusion/Makefile +++ b/samples/stable-diffusion/Makefile @@ -70,13 +70,12 @@ deploy-minio: ## Deploys minio into current Kubernetes context. --set rootUser=$(MINIO_USER),rootPassword=$(MINIO_PASSWORD) .PHONY: deploy-app -deploy-app: ## Deploys the fastsdcpu deployment and webui. +deploy-app: ## Deploys the webui. @$(call say,Deploy stable diffusion app) @$(call createNs,stable-diff) - kubectl apply -nstable-diff -f manifests/app$(GPU).yaml kubectl apply -nstable-diff -f manifests/webapp.yaml kubectl wait -nstable-diff --timeout=90s --for=condition=ready pod -lapp=stable-diffusion-webui - kubectl wait -nstable-diff --timeout=600s --for=condition=ready pod -lapp=stable-diffusion-worker + @$(call say,Exposing..) @echo the webapp should be available on http://$(shell echo $(shell kubectl get ing -n stable-diff stable-diffusion-webui -ojson | jq -r '.status.loadBalancer.ingress[0].ip')) @echo ------------------------------------------------------- @@ -108,25 +107,29 @@ deploy-rabbitmq: ## Deploys rabbitmq. .PHONY: deploy-scaledobject deploy-scaledobject: ## Deploys KEDA scaledobject. @$(call say,Deploy KEDA scaledobject) + -kubectl delete -nstable-diff -f manifests/scaledjob.yaml + kubectl apply -nstable-diff -f manifests/app$(GPU).yaml + kubectl wait -nstable-diff --timeout=600s --for=condition=ready pod -lapp=stable-diffusion-worker kubectl apply -nstable-diff -f manifests/scaledobject.yaml .PHONY: deploy-scaledjob deploy-scaledjob: ## Deploys KEDA scaledjob. @$(call say,Deploy KEDA scaledjob) + -kubectl delete -nstable-diff -f manifests/scaledobject.yaml kubectl apply -nstable-diff -f manifests/scaledjob.yaml .PHONY: deploy -deploy: deploy-minio deploy-rabbitmq deploy-app ## Deploys minio and sample application. - @$(call say,Deploy everything) - @echo Done. +deploy: deploy-minio deploy-rabbitmq deploy-app ## Deploys minio, RabbitMQ and the web ui. + @$(call say,Deploy the required infrastructure) + @echo Done. Continue with either make deploy-scaledjob XOR make deploy-scaledobject. .PHONY: undeploy undeploy: @$(call say,Undeploying the use-case) - kubectl delete -nrabbitmq-system rabbitmqcluster rabbitmq-cluster - helm uninstall minio -nstable-diff - kubectl delete ns stable-diff - kubectl delete ns rabbitmq-system + -kubectl delete -nrabbitmq-system rabbitmqcluster rabbitmq-cluster + -helm uninstall minio -nstable-diff + -kubectl delete ns stable-diff + -kubectl delete ns rabbitmq-system .PHONY: deploy-from-scratch deploy-from-scratch: cluster import deploy ## Prepares also k3s cluster and deploys everything on it. diff --git a/samples/stable-diffusion/README.md b/samples/stable-diffusion/README.md index d0a3d78..9d2aad7 100644 --- a/samples/stable-diffusion/README.md +++ b/samples/stable-diffusion/README.md @@ -3,20 +3,49 @@ ![diagram](./demo.png "Diagram") https://excalidraw.com/#json=p1f9nzyFye_sOgnV9AmIL,69oUi00h3HKXnsyzUReA5g -### try the container image locally +### Try the container image locally ``` make build-image PROMPT="cartoon dwarf" NUM_IMAGES=7 make run-example ``` +### Try the local k8s setup + +This requires `k3d` binary to be present on the `$PATH` and also the GPU support is turned off. + +``` +GPU="" make deploy-from-scratch +``` + ### Deploy to K8s ``` make deploy ``` -This deploys one replica of web ui, Minio, RabbitMQ and one replica of worker deployment that can generate the images. +This deploys Minio, RabbitMQ and web ui that can send request for image generation to the job queue. + +From now you can continue either with a `scaledobject` approach or with `scaledjob` approach. + +#### Deploy scaledjob or scaledobject + +``` +make deploy-scaledjob +``` + +XOR + +``` +make deploy-scaledobject +``` + +When using the `scaledjob` approach, the new kubernetes jobs are being created if the message queue is not empty and each job can process exactly one request from +the job queue. Once it generates the image, its side-car container with minio will sync the result (image and metadata file) to a shared filesystem and pod with job is terminated. + +On the other hand, with scaledobject approach, normal Kubernetes deployment is being used for worker pods and these run the infinite loop where they process one job request +after another. The deployment is still subject of KEDA autoscaling so if there are no more pending messages in the job queue, the deployment is scaled to min replicas (`0`). + ## Common Pain Points @@ -26,14 +55,18 @@ Reasons: - the models are too large (~4 gigs) - python -Mitigations: +Mitigation: - pre-fetch or even bake the container images on a newly spawned k8s nodes ### GPUs being too expensive - https://cloud.google.com/spot-vms/pricing#gpu_pricing +Mitigation: +- use node pool that can scale the number of GPU enabled nodes to zero replicas. This on the other hand ends up with significant delay if there are no GPU enabled k8s nodes and user +is waiting for their creation (installation of nvidia drivers). + -### GKE Setup +### Example GKE Setup #### two-nodes conventional k8s cluster with a GPU based elastic node pool @@ -146,7 +179,14 @@ gcloud container clusters update use-cases-single-node \ # login gcloud container clusters get-credentials use-cases-single-node --zone us-east4-a --project kedify-initial +``` + + +## Non GCP environments or bare-metal K8s clusters +In case the nvidia drivers are not being managed by the cloud provider, one has to install the GPU operator: + +``` kubectl create ns gpu-operator kubectl label --overwrite ns gpu-operator pod-security.kubernetes.io/enforce=privileged cat <