Skip to content

Commit

Permalink
deploy: add initial vllm worker chart
Browse files Browse the repository at this point in the history
This change adds the initial version of the vLLM worker Helm chart.

This is NOT the final values schema.

Includes schema validation and embedded chart scripting to enable default values.
  • Loading branch information
whoisj committed Jan 21, 2025
1 parent 6f7815a commit 2daf46e
Show file tree
Hide file tree
Showing 5 changed files with 438 additions and 81 deletions.
2 changes: 1 addition & 1 deletion deploy/Kubernetes/worker/charts/vllm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

apiVersion: v2
appVersion: 1.0.0
description: Triton Distributed Worker Operator for vLLM
description: Triton Distributed Worker for vLLM
icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/[email protected]
name: triton-distributed_worker-vllm
version: 1.0.0
88 changes: 88 additions & 0 deletions deploy/Kubernetes/worker/charts/vllm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Annotation Groups
{{- define "annotations.default" }}
triton-distributed: "{{ .Release.Name }}.{{ .Chart.AppVersion | default "0.0" }}"
{{- with .Values.kubernetes }}
{{- with .annotations }}
{{ toYaml . }}
{{- end }}
{{- end }}
{{- end -}}

{{- define "annotations.chart" }}
helm.sh/chart: {{ .Chart.Name | quote }}
{{- template "annotations.default" . }}
{{- end -}}

# Label Groups
{{- define "labels.default" }}
{{- template "label.appInstance" . }}
{{- template "label.appName" . }}
{{- template "label.appPartOf" . }}
{{- template "label.appVersion" . }}
{{- end -}}

{{- define "labels.chart" }}
{{- template "labels.default" . }}
{{- template "label.appManagedBy" . }}
{{- template "label.chart" . }}
{{- with .Values.kubernetes }}
{{- with .labels }}
{{ toYaml . }}
{{- end }}
{{- end }}
{{- template "label.release" . }}
{{- end -}}

# Label Values
{{- define "label.appInstance" }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{- define "label.appManagedBy" }}
{{- $service_name := "triton-distributed" }}
{{- with .Release.Service }}
{{- $service_name = . }}
{{- end }}
app.kubernetes.io/managed-by: {{ $service_name }}
{{- end }}

{{- define "label.appName" }}
app.kubernetes.io/name: {{ required "Property '.triton.componentName' is required." .Values.triton.componentName }}
{{- end }}

{{- define "label.appPartOf" }}
{{- $part_of := "triton-distributed" }}
{{- with .Values.kubernetes }}
{{- with .partOf }}
{{- $part_of = . }}
{{- end }}
{{- end }}
app.kubernetes.io/part-of: {{ $part_of }}
{{- end }}

{{- define "label.appVersion" }}
app.kubernetes.io/version: {{ .Chart.Version | default "0.0" | quote }}
{{- end }}

{{- define "label.chart" }}
helm.sh/chart: {{ .Chart.Name | quote }}
helm.sh/version: {{ .Chart.Version | default "0.0" | quote }}
{{- end }}

{{- define "label.release" }}
release: "{{ .Chart.Name }}_v{{ .Chart.Version | default "0.0" }}"
{{- end }}
179 changes: 121 additions & 58 deletions deploy/Kubernetes/worker/charts/vllm/templates/worker-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,95 @@
{{- fail "Property '.image' is required." }}
{{- end }}
{{- $component_name := "" }}
{{- $manifold_kind := "nats-io" }}
{{- $manifold_name := "triton-distributed-manifold" }}
{{- $manifold_port := 4222 }}
{{- $instance_count := 1 }}
{{- $k8s_liveness_delay := 10 }}
{{- $k8s_liveness_enabled := true }}
{{- $k8s_liveness_fail := 15 }}
{{- $k8s_liveness_period := 2 }}
{{- $k8s_liveness_success := 1 }}
{{- $k8s_readiness_delay := 10 }}
{{- $k8s_readiness_enabled := true }}
{{- $k8s_readiness_fail := 15 }}
{{- $k8s_readiness_period := 2 }}
{{- $k8s_readiness_success := 1 }}
{{- $request_plane_kind := "nats-io" }}
{{- $request_plane_name := "triton-distributed_request-plane" }}
{{- $request_plane_port := 4222 }}
{{- $parallel_pipeline := 1 }}
{{- $parallel_tensor := 1 }}
{{- $parallel_world := 1 }}
{{- $port_data := 9346 }}
{{- $port_health := 8000 }}
{{- $port_metrics := 9347 }}
{{- $port_request := 9345 }}
{{- $triton_cpu := 4 }}
{{- $triton_ephemeral := "1Gi" }}
{{- $triton_gpu := 1 }}
{{- $triton_logging_iso8601 := 0 }}
{{- $triton_logging_verbose := 0 }}
{{- $triton_memory := "16Gi" }}
{{- $triton_shared := "512Mi" }}
{{- with $.Values.kubernetes }}
{{- with .checks }}
{{- with .liveness }}
{{- $k8s_liveness_enabled = ne false .enabled }}
{{- with .failureThreshold }}
{{- $k8s_liveness_fail = (int .) }}
{{- end }}
{{- with .initialDelaySeconds }}
{{- $k8s_liveness_delay = (int .) }}
{{- end }}
{{- with .periodSeconds }}
{{- $k8s_liveness_period = (int .) }}
{{- end }}
{{- with .successThreshold }}
{{- $k8s_liveness_success = (int .) }}
{{- end }}
{{- end }}
{{- with .readiness }}
{{- $k8s_readiness_enabled = ne false .enabled }}
{{- with .failureThreshold }}
{{- $k8s_readiness_fail = (int .) }}
{{- end }}
{{- with .initialDelaySeconds }}
{{- $k8s_readiness_delay = (int .) }}
{{- end }}
{{- with .periodSeconds }}
{{- $k8s_readiness_period = (int .) }}
{{- end }}
{{- with .successThreshold }}
{{- $k8s_readiness_success = (int .) }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- with $.Values.triton }}
{{- $component_name = required "Property '.triton.componentName' is required." .componentName }}
{{- with .distributed }}
{{- with .manifold }}
{{- with .serverKind }}
{{- $manifold_kind = . }}
{{- $request_plane_kind = . }}
{{- end }}
{{- with .serviceName }}
{{- $manifold_name = . }}
{{- $request_plane_name = . }}
{{- end }}
{{- with .servicePort }}
{{- $manifold_port = (int .) }}
{{- $request_plane_port = (int .) }}
{{- end }}
{{- end }}
{{- end }}
{{- with .instance }}
{{- with .count }}
{{- $instance_count = (int .) }}
{{- end }}
{{- with .parallelism }}
{{- with .pipeline }}
{{- $parallel_pipeline = (int .) }}
{{- end }}
{{- with .tensor }}
{{- $parallel_tensor = (int .) }}
{{- end }}
{{- $parallel_world = mul $parallel_pipeline $parallel_tensor }}
{{- end }}
{{- end }}
{{- with .logging }}
Expand Down Expand Up @@ -76,6 +139,9 @@
{{- with .ephemeral }}
{{- $triton_ephemeral = . }}
{{- end }}
{{- with .gpu }}
{{- $triton_gpu = (int .count) }}
{{- end }}
{{- with .memory }}
{{- $triton_memory = . }}
{{- end }}
Expand All @@ -89,54 +155,52 @@
{{- $model_repo_path := "/var/run/models" }}
{{- with $.Values.modelRepository }}
{{- with .path }}
{{- $model_repo_path = . }}
{{- $model_repo_path = trimSuffix "/" . }}
{{- end }}
{{- end }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $.Release.Name }}
annotations:
{{- include "annotations.chart" . | indent 4 }}
labels:
app: {{ $.Release.Name }}
{{- with $.Values.kubernetes }}
{{- with .labels }}
{{ toYaml . | indent 4 }}
{{- end }}
{{- end }}
app.kubernetes.io/component: worker
{{- include "labels.chart" . | indent 4 }}
spec:
selector:
matchLabels:
app: {{ $.Release.Name }}
replicas: 1
app.kubernetes.io/component: worker
replicas: {{ $instance_count }}
template:
metadata:
annotations:
{{- include "annotations.chart" . | indent 8 }}
labels:
app: {{ $.Release.Name }}
app.kubernetes.io/component: worker
app.kubernetes.io/name: {{ $component_name }}
app.kubernetes.io/part-of: triton-distributed
{{- with $.Values.kubernetes }}
{{- with .labels }}
{{ toYaml . | indent 8 }}
{{- end }}
{{- end }}
{{- include "labels.chart" . | indent 8 }}
spec:
{{- if ne $triton_gpu 0 }}
affinity:
nodeAffinity:
{{- with $.Values.triton }}
{{- with .resources }}
{{- with .gpu }}
{{- with .product }}
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nvidia.com/gpu
operator: Exists
{{- if len . }}
{{- with $.Values.triton }}
{{- with .resources }}
{{- with .gpu }}
{{- with .product }}
{{- if len . }}
- key: nvidia.com/gpu.product
operator: In
values:
{{ toYaml . | indent 16 }}
{{ toYaml . | indent 16 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
Expand All @@ -154,9 +218,13 @@ spec:
{{- end }}
env:
- TRITON_COMPONENT_NAME: {{ $component_name }}
- TRITON_MANIFOLD_KIND: {{ $manifold_kind }}
- TRITON_MANIFOLD_NAME: {{ $manifold_name }}
- TRITON_MANIFOLD_PORT: {{ $manifold_port }}
{{- if gt $parallel_world 1 }}
- TRITON_LLM_PP: {{ $parallel_pipeline }}
- TRITON_LLM_TP: {{ $parallel_tensor }}
{{- end }}
- TRITON_request_plane_kind: {{ $request_plane_kind }}
- TRITON_request_plane_name: {{ $request_plane_name }}
- TRITON_request_plane_port: {{ $request_plane_port }}
- TRITON_MODEL_REPOSITORY: {{ $model_repo_path }}
{{- if ne $port_data 9346 }}
- TRITON_PORT_DATA: {{ $port_data }}
Expand Down Expand Up @@ -185,14 +253,16 @@ spec:
{{- end }}
image: {{ $triton_image_name }}
imagePullPolicy: IfNotPresent
{{- if $k8s_liveness_enabled }}
livenessProbe:
failureThreshold: 15
failureThreshold: {{ $k8s_liveness_fail }}
httpGet:
path: /v2/health/live
port: {{ $port_health }}
initialDelaySeconds: 10
periodSeconds: 2
successThreshold: 1
initialDelaySeconds: {{ $k8s_liveness_delay }}
periodSeconds: {{ $k8s_liveness_period }}
successThreshold: {{ $k8s_liveness_success }}
{{- end }}
ports:
- containerPort: {{ $port_health }}
name: health
Expand All @@ -202,55 +272,48 @@ spec:
name: data
- containerPort: {{ $port_metrics }}
name: metrics
{{- if $k8s_readiness_enabled }}
readinessProbe:
failureThreshold: 15
failureThreshold: {{ $k8s_readiness_fail }}
httpGet:
path: /v2/health/ready
port: {{ $port_health }}
initialDelaySeconds: 15
periodSeconds: 2
successThreshold: 1
initialDelaySeconds: {{ $k8s_readiness_delay }}
periodSeconds: {{ $k8s_readiness_period }}
successThreshold: {{ $k8s_readiness_success }}
{{- end }}
resources:
limits:
cpu: {{ $triton_cpu }}
ephemeral-storage: {{ $triton_ephemeral }}
memory: {{ $triton_memory }}
{{- with $.Values.triton }}
{{- with .resources }}
{{- with .gpu }}
{{- $triton_gpu := 1 }}
{{- with .count }}
{{- $triton_gpu = (int .) }}
{{- end}}
{{- if gt $triton_gpu 0 }}
nvidia.com/gpu: {{ $triton_gpu }}
{{- end }}
{{- end }}
{{- end }}
requests:
cpu: {{ $triton_cpu }}
cpu: {{ mul $triton_cpu }}
ephemeral-storage: {{ $triton_ephemeral }}
memory: {{ $triton_memory }}
{{- with $.Values.triton }}
{{- with .resources }}
{{- with .gpu }}
{{- $triton_gpu := 1 }}
{{- with .count }}
{{- $triton_gpu = (int .) }}
{{- end}}
{{- if gt $triton_gpu 0 }}
nvidia.com/gpu: {{ $triton_gpu }}
{{- end }}
{{- end }}
{{- end }}
volumeMounts:
{{- with $.Values.modelRepository }}
{{- with .volumeMounts }}
{{- range . }}
{{- $mount_path := $model_repo_path }}
{{- $volume_name := required "Property '.modelRepository.volumeMounts[*].name' is required." .name }}
{{- if eq "shared-memory" $volume_name }}
{{- fail "Property '.modelRepository.volumeMounts[*].name' cannot be `shared-memory` because it is a reserved name." }}
{{- end }}
{{- with .path }}
{{- $mount_path = cat $model_repo_path "/" . }}
{{- $mount_path = printf "%s/%s" $model_repo_path (trimPrefix "/" .) }}
{{- if regexMatch "/\\.\\./?" $mount_path }}
{{- fail (printf "Value of property `.modelRepository.volumeMounts[*].path' `%s` is illegal because `%s` is not a sub-directory of `%s`." . (clean $mount_path) $model_repo_path) }}
{{- end }}
{{- end }}
- mountPath: {{ $mount_path }}
name: {{ required "Property '.modelRepository.volumeMounts.name' is required." .name }}
name: {{ $volume_name }}
{{- end }}
{{- end }}
{{- end }}
Expand Down
Loading

0 comments on commit 2daf46e

Please sign in to comment.