-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: add the llm and xinference addons back (#1455)
(cherry picked from commit af0f606)
- Loading branch information
Showing
47 changed files
with
1,320 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Patterns to ignore when building packages. | ||
# This supports shell glob matching, relative path matching, and | ||
# negation (prefixed with !). Only one pattern per line. | ||
.DS_Store | ||
# Common VCS dirs | ||
.git/ | ||
.gitignore | ||
.bzr/ | ||
.bzrignore | ||
.hg/ | ||
.hgignore | ||
.svn/ | ||
# Common backup files | ||
*.swp | ||
*.bak | ||
*.tmp | ||
*.orig | ||
*~ | ||
# Various IDEs | ||
.project | ||
.idea/ | ||
*.tmproj | ||
.vscode/ | ||
*.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
annotations: | ||
category: Database | ||
apiVersion: v2 | ||
name: llm-cluster | ||
type: application | ||
version: 1.0.0-alpha.0 | ||
description: A vLLM Cluster Helm chart for KubeBlocks. | ||
dependencies: | ||
- name: kblib | ||
version: 0.1.2 | ||
repository: file://../kblib | ||
alias: extra | ||
|
||
appVersion: "7.0.6" | ||
|
||
keywords: | ||
- llm | ||
|
||
home: https://github.com/apecloud/kubeblocks/tree/main/deploy/llm-cluster | ||
icon: https://kubeblocks.io/img/logo.png | ||
|
||
maintainers: | ||
- name: ApeCloud | ||
url: https://kubeblocks.io/ | ||
|
||
sources: | ||
- https://github.com/apecloud/kubeblocks/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1. Get the application URL by running these commands: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{{/* | ||
Define llm cluster component. | ||
*/}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
apiVersion: apps.kubeblocks.io/v1 | ||
kind: Cluster | ||
metadata: | ||
name: {{ include "kblib.clusterName" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: {{ include "kblib.clusterLabels" . | nindent 4 }} | ||
annotations: | ||
"kubeblocks.io/extra-env": "{\"MODEL_NAME\":\"{{ .Values.model }}\", \"EXTRA_ARGS\":\"{{ .Values.extraArgs }}\", \"MODEL_URL\":\"{{ .Values.url }}\", \"QUANTIZE\":\"{{ .Values.quantize }}\"}" | ||
spec: | ||
terminationPolicy: {{ .Values.extra.terminationPolicy }} | ||
{{ if .Values.cpuMode }} | ||
clusterDef: ggml # ref clusterDefinition.name | ||
componentSpecs: | ||
- name: ggml | ||
componentDef: ggml | ||
{{- include "kblib.componentMonitor" . | indent 6 }} | ||
replicas: {{ .Values.replicas }} | ||
{{- include "kblib.componentResources" . | indent 6 }} | ||
{{ else }} | ||
clusterDef: vllm # ref clusterDefinition.name | ||
componentSpecs: | ||
- name: vllm | ||
componentDef: vllm | ||
{{- include "kblib.componentMonitor" . | indent 6 }} | ||
replicas: {{ .Values.replicas }} | ||
resources: | ||
limits: | ||
nvidia.com/gpu: {{ .Values.gpu }} | ||
cpu: {{ .Values.cpu | quote }} | ||
memory: {{ print .Values.memory "Gi" | quote }} | ||
requests: | ||
nvidia.com/gpu: {{ .Values.gpu }} | ||
cpu: {{ .Values.cpu | quote }} | ||
memory: {{ print .Values.memory "Gi" | quote }} | ||
{{ end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{{/* | ||
Validate cpu, memory and storage | ||
*/}} | ||
{{- $cpu := (float64 .Values.cpu) }} | ||
{{- $memory := (float64 .Values.memory) }} | ||
{{- $storage := (float64 .Values.storage) }} | ||
{{- if or (lt $cpu 0.0) (gt $cpu 64.0) }} | ||
{{- fail (print "cpu must be between 0 and 64, got " $cpu) }} | ||
{{- end }} | ||
{{- if or (lt $memory 0.0) (gt $memory 1000.0) }} | ||
{{- fail (print "memory must be between 0 and 1000, got " $memory) }} | ||
{{- end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
{ | ||
"$schema": "http://json-schema.org/schema#", | ||
"type": "object", | ||
"properties": { | ||
"cpuMode": { | ||
"title": "CPU Mode", | ||
"description": "Set to true if no GPU is available, default true", | ||
"type": "boolean", | ||
"default": true | ||
}, | ||
"version": { | ||
"title": "Version", | ||
"description": "Cluster version.", | ||
"type": "string", | ||
"default": "" | ||
}, | ||
"replicas": { | ||
"title": "Replicas", | ||
"description": "The number of replicas, for standalone mode, the replicas is 1, for replication mode, the default replicas is 2.", | ||
"type": "integer", | ||
"default": 1, | ||
"minimum": 1, | ||
"maximum": 5 | ||
}, | ||
"cpu": { | ||
"title": "CPU", | ||
"description": "CPU cores.", | ||
"type": [ | ||
"number", | ||
"string" | ||
], | ||
"default": 0, | ||
"minimum": 0, | ||
"maximum": 64, | ||
"multipleOf": 0.5 | ||
}, | ||
"memory": { | ||
"title": "Memory(Gi)", | ||
"description": "Memory, the unit is Gi.", | ||
"type": [ | ||
"number", | ||
"string" | ||
], | ||
"default": 0, | ||
"minimum": 0, | ||
"maximum": 1000 | ||
}, | ||
"gpu": { | ||
"title": "GPU", | ||
"description": "GPU cores.", | ||
"type": [ | ||
"number", | ||
"string" | ||
], | ||
"default": 1, | ||
"minimum": 0, | ||
"maximum": 64, | ||
"multipleOf": 1 | ||
}, | ||
"model": { | ||
"title": "Model", | ||
"description": "Model name", | ||
"type": [ | ||
"string" | ||
], | ||
"default": "facebook/opt-125m" | ||
}, | ||
"url": { | ||
"title": "Model URL", | ||
"description": "Model URL, only work for CPU mode", | ||
"type": [ | ||
"string" | ||
], | ||
"default": "" | ||
}, | ||
"quantize": { | ||
"title": "Model Quantize", | ||
"description": "Model's quantized file name, only work for CPU mode", | ||
"type": [ | ||
"string" | ||
], | ||
"default": "" | ||
}, | ||
"extraArgs": { | ||
"title": "extra arguments", | ||
"description": "extra arguments that will be passed to run model", | ||
"type": [ | ||
"string" | ||
], | ||
"default": "--trust-remote-code" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Default values for llm-cluster. | ||
# This is a YAML-formatted file. | ||
# Declare variables to be passed into your templates. | ||
|
||
## @param version llm cluster version | ||
## | ||
version: "" | ||
|
||
## @param replicas specify cluster replicas | ||
## | ||
replicas: 1 | ||
|
||
## @param cpu | ||
## | ||
cpu: 0 | ||
|
||
## @param memory, the unit is Gi | ||
## | ||
memory: 0 | ||
|
||
## @param gpu | ||
## | ||
gpu: 1 | ||
|
||
cpuMode: true | ||
|
||
model: "facebook/opt-125m" | ||
|
||
extraArgs: "--trust-remote-code" | ||
|
||
url: "" | ||
|
||
quantize: "" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Patterns to ignore when building packages. | ||
# This supports shell glob matching, relative path matching, and | ||
# negation (prefixed with !). Only one pattern per line. | ||
.DS_Store | ||
# Common VCS dirs | ||
.git/ | ||
.gitignore | ||
.bzr/ | ||
.bzrignore | ||
.hg/ | ||
.hgignore | ||
.svn/ | ||
# Common backup files | ||
*.swp | ||
*.bak | ||
*.tmp | ||
*.orig | ||
*~ | ||
# Various IDEs | ||
.project | ||
.idea/ | ||
*.tmproj | ||
.vscode/ | ||
*.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
annotations: | ||
category: Database | ||
apiVersion: v2 | ||
name: xinference-cluster | ||
type: application | ||
version: 1.0.0-alpha.0 | ||
description: A vxinference Cluster Helm chart for KubeBlocks. | ||
dependencies: | ||
- name: kblib | ||
version: 0.1.2 | ||
repository: file://../kblib | ||
alias: extra | ||
|
||
appVersion: "7.0.6" | ||
|
||
keywords: | ||
- xinference | ||
|
||
home: https://github.com/apecloud/kubeblocks/tree/main/deploy/xinference-cluster | ||
icon: https://kubeblocks.io/img/logo.png | ||
|
||
maintainers: | ||
- name: ApeCloud | ||
url: https://kubeblocks.io/ | ||
|
||
sources: | ||
- https://github.com/apecloud/kubeblocks/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1. Get the application URL by running these commands: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{{/* | ||
Define xinference cluster component. | ||
*/}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
apiVersion: apps.kubeblocks.io/v1 | ||
kind: Cluster | ||
metadata: | ||
name: {{ include "kblib.clusterName" . }} | ||
namespace: {{ .Release.Namespace }} | ||
labels: {{ include "kblib.clusterLabels" . | nindent 4 }} | ||
spec: | ||
clusterDef: xinference | ||
terminationPolicy: {{ .Values.extra.terminationPolicy }} | ||
componentSpecs: | ||
- name: xinference | ||
componentDef: xinference | ||
{{- include "kblib.componentMonitor" . | indent 6 }} | ||
replicas: {{ .Values.replicas }} | ||
resources: | ||
limits: | ||
{{ if eq .Values.cpuMode false }} | ||
nvidia.com/gpu: {{ .Values.gpu }} | ||
{{ end }} | ||
cpu: {{ .Values.cpu | quote }} | ||
memory: {{ print .Values.memory "Gi" | quote }} | ||
requests: | ||
{{ if eq .Values.cpuMode false }} | ||
nvidia.com/gpu: {{ .Values.gpu }} | ||
{{ end }} | ||
cpu: {{ .Values.cpu | quote }} | ||
memory: {{ print .Values.memory "Gi" | quote }} | ||
{{ if .Values.cpuMode }} | ||
serviceVersion: 0.11.0-cpu | ||
{{ else }} | ||
serviceVersion: 0.11.0 | ||
{{ end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{{/* | ||
Validate cpu, memory and storage | ||
*/}} | ||
{{- $cpu := (float64 .Values.cpu) }} | ||
{{- $memory := (float64 .Values.memory) }} | ||
{{- $storage := (float64 .Values.storage) }} | ||
{{- if or (lt $cpu 0.0) (gt $cpu 64.0) }} | ||
{{- fail (print "cpu must be between 0.0 and 64, got " $cpu) }} | ||
{{- end }} | ||
{{- if or (lt $memory 0.0) (gt $memory 1000.0) }} | ||
{{- fail (print "memory must be between 0.5 and 1000, got " $memory) }} | ||
{{- end }} |
Oops, something went wrong.