chore: add the llm and xinference addons back (#1455)

(cherry picked from commit af0f606)
apecloud · Feb 7, 2025 · c4738d9 · c4738d9
1 parent 6d8a3c7
commit c4738d9
Show file tree

Hide file tree

Showing 47 changed files with 1,320 additions and 1 deletion.
diff --git a/.github/workflows/release-addons-cluster-chart.yml b/.github/workflows/release-addons-cluster-chart.yml
@@ -17,6 +17,7 @@ on:
           - etcd
           - greptimedb
           - kafka
+          - llm
           - loki
           - mariadb
           - milvus
@@ -43,6 +44,7 @@ on:
           - tidb
           - victoria-metrics
           - weaviate
+          - xinference
           - yashandb
           - zookeeper
       chart_version:

diff --git a/.github/workflows/release-chart.yml b/.github/workflows/release-chart.yml
@@ -18,6 +18,7 @@ on:
           - greptimedb
           - influxdb
           - kafka
+          - llm
           - loki
           - mariadb
           - milvus
@@ -44,6 +45,7 @@ on:
           - tidb
           - victoria-metrics
           - weaviate
+          - xinference
           - yashandb
           - zookeeper
       chart_version:

diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@ KubeBlocks add-ons.
 | greptimedb | greptimedb-0.3.2 | An open-source, cloud-native, distributed time-series database with PromQL/SQL/Python supported. | GreptimeTeam sh2 |
 | influxdb | influxdb-2.7.4 | InfluxDB(TM) is an open source time-series database. It is a core component of the TICK (Telegraf, InfluxDB(TM), Chronograf, Kapacitor) stack. |  |
 | kafka | kafka-broker-2.7.0<br>kafka-broker-3.3.2<br>kafka-combine-3.3.2<br>kafka-controller-3.3.2<br>kafka-exporter-1.6.0 | Apache Kafka is a distributed streaming platform designed to build real-time pipelines and can be used as a message broker or as a replacement for a log aggregation solution for big data applications. | caiq1nyu |
+| llm | ggml-0.1.0<br>llm-0.2.7 | Large language models. | lynnleelhl |
 | loki | loki-1.0.0 | Loki is a horizontally-scalable, highly-available, multi-tenant log aggregation system inspired by Prometheus. It is designed to be very cost effective and easy to operate. | Chen-speculation |
 | mariadb | mariadb-10.6.15 | MariaDB is a high performance open source relational database management system that is widely used for web and application servers | yinmin |
 | milvus | milvus-v2.3.2 | A cloud-native vector database, storage for next generation AI applications. | leon-inf |
@@ -52,5 +53,6 @@ KubeBlocks add-ons.
 | vanilla-postgresql | anilla-postgresql-12.15.0<br>anilla-postgresql-14.7.0<br>anilla-postgresql-15.6.1-138<br>anilla-postgresql-15.7.0 | Vanilla-PostgreSQL is compatible with the native PostgreSQL kernel, enabling it to quickly provide HA solutions for various variants based on the native PostgreSQL kernel. | kizuna-lek |
 | victoria-metrics | ictoria-metrics-1.0.0 | VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database. | sophon-zt ButterBright |
 | weaviate | weaviate-1.19.6 | Weaviate is an open-source vector database. It allows you to store data objects and vector embeddings from your favorite ML-models, and scale seamlessly into billions of data objects. | iziang |
+| xinference | xinference-0.11.0<br>xinference-0.11.0-cpu | Xorbits Inference(Xinference) is a powerful and versatile library designed to serve language, speech recognition, and multimodal models. | lynnleelhl |
 | yashandb | yashandb-23.1.1-100 | YashanDB is a new database system completely independently designed and developed by SICS. Based on classical database theories, it incorporates original Bounded Evaluation theory, Approximation theory, Parallel Scalability theory and Cross-Modal Fusion Computation theory, supports multiple deployment methods such as stand-alone/primary-standby, shared cluster, and distributed ones, covers OLTP/HTAP/OLAP transactions and analyzes mixed load scenarios, and is fully compatible with privatization and cloud infrastructure, providing clients with one-stop enterprise-level converged data management solutions to meet the needs of key industries such as finance, government, telecommunications and energy for high performance, concurrency and security. | JesseAtSZ shanshanying |
 | zookeeper | zookeeper-3.4.14<br>zookeeper-3.6.4<br>zookeeper-3.7.2<br>zookeeper-3.8.4<br>zookeeper-3.9.2 | Apache ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services. | kubeJocker kissycn |
diff --git a/addons-cluster/llm/.helmignore b/addons-cluster/llm/.helmignore
@@ -0,0 +1,24 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+*.lock
diff --git a/addons-cluster/llm/Chart.yaml b/addons-cluster/llm/Chart.yaml
@@ -0,0 +1,27 @@
+annotations:
+  category: Database
+apiVersion: v2
+name: llm-cluster
+type: application
+version: 1.0.0-alpha.0
+description: A vLLM Cluster Helm chart for KubeBlocks.
+dependencies:
+  - name: kblib
+    version: 0.1.2
+    repository: file://../kblib
+    alias: extra
+
+appVersion: "7.0.6"
+
+keywords:
+  - llm
+
+home: https://github.com/apecloud/kubeblocks/tree/main/deploy/llm-cluster
+icon: https://kubeblocks.io/img/logo.png
+
+maintainers:
+  - name: ApeCloud
+    url: https://kubeblocks.io/
+
+sources:
+  - https://github.com/apecloud/kubeblocks/
diff --git a/addons-cluster/llm/templates/NOTES.txt b/addons-cluster/llm/templates/NOTES.txt
@@ -0,0 +1 @@
+1. Get the application URL by running these commands:
diff --git a/addons-cluster/llm/templates/_helpers.tpl b/addons-cluster/llm/templates/_helpers.tpl
@@ -0,0 +1,3 @@
+{{/*
+Define llm cluster component.
+*/}}
diff --git a/addons-cluster/llm/templates/cluster.yaml b/addons-cluster/llm/templates/cluster.yaml
@@ -0,0 +1,35 @@
+apiVersion: apps.kubeblocks.io/v1
+kind: Cluster
+metadata:
+  name: {{ include "kblib.clusterName" . }}
+  namespace: {{ .Release.Namespace }}
+  labels: {{ include "kblib.clusterLabels" . | nindent 4 }}
+  annotations:
+    "kubeblocks.io/extra-env": "{\"MODEL_NAME\":\"{{ .Values.model }}\", \"EXTRA_ARGS\":\"{{ .Values.extraArgs }}\", \"MODEL_URL\":\"{{ .Values.url }}\", \"QUANTIZE\":\"{{ .Values.quantize }}\"}"
+spec:
+  terminationPolicy: {{ .Values.extra.terminationPolicy }}
+{{ if .Values.cpuMode }}
+  clusterDef: ggml  # ref clusterDefinition.name
+  componentSpecs:
+    - name: ggml
+      componentDef: ggml
+      {{- include "kblib.componentMonitor" . | indent 6 }}
+      replicas: {{ .Values.replicas }}
+      {{- include "kblib.componentResources" . | indent 6 }}
+{{ else }}
+  clusterDef: vllm  # ref clusterDefinition.name
+  componentSpecs:
+    - name: vllm
+      componentDef: vllm
+      {{- include "kblib.componentMonitor" . | indent 6 }}
+      replicas: {{ .Values.replicas }}
+      resources:
+        limits:
+          nvidia.com/gpu: {{ .Values.gpu }}
+          cpu: {{ .Values.cpu | quote }}
+          memory: {{ print .Values.memory "Gi" | quote }}
+        requests:
+          nvidia.com/gpu: {{ .Values.gpu }}
+          cpu: {{ .Values.cpu | quote }}
+          memory: {{ print .Values.memory "Gi" | quote }}
+{{ end }}
diff --git a/addons-cluster/llm/templates/validate.yaml b/addons-cluster/llm/templates/validate.yaml
@@ -0,0 +1,12 @@
+{{/*
+Validate cpu, memory and storage
+*/}}
+{{- $cpu := (float64 .Values.cpu) }}
+{{- $memory := (float64 .Values.memory) }}
+{{- $storage := (float64 .Values.storage) }}
+{{- if or (lt $cpu 0.0) (gt $cpu 64.0) }}
+{{- fail (print "cpu must be between 0 and 64, got " $cpu) }}
+{{- end }}
+{{- if or (lt $memory 0.0) (gt $memory 1000.0) }}
+{{- fail (print "memory must be between 0 and 1000, got " $memory) }}
+{{- end }}
diff --git a/addons-cluster/llm/values.schema.json b/addons-cluster/llm/values.schema.json
@@ -0,0 +1,93 @@
+{
+  "$schema": "http://json-schema.org/schema#",
+  "type": "object",
+  "properties": {
+    "cpuMode": {
+      "title": "CPU Mode",
+      "description": "Set to true if no GPU is available, default true",
+      "type": "boolean",
+      "default": true
+    },
+    "version": {
+      "title": "Version",
+      "description": "Cluster version.",
+      "type": "string",
+      "default": ""
+    },
+    "replicas": {
+      "title": "Replicas",
+      "description": "The number of replicas, for standalone mode, the replicas is 1, for replication mode, the default replicas is 2.",
+      "type": "integer",
+      "default": 1,
+      "minimum": 1,
+      "maximum": 5
+    },
+    "cpu": {
+      "title": "CPU",
+      "description": "CPU cores.",
+      "type": [
+        "number",
+        "string"
+      ],
+      "default": 0,
+      "minimum": 0,
+      "maximum": 64,
+      "multipleOf": 0.5
+    },
+    "memory": {
+      "title": "Memory(Gi)",
+      "description": "Memory, the unit is Gi.",
+      "type": [
+        "number",
+        "string"
+      ],
+      "default": 0,
+      "minimum": 0,
+      "maximum": 1000
+    },
+    "gpu": {
+      "title": "GPU",
+      "description": "GPU cores.",
+      "type": [
+        "number",
+        "string"
+      ],
+      "default": 1,
+      "minimum": 0,
+      "maximum": 64,
+      "multipleOf": 1
+    },
+    "model": {
+      "title": "Model",
+      "description": "Model name",
+      "type": [
+        "string"
+      ],
+      "default": "facebook/opt-125m"
+    },
+    "url": {
+      "title": "Model URL",
+      "description": "Model URL, only work for CPU mode",
+      "type": [
+        "string"
+      ],
+      "default": ""
+    },
+    "quantize": {
+      "title": "Model Quantize",
+      "description": "Model's quantized file name, only work for CPU mode",
+      "type": [
+        "string"
+      ],
+      "default": ""
+    },
+    "extraArgs": {
+      "title": "extra arguments",
+      "description": "extra arguments that will be passed to run model",
+      "type": [
+        "string"
+      ],
+      "default": "--trust-remote-code"
+    }
+  }
+}
diff --git a/addons-cluster/llm/values.yaml b/addons-cluster/llm/values.yaml
@@ -0,0 +1,33 @@
+# Default values for llm-cluster.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+## @param version llm cluster version
+##
+version: ""
+
+## @param replicas specify cluster replicas
+##
+replicas: 1
+
+## @param cpu
+##
+cpu: 0
+
+## @param memory, the unit is Gi
+##
+memory: 0
+
+## @param gpu
+##
+gpu: 1
+
+cpuMode: true
+
+model: "facebook/opt-125m"
+
+extraArgs: "--trust-remote-code"
+
+url: ""
+
+quantize: ""
diff --git a/addons-cluster/xinference/.helmignore b/addons-cluster/xinference/.helmignore
@@ -0,0 +1,24 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+*.lock
diff --git a/addons-cluster/xinference/Chart.yaml b/addons-cluster/xinference/Chart.yaml
@@ -0,0 +1,27 @@
+annotations:
+  category: Database
+apiVersion: v2
+name: xinference-cluster
+type: application
+version: 1.0.0-alpha.0
+description: A vxinference Cluster Helm chart for KubeBlocks.
+dependencies:
+  - name: kblib
+    version: 0.1.2
+    repository: file://../kblib
+    alias: extra
+
+appVersion: "7.0.6"
+
+keywords:
+  - xinference
+
+home: https://github.com/apecloud/kubeblocks/tree/main/deploy/xinference-cluster
+icon: https://kubeblocks.io/img/logo.png
+
+maintainers:
+  - name: ApeCloud
+    url: https://kubeblocks.io/
+
+sources:
+  - https://github.com/apecloud/kubeblocks/
diff --git a/addons-cluster/xinference/templates/NOTES.txt b/addons-cluster/xinference/templates/NOTES.txt
@@ -0,0 +1 @@
+1. Get the application URL by running these commands:
diff --git a/addons-cluster/xinference/templates/_helpers.tpl b/addons-cluster/xinference/templates/_helpers.tpl
@@ -0,0 +1,3 @@
+{{/*
+Define xinference cluster component.
+*/}}
diff --git a/addons-cluster/xinference/templates/cluster.yaml b/addons-cluster/xinference/templates/cluster.yaml
@@ -0,0 +1,32 @@
+apiVersion: apps.kubeblocks.io/v1
+kind: Cluster
+metadata:
+  name: {{ include "kblib.clusterName" . }}
+  namespace: {{ .Release.Namespace }}
+  labels: {{ include "kblib.clusterLabels" . | nindent 4 }}
+spec:
+  clusterDef: xinference
+  terminationPolicy:  {{ .Values.extra.terminationPolicy }}
+  componentSpecs:
+    - name: xinference
+      componentDef: xinference
+      {{- include "kblib.componentMonitor" . | indent 6 }}
+      replicas: {{ .Values.replicas }}
+      resources:
+        limits:
+{{ if eq .Values.cpuMode false }}
+          nvidia.com/gpu: {{ .Values.gpu }}
+{{ end }}
+          cpu: {{ .Values.cpu | quote }}
+          memory: {{ print .Values.memory "Gi" | quote }}
+        requests:
+{{ if eq .Values.cpuMode false }}
+          nvidia.com/gpu: {{ .Values.gpu }}
+{{ end }}
+          cpu: {{ .Values.cpu | quote }}
+          memory: {{ print .Values.memory "Gi" | quote }}
+{{ if .Values.cpuMode }}
+      serviceVersion: 0.11.0-cpu
+{{ else }}
+      serviceVersion: 0.11.0
+{{ end }}
diff --git a/addons-cluster/xinference/templates/validate.yaml b/addons-cluster/xinference/templates/validate.yaml
@@ -0,0 +1,12 @@
+{{/*
+Validate cpu, memory and storage
+*/}}
+{{- $cpu := (float64 .Values.cpu) }}
+{{- $memory := (float64 .Values.memory) }}
+{{- $storage := (float64 .Values.storage) }}
+{{- if or (lt $cpu 0.0) (gt $cpu 64.0) }}
+{{- fail (print "cpu must be between 0.0 and 64, got " $cpu) }}
+{{- end }}
+{{- if or (lt $memory 0.0) (gt $memory 1000.0) }}
+{{- fail (print "memory must be between 0.5 and 1000, got " $memory) }}
+{{- end }}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		1. Get the application URL by running these commands: