Reframe ScaleTrigger to ScaleTriggers in BackendRuntime

Signed-off-by: kerthcet <[email protected]>
InftyAI · Jan 24, 2025 · 20ed25c · 20ed25c
1 parent ea460fe
commit 20ed25c
Show file tree

Hide file tree

Showing 27 changed files with 1,071 additions and 680 deletions.
diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
@@ -160,8 +160,8 @@ const (
 	LoraRole ModelRole = "lora"
 )
 
-// ModelRefer refers to a created Model with it's role.
-type ModelRefer struct {
+// ModelRef refers to a created Model with it's role.
+type ModelRef struct {
 	// Name represents the model name.
 	Name ModelName `json:"name"`
 	// Role represents the model role once more than one model is required.
@@ -181,7 +181,7 @@ type ModelClaims struct {
 	// speculative decoding, then one model is main(target) model, another one
 	// is draft model.
 	// +kubebuilder:validation:MinItems=1
-	Models []ModelRefer `json:"models,omitempty"`
+	Models []ModelRef `json:"models,omitempty"`
 	// InferenceFlavorClaims represents a list of flavors with fungibility supported
 	// to serve the model.
 	// - If not set, always apply with the 0-index model by default.

diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
@@ -28,7 +28,8 @@ import (
 type BackendRuntimeArg struct {
 	// Name represents the identifier of the backendRuntime argument.
 	// +kubebuilder:default=default
-	Name string `json:"name"`
+	// +optional
+	Name *string `json:"name,omitempty"`
 	// Flags represents all the preset configurations.
 	// Flag around with {{ .CONFIG }} is a configuration waiting for render.
 	Flags []string `json:"flags,omitempty"`
@@ -54,7 +55,19 @@ type HPATrigger struct {
 	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
 }
 
-// ScaleTrigger defines the scaler triggers to scale the workloads.
+// NamedScaleTrigger defines the rules to scale the workloads.
+// Only one trigger cloud work at a time. The name is used to identify
+// the trigger in backendRuntime.
+type NamedScaleTrigger struct {
+	// Name represents the identifier of the scale trigger, e.g. some triggers defined for
+	// latency sensitive workloads, some are defined for throughput sensitive workloads.
+	Name string `json:"name,omitempty"`
+	// HPA represents the trigger configuration of the HorizontalPodAutoscaler.
+	HPA *HPATrigger `json:"hpa,omitempty"`
+}
+
+// ScaleTrigger defines the rules to scale the workloads.
+// Only one trigger cloud work at a time, mostly used in Playground.
 type ScaleTrigger struct {
 	// HPA represents the trigger configuration of the HorizontalPodAutoscaler.
 	HPA *HPATrigger `json:"hpa,omitempty"`
@@ -107,11 +120,10 @@ type BackendRuntimeSpec struct {
 	// when it might take a long time to load data or warm a cache, than during steady-state operation.
 	// +optional
 	StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
-	// ScaleTrigger represents a set of triggers to scale the workloads based on metrics,
-	// only one trigger cloud work at a time and only HPA is supported right now.
-	// If playground doesn't define the ScaleTrigger, the trigger defined here will be used.
+	// ScaleTriggers represents a set of triggers preset to be used by Playground.
+	// If Playground not specify the scale trigger, the 0-index trigger will be used.
 	// +optional
-	ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
+	ScaleTriggers []NamedScaleTrigger `json:"scaleTriggers,omitempty"`
 }
 
 // BackendRuntimeStatus defines the observed state of BackendRuntime

diff --git a/api/inference/v1alpha1/config_types.go b/api/inference/v1alpha1/config_types.go
@@ -16,7 +16,9 @@ limitations under the License.
 
 package v1alpha1
 
-import corev1 "k8s.io/api/core/v1"
+import (
+	corev1 "k8s.io/api/core/v1"
+)
 
 type BackendName string
 
@@ -59,6 +61,12 @@ type ResourceRequirements struct {
 	Requests corev1.ResourceList `json:"requests,omitempty"`
 }
 
+// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime.
+type ScaleTriggerRef struct {
+	// Name represents the scale trigger name defined in the backendRuntime.scaleTriggers.
+	Name string `json:"name"`
+}
+
 type ElasticConfig struct {
 	// MinReplicas indicates the minimum number of inference workloads based on the traffic.
 	// Default to 1.
@@ -70,9 +78,15 @@ type ElasticConfig struct {
 	// Default to nil means there's no limit for the instance number.
 	// +optional
 	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
+	// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime
+	// with tuned target value.
+	// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
+	// +optional
+	ScaleTriggerRef *ScaleTriggerRef `json:"scaleTriggerRef,omitempty"`
 	// ScaleTrigger defines a set of triggers to scale the workloads.
 	// If not defined, trigger configured in backendRuntime will be used,
 	// otherwise, trigger defined here will overwrite the defaulted ones.
+	// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
 	// +optional
 	ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
 }
diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelclaims.go b/client-go/applyconfiguration/core/v1alpha1/modelclaims.go
diff --git a/...configuration/core/v1alpha1/modelrefer.go → ...lyconfiguration/core/v1alpha1/modelref.go b/...configuration/core/v1alpha1/modelrefer.go → ...lyconfiguration/core/v1alpha1/modelref.go
diff --git a/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go b/client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go
diff --git a/client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go b/client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go