Skip to content

Commit

Permalink
Reframe ScaleTrigger to ScaleTriggers in BackendRuntime
Browse files Browse the repository at this point in the history
Signed-off-by: kerthcet <[email protected]>
  • Loading branch information
kerthcet committed Jan 24, 2025
1 parent ea460fe commit 20ed25c
Show file tree
Hide file tree
Showing 27 changed files with 1,071 additions and 680 deletions.
6 changes: 3 additions & 3 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ const (
LoraRole ModelRole = "lora"
)

// ModelRefer refers to a created Model with it's role.
type ModelRefer struct {
// ModelRef refers to a created Model with it's role.
type ModelRef struct {
// Name represents the model name.
Name ModelName `json:"name"`
// Role represents the model role once more than one model is required.
Expand All @@ -181,7 +181,7 @@ type ModelClaims struct {
// speculative decoding, then one model is main(target) model, another one
// is draft model.
// +kubebuilder:validation:MinItems=1
Models []ModelRefer `json:"models,omitempty"`
Models []ModelRef `json:"models,omitempty"`
// InferenceFlavorClaims represents a list of flavors with fungibility supported
// to serve the model.
// - If not set, always apply with the 0-index model by default.
Expand Down
10 changes: 5 additions & 5 deletions api/core/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions api/inference/v1alpha1/backendruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (
type BackendRuntimeArg struct {
// Name represents the identifier of the backendRuntime argument.
// +kubebuilder:default=default
Name string `json:"name"`
// +optional
Name *string `json:"name,omitempty"`
// Flags represents all the preset configurations.
// Flag around with {{ .CONFIG }} is a configuration waiting for render.
Flags []string `json:"flags,omitempty"`
Expand All @@ -54,7 +55,19 @@ type HPATrigger struct {
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
}

// ScaleTrigger defines the scaler triggers to scale the workloads.
// NamedScaleTrigger defines the rules to scale the workloads.
// Only one trigger cloud work at a time. The name is used to identify
// the trigger in backendRuntime.
type NamedScaleTrigger struct {
// Name represents the identifier of the scale trigger, e.g. some triggers defined for
// latency sensitive workloads, some are defined for throughput sensitive workloads.
Name string `json:"name,omitempty"`
// HPA represents the trigger configuration of the HorizontalPodAutoscaler.
HPA *HPATrigger `json:"hpa,omitempty"`
}

// ScaleTrigger defines the rules to scale the workloads.
// Only one trigger cloud work at a time, mostly used in Playground.
type ScaleTrigger struct {
// HPA represents the trigger configuration of the HorizontalPodAutoscaler.
HPA *HPATrigger `json:"hpa,omitempty"`
Expand Down Expand Up @@ -107,11 +120,10 @@ type BackendRuntimeSpec struct {
// when it might take a long time to load data or warm a cache, than during steady-state operation.
// +optional
StartupProbe *corev1.Probe `json:"startupProbe,omitempty"`
// ScaleTrigger represents a set of triggers to scale the workloads based on metrics,
// only one trigger cloud work at a time and only HPA is supported right now.
// If playground doesn't define the ScaleTrigger, the trigger defined here will be used.
// ScaleTriggers represents a set of triggers preset to be used by Playground.
// If Playground not specify the scale trigger, the 0-index trigger will be used.
// +optional
ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
ScaleTriggers []NamedScaleTrigger `json:"scaleTriggers,omitempty"`
}

// BackendRuntimeStatus defines the observed state of BackendRuntime
Expand Down
16 changes: 15 additions & 1 deletion api/inference/v1alpha1/config_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ limitations under the License.

package v1alpha1

import corev1 "k8s.io/api/core/v1"
import (
corev1 "k8s.io/api/core/v1"
)

type BackendName string

Expand Down Expand Up @@ -59,6 +61,12 @@ type ResourceRequirements struct {
Requests corev1.ResourceList `json:"requests,omitempty"`
}

// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime.
type ScaleTriggerRef struct {
// Name represents the scale trigger name defined in the backendRuntime.scaleTriggers.
Name string `json:"name"`
}

type ElasticConfig struct {
// MinReplicas indicates the minimum number of inference workloads based on the traffic.
// Default to 1.
Expand All @@ -70,9 +78,15 @@ type ElasticConfig struct {
// Default to nil means there's no limit for the instance number.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// ScaleTriggerRef refers to the configured scaleTrigger in the backendRuntime
// with tuned target value.
// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
// +optional
ScaleTriggerRef *ScaleTriggerRef `json:"scaleTriggerRef,omitempty"`
// ScaleTrigger defines a set of triggers to scale the workloads.
// If not defined, trigger configured in backendRuntime will be used,
// otherwise, trigger defined here will overwrite the defaulted ones.
// ScaleTriggerRef and ScaleTrigger can't be set at the same time.
// +optional
ScaleTrigger *ScaleTrigger `json:"scaleTrigger,omitempty"`
}
55 changes: 51 additions & 4 deletions api/inference/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions client-go/applyconfiguration/core/v1alpha1/modelclaims.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions client-go/applyconfiguration/inference/v1alpha1/elasticconfig.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions client-go/applyconfiguration/inference/v1alpha1/scaletriggerref.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 20ed25c

Please sign in to comment.