From 3844fc1aba8ee3a5576f0bc7073767584cb908ac Mon Sep 17 00:00:00 2001
From: kerthcet <kerthcet@gmail.com>
Date: Fri, 24 Jan 2025 17:54:11 +0800
Subject: [PATCH] Add e2e test

Signed-off-by: kerthcet <kerthcet@gmail.com>
---
 test/e2e/playground_test.go | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go
index abe2e18..5a1bc15 100644
--- a/test/e2e/playground_test.go
+++ b/test/e2e/playground_test.go
@@ -18,12 +18,14 @@ package e2e
 import (
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	testing "sigs.k8s.io/lws/test/testutils"
 
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
+	"github.com/inftyai/llmaz/test/util"
 	"github.com/inftyai/llmaz/test/util/validation"
 	"github.com/inftyai/llmaz/test/util/wrapper"
 )
@@ -109,6 +111,29 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
 		validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
 		validation.ValidateServicePods(ctx, k8sClient, service)
 	})
+	ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() {
+		model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
+		gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed())
+		defer func() {
+			gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed())
+		}()
+
+		playground := wrapper.MakePlayground("qwen2-0-5b-gguf", ns.Name).ModelClaim("qwen2-0-5b-gguf").
+			BackendRuntime("llamacpp").ElasticConfig(1, 10).HPA(util.MockASimpleHPATrigger()).
+			Replicas(3).Obj()
+		gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed())
+		validation.ValidatePlayground(ctx, k8sClient, playground)
+		validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue)
+
+		service := &inferenceapi.Service{}
+		gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, service)).To(gomega.Succeed())
+		validation.ValidateService(ctx, k8sClient, service)
+		validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
+		validation.ValidateServicePods(ctx, k8sClient, service)
+
+		hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+		gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, hpa)).To(gomega.Succeed())
+	})
 	// TODO: add e2e tests.
 	// ginkgo.It("SpeculativeDecoding with llama.cpp", func() {
 	// 	targetModel := wrapper.MakeModel("llama2-7b-q8-gguf").FamilyName("llama2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("TheBloke/Llama-2-7B-GGUF", "llama-2-7b.Q8_0.gguf", "", nil, nil).Obj()