From 3844fc1aba8ee3a5576f0bc7073767584cb908ac Mon Sep 17 00:00:00 2001 From: kerthcet Date: Fri, 24 Jan 2025 17:54:11 +0800 Subject: [PATCH] Add e2e test Signed-off-by: kerthcet --- test/e2e/playground_test.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go index abe2e18..5a1bc15 100644 --- a/test/e2e/playground_test.go +++ b/test/e2e/playground_test.go @@ -18,12 +18,14 @@ package e2e import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + autoscalingv2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" testing "sigs.k8s.io/lws/test/testutils" inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1" + "github.com/inftyai/llmaz/test/util" "github.com/inftyai/llmaz/test/util/validation" "github.com/inftyai/llmaz/test/util/wrapper" ) @@ -109,6 +111,29 @@ var _ = ginkgo.Describe("playground e2e tests", func() { validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue) validation.ValidateServicePods(ctx, k8sClient, service) }) + ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() { + model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj() + gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed()) + defer func() { + gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed()) + }() + + playground := wrapper.MakePlayground("qwen2-0-5b-gguf", ns.Name).ModelClaim("qwen2-0-5b-gguf"). + BackendRuntime("llamacpp").ElasticConfig(1, 10).HPA(util.MockASimpleHPATrigger()). + Replicas(3).Obj() + gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed()) + validation.ValidatePlayground(ctx, k8sClient, playground) + validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue) + + service := &inferenceapi.Service{} + gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, service)).To(gomega.Succeed()) + validation.ValidateService(ctx, k8sClient, service) + validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue) + validation.ValidateServicePods(ctx, k8sClient, service) + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, hpa)).To(gomega.Succeed()) + }) // TODO: add e2e tests. // ginkgo.It("SpeculativeDecoding with llama.cpp", func() { // targetModel := wrapper.MakeModel("llama2-7b-q8-gguf").FamilyName("llama2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("TheBloke/Llama-2-7B-GGUF", "llama-2-7b.Q8_0.gguf", "", nil, nil).Obj()