From 5f760a54dad86241d7de9f9555c6c05f2e3da3c7 Mon Sep 17 00:00:00 2001 From: Nick Tran <10810510+njtran@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:07:09 -0700 Subject: [PATCH] test: add lastPodEventTime e2es (#6628) --- test/pkg/environment/aws/environment.go | 2 + test/suites/consolidation/suite_test.go | 114 ++++++++++++++++++++++++ test/suites/drift/suite_test.go | 5 +- 3 files changed, 118 insertions(+), 3 deletions(-) diff --git a/test/pkg/environment/aws/environment.go b/test/pkg/environment/aws/environment.go index 8df6765a9915..d06dbe463e7f 100644 --- a/test/pkg/environment/aws/environment.go +++ b/test/pkg/environment/aws/environment.go @@ -143,6 +143,8 @@ func GetTimeStreamAPI(session *session.Session) timestreamwriteiface.TimestreamW func (env *Environment) DefaultEC2NodeClass() *v1.EC2NodeClass { nodeClass := test.EC2NodeClass() + // Set the AMI Family to AL2023 in case the AMISelectorTerms change. + nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} nodeClass.Spec.Tags = map[string]string{ "testing/cluster": env.ClusterName, diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 92b1e3b2ecdf..25c248eaace2 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -24,6 +24,7 @@ import ( "github.com/awslabs/operatorpkg/object" "github.com/samber/lo" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -66,6 +67,119 @@ var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) var _ = Describe("Consolidation", func() { + Context("LastPodEventTime", func() { + var nodePool *karpv1.NodePool + BeforeEach(func() { + nodePool = env.DefaultNodePool(nodeClass) + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + + }) + It("should update lastPodEventTime when pods are scheduled and removed", func() { + var numPods int32 = 5 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: numPods, + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "regular-app"}, + }, + ResourceRequirements: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("1")}, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{ + { + Nodes: "0%", + }, + } + env.ExpectCreated(nodeClass, nodePool, dep) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 1) + env.EventuallyExpectCreatedNodeCount("==", 1) + env.EventuallyExpectHealthyPodCount(selector, int(numPods)) + + nodeClaim := env.ExpectExists(nodeClaims[0]).(*karpv1.NodeClaim) + lastPodEventTime := nodeClaim.Status.LastPodEventTime + + // wait 10 seconds so that we don't run into the de-dupe timeout + time.Sleep(10 * time.Second) + + dep.Spec.Replicas = lo.ToPtr[int32](4) + By("removing one pod from the node") + env.ExpectUpdated(dep) + + Eventually(func(g Gomega) { + nodeClaim = env.ExpectExists(nodeClaim).(*karpv1.NodeClaim) + g.Expect(nodeClaim.Status.LastPodEventTime.Time).ToNot(BeEquivalentTo(lastPodEventTime.Time)) + }).WithTimeout(5 * time.Second).WithPolling(1 * time.Second).Should(Succeed()) + lastPodEventTime = nodeClaim.Status.LastPodEventTime + + // wait 10 seconds so that we don't run into the de-dupe timeout + time.Sleep(10 * time.Second) + + dep.Spec.Replicas = lo.ToPtr[int32](5) + By("adding one pod to the node") + env.ExpectUpdated(dep) + + Eventually(func(g Gomega) { + nodeClaim = env.ExpectExists(nodeClaim).(*karpv1.NodeClaim) + g.Expect(nodeClaim.Status.LastPodEventTime.Time).ToNot(BeEquivalentTo(lastPodEventTime.Time)) + }).WithTimeout(5 * time.Second).WithPolling(1 * time.Second).Should(Succeed()) + }) + It("should update lastPodEventTime when pods go terminal", func() { + podLabels := map[string]string{"app": "regular-app"} + pod := test.Pod(test.PodOptions{ + // use a non-pause image so that we can have a sleep + Image: "alpine:3.20.2", + Command: []string{"/bin/sh", "-c", "sleep 30"}, + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + ResourceRequirements: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("1")}, + }, + RestartPolicy: corev1.RestartPolicyNever, + }) + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: test.RandomName(), + Namespace: "default", + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: pod.ObjectMeta, + Spec: pod.Spec, + }, + }, + } + selector := labels.SelectorFromSet(podLabels) + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{ + { + Nodes: "0%", + }, + } + env.ExpectCreated(nodeClass, nodePool, job) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 1) + env.EventuallyExpectCreatedNodeCount("==", 1) + pods := env.EventuallyExpectHealthyPodCount(selector, int(1)) + + // pods are healthy, which means the job has started its 30s sleep + nodeClaim := env.ExpectExists(nodeClaims[0]).(*karpv1.NodeClaim) + lastPodEventTime := nodeClaim.Status.LastPodEventTime + + // wait a minute for the pod's sleep to finish, and for the nodeclaim to update + Eventually(func(g Gomega) { + pod := env.ExpectExists(pods[0]).(*corev1.Pod) + g.Expect(pod.Status.Phase).To(Equal(corev1.PodSucceeded)) + }).WithTimeout(1 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + nodeClaim = env.ExpectExists(nodeClaims[0]).(*karpv1.NodeClaim) + Expect(nodeClaim.Status.LastPodEventTime).ToNot(BeEquivalentTo(lastPodEventTime.Time)) + }) + + }) Context("Budgets", func() { var nodePool *karpv1.NodePool var dep *appsv1.Deployment diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index e395e3abdea1..63ef94e0a3be 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -370,7 +370,7 @@ var _ = Describe("Drift", func() { }) }) It("should disrupt nodes that have drifted due to AMIs", func() { - oldCustomAMI := fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)) + oldCustomAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1))) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} env.ExpectCreated(dep, nodeClass, nodePool) @@ -391,7 +391,6 @@ var _ = Describe("Drift", func() { }) It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) - nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: armAMI}} env.ExpectCreated(dep, nodeClass, nodePool) @@ -413,7 +412,7 @@ var _ = Describe("Drift", func() { It("should not disrupt nodes that have drifted without the featureGate enabled", func() { env.ExpectSettingsOverridden(corev1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) - oldCustomAMI := fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)) + oldCustomAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1))) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} env.ExpectCreated(dep, nodeClass, nodePool)