From f141a10b574d55e91181fddc2cd1264283b84869 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda <74629455+engedaam@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:45:10 -0700 Subject: [PATCH 1/3] feat: Add Versioned for EC2NodeClass Hash to Prevent Drift on EC2NodeClass CRD Upgrade (#5770) --- pkg/apis/v1beta1/ec2nodeclass.go | 6 ++ pkg/apis/v1beta1/labels.go | 1 + pkg/cloudprovider/cloudprovider.go | 5 +- pkg/cloudprovider/drift.go | 13 ++- pkg/cloudprovider/suite_test.go | 62 +++++++++++- pkg/controllers/nodeclass/controller.go | 51 +++++++++- pkg/controllers/nodeclass/suite_test.go | 127 +++++++++++++++++++++++- test/suites/drift/suite_test.go | 49 +++++++++ 8 files changed, 303 insertions(+), 11 deletions(-) diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index 17b607278581..f31b7459fb4d 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -325,6 +325,12 @@ type EC2NodeClass struct { Status EC2NodeClassStatus `json:"status,omitempty"` } +// We need to bump the EC2NodeClassHashVersion when we make an update to the EC2NodeClass CRD under these conditions: +// 1. A field changes its default value for an existing field that is already hashed +// 2. A field is added to the hash calculation with an already-set value +// 3. A field is removed from the hash calculations +const EC2NodeClassHashVersion = "v1" + func (in *EC2NodeClass) Hash() string { return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{ SlicesAsSets: true, diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 1b86eb70726f..d765ca7afe32 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -110,5 +110,6 @@ var ( LabelInstanceAcceleratorManufacturer = Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = Group + "/instance-accelerator-count" AnnotationEC2NodeClassHash = Group + "/ec2nodeclass-hash" + AnnotationEC2NodeClassHashVersion = Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = Group + "/tagged" ) diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 006e0ea71741..9329b61f4d9a 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -101,7 +101,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC return i.Name == instance.Type }) nc := c.instanceToNodeClaim(instance, instanceType) - nc.Annotations = lo.Assign(nc.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) return nc, nil } diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index fe259ef397d1..69dc5aad1783 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -135,9 +135,16 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instanc } func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *corev1beta1.NodeClaim, nodeClass *v1beta1.EC2NodeClass) cloudprovider.DriftReason { - nodeClassHash, foundHashNodeClass := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash] - nodeClaimHash, foundHashNodeClaim := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] - if !foundHashNodeClass || !foundHashNodeClaim { + nodeClassHash, foundNodeClassHash := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash] + nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + nodeClaimHash, foundNodeClaimHash := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] + nodeClaimHashVersion, foundNodeClaimHashVersion := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + + if !foundNodeClassHash || !foundNodeClaimHash || !foundNodeClassHashVersion || !foundNodeClaimHashVersion { + return "" + } + // validate that the hash version for the EC2NodeClass is the same as the NodeClaim before evaluating for static drift + if nodeClassHashVersion != nodeClaimHashVersion { return "" } return lo.Ternary(nodeClassHash != nodeClaimHash, NodeClassDrift, "") diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 8e5938012887..2f2e08047266 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -293,10 +293,14 @@ var _ = Describe("CloudProvider", func() { Reservations: []*ec2.Reservation{{Instances: []*ec2.Instance{instance}}}, }) nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, }) nodeClaim.Status.ProviderID = fake.ProviderID(lo.FromPtr(instance.InstanceId)) - nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name}) }) It("should not fail if NodeClass does not exist", func() { @@ -460,8 +464,58 @@ var _ = Describe("CloudProvider", func() { Entry("Subnet Drift", v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{SubnetSelectorTerms: []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"sn-key-1": "sn-value-1"}}}}}), Entry("SecurityGroup Drift", v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{SecurityGroupSelectorTerms: []v1beta1.SecurityGroupSelectorTerm{{Tags: map[string]string{"sg-key": "sg-value"}}}}}), ) - It("should not return drifted if karpenter.k8s.aws/nodeclass-hash annotation is not present on the NodeClaim", func() { - nodeClaim.Annotations = map[string]string{} + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash annotation is not present on the NodeClaim", func() { + nodeClaim.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + } + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if the NodeClaim's karpenter.k8s.aws/ec2nodeclass-hash-version annotation does not match the EC2NodeClass's", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClass", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + // should trigger drift + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClaim", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + } + // should trigger drift nodeClass.Spec.Tags = map[string]string{ "Test Key": "Test Value", } diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 7ebd8decfb3d..f38a6e65573c 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -80,7 +80,17 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { stored := nodeClass.DeepCopy() controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + + if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil { + return reconcile.Result{}, err + } + } + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + err := multierr.Combine( c.resolveSubnets(ctx, nodeClass), c.resolveSecurityGroups(ctx, nodeClass), @@ -217,6 +227,45 @@ func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1be return nil } +// Updating `ec2nodeclass-hash-version` annotation inside the karpenter controller means a breaking change has been made to the hash calculation. +// `ec2nodeclass-hash` annotation on the EC2NodeClass will be updated, due to the breaking change, making the `ec2nodeclass-hash` on the NodeClaim different from +// EC2NodeClass. Since, we cannot rely on the `ec2nodeclass-hash` on the NodeClaims, due to the breaking change, we will need to re-calculate the hash and update the annotation. +// For more information on the Drift Hash Versioning: https://github.com/kubernetes-sigs/karpenter/blob/main/designs/drift-hash-versioning.md +func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { + ncList := &corev1beta1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + return err + } + + errs := make([]error, len(ncList.Items)) + for i := range ncList.Items { + nc := ncList.Items[i] + stored := nc.DeepCopy() + + if nc.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + + // Any NodeClaim that is already drifted will remain drifted if the karpenter.k8s.aws/nodepool-hash-version doesn't match + // Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed + if nc.StatusConditions().GetCondition(corev1beta1.Drifted) == nil { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + }) + } + + if !equality.Semantic.DeepEqual(stored, nc) { + if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil { + errs[i] = client.IgnoreNotFound(err) + } + } + } + } + + return multierr.Combine(errs...) +} + func (c *Controller) Name() string { return "nodeclass" } diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 13526cb17841..53521fc84193 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -23,6 +23,7 @@ import ( "github.com/imdario/mergo" "github.com/samber/lo" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" _ "knative.dev/pkg/system/testing" "sigs.k8s.io/controller-runtime/pkg/client" @@ -758,7 +759,7 @@ var _ = Describe("NodeClassController", func() { }) }) Context("Static Drift Hash", func() { - DescribeTable("should update the static drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { + DescribeTable("should update the drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -785,7 +786,7 @@ var _ = Describe("NodeClassController", func() { Entry("MetadataOptions Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{MetadataOptions: &v1beta1.MetadataOptions{HTTPEndpoint: aws.String("disabled")}}}), Entry("Context Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{Context: aws.String("context-2")}}), ) - It("should not update the static drift hash when dynamic field is updated", func() { + It("should not update the drift hash when dynamic field is updated", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -814,6 +815,128 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) }) + It("should update ec2nodeclass-hash-version annotation when the ec2nodeclass-hash-version on the NodeClass does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + ExpectApplied(ctx, env.Client, nodeClass) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should update ec2nodeclass-hash-versions on all NodeClaims when the ec2nodeclass-hash-version does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaimOne := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaimTwo := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + + ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne) + nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClaims to be updated + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on all NodeClaims when the ec2nodeclass-hash-version matches the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "1234564654", + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + expectedHash := nodeClass.Hash() + + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "1234564654")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on the NodeClaim if it's drifted and the ec2nodeclass-hash-version does not match the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "123456")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) }) Context("NodeClass Termination", func() { var profileName string diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index f5890f6b96a4..3911bde09bf3 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -711,6 +711,55 @@ var _ = Describe("Drift", func() { env.EventuallyExpectNotFound(pod, node) env.EventuallyExpectHealthyPodCount(selector, numPods) }) + It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodeClass = env.ExpectExists(nodeClass).(*v1beta1.EC2NodeClass) + expectedHash := nodeClass.Hash() + + By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1beta1.AnnotationEC2NodeClassHash, v1beta1.AnnotationEC2NodeClassHashVersion)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-1", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + }) + // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the + // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation + nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-2", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass + env.ExpectUpdated(nodeClaim, nodeClass) + expectedHash = nodeClass.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) + }) Context("Failure", func() { It("should not continue to drift if a node never registers", func() { // launch a new nodeClaim From e908fb60b6c4caca80f83c712a2f50d2856bdb76 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda <74629455+engedaam@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:20:18 -0700 Subject: [PATCH 2/3] test: Add E2E tests for NodePools Hash Version (#5803) --- test/pkg/environment/common/expectations.go | 12 +++++ test/suites/drift/suite_test.go | 49 +++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 03bb0c4d4b05..5be7869cfb60 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -696,6 +696,18 @@ func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*corev1beta1.NodeC }).Should(Succeed()) } +func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Duration, nodeClaims ...*corev1beta1.NodeClaim) { + GinkgoHelper() + nodeClaimNames := lo.Map(nodeClaims, func(nc *corev1beta1.NodeClaim, _ int) string { return nc.Name }) + By(fmt.Sprintf("consistently expect nodeclaims %s not to be drifted for %s", nodeClaimNames, duration)) + Consistently(func(g Gomega) { + for _, nc := range nodeClaims { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) + g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted)).To(BeNil()) + } + }, duration).Should(Succeed()) +} + func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*corev1beta1.NodeClaim) { GinkgoHelper() Eventually(func(g Gomega) { diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 3911bde09bf3..d19f458a76a7 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -760,6 +760,55 @@ var _ = Describe("Drift", func() { }).WithTimeout(30 * time.Second).Should(Succeed()) env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) }) + It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodePool = env.ExpectExists(nodePool).(*corev1beta1.NodePool) + expectedHash := nodePool.Hash() + + By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, corev1beta1.NodePoolHashAnnotationKey, corev1beta1.NodePoolHashVersionAnnotationKey)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + corev1beta1.NodePoolHashAnnotationKey: "test-hash-1", + corev1beta1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", + }) + // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the + // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation + nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + corev1beta1.NodePoolHashAnnotationKey: "test-hash-2", + corev1beta1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool + env.ExpectUpdated(nodeClaim, nodePool) + expectedHash = nodePool.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) + }) Context("Failure", func() { It("should not continue to drift if a node never registers", func() { // launch a new nodeClaim From 247decc092c2ae96faf3a17f0cb81fa80bc83f11 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda Date: Tue, 26 Mar 2024 09:24:47 -0700 Subject: [PATCH 3/3] Update Karpenter --- go.mod | 2 +- go.sum | 4 ++-- pkg/apis/crds/karpenter.sh_nodepools.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 0509e00e2463..ae1bdc7d47eb 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( k8s.io/utils v0.0.0-20230726121419-3b25d923346b knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.17.0 - sigs.k8s.io/karpenter v0.34.3 + sigs.k8s.io/karpenter v0.34.4-0.20240326162120-3f66cef5ae81 ) require ( diff --git a/go.sum b/go.sum index 337bf6e31774..ab9984a3da33 100644 --- a/go.sum +++ b/go.sum @@ -760,8 +760,8 @@ sigs.k8s.io/controller-runtime v0.17.0 h1:fjJQf8Ukya+VjogLO6/bNX9HE6Y2xpsO5+fyS2 sigs.k8s.io/controller-runtime v0.17.0/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.34.3 h1:fSPsFiMFlZAwVKQmQ+C5SuNbo4xZDsBZgaSKrKEqnyo= -sigs.k8s.io/karpenter v0.34.3/go.mod h1:3eBol/ltKkvJ0heLDFRUx94olkDC1ICiX3CqvwlQtHU= +sigs.k8s.io/karpenter v0.34.4-0.20240326162120-3f66cef5ae81 h1:AFNAkmvLkTk5KDWI/cxOtFfKaLY9tLSAgvzpZUOpBp0= +sigs.k8s.io/karpenter v0.34.4-0.20240326162120-3f66cef5ae81/go.mod h1:3eBol/ltKkvJ0heLDFRUx94olkDC1ICiX3CqvwlQtHU= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index b1172df50846..9cb3ef612b21 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -80,7 +80,7 @@ spec: This is required if Schedule is set. This regex has an optional 0s at the end since the duration.String() always adds a 0s at the end. - pattern: ^([0-9]+(m|h)+(0s)?)$ + pattern: ^((([0-9]+(h|m))|([0-9]+h[0-9]+m))(0s)?)$ type: string nodes: default: 10%