diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index 97cfa82c9d5f..98b35e8961fe 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -329,6 +329,12 @@ type EC2NodeClass struct { Status EC2NodeClassStatus `json:"status,omitempty"` } +// We need to bump the EC2NodeClassHashVersion under these condition when we make an update the EC2NodeClass CRD +// 1. A EC2NodeClass adds default values to existing fields that are included in the hash calculations +// 2. A EC2NodeClass field is added to the hash calculation with an already-set value +// 3. A EC2NodeClass field is removed from the hash calculations +const EC2NodeClassHashVersion = "v1" + func (in *EC2NodeClass) Hash() string { return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{ SlicesAsSets: true, diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 9e6c37905b85..a3130f18dea0 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -113,6 +113,7 @@ var ( LabelInstanceAcceleratorManufacturer = Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = Group + "/instance-accelerator-count" AnnotationEC2NodeClassHash = Group + "/ec2nodeclass-hash" + AnnotationEC2NodeClassHashVersion = Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = Group + "/tagged" TagNodeClaim = v1beta1.Group + "/nodeclaim" diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 116ce4d23662..4684ccdf6c30 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -101,7 +101,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC return i.Name == instance.Type }) nc := c.instanceToNodeClaim(instance, instanceType) - nc.Annotations = lo.Assign(nc.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) return nc, nil } diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index fe259ef397d1..9158dceaa005 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -136,8 +136,15 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instanc func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *corev1beta1.NodeClaim, nodeClass *v1beta1.EC2NodeClass) cloudprovider.DriftReason { nodeClassHash, foundHashNodeClass := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash] + nodeClassHashVersion, foundHashNodeClassVersion := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] nodeClaimHash, foundHashNodeClaim := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] - if !foundHashNodeClass || !foundHashNodeClaim { + nodeClaimHashVersion, foundHashNodeClaimVersion := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + + if !foundHashNodeClass || !foundHashNodeClaim || !foundHashNodeClassVersion || !foundHashNodeClaimVersion { + return "" + } + // validate that the version of the crd is the same + if nodeClassHashVersion != nodeClaimHashVersion { return "" } return lo.Ternary(nodeClassHash != nodeClaimHash, NodeClassDrift, "") diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 0b5b76ddb0c2..f66586e497ae 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -593,10 +593,14 @@ var _ = Describe("CloudProvider", func() { Reservations: []*ec2.Reservation{{Instances: []*ec2.Instance{instance}}}, }) nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, }) nodeClaim.Status.ProviderID = fake.ProviderID(lo.FromPtr(instance.InstanceId)) - nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name}) }) It("should not fail if NodeClass does not exist", func() { @@ -770,6 +774,20 @@ var _ = Describe("CloudProvider", func() { Expect(err).NotTo(HaveOccurred()) Expect(isDrifted).To(BeEmpty()) }) + It("should not return drifted if karpenter.sh/ec2nodeclass-hash-version annotation version does not match the EC2NodeClass on the NodeClaim", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-1", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-1", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version-2", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClaim) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) }) }) Context("Subnet Compatibility", func() { diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 6c3a9a1204ec..d6e5b84d1262 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -80,7 +80,17 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { stored := nodeClass.DeepCopy() controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + + if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil { + return reconcile.Result{}, err + } + } + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + err := multierr.Combine( c.resolveSubnets(ctx, nodeClass), c.resolveSecurityGroups(ctx, nodeClass), @@ -222,6 +232,39 @@ func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1be return nil } +func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { + ncList := &corev1beta1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + return err + } + + for i := range ncList.Items { + stored := ncList.Items[i].DeepCopy() + + if ncList.Items[i].Annotations[v1beta1.EC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + ncList.Items[i].Annotations = lo.Assign(ncList.Items[i].Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + + // Any NodeClaim that is already drifted will remain drifted if the karpenter.sh/ec2nodeclass-hash-version doesn't match + // Since the hashing mechanism has changed we will not be able to determine if the drifted status of the node has changed + if ncList.Items[i].StatusConditions().GetCondition(corev1beta1.Drifted) == nil { + ncList.Items[i].Annotations = lo.Assign(ncList.Items[i].Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + }) + } + + if !equality.Semantic.DeepEqual(stored, ncList.Items[i]) { + if err := c.kubeClient.Patch(ctx, &ncList.Items[i], client.MergeFrom(stored)); err != nil { + return client.IgnoreNotFound(err) + } + } + } + } + + return nil +} + func (c *Controller) Name() string { return "nodeclass" } diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 129b5be8decb..34b588f2f76f 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -23,6 +23,7 @@ import ( "github.com/imdario/mergo" "github.com/samber/lo" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" _ "knative.dev/pkg/system/testing" "sigs.k8s.io/controller-runtime/pkg/client" @@ -833,7 +834,7 @@ var _ = Describe("NodeClassController", func() { }) }) Context("Static Drift Hash", func() { - DescribeTable("should update the static drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { + DescribeTable("should update the drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -860,7 +861,7 @@ var _ = Describe("NodeClassController", func() { Entry("MetadataOptions Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{MetadataOptions: &v1beta1.MetadataOptions{HTTPEndpoint: aws.String("disabled")}}}), Entry("Context Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{Context: aws.String("context-2")}}), ) - It("should not update the static drift hash when dynamic field is updated", func() { + It("should not update the drift hash when dynamic field is updated", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -889,6 +890,118 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) }) + It("should update ec2nodeclass hash version when the ec2nodeclass hash version is out of sync with the controller", func() { + nodeClass.Annotations = map[string]string{v1beta1.AnnotationEC2NodeClassHash: "test"} + ExpectApplied(ctx, env.Client, nodeClass) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + expectedHash := nodeClass.Hash() + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should update ec2nodeclass hash versions on all nodeclaims when the hash versions don't match", func() { + nodeClaimOne := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaimTwo := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + + ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne) + nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo) + + expectedHash := nodeClass.Hash() + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass hash on all nodeclaims when the hash versions match", func() { + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "1234564654", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + expectedHash := nodeClass.Hash() + + // Expect NodeClaims to have been updated to the original hash + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + + nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] = "test-hash" + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "test-hash")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass hash on the nodeclaim if it's drifted", func() { + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted) + ExpectApplied(ctx, env.Client, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + // Expect NodeClaims to have been updated to the original hash + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "123456")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, "test")) + }) }) Context("NodeClass Termination", func() { var profileName string diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 0deda0d5f60f..71469a0ff945 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -719,6 +719,23 @@ var _ = Describe("Drift", func() { env.EventuallyExpectNotFound(pod, node) env.EventuallyExpectHealthyPodCount(selector, numPods) }) + It("should update NodeClass hash on the NodeClass and NodeClaim", func() { + nodeClass.Annotations = map[string]string{v1beta1.AnnotationEC2NodeClassHashVersion: "test-version"} + nodeClass.Annotations = map[string]string{v1beta1.AnnotationEC2NodeClassHash: "test-adfdhhcnkdskj"} + + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + err := env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass) + Expect(err).ToNot(HaveOccurred()) + + expectedHash := nodeClass.Hash() + + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) Context("Failure", func() { It("should not continue to drift if a node never registers", func() { // launch a new nodeClaim