Skip to content

Commit

Permalink
Add Versioned Hash
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Mar 9, 2024
1 parent 108bdf5 commit c939227
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 7 deletions.
6 changes: 6 additions & 0 deletions pkg/apis/v1beta1/ec2nodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ type EC2NodeClass struct {
Status EC2NodeClassStatus `json:"status,omitempty"`
}

// We need to bump the EC2NodeClassHashVersion when we make an update to the EC2NodeClass CRD under these conditions:
// 1. A field changes its default value for an existing field that is already hashed
// 2. A field is added to the hash calculation with an already-set value
// 3. A field is removed from the hash calculations
const EC2NodeClassHashVersion = "v1"

func (in *EC2NodeClass) Hash() string {
return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{
SlicesAsSets: true,
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/v1beta1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ var (
LabelInstanceAcceleratorManufacturer = Group + "/instance-accelerator-manufacturer"
LabelInstanceAcceleratorCount = Group + "/instance-accelerator-count"
AnnotationEC2NodeClassHash = Group + "/ec2nodeclass-hash"
AnnotationEC2NodeClassHashVersion = Group + "/ec2nodeclass-hash-version"
AnnotationInstanceTagged = Group + "/tagged"

TagNodeClaim = v1beta1.Group + "/nodeclaim"
Expand Down
5 changes: 4 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC
return i.Name == instance.Type
})
nc := c.instanceToNodeClaim(instance, instanceType)
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
})
return nc, nil
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,15 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instanc

func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *corev1beta1.NodeClaim, nodeClass *v1beta1.EC2NodeClass) cloudprovider.DriftReason {
nodeClassHash, foundHashNodeClass := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]
nodeClassHashVersion, foundHashNodeClassVersion := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion]
nodeClaimHash, foundHashNodeClaim := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash]
if !foundHashNodeClass || !foundHashNodeClaim {
nodeClaimHashVersion, foundHashNodeClaimVersion := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion]

if !foundHashNodeClass || !foundHashNodeClaim || !foundHashNodeClassVersion || !foundHashNodeClaimVersion {
return ""
}
// validate that the version of the crd is the same
if nodeClassHashVersion != nodeClaimHashVersion {
return ""
}
return lo.Ternary(nodeClassHash != nodeClaimHash, NodeClassDrift, "")
Expand Down
22 changes: 20 additions & 2 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,10 +593,14 @@ var _ = Describe("CloudProvider", func() {
Reservations: []*ec2.Reservation{{Instances: []*ec2.Instance{instance}}},
})
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
})
nodeClaim.Status.ProviderID = fake.ProviderID(lo.FromPtr(instance.InstanceId))
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
})
nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name})
})
It("should not fail if NodeClass does not exist", func() {
Expand Down Expand Up @@ -770,6 +774,20 @@ var _ = Describe("CloudProvider", func() {
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
})
It("should not return drifted if the NodeClaim's karpenter.sh/ec2nodeclass-hash-version annotation does not match the EC2NodeClass's", func() {
nodeClass.ObjectMeta.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "test-hash-1",
v1beta1.AnnotationEC2NodeClassHashVersion: "test-version-1",
}
nodeClaim.ObjectMeta.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "test-hash-1",
v1beta1.AnnotationEC2NodeClassHashVersion: "test-version-2",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
})
})
})
Context("Subnet Compatibility", func() {
Expand Down
46 changes: 45 additions & 1 deletion pkg/controllers/nodeclass/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,17 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro
func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) {
stored := nodeClass.DeepCopy()
controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer)
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})

if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion {
if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil {
return reconcile.Result{}, err
}
}
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
})

err := multierr.Combine(
c.resolveSubnets(ctx, nodeClass),
c.resolveSecurityGroups(ctx, nodeClass),
Expand Down Expand Up @@ -222,6 +232,40 @@ func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1be
return nil
}

func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error {
ncList := &corev1beta1.NodeClaimList{}
if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil {
return err
}

for i := range ncList.Items {
nc := ncList.Items[i]
stored := nc.DeepCopy()

if nc.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion {
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
})

// Any NodeClaim that is already drifted will remain drifted if the karpenter.sh/ec2nodeclass-hash-version doesn't match
// Since the hashing mechanism has changed we will not be able to determine if the drifted status of the node has changed
if nc.StatusConditions().GetCondition(corev1beta1.Drifted) == nil {
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
})
}

if !equality.Semantic.DeepEqual(stored, nc) {
if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil {
return client.IgnoreNotFound(err)
}
}
}
}

return nil
}

func (c *Controller) Name() string {
return "nodeclass"
}
Expand Down
124 changes: 122 additions & 2 deletions pkg/controllers/nodeclass/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/imdario/mergo"
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
_ "knative.dev/pkg/system/testing"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -833,7 +834,7 @@ var _ = Describe("NodeClassController", func() {
})
})
Context("Static Drift Hash", func() {
DescribeTable("should update the static drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) {
DescribeTable("should update the drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expand All @@ -860,7 +861,7 @@ var _ = Describe("NodeClassController", func() {
Entry("MetadataOptions Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{MetadataOptions: &v1beta1.MetadataOptions{HTTPEndpoint: aws.String("disabled")}}}),
Entry("Context Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{Context: aws.String("context-2")}}),
)
It("should not update the static drift hash when dynamic field is updated", func() {
It("should not update the drift hash when dynamic field is updated", func() {
ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expand Down Expand Up @@ -889,6 +890,125 @@ var _ = Describe("NodeClassController", func() {
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash))
})
It("should update ec2nodeclass hash version when the ec2nodeclass hash version is out of sync with the controller hash version", func() {
nodeClass.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "abceduefed",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
}
ExpectApplied(ctx, env.Client, nodeClass)

ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)

expectedHash := nodeClass.Hash()
Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
})
It("should update ec2nodeclass hash versions on all nodeclaims when the hash versions don't match with the controller hash version", func() {
nodeClass.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "abceduefed",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
}
nodeClaimOne := coretest.NodeClaim(corev1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "123456",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
},
},
Spec: corev1beta1.NodeClaimSpec{
NodeClassRef: &corev1beta1.NodeClassReference{
Name: nodeClass.Name,
},
},
})
nodeClaimTwo := coretest.NodeClaim(corev1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "123456",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
},
},
Spec: corev1beta1.NodeClaimSpec{
NodeClassRef: &corev1beta1.NodeClassReference{
Name: nodeClass.Name,
},
},
})

ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo)

ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne)
nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo)

expectedHash := nodeClass.Hash()
Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
})
It("should not update ec2nodeclass hash on all nodeclaims when the hash versions matches the controller hash version", func() {
nodeClass.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "abceduefed",
v1beta1.AnnotationEC2NodeClassHashVersion: "test-version",
}
nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "1234564654",
v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion,
},
},
Spec: corev1beta1.NodeClaimSpec{
NodeClassRef: &corev1beta1.NodeClassReference{
Name: nodeClass.Name,
},
},
})
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim)

ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)

expectedHash := nodeClass.Hash()

// Expect NodeClaims to have been updated to the original hash
Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "1234564654"))
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
})
It("should not update ec2nodeclass hash on the nodeclaim if it's drifted and the hash version doesn't match the controller hash version", func() {
nodeClass.Annotations = map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "abceduefed",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
}
nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "123456",
v1beta1.AnnotationEC2NodeClassHashVersion: "test",
},
},
Spec: corev1beta1.NodeClaimSpec{
NodeClassRef: &corev1beta1.NodeClassReference{
Name: nodeClass.Name,
},
},
})
nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted)
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim)

ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass))
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)

// Expect NodeClaims to have been updated to the original hash
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "123456"))
Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
})
})
Context("NodeClass Termination", func() {
var profileName string
Expand Down
10 changes: 10 additions & 0 deletions test/pkg/environment/common/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,16 @@ func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*corev1beta1.NodeC
}).Should(Succeed())
}

func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Duration, nodeClaims ...*corev1beta1.NodeClaim) {
GinkgoHelper()
Consistently(func(g Gomega) {
for _, nc := range nodeClaims {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed())
g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted)).To(BeNil())
}
}, duration).Should(Succeed())
}

func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*corev1beta1.NodeClaim) {
GinkgoHelper()
Eventually(func(g Gomega) {
Expand Down
49 changes: 49 additions & 0 deletions test/suites/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,55 @@ var _ = Describe("Drift", func() {
env.EventuallyExpectNotFound(pod, node)
env.EventuallyExpectHealthyPodCount(selector, numPods)
})
It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() {
env.ExpectCreated(dep, nodeClass, nodePool)
env.EventuallyExpectHealthyPodCount(selector, numPods)
nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0]
nodeClass = env.ExpectExists(nodeClass).(*v1beta1.EC2NodeClass)
expectedHash := nodeClass.Hash()

By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain karpenter.k8s.aws/ec2nodeclass-hash and karpenter.k8s.aws/ec2nodeclass-hash-version annotations", nodeClass.Name, nodeClaim.Name))
Eventually(func(g Gomega) {
g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
}).WithTimeout(30 * time.Second).Should(Succeed())

nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "test-hash-1",
v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1",
})
// Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the
// nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that
// none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotations
nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{
"test-key": "test-value",
})
nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{
v1beta1.AnnotationEC2NodeClassHash: "test-hash-2",
v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2",
})

// The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass
env.ExpectUpdated(nodeClaim, nodeClass)
expectedHash = nodeClass.Hash()

// Should expect the all nodeclaims will not be drifted and contain an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotations
Eventually(func(g Gomega) {
nodeClass = env.ExpectExists(nodeClass).(*v1beta1.EC2NodeClass)
nodeClaim = env.ExpectExists(nodeClaim).(*corev1beta1.NodeClaim)

g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash))
g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion))
}).WithTimeout(30 * time.Second).Should(Succeed())

notDriftedDuration := time.Minute
By(fmt.Sprintf("consistently expect nodeclaim %s not to be drifted for %s", nodeClaim.Name, notDriftedDuration))
env.ConsistentlyExpectNodeClaimsNotDrifted(notDriftedDuration, nodeClaim)
})
Context("Failure", func() {
It("should not continue to drift if a node never registers", func() {
// launch a new nodeClaim
Expand Down

0 comments on commit c939227

Please sign in to comment.