Skip to content

Commit

Permalink
chore: update kubelet hash annotation on nodeclaim
Browse files Browse the repository at this point in the history
  • Loading branch information
jigisha620 committed Jul 16, 2024
1 parent 51122a5 commit 7d254f8
Show file tree
Hide file tree
Showing 9 changed files with 363 additions and 10 deletions.
3 changes: 2 additions & 1 deletion pkg/apis/apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ import (

//go:generate controller-gen crd object:headerFile="../../hack/boilerplate.go.txt" paths="./..." output:crd:artifacts:config=crds
var (
Group = "karpenter.k8s.aws"
Group = "karpenter.k8s.aws"
CompatabilityGroup = "compatibility." + Group
//go:embed crds/karpenter.k8s.aws_ec2nodeclasses.yaml
EC2NodeClassCRD []byte
CRDs = append(apis.CRDs,
Expand Down
4 changes: 2 additions & 2 deletions pkg/apis/v1/ec2nodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ type EC2NodeClassSpec struct {
// +kubebuilder:validation:XValidation:message="evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod",rule="has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true"
// +kubebuilder:validation:XValidation:message="evictionSoftGracePeriod OwnerKey does not have a matching evictionSoft",rule="has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, (e in self.evictionSoft)):true"
// +optional
Kubelet *KubeletConfiguration `json:"kubelet,omitempty"`
Kubelet *KubeletConfiguration `json:"kubelet,omitempty" hash:"ignore"`
// BlockDeviceMappings to be applied to provisioned nodes.
// +kubebuilder:validation:XValidation:message="must have only one blockDeviceMappings with rootVolume",rule="self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1"
// +kubebuilder:validation:MaxItems:=50
Expand Down Expand Up @@ -416,7 +416,7 @@ type EC2NodeClass struct {
// 1. A field changes its default value for an existing field that is already hashed
// 2. A field is added to the hash calculation with an already-set value
// 3. A field is removed from the hash calculations
const EC2NodeClassHashVersion = "v2"
const EC2NodeClassHashVersion = "v3"

func (in *EC2NodeClass) Hash() string {
return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/v1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ var (
LabelInstanceAcceleratorManufacturer = apis.Group + "/instance-accelerator-manufacturer"
LabelInstanceAcceleratorCount = apis.Group + "/instance-accelerator-count"
AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash"
AnnotationKubeletCompatibilityHash = apis.CompatabilityGroup + "/kubelet-drift-hash"
AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version"
AnnotationInstanceTagged = apis.Group + "/tagged"

Expand Down
15 changes: 13 additions & 2 deletions pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim)
// We treat a failure to resolve the NodeClass as an ICE since this means there is no capacity possibilities for this NodeClaim
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err))
}

// TODO: Remove this after v1
nodePool, err := utils.ResolveNodePoolFromNodeClaim(ctx, c.kubeClient, nodeClaim)
if err != nil {
return nil, err
}
kubeletHash, err := utils.GetHashKubelet(nodePool, nodeClass)
if err != nil {
return nil, err
}
nodeClassReady := nodeClass.StatusConditions().Get(status.ConditionReady)
if !nodeClassReady.IsTrue() {
return nil, fmt.Errorf("resolving ec2nodeclass, %s", nodeClassReady.Message)
Expand All @@ -106,8 +116,9 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim)
})
nc := c.instanceToNodeClaim(instance, instanceType, nodeClass)
nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{
v1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1.AnnotationEC2NodeClassHashVersion: v1.EC2NodeClassHashVersion,
v1.AnnotationKubeletCompatibilityHash: kubeletHash,
v1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1.AnnotationEC2NodeClassHashVersion: v1.EC2NodeClassHashVersion,
})
return nc, nil
}
Expand Down
27 changes: 26 additions & 1 deletion pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv
if drifted := c.areStaticFieldsDrifted(nodeClaim, nodeClass); drifted != "" {
return drifted, nil
}
kubeletDrifted, err := c.isKubeletConfigurationDrifted(nodeClaim, nodeClass, nodePool)
if err != nil {
return "", err
}
instance, err := c.getInstance(ctx, nodeClaim.Status.ProviderID)
if err != nil {
return "", err
Expand All @@ -59,7 +63,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *karpv
if err != nil {
return "", fmt.Errorf("calculating subnet drift, %w", err)
}
drifted := lo.FindOrElse([]cloudprovider.DriftReason{amiDrifted, securitygroupDrifted, subnetDrifted}, "", func(i cloudprovider.DriftReason) bool {
drifted := lo.FindOrElse([]cloudprovider.DriftReason{amiDrifted, securitygroupDrifted, subnetDrifted, kubeletDrifted}, "", func(i cloudprovider.DriftReason) bool {
return string(i) != ""
})
return drifted, nil
Expand Down Expand Up @@ -135,6 +139,27 @@ func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *karpv1.NodeClaim, node
return lo.Ternary(nodeClassHash != nodeClaimHash, NodeClassDrift, "")
}

// Remove once v1beta1 is dropped
func (c *CloudProvider) isKubeletConfigurationDrifted(nodeClaim *karpv1.NodeClaim, nodeClass *v1.EC2NodeClass, nodePool *karpv1.NodePool) (cloudprovider.DriftReason, error) {
kubeletHash, err := utils.GetHashKubelet(nodePool, nodeClass)
if err != nil {
return "", err
}
nodeClaimKubeletHash, foundNodeClaimKubeletHash := nodeClaim.Annotations[v1.AnnotationKubeletCompatibilityHash]
nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1.AnnotationEC2NodeClassHashVersion]
nodeClaimHashVersion, foundNodeClaimHashVersion := nodeClaim.Annotations[v1.AnnotationEC2NodeClassHashVersion]

if !foundNodeClaimKubeletHash || !foundNodeClaimHashVersion || !foundNodeClassHashVersion {
return "", nil
}

// validate that the hash version for the EC2NodeClass is the same as the NodeClaim before evaluating for static drift
if nodeClassHashVersion != nodeClaimHashVersion {
return "", nil
}
return lo.Ternary(kubeletHash != nodeClaimKubeletHash, NodeClassDrift, ""), nil
}

func (c *CloudProvider) getInstance(ctx context.Context, providerID string) (*instance.Instance, error) {
// Get InstanceID to fetch from EC2
instanceID, err := utils.ParseInstanceID(providerID)
Expand Down
14 changes: 13 additions & 1 deletion pkg/controllers/nodeclass/hash/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ package hash
import (
"context"

"github.com/aws/karpenter-provider-aws/pkg/utils"

"github.com/samber/lo"
"go.uber.org/multierr"
"k8s.io/apimachinery/pkg/api/equality"
Expand Down Expand Up @@ -93,6 +95,15 @@ func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1.EC2N
nc := ncList.Items[i]
stored := nc.DeepCopy()

nodePool, err := utils.ResolveNodePoolFromNodeClaim(ctx, c.kubeClient, &nc)
if err != nil {
return err
}
kubeletHash, err := utils.GetHashKubelet(nodePool, nodeClass)
if err != nil {
return err
}

if nc.Annotations[v1.AnnotationEC2NodeClassHashVersion] != v1.EC2NodeClassHashVersion {
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1.AnnotationEC2NodeClassHashVersion: v1.EC2NodeClassHashVersion,
Expand All @@ -102,7 +113,8 @@ func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1.EC2N
// Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed
if nc.StatusConditions().Get(karpv1.ConditionTypeDrifted) == nil {
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1.AnnotationKubeletCompatibilityHash: kubeletHash,
})
}

Expand Down
137 changes: 134 additions & 3 deletions pkg/controllers/nodeclass/hash/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ package hash_test

import (
"context"
"encoding/json"
"testing"

"github.com/samber/lo"
"sigs.k8s.io/karpenter/pkg/apis/v1beta1"

"github.com/aws/karpenter-provider-aws/pkg/utils"

"sigs.k8s.io/karpenter/pkg/test/v1alpha1"

"github.com/aws/aws-sdk-go/aws"
Expand Down Expand Up @@ -75,6 +81,7 @@ var _ = AfterEach(func() {

var _ = Describe("NodeClass Hash Controller", func() {
var nodeClass *v1.EC2NodeClass
var nodePool *karpv1.NodePool
BeforeEach(func() {
nodeClass = test.EC2NodeClass(v1.EC2NodeClass{
Spec: v1.EC2NodeClassSpec{
Expand All @@ -95,6 +102,19 @@ var _ = Describe("NodeClass Hash Controller", func() {
},
},
})
nodePool = coretest.NodePool(karpv1.NodePool{
Spec: karpv1.NodePoolSpec{
Template: karpv1.NodeClaimTemplate{
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
},
},
})
})
DescribeTable("should update the drift hash when static field is updated", func(changes *v1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodeClass)
Expand Down Expand Up @@ -123,6 +143,113 @@ var _ = Describe("NodeClass Hash Controller", func() {
Entry("MetadataOptions Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: aws.String("disabled")}}}),
Entry("Context Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Context: aws.String("context-2")}}),
)
It("should update nodeClaim annotation kubelet hash if nodePool was configured using v1beta1 NodePool", func() {
kubeletConfig := &v1beta1.KubeletConfiguration{
ClusterDNS: []string{"test-cluster-dns"},
MaxPods: lo.ToPtr(int32(9383)),
PodsPerCore: lo.ToPtr(int32(9334283)),
}
kubeletConfigString, _ := json.Marshal(kubeletConfig)
nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{
karpv1.KubeletCompatabilityAnnotationKey: string(kubeletConfigString),
})
nodeClaim := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
v1.AnnotationKubeletCompatibilityHash: "123456",
},
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim, nodePool)
expectedHash, _ := utils.GetHashKubelet(nodePool, nodeClass)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Annotations[v1.AnnotationKubeletCompatibilityHash]).To(Equal(expectedHash))
})
It("should update nodeClaim annotation kubelet hash when kubelet is configured using ec2nodeClass", func() {
nodeClaim := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
v1.AnnotationKubeletCompatibilityHash: "123456",
},
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{
ClusterDNS: []string{"test-cluster-dns"},
MaxPods: lo.ToPtr(int32(9383)),
PodsPerCore: lo.ToPtr(int32(9334283)),
}
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim, nodePool)
expectedHash, _ := utils.GetHashKubelet(nodePool, nodeClass)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Annotations[v1.AnnotationKubeletCompatibilityHash]).To(Equal(expectedHash))
})
It("should not update nodeClaim annotation kubelet hash if annotation is same as kubelet configuration on nodeClass", func() {
kubeletConfig := &v1beta1.KubeletConfiguration{
ClusterDNS: []string{"test-cluster-dns"},
MaxPods: lo.ToPtr(int32(9383)),
PodsPerCore: lo.ToPtr(int32(9334283)),
}
kubeletConfigString, _ := json.Marshal(kubeletConfig)
nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{
karpv1.KubeletCompatabilityAnnotationKey: string(kubeletConfigString),
})
nodeClaim := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
},
},
Spec: karpv1.NodeClaimSpec{
NodeClassRef: &karpv1.NodeClassReference{
Group: object.GVK(nodeClass).Group,
Kind: object.GVK(nodeClass).Kind,
Name: nodeClass.Name,
},
},
})
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim, nodePool)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
hashBefore := nodeClaim.Annotations[v1.AnnotationKubeletCompatibilityHash]

nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{
ClusterDNS: []string{"test-cluster-dns"},
MaxPods: lo.ToPtr(int32(9383)),
PodsPerCore: lo.ToPtr(int32(9334283)),
}
nodePool.Annotations = nil
ExpectApplied(ctx, env.Client, nodeClass, nodePool)
ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.Annotations[v1.AnnotationKubeletCompatibilityHash]).To(Equal(hashBefore))
})
It("should not update the drift hash when dynamic field is updated", func() {
ExpectApplied(ctx, env.Client, nodeClass)
ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
Expand Down Expand Up @@ -174,6 +301,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
}
nodeClaimOne := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
Expand All @@ -189,6 +317,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
})
nodeClaimTwo := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
Expand All @@ -203,7 +332,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
},
})

ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo)
ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo, nodePool)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expand All @@ -224,6 +353,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
}
nodeClaim := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "1234564654",
v1.AnnotationEC2NodeClassHashVersion: v1.EC2NodeClassHashVersion,
Expand All @@ -237,7 +367,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
},
},
})
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim)
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim, nodePool)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
Expand All @@ -259,6 +389,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
}
nodeClaim := coretest.NodeClaim(karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{karpv1.NodePoolLabelKey: nodePool.Name},
Annotations: map[string]string{
v1.AnnotationEC2NodeClassHash: "123456",
v1.AnnotationEC2NodeClassHashVersion: "test",
Expand All @@ -273,7 +404,7 @@ var _ = Describe("NodeClass Hash Controller", func() {
},
})
nodeClaim.StatusConditions().SetTrue(karpv1.ConditionTypeDrifted)
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim)
ExpectApplied(ctx, env.Client, nodeClass, nodeClaim, nodePool)

ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expand Down
Loading

0 comments on commit 7d254f8

Please sign in to comment.