diff --git a/hack/github/community-contributors.sh b/hack/github/community-contributors.sh index 0404a56186db..1466a6d2b523 100755 --- a/hack/github/community-contributors.sh +++ b/hack/github/community-contributors.sh @@ -77,6 +77,9 @@ COMMUNITY_CONTRIBUTIONS=$( .author != "njtran" and .author != "dewjam" and .author != "suket22" and + .author != "Jigisha Patil" and + .author != "jigisha620" and + .author != "nikmohan123" and .author != "StableRelease" and .author != "dependabot[bot]" and .author != "github-actions[bot]" and diff --git a/pkg/apis/v1beta1/ec2nodeclass.go b/pkg/apis/v1beta1/ec2nodeclass.go index 97cfa82c9d5f..5c264202bdde 100644 --- a/pkg/apis/v1beta1/ec2nodeclass.go +++ b/pkg/apis/v1beta1/ec2nodeclass.go @@ -329,6 +329,12 @@ type EC2NodeClass struct { Status EC2NodeClassStatus `json:"status,omitempty"` } +// We need to bump the EC2NodeClassHashVersion when we make an update to the EC2NodeClass CRD under these conditions: +// 1. A field changes its default value for an existing field that is already hashed +// 2. A field is added to the hash calculation with an already-set value +// 3. A field is removed from the hash calculations +const EC2NodeClassHashVersion = "v1" + func (in *EC2NodeClass) Hash() string { return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{ SlicesAsSets: true, diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 1f39dbd1695d..cadf8dd902c6 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -115,6 +115,7 @@ var ( LabelInstanceAcceleratorManufacturer = Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = Group + "/instance-accelerator-count" AnnotationEC2NodeClassHash = Group + "/ec2nodeclass-hash" + AnnotationEC2NodeClassHashVersion = Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = Group + "/tagged" TagNodeClaim = v1beta1.Group + "/nodeclaim" diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 7f0ba1d97510..7bb586693f6d 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -36,4 +36,8 @@ const ( const ( // DefaultCleanupInterval triggers cache cleanup (lazy eviction) at this interval. DefaultCleanupInterval = time.Minute + // UnavailableOfferingsCleanupInterval triggers cache cleanup (lazy eviction) at this interval. + // We drop the cleanup interval down for the ICE cache to get quicker reactivity to offerings + // that become available after they get evicted from the cache + UnavailableOfferingsCleanupInterval = time.Second * 10 ) diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index 5051ad58300c..bbc2c16b3fb5 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -35,10 +35,14 @@ type UnavailableOfferings struct { } func NewUnavailableOfferings() *UnavailableOfferings { - return &UnavailableOfferings{ - cache: cache.New(UnavailableOfferingsTTL, DefaultCleanupInterval), + uo := &UnavailableOfferings{ + cache: cache.New(UnavailableOfferingsTTL, UnavailableOfferingsCleanupInterval), SeqNum: 0, } + uo.cache.OnEvicted(func(_ string, _ interface{}) { + atomic.AddUint64(&uo.SeqNum, 1) + }) + return uo } // IsUnavailable returns true if the offering appears in the cache diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 116ce4d23662..4684ccdf6c30 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -101,7 +101,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC return i.Name == instance.Type }) nc := c.instanceToNodeClaim(instance, instanceType) - nc.Annotations = lo.Assign(nc.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) return nc, nil } diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index fe259ef397d1..69dc5aad1783 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -135,9 +135,16 @@ func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instanc } func (c *CloudProvider) areStaticFieldsDrifted(nodeClaim *corev1beta1.NodeClaim, nodeClass *v1beta1.EC2NodeClass) cloudprovider.DriftReason { - nodeClassHash, foundHashNodeClass := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash] - nodeClaimHash, foundHashNodeClaim := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] - if !foundHashNodeClass || !foundHashNodeClaim { + nodeClassHash, foundNodeClassHash := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash] + nodeClassHashVersion, foundNodeClassHashVersion := nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + nodeClaimHash, foundNodeClaimHash := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHash] + nodeClaimHashVersion, foundNodeClaimHashVersion := nodeClaim.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] + + if !foundNodeClassHash || !foundNodeClaimHash || !foundNodeClassHashVersion || !foundNodeClaimHashVersion { + return "" + } + // validate that the hash version for the EC2NodeClass is the same as the NodeClaim before evaluating for static drift + if nodeClassHashVersion != nodeClaimHashVersion { return "" } return lo.Ternary(nodeClassHash != nodeClaimHash, NodeClassDrift, "") diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 0b5b76ddb0c2..f7f041ff312c 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -593,10 +593,14 @@ var _ = Describe("CloudProvider", func() { Reservations: []*ec2.Reservation{{Instances: []*ec2.Instance{instance}}}, }) nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, }) nodeClaim.Status.ProviderID = fake.ProviderID(lo.FromPtr(instance.InstanceId)) - nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name}) }) It("should not fail if NodeClass does not exist", func() { @@ -760,8 +764,58 @@ var _ = Describe("CloudProvider", func() { Entry("Subnet Drift", v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{SubnetSelectorTerms: []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"sn-key-1": "sn-value-1"}}}}}), Entry("SecurityGroup Drift", v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{SecurityGroupSelectorTerms: []v1beta1.SecurityGroupSelectorTerm{{Tags: map[string]string{"sg-key": "sg-value"}}}}}), ) - It("should not return drifted if karpenter.k8s.aws/nodeclass-hash annotation is not present on the NodeClaim", func() { - nodeClaim.Annotations = map[string]string{} + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash annotation is not present on the NodeClaim", func() { + nodeClaim.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + } + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if the NodeClaim's karpenter.k8s.aws/ec2nodeclass-hash-version annotation does not match the EC2NodeClass's", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClass", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + } + // should trigger drift + nodeClass.Spec.Tags = map[string]string{ + "Test Key": "Test Value", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) + Expect(err).NotTo(HaveOccurred()) + Expect(isDrifted).To(BeEmpty()) + }) + It("should not return drifted if karpenter.k8s.aws/ec2nodeclass-hash-version annotation is not present on the NodeClaim", func() { + nodeClass.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-111111", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + } + nodeClaim.ObjectMeta.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-222222", + } + // should trigger drift nodeClass.Spec.Tags = map[string]string{ "Test Key": "Test Value", } diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 6c3a9a1204ec..25c0c1f7c561 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -80,7 +80,17 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { stored := nodeClass.DeepCopy() controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) + + if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + if err := c.updateNodeClaimHash(ctx, nodeClass); err != nil { + return reconcile.Result{}, err + } + } + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + err := multierr.Combine( c.resolveSubnets(ctx, nodeClass), c.resolveSecurityGroups(ctx, nodeClass), @@ -222,6 +232,45 @@ func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1be return nil } +// Updating `ec2nodeclass-hash-version` annotation inside the karpenter controller means a breaking change has been made to the hash calculation. +// `ec2nodeclass-hash` annotation on the EC2NodeClass will be updated, due to the breaking change, making the `ec2nodeclass-hash` on the NodeClaim different from +// EC2NodeClass. Since, we cannot rely on the `ec2nodeclass-hash` on the NodeClaims, due to the breaking change, we will need to re-calculate the hash and update the annotation. +// For more information on the Drift Hash Versioning: https://github.com/kubernetes-sigs/karpenter/blob/main/designs/drift-hash-versioning.md +func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { + ncList := &corev1beta1.NodeClaimList{} + if err := c.kubeClient.List(ctx, ncList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + return err + } + + errs := make([]error, len(ncList.Items)) + for i := range ncList.Items { + nc := ncList.Items[i] + stored := nc.DeepCopy() + + if nc.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }) + + // Any NodeClaim that is already drifted will remain drifted if the karpenter.k8s.aws/nodepool-hash-version doesn't match + // Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed + if nc.StatusConditions().GetCondition(corev1beta1.Drifted) == nil { + nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), + }) + } + + if !equality.Semantic.DeepEqual(stored, nc) { + if err := c.kubeClient.Patch(ctx, &nc, client.MergeFrom(stored)); err != nil { + errs[i] = client.IgnoreNotFound(err) + } + } + } + } + + return multierr.Combine(errs...) +} + func (c *Controller) Name() string { return "nodeclass" } diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 129b5be8decb..0c0a436697c3 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -23,6 +23,7 @@ import ( "github.com/imdario/mergo" "github.com/samber/lo" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" _ "knative.dev/pkg/system/testing" "sigs.k8s.io/controller-runtime/pkg/client" @@ -833,7 +834,7 @@ var _ = Describe("NodeClassController", func() { }) }) Context("Static Drift Hash", func() { - DescribeTable("should update the static drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { + DescribeTable("should update the drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -860,7 +861,7 @@ var _ = Describe("NodeClassController", func() { Entry("MetadataOptions Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{MetadataOptions: &v1beta1.MetadataOptions{HTTPEndpoint: aws.String("disabled")}}}), Entry("Context Drift", &v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{Context: aws.String("context-2")}}), ) - It("should not update the static drift hash when dynamic field is updated", func() { + It("should not update the drift hash when dynamic field is updated", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -889,6 +890,128 @@ var _ = Describe("NodeClassController", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) }) + It("should update ec2nodeclass-hash-version annotation when the ec2nodeclass-hash-version on the NodeClass does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + ExpectApplied(ctx, env.Client, nodeClass) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should update ec2nodeclass-hash-versions on all NodeClaims when the ec2nodeclass-hash-version does not match with the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaimOne := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaimTwo := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + + ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne) + nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo) + + expectedHash := nodeClass.Hash() + // Expect ec2nodeclass-hash on the NodeClaims to be updated + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimOne.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClaimTwo.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on all NodeClaims when the ec2nodeclass-hash-version matches the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-version", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "1234564654", + v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + expectedHash := nodeClass.Hash() + + // Expect ec2nodeclass-hash on the NodeClass to be updated + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "1234564654")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) + It("should not update ec2nodeclass-hash on the NodeClaim if it's drifted and the ec2nodeclass-hash-version does not match the controller hash version", func() { + nodeClass.Annotations = map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "abceduefed", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + } + nodeClaim := coretest.NodeClaim(corev1beta1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "123456", + v1beta1.AnnotationEC2NodeClassHashVersion: "test", + }, + }, + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }) + nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted) + ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) + + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + + // Expect ec2nodeclass-hash on the NodeClaims to stay the same + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, "123456")) + Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }) }) Context("NodeClass Termination", func() { var profileName string diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index ba91f70f9454..92daf188bc42 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -20,7 +20,6 @@ import ( "net/http" "sync" "sync/atomic" - "time" "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" @@ -99,9 +98,6 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio if err != nil { return nil, err } - // Get zones from instancetypeOfferings - zones := p.getZones(ctx, instanceTypeOfferings) - // Constrain zones from subnets subnets, err := p.subnetProvider.List(ctx, nodeClass) if err != nil { return nil, err @@ -111,7 +107,7 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio })...) // Compute fully initialized instance types hash key - subnetHash, _ := hashstructure.Hash(subnets, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + subnetZonesHash, _ := hashstructure.Hash(subnetZones, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) kcHash, _ := hashstructure.Hash(kc, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) // TODO: remove kubeReservedHash and systemReservedHash once v1.ResourceList objects are hashed as strings in KubeletConfiguration // For more information on the v1.ResourceList hash issue: https://github.com/kubernetes-sigs/karpenter/issues/1080 @@ -130,7 +126,7 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio p.instanceTypesSeqNum, p.instanceTypeOfferingsSeqNum, p.unavailableOfferings.SeqNum, - subnetHash, + subnetZonesHash, kcHash, blockDeviceMappingsHash, aws.StringValue((*string)(nodeClass.Spec.InstanceStorePolicy)), @@ -142,6 +138,18 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio if item, ok := p.cache.Get(key); ok { return item.([]*cloudprovider.InstanceType), nil } + + // Get all zones across all offerings + // We don't use this in the cache key since this is produced from our instanceTypeOfferings which we do cache + allZones := sets.New[string]() + for _, offeringZones := range instanceTypeOfferings { + for zone := range offeringZones { + allZones.Insert(zone) + } + } + if p.cm.HasChanged("zones", allZones) { + logging.FromContext(ctx).With("zones", allZones.UnsortedList()).Debugf("discovered zones") + } result := lo.Map(instanceTypes, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { instanceTypeVCPU.With(prometheus.Labels{ instanceTypeLabel: *i.InstanceType, @@ -150,7 +158,7 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio instanceTypeLabel: *i.InstanceType, }).Set(float64(aws.Int64Value(i.MemoryInfo.SizeInMiB) * 1024 * 1024)) - return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], zones, subnetZones)) + return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], allZones, subnetZones)) }) p.cache.SetDefault(key, result) return result, nil @@ -206,30 +214,6 @@ func (p *Provider) createOfferings(ctx context.Context, instanceType *ec2.Instan return offerings } -func (p *Provider) getZones(ctx context.Context, instanceTypeOfferings map[string]sets.Set[string]) sets.Set[string] { - // DO NOT REMOVE THIS LOCK ---------------------------------------------------------------------------- - // We lock here so that multiple callers to getAvailabilityZones do not result in cache misses and multiple - // calls to EC2 when we could have just made one call. - // TODO @joinnis: This can be made more efficient by holding a Read lock and only obtaining the Write if not in cache - p.mu.Lock() - defer p.mu.Unlock() - if cached, ok := p.cache.Get(ZonesCacheKey); ok { - return cached.(sets.Set[string]) - } - // Get zones from offerings - zones := sets.Set[string]{} - for _, offeringZones := range instanceTypeOfferings { - for zone := range offeringZones { - zones.Insert(zone) - } - } - if p.cm.HasChanged("zones", zones) { - logging.FromContext(ctx).With("zones", zones.UnsortedList()).Debugf("discovered zones") - } - p.cache.Set(ZonesCacheKey, zones, 24*time.Hour) - return zones -} - func (p *Provider) getInstanceTypeOfferings(ctx context.Context) (map[string]sets.Set[string], error) { // DO NOT REMOVE THIS LOCK ---------------------------------------------------------------------------- // We lock here so that multiple callers to getInstanceTypeOfferings do not result in cache misses and multiple diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 2ac75d8b4ef2..46c77c543320 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -765,6 +765,54 @@ var _ = Describe("Drift", func() { g.Expect(nodePool.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashAnnotationKey, expectedHash)) g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(corev1beta1.NodePoolHashVersionAnnotationKey, corev1beta1.NodePoolHashVersion)) + }) + }) + It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodeClass = env.ExpectExists(nodeClass).(*v1beta1.EC2NodeClass) + expectedHash := nodeClass.Hash() + + By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1beta1.AnnotationEC2NodeClassHash, v1beta1.AnnotationEC2NodeClassHashVersion)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-1", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + }) + // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the + // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation + nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + v1beta1.AnnotationEC2NodeClassHash: "test-hash-2", + v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass + env.ExpectUpdated(nodeClaim, nodeClass) + expectedHash = nodeClass.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationEC2NodeClassHashVersion, v1beta1.EC2NodeClassHashVersion)) }).WithTimeout(30 * time.Second).Should(Succeed()) env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) }) diff --git a/tools/kompat/pkg/kompat/kompat.go b/tools/kompat/pkg/kompat/kompat.go index ef8775ed91eb..a13e8131b389 100644 --- a/tools/kompat/pkg/kompat/kompat.go +++ b/tools/kompat/pkg/kompat/kompat.go @@ -193,7 +193,7 @@ func (k Kompat) Markdown(_ ...Options) string { data := []string{k.Name} for _, c := range k.Compatibility { if c.MaxK8sVersion == "" || c.MinK8sVersion == c.MaxK8sVersion { - headers = append(headers, fmt.Sprintf("`%s`+", c.MinK8sVersion)) + headers = append(headers, fmt.Sprintf("\\>= `%s`", c.MinK8sVersion)) } else { headers = append(headers, fmt.Sprintf("`%s` - `%s`", c.MinK8sVersion, c.MaxK8sVersion)) } @@ -365,7 +365,7 @@ func semverRange(semvers []string, allSemvers ...string) string { allSems := allSemvers sortSemvers(allSems) if allSems[len(allSems)-1] == semvers[len(semvers)-1] { - return fmt.Sprintf("%s+", semvers[0]) + return fmt.Sprintf("\\>= %s", strings.ReplaceAll(semvers[0], ".x", "")) } } return fmt.Sprintf("%s - %s", semvers[0], semvers[len(semvers)-1]) diff --git a/website/content/en/docs/upgrading/compatibility.md b/website/content/en/docs/upgrading/compatibility.md index 88507e3c7b8f..2d898d2a99f8 100644 --- a/website/content/en/docs/upgrading/compatibility.md +++ b/website/content/en/docs/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | -|------------|---------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | 0.34.0+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) diff --git a/website/content/en/preview/upgrading/compatibility.md b/website/content/en/preview/upgrading/compatibility.md index 88507e3c7b8f..2d898d2a99f8 100644 --- a/website/content/en/preview/upgrading/compatibility.md +++ b/website/content/en/preview/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | -|------------|---------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | 0.34.0+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) diff --git a/website/content/en/v0.32/upgrading/compatibility.md b/website/content/en/v0.32/upgrading/compatibility.md index 3b0ea36e3f2f..836df2552a15 100644 --- a/website/content/en/v0.32/upgrading/compatibility.md +++ b/website/content/en/v0.32/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | -|------------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) diff --git a/website/content/en/v0.33/upgrading/compatibility.md b/website/content/en/v0.33/upgrading/compatibility.md index dccc4caf6aa1..805e107b4f8f 100644 --- a/website/content/en/v0.33/upgrading/compatibility.md +++ b/website/content/en/v0.33/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | -|------------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) diff --git a/website/content/en/v0.34/upgrading/compatibility.md b/website/content/en/v0.34/upgrading/compatibility.md index da65395ed960..2eab4a752d15 100644 --- a/website/content/en/v0.34/upgrading/compatibility.md +++ b/website/content/en/v0.34/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | -|------------|---------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | 0.34.x+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go) @@ -103,4 +103,3 @@ aws ecr get-login-password --region {{< param "snapshot_repo.region" >}} | docke {{% alert title="Note" color="warning" %}} Snapshot releases are suitable for testing, and troubleshooting but they should not be used in production environments. Snapshot releases are ephemeral and will be removed 90 days after they were published. {{% /alert %}} - diff --git a/website/content/en/v0.35/upgrading/compatibility.md b/website/content/en/v0.35/upgrading/compatibility.md index 88507e3c7b8f..2d898d2a99f8 100644 --- a/website/content/en/v0.35/upgrading/compatibility.md +++ b/website/content/en/v0.35/upgrading/compatibility.md @@ -15,9 +15,9 @@ Before you begin upgrading Karpenter, consider Karpenter compatibility issues re [comment]: <> (the content below is generated from hack/docs/compataiblitymetrix_gen_docs.go) -| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | -|------------|---------|---------|---------|---------|---------|---------|---------| -| karpenter | 0.21.x+ | 0.21.x+ | 0.25.x+ | 0.28.x+ | 0.28.x+ | 0.31.x+ | 0.34.0+ | +| KUBERNETES | 1.23 | 1.24 | 1.25 | 1.26 | 1.27 | 1.28 | 1.29 | +|------------|----------|----------|----------|----------|----------|----------|------------| +| karpenter | \>= 0.21 | \>= 0.21 | \>= 0.25 | \>= 0.28 | \>= 0.28 | \>= 0.31 | \>= 0.34.0 | [comment]: <> (end docs generated content from hack/docs/compataiblitymetrix_gen_docs.go)