From 0b078b0def859f2693db6215f62e2b70eafe90e2 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Fri, 22 Mar 2024 06:57:00 -0700 Subject: [PATCH] fix: include capacity type and resource.Quantity in launch template cache key (#5882) --- .../amifamily/bootstrap/bootstrap.go | 15 +++++++++++ pkg/providers/amifamily/resolver.go | 8 +++--- .../launchtemplate/launchtemplate.go | 27 ++++++++++++++----- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/pkg/providers/amifamily/bootstrap/bootstrap.go b/pkg/providers/amifamily/bootstrap/bootstrap.go index 65321c40d339..4eb334192190 100644 --- a/pkg/providers/amifamily/bootstrap/bootstrap.go +++ b/pkg/providers/amifamily/bootstrap/bootstrap.go @@ -19,6 +19,7 @@ import ( "sort" "strings" + "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -102,6 +103,17 @@ func (o Options) nodeLabelArg() string { return fmt.Sprintf("--node-labels=%q", strings.Join(labelStrings, ",")) } +// TODO: jmdeal@ remove once KubeletConfiguration can be properly hashed +// For more information on the resource.Quantity hash issue: https://github.com/aws/karpenter-provider-aws/issues/5447 +func (o Options) HashReservedResources() string { + kubeReservedHash, systemReservedHash := uint64(0), uint64(0) + if kc := o.KubeletConfig; kc != nil { + kubeReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.KubeReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + systemReservedHash, _ = hashstructure.Hash(resources.StringMap(kc.SystemReserved), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + } + return fmt.Sprintf("%d-%d", kubeReservedHash, systemReservedHash) +} + // joinParameterArgs joins a map of keys and values by their separator. The separator will sit between the // arguments in a comma-separated list i.e. arg1val1,arg2val2 func joinParameterArgs[K comparable, V any](name string, m map[K]V, separator string) string { @@ -122,4 +134,7 @@ func joinParameterArgs[K comparable, V any](name string, m map[K]V, separator st // Examples are the Bottlerocket config and the eks-bootstrap script type Bootstrapper interface { Script() (string, error) + + // TODO: jmdeal@ remove once KubeletConfiguration can be properly hashed + HashReservedResources() string } diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 14467a6d852a..57fc079004f7 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -72,6 +72,7 @@ type LaunchTemplate struct { InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` DetailedMonitoring bool EFACount int + CapacityType string } // AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters @@ -118,7 +119,7 @@ func New(amiProvider *Provider) *Resolver { // Resolve generates launch templates using the static options and dynamically generates launch template parameters. // Multiple ResolvedTemplates are returned based on the instanceTypes passed in to support special AMIs for certain instance types like GPUs. -func (r Resolver) Resolve(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, options *Options) ([]*LaunchTemplate, error) { +func (r Resolver) Resolve(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, options *Options) ([]*LaunchTemplate, error) { amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, options) amis, err := r.amiProvider.Get(ctx, nodeClass, options) if err != nil { @@ -153,7 +154,7 @@ func (r Resolver) Resolve(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, } }) for params, instanceTypes := range paramsToInstanceTypes { - resolved, err := r.resolveLaunchTemplate(nodeClass, nodeClaim, instanceTypes, amiFamily, amiID, params.maxPods, params.efaCount, options) + resolved, err := r.resolveLaunchTemplate(nodeClass, nodeClaim, instanceTypes, capacityType, amiFamily, amiID, params.maxPods, params.efaCount, options) if err != nil { return nil, err } @@ -206,7 +207,7 @@ func (r Resolver) defaultClusterDNS(opts *Options, kubeletConfig *corev1beta1.Ku return newKubeletConfig } -func (r Resolver) resolveLaunchTemplate(nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, +func (r Resolver) resolveLaunchTemplate(nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, amiFamily AMIFamily, amiID string, maxPods int, efaCount int, options *Options) (*LaunchTemplate, error) { kubeletConfig := &corev1beta1.KubeletConfiguration{} if nodeClaim.Spec.Kubelet != nil { @@ -234,6 +235,7 @@ func (r Resolver) resolveLaunchTemplate(nodeClass *v1beta1.EC2NodeClass, nodeCla AMIID: amiID, InstanceTypes: instanceTypes, EFACount: efaCount, + CapacityType: capacityType, } if len(resolved.BlockDeviceMappings) == 0 { resolved.BlockDeviceMappings = amiFamily.DefaultBlockDeviceMappings() diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index ea01721b0dad..ff3685987204 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -113,14 +113,14 @@ func (p *Provider) EnsureAll(ctx context.Context, nodeClass *v1beta1.EC2NodeClas if err != nil { return nil, err } - resolvedLaunchTemplates, err := p.amiFamily.Resolve(ctx, nodeClass, nodeClaim, instanceTypes, options) + resolvedLaunchTemplates, err := p.amiFamily.Resolve(ctx, nodeClass, nodeClaim, instanceTypes, capacityType, options) if err != nil { return nil, err } var launchTemplates []*LaunchTemplate for _, resolvedLaunchTemplate := range resolvedLaunchTemplates { // Ensure the launch template exists, or create it - ec2LaunchTemplate, err := p.ensureLaunchTemplate(ctx, capacityType, resolvedLaunchTemplate) + ec2LaunchTemplate, err := p.ensureLaunchTemplate(ctx, resolvedLaunchTemplate) if err != nil { return nil, err } @@ -141,7 +141,20 @@ func (p *Provider) Invalidate(ctx context.Context, ltName string, ltID string) { } func launchTemplateName(options *amifamily.LaunchTemplate) string { - hash, err := hashstructure.Hash(options, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + // TODO: jmdeal@ remove custom hash struct once BlockDeviceMapping and KubeletConfiguration hashing is fixed, only hash Options + volumeSizeHash, _ := hashstructure.Hash(lo.Reduce(options.BlockDeviceMappings, func(agg string, block *v1beta1.BlockDeviceMapping, _ int) string { + return fmt.Sprintf("%s/%s", agg, block.EBS.VolumeSize) + }, ""), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + hashStruct := struct { + Options *amifamily.LaunchTemplate + VolumeSizeHash string + ReservedResourcesHash string + }{ + Options: options, + VolumeSizeHash: fmt.Sprint(volumeSizeHash), + ReservedResourcesHash: options.UserData.HashReservedResources(), + } + hash, err := hashstructure.Hash(hashStruct, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) if err != nil { panic(fmt.Sprintf("hashing launch template, %s", err)) } @@ -195,7 +208,7 @@ func (p *Provider) createAMIOptions(ctx context.Context, nodeClass *v1beta1.EC2N return options, nil } -func (p *Provider) ensureLaunchTemplate(ctx context.Context, capacityType string, options *amifamily.LaunchTemplate) (*ec2.LaunchTemplate, error) { +func (p *Provider) ensureLaunchTemplate(ctx context.Context, options *amifamily.LaunchTemplate) (*ec2.LaunchTemplate, error) { var launchTemplate *ec2.LaunchTemplate name := launchTemplateName(options) ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("launch-template-name", name)) @@ -210,7 +223,7 @@ func (p *Provider) ensureLaunchTemplate(ctx context.Context, capacityType string }) // Create LT if one doesn't exist if awserrors.IsNotFound(err) { - launchTemplate, err = p.createLaunchTemplate(ctx, capacityType, options) + launchTemplate, err = p.createLaunchTemplate(ctx, options) if err != nil { return nil, fmt.Errorf("creating launch template, %w", err) } @@ -228,7 +241,7 @@ func (p *Provider) ensureLaunchTemplate(ctx context.Context, capacityType string return launchTemplate, nil } -func (p *Provider) createLaunchTemplate(ctx context.Context, capacityType string, options *amifamily.LaunchTemplate) (*ec2.LaunchTemplate, error) { +func (p *Provider) createLaunchTemplate(ctx context.Context, options *amifamily.LaunchTemplate) (*ec2.LaunchTemplate, error) { userData, err := options.UserData.Script() if err != nil { return nil, err @@ -237,7 +250,7 @@ func (p *Provider) createLaunchTemplate(ctx context.Context, capacityType string {ResourceType: aws.String(ec2.ResourceTypeNetworkInterface), Tags: utils.MergeTags(options.Tags)}, } // Add the spot-instances-request tag if trying to launch spot capacity - if capacityType == corev1beta1.CapacityTypeSpot { + if options.CapacityType == corev1beta1.CapacityTypeSpot { launchTemplateDataTags = append(launchTemplateDataTags, &ec2.LaunchTemplateTagSpecificationRequest{ResourceType: aws.String(ec2.ResourceTypeSpotInstancesRequest), Tags: utils.MergeTags(options.Tags)}) } networkInterfaces := p.generateNetworkInterfaces(options)