Skip to content

Commit

Permalink
chore: Drop karpenter.k8s.aws/cluster tag from launch templates (#7379
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jonathan-innis authored Nov 14, 2024
1 parent fd10545 commit fdccc5f
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 25 deletions.
5 changes: 2 additions & 3 deletions pkg/apis/v1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ var (
AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version"
AnnotationInstanceTagged = apis.Group + "/tagged"

TagNodeClaim = coreapis.Group + "/nodeclaim"
TagManagedLaunchTemplate = apis.Group + "/cluster"
TagName = "Name"
TagNodeClaim = coreapis.Group + "/nodeclaim"
TagName = "Name"
)
13 changes: 12 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (

"github.com/aws/karpenter-provider-aws/pkg/apis"
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
"github.com/aws/karpenter-provider-aws/pkg/operator/options"
"github.com/aws/karpenter-provider-aws/pkg/utils"

"github.com/samber/lo"
Expand Down Expand Up @@ -102,7 +103,7 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim)
if len(instanceTypes) == 0 {
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch"))
}
instance, err := c.instanceProvider.Create(ctx, nodeClass, nodeClaim, instanceTypes)
instance, err := c.instanceProvider.Create(ctx, nodeClass, nodeClaim, getTags(ctx, nodeClass, nodeClaim), instanceTypes)
if err != nil {
return nil, fmt.Errorf("creating instance, %w", err)
}
Expand Down Expand Up @@ -227,6 +228,16 @@ func (c *CloudProvider) GetSupportedNodeClasses() []status.Object {
return []status.Object{&v1.EC2NodeClass{}}
}

func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim) map[string]string {
staticTags := map[string]string{
fmt.Sprintf("kubernetes.io/cluster/%s", options.FromContext(ctx).ClusterName): "owned",
karpv1.NodePoolLabelKey: nodeClaim.Labels[karpv1.NodePoolLabelKey],
v1.EKSClusterNameTagKey: options.FromContext(ctx).ClusterName,
v1.LabelNodeClass: nodeClass.Name,
}
return lo.Assign(nodeClass.Spec.Tags, staticTags)
}

func (c *CloudProvider) resolveNodeClassFromNodeClaim(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*v1.EC2NodeClass, error) {
nodeClass := &v1.EC2NodeClass{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Spec.NodeClassRef.Name}, nodeClass); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/nodeclass/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ var _ = Describe("NodeClass Termination", func() {
})
It("should succeed to delete the launch template", func() {
ltName1 := aws.String(fake.LaunchTemplateName())
awsEnv.EC2API.LaunchTemplates.Store(ltName1, ec2types.LaunchTemplate{LaunchTemplateName: ltName1, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []ec2types.Tag{{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}})
awsEnv.EC2API.LaunchTemplates.Store(ltName1, ec2types.LaunchTemplate{LaunchTemplateName: ltName1, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []ec2types.Tag{{Key: aws.String("eks:eks-cluster-name"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}})
ltName2 := aws.String(fake.LaunchTemplateName())
awsEnv.EC2API.LaunchTemplates.Store(ltName2, ec2types.LaunchTemplate{LaunchTemplateName: ltName2, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []ec2types.Tag{{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}})
awsEnv.EC2API.LaunchTemplates.Store(ltName2, ec2types.LaunchTemplate{LaunchTemplateName: ltName2, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []ec2types.Tag{{Key: aws.String("eks:eks-cluster-name"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}})
_, ok := awsEnv.EC2API.LaunchTemplates.Load(ltName1)
Expect(ok).To(BeTrue())
_, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName2)
Expand Down
2 changes: 1 addition & 1 deletion pkg/fake/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func Filter(filters []ec2types.Filter, id, name string, tags []ec2types.Tag) boo
// nolint: gocyclo
func matchTags(tags []ec2types.Tag, filter ec2types.Filter) bool {
if strings.HasPrefix(*filter.Name, "tag:") {
tagKey := strings.Split(*filter.Name, ":")[1]
_, tagKey, _ := strings.Cut(*filter.Name, ":")
for _, val := range filter.Values {
for _, tag := range tags {
if tagKey == *tag.Key && (val == "*" || val == *tag.Value) {
Expand Down
15 changes: 2 additions & 13 deletions pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ var (
)

type Provider interface {
Create(context.Context, *v1.EC2NodeClass, *karpv1.NodeClaim, []*cloudprovider.InstanceType) (*Instance, error)
Create(context.Context, *v1.EC2NodeClass, *karpv1.NodeClaim, map[string]string, []*cloudprovider.InstanceType) (*Instance, error)
Get(context.Context, string) (*Instance, error)
List(context.Context) ([]*Instance, error)
Delete(context.Context, string) error
Expand Down Expand Up @@ -97,7 +97,7 @@ func NewDefaultProvider(ctx context.Context, region string, ec2api sdk.EC2API, u
}
}

func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) (*Instance, error) {
func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, tags map[string]string, instanceTypes []*cloudprovider.InstanceType) (*Instance, error) {
schedulingRequirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)
// Only filter the instances if there are no minValues in the requirement.
if !schedulingRequirements.HasMinValues() {
Expand All @@ -108,7 +108,6 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass
log.FromContext(ctx).Error(err, "truncating instance types")
return nil, fmt.Errorf("truncating instance types, %w", err)
}
tags := getTags(ctx, nodeClass, nodeClaim)
fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags)
if awserrors.IsLaunchTemplateNotFound(err) {
// retry once if launch template is not found. This allows karpenter to generate a new LT if the
Expand Down Expand Up @@ -268,16 +267,6 @@ func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1.EC2N
return createFleetOutput.Instances[0], nil
}

func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim) map[string]string {
staticTags := map[string]string{
fmt.Sprintf("kubernetes.io/cluster/%s", options.FromContext(ctx).ClusterName): "owned",
karpv1.NodePoolLabelKey: nodeClaim.Labels[karpv1.NodePoolLabelKey],
v1.EKSClusterNameTagKey: options.FromContext(ctx).ClusterName,
v1.LabelNodeClass: nodeClass.Name,
}
return lo.Assign(nodeClass.Spec.Tags, staticTags)
}

func (p *DefaultProvider) checkODFallback(nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, launchTemplateConfigs []ec2types.FleetLaunchTemplateConfigRequest) error {
// only evaluate for on-demand fallback if the capacity type for the request is OD and both OD and spot are allowed in requirements
if p.getCapacityType(nodeClaim, instanceTypes) != karpv1.CapacityTypeOnDemand || !scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Get(karpv1.CapacityTypeLabelKey).Has(karpv1.CapacityTypeSpot) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ var _ = Describe("InstanceProvider", func() {
instanceTypes = lo.Filter(instanceTypes, func(i *corecloudprovider.InstanceType, _ int) bool { return i.Name == "m5.xlarge" })

// Since all the capacity pools are ICEd. This should return back an ICE error
instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, instanceTypes)
instance, err := awsEnv.InstanceProvider.Create(ctx, nodeClass, nodeClaim, nil, instanceTypes)
Expect(corecloudprovider.IsInsufficientCapacityError(err)).To(BeTrue())
Expect(instance).To(BeNil())
})
Expand Down
8 changes: 4 additions & 4 deletions pkg/providers/launchtemplate/launchtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami
TagSpecifications: []ec2types.TagSpecification{
{
ResourceType: ec2types.ResourceTypeLaunchTemplate,
Tags: utils.MergeTags(options.Tags, map[string]string{v1.TagManagedLaunchTemplate: options.ClusterName, v1.LabelNodeClass: options.NodeClassName}),
Tags: utils.MergeTags(options.Tags),
},
},
})
Expand Down Expand Up @@ -348,12 +348,12 @@ func (p *DefaultProvider) volumeSize(quantity *resource.Quantity) *int32 {
// Any error during hydration will result in a panic
func (p *DefaultProvider) hydrateCache(ctx context.Context) {
clusterName := options.FromContext(ctx).ClusterName
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("tag-key", v1.TagManagedLaunchTemplate, "tag-value", clusterName))
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("tag-key", v1.EKSClusterNameTagKey, "tag-value", clusterName))

paginator := ec2.NewDescribeLaunchTemplatesPaginator(p.ec2api, &ec2.DescribeLaunchTemplatesInput{
Filters: []ec2types.Filter{
{
Name: aws.String(fmt.Sprintf("tag:%s", v1.TagManagedLaunchTemplate)),
Name: aws.String(fmt.Sprintf("tag:%s", v1.EKSClusterNameTagKey)),
Values: []string{clusterName},
},
},
Expand Down Expand Up @@ -400,7 +400,7 @@ func (p *DefaultProvider) DeleteAll(ctx context.Context, nodeClass *v1.EC2NodeCl
paginator := ec2.NewDescribeLaunchTemplatesPaginator(p.ec2api, &ec2.DescribeLaunchTemplatesInput{
Filters: []ec2types.Filter{
{
Name: aws.String(fmt.Sprintf("tag:%s", v1.TagManagedLaunchTemplate)),
Name: aws.String(fmt.Sprintf("tag:%s", v1.EKSClusterNameTagKey)),
Values: []string{clusterName},
},
{
Expand Down
1 change: 1 addition & 0 deletions website/content/en/preview/upgrading/upgrade-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE T
* Bottlerocket AMIFamily now supports `instanceStorePolicy: RAID0`. This means that Karpenter will auto-generate userData to RAID0 your instance store volumes (similar to AL2 and AL2023) when specifying this value.
* Note: This userData configuration is _only_ valid on Bottlerocket v1.22.0+. If you are using an earlier version of a Bottlerocket image (< v1.22.0) with `amiFamily: Bottlerocket` and `instanceStorePolicy: RAID0`, nodes will fail to join the cluster.
* The AWS Neuron accelerator well known name label (`karpenter.k8s.aws/instance-accelerator-name`) values now reflect their correct names of `trainium`, `inferentia`, and `inferentia2`. Previously, all Neuron accelerators were assigned the label name of `inferentia`.
* Karpenter drops the internal `karpenter.k8s.aws/cluster` tag used for launch template management in favor of `eks:eks-cluster-name` and consistency with other Karpenter-provisioned resources
### Upgrading to `1.0.0`+
Expand Down

0 comments on commit fdccc5f

Please sign in to comment.