diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 53a24fff091b..94dc0950929e 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -67,6 +67,7 @@ func main() { op.InstanceProvider, op.PricingProvider, op.AMIProvider, + op.LaunchTemplateProvider, )...). WithWebhooks(ctx, webhooks.NewWebhooks()...). Start(ctx) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 34094797f2de..6a5d7fe7dd36 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -17,6 +17,8 @@ package controllers import ( "context" + "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/aws-sdk-go/aws/session" servicesqs "github.com/aws/aws-sdk-go/service/sqs" "github.com/samber/lo" @@ -46,10 +48,10 @@ import ( func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, unavailableOfferings *cache.UnavailableOfferings, cloudProvider *cloudprovider.CloudProvider, subnetProvider *subnet.Provider, securityGroupProvider *securitygroup.Provider, instanceProfileProvider *instanceprofile.Provider, instanceProvider *instance.Provider, - pricingProvider *pricing.Provider, amiProvider *amifamily.Provider) []controller.Controller { + pricingProvider *pricing.Provider, amiProvider *amifamily.Provider, launchTemplateProvider *launchtemplate.Provider) []controller.Controller { controllers := []controller.Controller{ - nodeclass.NewNodeClassController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider), + nodeclass.NewNodeClassController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, instanceProvider), } diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index e77c0ff2b4a6..874c2e3c95a4 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -20,6 +20,8 @@ import ( "sort" "time" + "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/samber/lo" "go.uber.org/multierr" @@ -56,10 +58,11 @@ type Controller struct { securityGroupProvider *securitygroup.Provider amiProvider *amifamily.Provider instanceProfileProvider *instanceprofile.Provider + launchTemplateProvider *launchtemplate.Provider } func NewController(kubeClient client.Client, recorder events.Recorder, subnetProvider *subnet.Provider, securityGroupProvider *securitygroup.Provider, - amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider) *Controller { + amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider, launchTemplateProvider *launchtemplate.Provider) *Controller { return &Controller{ kubeClient: kubeClient, recorder: recorder, @@ -67,6 +70,7 @@ func NewController(kubeClient client.Client, recorder events.Recorder, subnetPro securityGroupProvider: securityGroupProvider, amiProvider: amiProvider, instanceProfileProvider: instanceProfileProvider, + launchTemplateProvider: launchTemplateProvider, } } @@ -113,6 +117,9 @@ func (c *Controller) Finalize(ctx context.Context, nodeClass *v1beta1.EC2NodeCla return reconcile.Result{}, fmt.Errorf("deleting instance profile, %w", err) } } + if err := c.launchTemplateProvider.DeleteLaunchTemplates(ctx, nodeClass); err != nil { + return reconcile.Result{}, fmt.Errorf("deleting launch templates, %w", err) + } controllerutil.RemoveFinalizer(nodeClass, v1beta1.TerminationFinalizer) if !equality.Semantic.DeepEqual(stored, nodeClass) { if err := c.kubeClient.Patch(ctx, nodeClass, client.MergeFrom(stored)); err != nil { @@ -215,9 +222,9 @@ type NodeClassController struct { } func NewNodeClassController(kubeClient client.Client, recorder events.Recorder, subnetProvider *subnet.Provider, securityGroupProvider *securitygroup.Provider, - amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider) corecontroller.Controller { + amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider, launchTemplateProvider *launchtemplate.Provider) corecontroller.Controller { return corecontroller.Typed[*v1beta1.EC2NodeClass](kubeClient, &NodeClassController{ - Controller: NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider), + Controller: NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, launchTemplateProvider), }) } diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index cb3918ca35c3..7a69ebe288d0 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -30,7 +30,6 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/iam" - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/events" corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" @@ -69,7 +68,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) - nodeClassController = nodeclass.NewNodeClassController(env.Client, events.NewRecorder(&record.FakeRecorder{}), awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider) + nodeClassController = nodeclass.NewNodeClassController(env.Client, events.NewRecorder(&record.FakeRecorder{}), awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) }) var _ = AfterSuite(func() { @@ -821,6 +820,53 @@ var _ = Describe("NodeClassController", func() { BeforeEach(func() { profileName = instanceprofile.GetProfileName(ctx, fake.DefaultRegion, nodeClass) }) + It("should not delete the NodeClass if launch template deletion fails", func() { + launchTemplateName := aws.String(fake.LaunchTemplateName()) + awsEnv.EC2API.LaunchTemplates.Store(launchTemplateName, &ec2.LaunchTemplate{LaunchTemplateName: launchTemplateName, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []*ec2.Tag{&ec2.Tag{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}}}) + _, ok := awsEnv.EC2API.LaunchTemplates.Load(launchTemplateName) + Expect(ok).To(BeTrue()) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + + Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) + awsEnv.EC2API.NextError.Set(fmt.Errorf("delete Launch Template Error")) + ExpectReconcileFailed(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + ExpectExists(ctx, env.Client, nodeClass) + }) + It("should not delete the launch template not associated with the nodeClass", func() { + launchTemplateName := aws.String(fake.LaunchTemplateName()) + awsEnv.EC2API.LaunchTemplates.Store(launchTemplateName, &ec2.LaunchTemplate{LaunchTemplateName: launchTemplateName, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []*ec2.Tag{&ec2.Tag{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}}}) + _, ok := awsEnv.EC2API.LaunchTemplates.Load(launchTemplateName) + Expect(ok).To(BeTrue()) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + + Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + _, ok = awsEnv.EC2API.LaunchTemplates.Load(launchTemplateName) + Expect(ok).To(BeTrue()) + ExpectNotFound(ctx, env.Client, nodeClass) + }) + It("should succeed to delete the launch template", func() { + ltName1 := aws.String(fake.LaunchTemplateName()) + awsEnv.EC2API.LaunchTemplates.Store(ltName1, &ec2.LaunchTemplate{LaunchTemplateName: ltName1, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []*ec2.Tag{&ec2.Tag{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}}) + ltName2 := aws.String(fake.LaunchTemplateName()) + awsEnv.EC2API.LaunchTemplates.Store(ltName2, &ec2.LaunchTemplate{LaunchTemplateName: ltName2, LaunchTemplateId: aws.String(fake.LaunchTemplateID()), Tags: []*ec2.Tag{&ec2.Tag{Key: aws.String("karpenter.k8s.aws/cluster"), Value: aws.String("test-cluster")}, {Key: aws.String("karpenter.k8s.aws/ec2nodeclass"), Value: aws.String(nodeClass.Name)}}}) + _, ok := awsEnv.EC2API.LaunchTemplates.Load(ltName1) + Expect(ok).To(BeTrue()) + _, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName2) + Expect(ok).To(BeTrue()) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + + Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) + ExpectReconcileSucceeded(ctx, nodeClassController, client.ObjectKeyFromObject(nodeClass)) + _, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName1) + Expect(ok).To(BeFalse()) + _, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName2) + Expect(ok).To(BeFalse()) + ExpectNotFound(ctx, env.Client, nodeClass) + }) It("should succeed to delete the instance profile with no NodeClaims", func() { awsEnv.IAMAPI.InstanceProfiles = map[string]*iam.InstanceProfile{ profileName: { @@ -927,7 +973,6 @@ var _ = Describe("NodeClassController", func() { }, }, } - nodeClass.Spec.Role = "" nodeClass.Spec.InstanceProfile = lo.ToPtr("test-instance-profile") ExpectApplied(ctx, env.Client, nodeClass) diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 0b74936c20c8..ce9cf21e0087 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -22,9 +22,10 @@ import ( "sync" "time" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/Pallinder/go-randomdata" "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" @@ -371,17 +372,38 @@ func (e *EC2API) DescribeLaunchTemplatesWithContext(_ context.Context, input *ec output := &ec2.DescribeLaunchTemplatesOutput{} e.LaunchTemplates.Range(func(key, value interface{}) bool { launchTemplate := value.(*ec2.LaunchTemplate) - if lo.Contains(aws.StringValueSlice(input.LaunchTemplateNames), aws.StringValue(launchTemplate.LaunchTemplateName)) { + if lo.Contains(aws.StringValueSlice(input.LaunchTemplateNames), aws.StringValue(launchTemplate.LaunchTemplateName)) || len(input.Filters) != 0 && Filter(input.Filters, aws.StringValue(launchTemplate.LaunchTemplateId), aws.StringValue(launchTemplate.LaunchTemplateName), launchTemplate.Tags) { output.LaunchTemplates = append(output.LaunchTemplates, launchTemplate) } return true }) + if len(input.Filters) != 0 { + return output, nil + } if len(output.LaunchTemplates) == 0 { return nil, awserr.New("InvalidLaunchTemplateName.NotFoundException", "not found", nil) } return output, nil } +func (e *EC2API) DescribeLaunchTemplatesPagesWithContext(ctx context.Context, input *ec2.DescribeLaunchTemplatesInput, fn func(*ec2.DescribeLaunchTemplatesOutput, bool) bool, _ ...request.Option) error { + out, err := e.DescribeLaunchTemplatesWithContext(ctx, input) + if err != nil { + return err + } + fn(out, false) + return nil +} + +func (e *EC2API) DeleteLaunchTemplateWithContext(_ context.Context, input *ec2.DeleteLaunchTemplateInput, _ ...request.Option) (*ec2.DeleteLaunchTemplateOutput, error) { + if !e.NextError.IsNil() { + defer e.NextError.Reset() + return nil, e.NextError.Get() + } + e.LaunchTemplates.Delete(input.LaunchTemplateName) + return nil, nil +} + func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.DescribeSubnetsInput, _ ...request.Option) (*ec2.DescribeSubnetsOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() diff --git a/pkg/fake/utils.go b/pkg/fake/utils.go index 427e38184876..b91df64f29a3 100644 --- a/pkg/fake/utils.go +++ b/pkg/fake/utils.go @@ -55,6 +55,14 @@ func RoleID() string { return fmt.Sprintf("role-%s", randomdata.Alphanumeric(17)) } +func LaunchTemplateName() string { + return fmt.Sprintf("karpenter.k8s.aws/%s", randomdata.Alphanumeric(17)) +} + +func LaunchTemplateID() string { + return fmt.Sprint(randomdata.Alphanumeric(17)) +} + func PrivateDNSName() string { return fmt.Sprintf("ip-192-168-%d-%d.%s.compute.internal", randomdata.Number(0, 256), randomdata.Number(0, 256), DefaultRegion) } diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index d3f991c37ef3..14467a6d852a 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -59,6 +59,7 @@ type Options struct { Labels map[string]string `hash:"ignore"` KubeDNSIP net.IP AssociatePublicIPAddress *bool + NodeClassName string } // LaunchTemplate holds the dynamically generated launch template parameters diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 577f4757bd70..7c0706b726b7 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -24,6 +24,8 @@ import ( "sync" "time" + "go.uber.org/multierr" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" @@ -175,10 +177,11 @@ func (p *Provider) createAMIOptions(ctx context.Context, nodeClass *v1beta1.EC2N SecurityGroups: lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) v1beta1.SecurityGroup { return v1beta1.SecurityGroup{ID: aws.StringValue(s.GroupId), Name: aws.StringValue(s.GroupName)} }), - Tags: tags, - Labels: labels, - CABundle: p.caBundle, - KubeDNSIP: p.KubeDNSIP, + Tags: tags, + Labels: labels, + CABundle: p.caBundle, + KubeDNSIP: p.KubeDNSIP, + NodeClassName: nodeClass.Name, } if ok, err := p.subnetProvider.CheckAnyPublicIPAssociations(ctx, nodeClass); err != nil { return nil, err @@ -264,7 +267,7 @@ func (p *Provider) createLaunchTemplate(ctx context.Context, capacityType string TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String(ec2.ResourceTypeLaunchTemplate), - Tags: utils.MergeTags(options.Tags, map[string]string{karpenterManagedTagKey: options.ClusterName}), + Tags: utils.MergeTags(options.Tags, map[string]string{karpenterManagedTagKey: options.ClusterName, v1beta1.LabelNodeClass: options.NodeClassName}), }, }, }) @@ -389,3 +392,34 @@ func (p *Provider) getInstanceProfile(nodeClass *v1beta1.EC2NodeClass) (string, } return "", errors.New("neither spec.instanceProfile or spec.role is specified") } + +func (p *Provider) DeleteLaunchTemplates(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { + clusterName := options.FromContext(ctx).ClusterName + var ltNames []*string + if err := p.ec2api.DescribeLaunchTemplatesPagesWithContext(ctx, &ec2.DescribeLaunchTemplatesInput{ + Filters: []*ec2.Filter{ + {Name: aws.String(fmt.Sprintf("tag:%s", karpenterManagedTagKey)), Values: []*string{aws.String(clusterName)}}, + {Name: aws.String(fmt.Sprintf("tag:%s", v1beta1.LabelNodeClass)), Values: []*string{aws.String(nodeClass.Name)}}, + }, + }, func(output *ec2.DescribeLaunchTemplatesOutput, _ bool) bool { + for _, lt := range output.LaunchTemplates { + ltNames = append(ltNames, lt.LaunchTemplateName) + } + return true + }); err != nil { + return fmt.Errorf("fetching launch templates, %w", err) + } + + var deleteErr error + for _, name := range ltNames { + _, err := p.ec2api.DeleteLaunchTemplateWithContext(ctx, &ec2.DeleteLaunchTemplateInput{LaunchTemplateName: name}) + deleteErr = multierr.Append(deleteErr, err) + } + if len(ltNames) > 0 { + logging.FromContext(ctx).With("launchTemplates", utils.PrettySlice(aws.StringValueSlice(ltNames), 5)).Debugf("deleted launch templates") + } + if deleteErr != nil { + return fmt.Errorf("deleting launch templates, %w", deleteErr) + } + return nil +} diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 10f139987cc3..d7189c157fc5 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -135,6 +135,57 @@ var _ = Describe("LaunchTemplates", func() { }, }) }) + It("should create unique launch templates for multiple identical nodeClasses", func() { + nodeClass2 := test.EC2NodeClass() + nodePool2 := coretest.NodePool(corev1beta1.NodePool{ + Spec: corev1beta1.NodePoolSpec{ + Template: corev1beta1.NodeClaimTemplate{ + Spec: corev1beta1.NodeClaimSpec{ + Requirements: []v1.NodeSelectorRequirement{ + { + Key: corev1beta1.CapacityTypeLabelKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.CapacityTypeSpot}, + }, + }, + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass2.Name, + }, + }, + }, + }, + }) + pods := []*v1.Pod{ + coretest.UnschedulablePod(coretest.PodOptions{NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: corev1beta1.CapacityTypeLabelKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.CapacityTypeSpot}, + }, + }, + }), + coretest.UnschedulablePod(coretest.PodOptions{NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: corev1beta1.CapacityTypeLabelKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.CapacityTypeOnDemand}, + }, + }, + }), + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodePool2, nodeClass2) + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pods...) + ltConfigCount := len(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop().LaunchTemplateConfigs) + len(awsEnv.EC2API.CreateFleetBehavior.CalledWithInput.Pop().LaunchTemplateConfigs) + Expect(ltConfigCount).To(BeNumerically("==", awsEnv.EC2API.CalledWithCreateLaunchTemplateInput.Len())) + nodeClasses := [2]string{nodeClass.Name, nodeClass2.Name} + awsEnv.EC2API.CalledWithCreateLaunchTemplateInput.ForEach(func(ltInput *ec2.CreateLaunchTemplateInput) { + for _, value := range ltInput.LaunchTemplateData.TagSpecifications[0].Tags { + if *value.Key == v1beta1.LabelNodeClass { + Expect(*value.Value).To(BeElementOf(nodeClasses)) + } + } + }) + }) It("should default to a generated launch template", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() diff --git a/test/suites/integration/launch_template_test.go b/test/suites/integration/launch_template_test.go new file mode 100644 index 000000000000..41f602105ad7 --- /dev/null +++ b/test/suites/integration/launch_template_test.go @@ -0,0 +1,47 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + coretest "sigs.k8s.io/karpenter/pkg/test" + + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Launch Template Deletion", func() { + It("should remove the generated Launch Templates when deleting the NodeClass", func() { + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectDeleted(nodePool, nodeClass) + Eventually(func(g Gomega) { + output, _ := env.EC2API.DescribeLaunchTemplatesWithContext(env.Context, &ec2.DescribeLaunchTemplatesInput{ + Filters: []*ec2.Filter{ + {Name: aws.String(fmt.Sprintf("tag:%s", v1beta1.LabelNodeClass)), Values: []*string{aws.String(nodeClass.Name)}}, + }, + }) + g.Expect(output.LaunchTemplates).To(HaveLen(0)) + }).WithPolling(5.0).Should(Succeed()) + }) +})