From 5cc4801a80c55a5686ec1186f98215ad9da0382a Mon Sep 17 00:00:00 2001 From: Amanuel Engeda Date: Mon, 25 Sep 2023 11:22:30 -0700 Subject: [PATCH] Update E2E clean-up --- .github/actions/e2e/cleanup/action.yaml | 70 +-------- test/hack/cleanup/go.mod | 8 +- test/hack/cleanup/go.sum | 12 +- test/hack/cleanup/main.go | 189 +++++++++++++++++------- 4 files changed, 152 insertions(+), 127 deletions(-) diff --git a/.github/actions/e2e/cleanup/action.yaml b/.github/actions/e2e/cleanup/action.yaml index 21032bd2d8bb..01b0eb12a25b 100644 --- a/.github/actions/e2e/cleanup/action.yaml +++ b/.github/actions/e2e/cleanup/action.yaml @@ -27,71 +27,17 @@ runs: - uses: ./.github/actions/e2e/install-eksctl with: version: ${{ inputs.eksctl_version }} - - name: delete-instance-profiles - shell: - run: | - for name in $(aws iam list-instance-profiles --query "InstanceProfiles[*].{Name:InstanceProfileName}" --output text); do - tags=$(aws iam list-instance-profile-tags --instance-profile-name $name --output json || true) - if [[ $(echo $tags | jq -r '.Tags[] | select(.Key == "testing/cluster") | .Value') == "${{ inputs.cluster_name }}" ]]; then - echo "Deleting instance profile '$name'..." - roleName=$(aws iam get-instance-profile --instance-profile-name $name --query "InstanceProfile.Roles[*].{Name:RoleName}" --output text) - aws iam remove-role-from-instance-profile --instance-profile-name $name --role-name $roleName - aws iam delete-instance-profile --instance-profile-name $name - fi - done + - uses: actions/setup-go@v4 + with: + go-version-file: go.mod + check-latest: true + cache-dependency-path: "**/go.sum" - name: delete-cluster shell: bash run: | eksctl delete cluster --name ${{ inputs.cluster_name }} --timeout 60m --wait || true - - name: delete-network-interfaces - shell: bash - run: | - aws ec2 describe-network-interfaces \ - --filter Name=tag:cluster.k8s.amazonaws.com/name,Values=${{ inputs.cluster_name }} \ - --query "NetworkInterfaces[*].NetworkInterfaceId" \ - --output text | - xargs \ - -n 1 \ - -r \ - aws ec2 delete-network-interface \ - --network-interface-id - - name: delete-security-group - shell: bash - # For drift testing, we create a security group and need to clean it up here - # to avoid leaks if the tests is not fully completed - run: | - aws ec2 describe-security-groups \ - --filters Name=group-name,Values=security-group-drift Name=tag:karpenter.sh/discovery,Values=${{ inputs.cluster_name }} \ - --query "SecurityGroups[*].{ID:GroupId}" \ - --output text | - xargs \ - -n 1 \ - -r \ - aws ec2 delete-security-group \ - --group-id - - name: delete-iam-alpha-policy - shell: bash - run: | - aws iam delete-policy --policy-arn "arn:aws:iam::${{ inputs.account_id }}:policy/KarpenterControllerPolicy-Alpha-${{ inputs.cluster_name }}" || true - - name: delete-iam-policies-stack - shell: bash + - name: "Run cleanup script" run: | - aws cloudformation delete-stack --stack-name iam-${{ inputs.cluster_name }} - aws cloudformation wait stack-delete-complete --stack-name iam-${{ inputs.cluster_name }} - - name: delete-cluster-stack + go run main.go ${{ inputs.cluster_name }} + working-directory: ./test/hack/cleanup shell: bash - run: | - aws cloudformation delete-stack --stack-name eksctl-${{ inputs.cluster_name }}-cluster || true - aws cloudformation wait stack-delete-complete --stack-name eksctl-${{ inputs.cluster_name }}-cluster || true - - name: delete-launch-templates - shell: bash - run: | - aws ec2 describe-launch-templates \ - --filter Name=tag:karpenter.k8s.aws/cluster,Values=${{ inputs.cluster_name }} \ - --query "LaunchTemplates[*].LaunchTemplateId" \ - --output text | - xargs \ - -n 1 \ - -r \ - aws ec2 delete-launch-template \ - --launch-template-id diff --git a/test/hack/cleanup/go.mod b/test/hack/cleanup/go.mod index 44fbaea8520e..2dbb2303d044 100644 --- a/test/hack/cleanup/go.mod +++ b/test/hack/cleanup/go.mod @@ -16,18 +16,18 @@ require ( ) require ( - github.com/aws/aws-sdk-go-v2 v1.20.1 // indirect + github.com/aws/aws-sdk-go-v2 v1.21.0 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.13.26 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.38 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.32 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 // indirect github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.32 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect - github.com/aws/smithy-go v1.14.1 // indirect + github.com/aws/smithy-go v1.14.2 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect go.uber.org/atomic v1.7.0 // indirect diff --git a/test/hack/cleanup/go.sum b/test/hack/cleanup/go.sum index 3856b069fb55..f66bed564169 100644 --- a/test/hack/cleanup/go.sum +++ b/test/hack/cleanup/go.sum @@ -1,8 +1,9 @@ github.com/aws/aws-sdk-go v1.44.309 h1:IPJOFBzXekakxmEpDwd4RTKmmBR6LIAiXgNsM51bWbU= github.com/aws/aws-sdk-go v1.44.309/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= -github.com/aws/aws-sdk-go-v2 v1.20.1 h1:rZBf5DWr7YGrnlTK4kgDQGn1ltqOg5orCYb/UhOFZkg= github.com/aws/aws-sdk-go-v2 v1.20.1/go.mod h1:NU06lETsFm8fUC6ZjhgDpVBcGZTFQ6XM+LZWZxMI4ac= +github.com/aws/aws-sdk-go-v2 v1.21.0 h1:gMT0IW+03wtYJhRqTVYn0wLzwdnK9sRMcxmtfGzRdJc= +github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M= github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA= github.com/aws/aws-sdk-go-v2/config v1.18.27/go.mod h1:0My+YgmkGxeqjXZb5BYme5pc4drjTnM+x1GJ3zv42Nw= github.com/aws/aws-sdk-go-v2/credentials v1.13.26 h1:qmU+yhKmOCyujmuPY7tf5MxR/RKyZrOPO3V4DobiTUk= @@ -10,11 +11,13 @@ github.com/aws/aws-sdk-go-v2/credentials v1.13.26/go.mod h1:GoXt2YC8jHUBbA4jr+W3 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 h1:LxK/bitrAr4lnh9LnIS6i7zWbCOdMsfzKFBI6LUCS0I= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4/go.mod h1:E1hLXN/BL2e6YizK1zFlYd8vsfi2GTjbjBazinMmeaM= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34/go.mod h1:wZpTEecJe0Btj3IYnDx/VlUzor9wm3fJHyvLpQF0VwY= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.38 h1:c8ed/T9T2K5I+h/JzmF5tpI46+OODQ74dzmdo+QnaMg= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.38/go.mod h1:qggunOChCMu9ZF/UkAfhTz25+U2rLVb3ya0Ua6TTfCA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28/go.mod h1:7VRpKQQedkfIEXb4k52I7swUnZP0wohVajJMRn3vsUw= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.32 h1:hNeAAymUY5gu11WrrmFb3CVIp9Dar9hbo44yzzcQpzA= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.32/go.mod h1:0ZXSqrty4FtQ7p8TEuRde/SZm9X05KT18LAUlR40Ln0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35/go.mod h1:SJC1nEVVva1g3pHAIdCp7QsRIkMmLAgoDquQ9Rr8kYw= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 h1:LWA+3kDM8ly001vJ1X1waCuLJdtTl48gwkPKWy9sosI= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35/go.mod h1:0Eg1YjxE0Bhn56lx+SHJwCzhW+2JGtizsrx+lCqrfm0= github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 h1:XbDkc4FLeg1RfnqeblfbJvaEabqq9ByZl4zqyPFkfSc= @@ -36,8 +39,9 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.19.2/go.mod h1:dp0yLPsLBOi++WTxzCjA/ github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2 h1:5QyvAYyr+ZibpVxfovzd5JMTZ8miv9s3zT4jG4PJkIA= github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2/go.mod h1:3ZCiyyNF7myh/a7DcOjcqRsLmSF9EdhEZSr00Qlui4s= github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= -github.com/aws/smithy-go v1.14.1 h1:EFKMUmH/iHMqLiwoEDx2rRjRQpI1YCn5jTysoaDujFs= github.com/aws/smithy-go v1.14.1/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= +github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ= +github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/test/hack/cleanup/main.go b/test/hack/cleanup/main.go index 9b5c9fe8ac71..c2479dc43784 100644 --- a/test/hack/cleanup/main.go +++ b/test/hack/cleanup/main.go @@ -17,6 +17,7 @@ package main import ( "context" "fmt" + "os" "time" "github.com/aws/aws-sdk-go-v2/config" @@ -24,6 +25,7 @@ import ( cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/iam" "github.com/aws/aws-sdk-go-v2/service/timestreamwrite" timestreamtypes "github.com/aws/aws-sdk-go-v2/service/timestreamwrite/types" @@ -40,7 +42,9 @@ const ( karpenterMetricDatabase = "karpenterTesting" karpenterMetricTableName = "sweeperCleanedResources" + karpenterClusterNameTag = "karpenter.sh/managed-by" karpenterProvisionerNameTag = "karpenter.sh/provisioner-name" + karpenterNodePoolNameTag = "karpenter.sh/nodepool-name" karpenterLaunchTemplateTag = "karpenter.k8s.aws/cluster" karpenterSecurityGroupTag = "karpenter.sh/discovery" // TODO @joinnis: Remove this karpenterTestingTagLegacy field after running this cleanup script for a few days @@ -50,6 +54,8 @@ const ( githubRunURLTag = "github.com/run-url" ) +var clusterName = "" + type CleanableResourceType interface { Type() string Get(context.Context, time.Time) ([]string, error) @@ -61,6 +67,9 @@ type MetricsClient interface { } func main() { + if len(os.Args) == 2 { + clusterName = os.Args[1] + } ctx := context.Background() cfg := lo.Must(config.LoadDefaultConfig(ctx)) @@ -78,13 +87,14 @@ func main() { resources := []CleanableResourceType{ &eni{ec2Client: ec2Client}, - &instance{ec2Client: ec2Client}, &securitygroup{ec2Client: ec2Client}, + &instance{ec2Client: ec2Client}, &stack{cloudFormationClient: cloudFormationClient}, &launchtemplate{ec2Client: ec2Client}, &oidc{iamClient: iamClient}, &instanceProfile{iamClient: iamClient}, } + workqueue.ParallelizeUntil(ctx, len(resources), len(resources), func(i int) { ids, err := resources[i].Get(ctx, expirationTime) if err != nil { @@ -114,18 +124,35 @@ func (i *instance) Type() string { func (i *instance) Get(ctx context.Context, expirationTime time.Time) (ids []string, err error) { var nextToken *string + var ec2Filter []ec2types.Filter + + if clusterName == "" { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("instance-state-name"), + Values: []string{string(ec2types.InstanceStateNameRunning)}, + }, + { + Name: lo.ToPtr("tag-key"), + Values: []string{karpenterProvisionerNameTag, karpenterNodePoolNameTag}, + }, + } + } else { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("instance-state-name"), + Values: []string{string(ec2types.InstanceStateNameRunning)}, + }, + { + Name: lo.ToPtr("tag:" + karpenterClusterNameTag), + Values: []string{clusterName}, + }, + } + } + for { out, err := i.ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ - Filters: []ec2types.Filter{ - { - Name: lo.ToPtr("instance-state-name"), - Values: []string{string(ec2types.InstanceStateNameRunning)}, - }, - { - Name: lo.ToPtr("tag-key"), - Values: []string{karpenterProvisionerNameTag}, - }, - }, + Filters: ec2Filter, NextToken: nextToken, }) if err != nil { @@ -134,11 +161,10 @@ func (i *instance) Get(ctx context.Context, expirationTime time.Time) (ids []str for _, res := range out.Reservations { for _, instance := range res.Instances { - if _, found := lo.Find(instance.Tags, func(t ec2types.Tag) bool { - return lo.FromPtr(t.Key) == "kubernetes.io/cluster/KITInfrastructure" - }); !found && lo.FromPtr(instance.LaunchTime).Before(expirationTime) { - ids = append(ids, lo.FromPtr(instance.InstanceId)) + if clusterName == "" && !lo.FromPtr(instance.LaunchTime).Before(expirationTime) { + continue } + ids = append(ids, lo.FromPtr(instance.InstanceId)) } } @@ -171,14 +197,27 @@ func (sg *securitygroup) Type() string { func (sg *securitygroup) Get(ctx context.Context, expirationTime time.Time) (ids []string, err error) { var nextToken *string + var ec2Filter []ec2types.Filter + + if clusterName == "" { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("group-name"), + Values: []string{"security-group-drift"}, + }, + } + } else { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("tag:" + karpenterSecurityGroupTag), + Values: []string{clusterName}, + }, + } + } + for { out, err := sg.ec2Client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{ - Filters: []ec2types.Filter{ - { - Name: lo.ToPtr("group-name"), - Values: []string{"security-group-drift"}, - }, - }, + Filters: ec2Filter, NextToken: nextToken, }) if err != nil { @@ -186,17 +225,21 @@ func (sg *securitygroup) Get(ctx context.Context, expirationTime time.Time) (ids } for _, sgroup := range out.SecurityGroups { - creationDate, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool { - return *tag.Key == "creation-date" - }) - if !found { - continue - } - time, err := time.Parse(time.RFC3339, *creationDate.Value) - if err != nil { - continue - } - if time.Before(expirationTime) { + if clusterName == "" { + creationDate, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool { + return *tag.Key == "creation-date" + }) + if !found { + continue + } + time, err := time.Parse(time.RFC3339, *creationDate.Value) + if err != nil { + continue + } + if time.Before(expirationTime) { + ids = append(ids, lo.FromPtr(sgroup.GroupId)) + } + } else { ids = append(ids, lo.FromPtr(sgroup.GroupId)) } } @@ -236,6 +279,9 @@ func (s *stack) Type() string { func (s *stack) Get(ctx context.Context, expirationTime time.Time) (names []string, err error) { var nextToken *string + if clusterName != "" { + return []string{fmt.Sprintf("iam-%s", clusterName), fmt.Sprintf("eksctl-%s-cluster", clusterName)}, nil + } for { out, err := s.cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ NextToken: nextToken, @@ -292,14 +338,26 @@ func (lt *launchtemplate) Type() string { func (lt *launchtemplate) Get(ctx context.Context, expirationTime time.Time) (names []string, err error) { var nextToken *string + var ec2Filter []ec2types.Filter + if clusterName == "" { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("tag-key"), + Values: []string{karpenterLaunchTemplateTag}, + }, + } + } else { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("tag:" + karpenterLaunchTemplateTag), + Values: []string{clusterName}, + }, + } + } + for { out, err := lt.ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ - Filters: []ec2types.Filter{ - { - Name: lo.ToPtr("tag-key"), - Values: []string{karpenterLaunchTemplateTag}, - }, - }, + Filters: ec2Filter, NextToken: nextToken, }) if err != nil { @@ -307,9 +365,10 @@ func (lt *launchtemplate) Get(ctx context.Context, expirationTime time.Time) (na } for _, launchTemplate := range out.LaunchTemplates { - if lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) { - names = append(names, lo.FromPtr(launchTemplate.LaunchTemplateName)) + if clusterName == "" && !lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) { + continue } + names = append(names, lo.FromPtr(launchTemplate.LaunchTemplateName)) } nextToken = out.NextToken @@ -458,14 +517,26 @@ func (e *eni) Type() string { func (e *eni) Get(ctx context.Context, expirationTime time.Time) (ids []string, err error) { var nextToken *string + var ec2Filter []ec2types.Filter + if clusterName == "" { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("tag-key"), + Values: []string{k8sClusterTag}, + }, + } + } else { + ec2Filter = []ec2types.Filter{ + { + Name: lo.ToPtr("tag:" + k8sClusterTag), + Values: []string{clusterName}, + }, + } + } + for { out, err := e.ec2Client.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{ - Filters: []ec2types.Filter{ - { - Name: lo.ToPtr("tag-key"), - Values: []string{k8sClusterTag}, - }, - }, + Filters: ec2Filter, NextToken: nextToken, }) if err != nil { @@ -473,17 +544,21 @@ func (e *eni) Get(ctx context.Context, expirationTime time.Time) (ids []string, } for _, ni := range out.NetworkInterfaces { - creationDate, found := lo.Find(ni.TagSet, func(tag ec2types.Tag) bool { - return *tag.Key == "node.k8s.amazonaws.com/createdAt" - }) - if !found { - continue - } - time, err := time.Parse(time.RFC3339, *creationDate.Value) - if err != nil { - continue - } - if ni.Status == ec2types.NetworkInterfaceStatusAvailable && time.Before(expirationTime) { + if clusterName == "" { + creationDate, found := lo.Find(ni.TagSet, func(tag ec2types.Tag) bool { + return *tag.Key == "node.k8s.amazonaws.com/createdAt" + }) + if !found { + continue + } + time, err := time.Parse(time.RFC3339, *creationDate.Value) + if err != nil { + continue + } + if ni.Status == ec2types.NetworkInterfaceStatusAvailable && time.Before(expirationTime) { + ids = append(ids, lo.FromPtr(ni.NetworkInterfaceId)) + } + } else { ids = append(ids, lo.FromPtr(ni.NetworkInterfaceId)) } }