Skip to content

Commit

Permalink
fix SG and OICD leak
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Jul 25, 2023
1 parent 4cecea2 commit 3f015f6
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 8 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/sweeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ permissions:
jobs:
sweeper:
if: github.repository == 'aws/karpenter' || github.event_name == 'workflow_dispatch'
strategy:
fail-fast: false
matrix:
region: [us-east-2, us-west-2, eu-west-1]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }}
aws-region: ${{ vars.AWS_REGION }}
aws-region: ${{ matrix.region }}
- uses: actions/setup-go@v4
with:
go-version-file: test/hack/cleanup/go.mod
Expand Down
1 change: 1 addition & 0 deletions test/hack/cleanup/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0
github.com/aws/aws-sdk-go-v2/service/iam v1.21.0
github.com/samber/lo v1.38.1
go.uber.org/zap v1.24.0
)
Expand Down
2 changes: 2 additions & 0 deletions test/hack/cleanup/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 h1:PWGu2JhCb/XJlJ7SSFJq7
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2/go.mod h1:2KOZkkzMDZCo/aLzPhys06mHNkiU74u85aMJA3PLRvg=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 h1:P4dyjm49F2kKws0FpouBC6fjVImACXKt752+CWa01lM=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0/go.mod h1:tIctCeX9IbzsUTKHt53SVEcgyfxV2ElxJeEB+QUbc4M=
github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 h1:8hEpu60CWlrp7iEBUFRZhgPoX6+gadaGL1sD4LoRYS0=
github.com/aws/aws-sdk-go-v2/service/iam v1.21.0/go.mod h1:aQZ8BI+reeaY7RI/QQp7TKCSUHOesTdrzzylp3CW85c=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 h1:bkRyG4a929RCnpVSTvLM2j/T4ls015ZhhYApbmYs15s=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28/go.mod h1:jj7znCIg05jXlaGBlFMGP8+7UN3VtCkRBG2spnmRQkU=
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 h1:nneMBM2p79PGWBQovYO/6Xnc2ryRMw3InnDJq1FHkSY=
Expand Down
93 changes: 86 additions & 7 deletions test/hack/cleanup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package main

import (
"context"
"fmt"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/config"
Expand All @@ -25,6 +27,7 @@ import (
cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/aws/aws-sdk-go-v2/service/iam"
"github.com/samber/lo"
"go.uber.org/zap"
)
Expand All @@ -35,6 +38,7 @@ const (

karpenterProvisionerNameTag = "karpenter.sh/provisioner-name"
karpenterLaunchTemplateTag = "karpenter.k8s.aws/cluster"
karpenterSecurityGroupTag = "karpenter.sh/discovery"
githubRunURLTag = "github.com/run-url"
)

Expand All @@ -51,9 +55,17 @@ func main() {
ec2Client := ec2.NewFromConfig(cfg)
cloudFormationClient := cloudformation.NewFromConfig(cfg)
cloudWatchClient := cloudwatch.NewFromConfig(cfg)
iamClient := iam.NewFromConfig(cfg)

// Terminate any old instances that were provisioned by Karpenter as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
cleanupInstances(ctx, ec2Client, cloudWatchClient, expirationTime, logger)
cleanupOldstack(ctx, ec2Client, cloudWatchClient, cloudFormationClient, expirationTime, logger)
cleanupLaunchTemplates(ctx, ec2Client, cloudWatchClient, expirationTime, logger)
cleanupOIDCProvider(ctx, iamClient, cloudWatchClient, expirationTime, logger)
}

// Terminate any old instances that were provisioned by Karpenter as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
func cleanupInstances(ctx context.Context, ec2Client *ec2.Client, cloudWatchClient *cloudwatch.Client, expirationTime time.Time, logger *zap.SugaredLogger) {
ids := getOldInstances(ctx, ec2Client, expirationTime)
logger.With("ids", ids, "count", len(ids)).Infof("discovered test instances to delete")
if len(ids) > 0 {
Expand All @@ -68,13 +80,37 @@ func main() {
}
}
}
}

// Terminate any old stacks that were provisioned as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
// Terminate any old stacks that were provisioned as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
func cleanupOldstack(ctx context.Context, ec2Client *ec2.Client, cloudWatchClient *cloudwatch.Client, cloudFormationClient *cloudformation.Client, expirationTime time.Time, logger *zap.SugaredLogger) {
sgInAccount := lo.Must(ec2Client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
Filters: []ec2types.Filter{
{
Name: lo.ToPtr("group-name"),
Values: []string{"security-group-drift"},
},
},
}))
names := getOldStacks(ctx, cloudFormationClient, expirationTime)
logger.With("names", names, "count", len(names)).Infof("discovered test stacks to delete")
deleted := 0
for i := range names {
if strings.HasSuffix(names[i], "-cluster") && strings.Contains(names[i], "drift") {
stackName := strings.Split(names[i], "-")
sgName := fmt.Sprintf("drift-%s", stackName[2])
sg, _ := lo.Find(sgInAccount.SecurityGroups, func(sg ec2types.SecurityGroup) bool {
return *sg.Tags[0].Key == karpenterSecurityGroupTag && *sg.Tags[0].Value == sgName
})
if _, err := ec2Client.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{
GroupId: sg.GroupId,
}); err != nil {
logger.With("name", names[i]).Errorf("deleting test stack sg, %v", err)
} else {
logger.With("name", names[i]).Infof("deleted test stack sg")
}
}
if _, err := cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{
StackName: lo.ToPtr(names[i]),
}); err != nil {
Expand All @@ -87,11 +123,14 @@ func main() {
if err := fireMetric(ctx, cloudWatchClient, "StacksDeleted", float64(deleted)); err != nil {
logger.With("name", "StacksDeleted").Errorf("firing metric, %v", err)
}
}

// Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing
names = getOldLaunchTemplates(ctx, ec2Client, expirationTime)
// Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
func cleanupLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, cloudWatchClient *cloudwatch.Client, expirationTime time.Time, logger *zap.SugaredLogger) {
names := getOldLaunchTemplates(ctx, ec2Client, expirationTime)
logger.With("names", names, "count", len(names)).Infof("discovered test launch templates to delete")
deleted = 0
deleted := 0
for i := range names {
if _, err := ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{
LaunchTemplateName: lo.ToPtr(names[i]),
Expand All @@ -107,6 +146,26 @@ func main() {
}
}

// Terminate any old OIDC providers that were are remaining as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
func cleanupOIDCProvider(ctx context.Context, iamClient *iam.Client, cloudWatchClient *cloudwatch.Client, expirationTime time.Time, logger *zap.SugaredLogger) {
arns := getOldOIDCProviders(ctx, iamClient, expirationTime)
deleted := 0
for i := range arns {
if _, err := iamClient.DeleteOpenIDConnectProvider(ctx, &iam.DeleteOpenIDConnectProviderInput{
OpenIDConnectProviderArn: lo.ToPtr(arns[i]),
}); err != nil {
logger.With("arn", arns[i]).Errorf("deleting test cluster oidc provider, %v", err)
} else {
logger.With("arn", arns[i]).Infof("deleted test cluster oidc provider")
deleted++
}
}
if err := fireMetric(ctx, cloudWatchClient, "OIDCDeleted", float64(deleted)); err != nil {
logger.With("name", "OIDCDeleted").Errorf("firing metric, %v", err)
}
}

func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name string, value float64) error {
_, err := cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{
Namespace: lo.ToPtr(karpenterMetricNamespace),
Expand Down Expand Up @@ -208,3 +267,23 @@ func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expiratio
}
return names
}

func getOldOIDCProviders(ctx context.Context, iamClient *iam.Client, expirationTime time.Time) (names []string) {
testSuite := []string{"upgrade", "chaos", "consolidation", "drift", "integration", "interruption", "ipv6", "machine", "scale", "utilization"}
out := lo.Must(iamClient.ListOpenIDConnectProviders(ctx, &iam.ListOpenIDConnectProvidersInput{}))

for _, oicdArn := range out.OpenIDConnectProviderList {
oicd := lo.Must(iamClient.GetOpenIDConnectProvider(ctx, &iam.GetOpenIDConnectProviderInput{
OpenIDConnectProviderArn: oicdArn.Arn,
}))

for _, t := range oicd.Tags {
if lo.FromPtr(t.Key) == "alpha.eksctl.io/cluster-name" &&
lo.SomeBy(testSuite, func(s string) bool { return strings.HasPrefix(lo.FromPtr(t.Value), fmt.Sprintf("%s-", s)) }) &&
oicd.CreateDate.Before(expirationTime) {
names = append(names, lo.FromPtr(oicdArn.Arn))
}
}
}
return names
}

0 comments on commit 3f015f6

Please sign in to comment.