Skip to content

Commit

Permalink
ci: add cluster exclusion list to cleanup (aws#5764)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdeal authored Mar 2, 2024
1 parent 4b1d4e6 commit e53c52b
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 15 deletions.
10 changes: 8 additions & 2 deletions test/hack/resource/clean/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,19 @@ import (
"github.com/aws/aws-sdk-go-v2/service/iam"
"github.com/samber/lo"
"go.uber.org/zap"
"golang.org/x/exp/slices"

"github.com/aws/karpenter-provider-aws/test/hack/resource/pkg/metrics"
"github.com/aws/karpenter-provider-aws/test/hack/resource/pkg/resourcetypes"
)

const sweeperCleanedResourcesTableName = "sweeperCleanedResources"

var excludedClusters = []string{
// TODO: @jmdeal remove after SQS investigation
"soak-periodic-46287782",
}

func main() {
expiration := flag.String("expiration", "12h", "define the expirationTTL of the resources")
clusterName := flag.String("cluster-name", "", "define cluster name to cleanup")
Expand Down Expand Up @@ -78,8 +84,8 @@ func main() {
var err error
// If there's no cluster defined, clean up all expired resources. otherwise, only cleanup the resources associated with the cluster
if lo.FromPtr(clusterName) == "" {
ids, err = resourceTypes[i].GetExpired(ctx, expirationTime)
} else {
ids, err = resourceTypes[i].GetExpired(ctx, expirationTime, excludedClusters)
} else if !slices.Contains(excludedClusters, *clusterName) {
ids, err = resourceTypes[i].Get(ctx, lo.FromPtr(clusterName))
}
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion test/hack/resource/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/samber/lo v1.38.1
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.24.0
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17
)

require (
Expand All @@ -30,6 +31,5 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/stretchr/testify v1.8.1 // indirect
go.uber.org/atomic v1.7.0 // indirect
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type ENI struct {
Expand All @@ -41,7 +42,7 @@ func (e *ENI) Global() bool {
return false
}

func (e *ENI) GetExpired(ctx context.Context, expirationTime time.Time) (ids []string, err error) {
func (e *ENI) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (ids []string, err error) {
var nextToken *string
for {
out, err := e.ec2Client.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{
Expand All @@ -58,6 +59,12 @@ func (e *ENI) GetExpired(ctx context.Context, expirationTime time.Time) (ids []s
}

for _, ni := range out.NetworkInterfaces {
clusterName, found := lo.Find(ni.TagSet, func(tag ec2types.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
creationDate, found := lo.Find(ni.TagSet, func(tag ec2types.Tag) bool {
return *tag.Key == "node.k8s.amazonaws.com/createdAt"
})
Expand Down
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"golang.org/x/exp/slices"
)

type Instance struct {
Expand All @@ -39,7 +40,7 @@ func (i *Instance) Global() bool {
return false
}

func (i *Instance) GetExpired(ctx context.Context, expirationTime time.Time) (ids []string, err error) {
func (i *Instance) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (ids []string, err error) {
var nextToken *string
for {
out, err := i.ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
Expand All @@ -61,6 +62,12 @@ func (i *Instance) GetExpired(ctx context.Context, expirationTime time.Time) (id

for _, res := range out.Reservations {
for _, instance := range res.Instances {
clusterName, found := lo.Find(instance.Tags, func(tag ec2types.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
if lo.FromPtr(instance.LaunchTime).Before(expirationTime) {
ids = append(ids, lo.FromPtr(instance.InstanceId))
}
Expand Down
11 changes: 10 additions & 1 deletion test/hack/resource/pkg/resourcetypes/instanceprofile.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ import (

"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/iam"
iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type InstanceProfile struct {
Expand All @@ -41,7 +43,7 @@ func (ip *InstanceProfile) Global() bool {
return true
}

func (ip *InstanceProfile) GetExpired(ctx context.Context, expirationTime time.Time) (names []string, err error) {
func (ip *InstanceProfile) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (names []string, err error) {
out, err := ip.iamClient.ListInstanceProfiles(ctx, &iam.ListInstanceProfilesInput{})
if err != nil {
return names, err
Expand All @@ -61,6 +63,13 @@ func (ip *InstanceProfile) GetExpired(ctx context.Context, expirationTime time.T
continue
}

clusterName, found := lo.Find(out.InstanceProfiles[i].Tags, func(tag iamtypes.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}

for _, t := range profiles.Tags {
// Since we can only get the date of the instance profile (not the exact time the instance profile was created)
// we add a day to the time that it was created to account for the worst-case of the instance profile being created
Expand Down
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/launchtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type LaunchTemplate struct {
Expand All @@ -40,7 +41,7 @@ func (lt *LaunchTemplate) Global() bool {
return false
}

func (lt *LaunchTemplate) GetExpired(ctx context.Context, expirationTime time.Time) (names []string, err error) {
func (lt *LaunchTemplate) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (names []string, err error) {
var nextToken *string
for {
out, err := lt.ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{
Expand All @@ -57,6 +58,12 @@ func (lt *LaunchTemplate) GetExpired(ctx context.Context, expirationTime time.Ti
}

for _, launchTemplate := range out.LaunchTemplates {
clusterName, found := lo.Find(launchTemplate.Tags, func(tag ec2types.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
if lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) {
names = append(names, lo.FromPtr(launchTemplate.LaunchTemplateName))
}
Expand Down
11 changes: 10 additions & 1 deletion test/hack/resource/pkg/resourcetypes/oidc.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ import (

"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/iam"
iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type OIDC struct {
Expand All @@ -41,7 +43,7 @@ func (o *OIDC) Global() bool {
return true
}

func (o *OIDC) GetExpired(ctx context.Context, expirationTime time.Time) (names []string, err error) {
func (o *OIDC) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (names []string, err error) {
out, err := o.iamClient.ListOpenIDConnectProviders(ctx, &iam.ListOpenIDConnectProvidersInput{})
if err != nil {
return names, err
Expand All @@ -61,6 +63,13 @@ func (o *OIDC) GetExpired(ctx context.Context, expirationTime time.Time) (names
continue
}

clusterName, found := lo.Find(oicd.Tags, func(tag iamtypes.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}

for _, t := range oicd.Tags {
if lo.FromPtr(t.Key) == githubRunURLTag && oicd.CreateDate.Before(expirationTime) {
names = append(names, lo.FromPtr(out.OpenIDConnectProviderList[i].Arn))
Expand Down
2 changes: 1 addition & 1 deletion test/hack/resource/pkg/resourcetypes/resourcetypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type Type interface {
// Get returns all resources of the type associated with the clusterName
Get(ctx context.Context, clusterName string) (ids []string, err error)
// GetExpired returns all resources of the type that were provisioned before the expirationTime
GetExpired(ctx context.Context, expirationTime time.Time) (ids []string, err error)
GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (ids []string, err error)
// Cleanup deletes all resources of the type by id and returns the resource ids it succeeded to delete
// In general, if all resources can't be deleted by id with a single API call (like with DeleteInstances)
// you should call the requests synchronously to avoid rate limiting against the number of requests made
Expand Down
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/securitygroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type SecurityGroup struct {
Expand All @@ -41,7 +42,7 @@ func (sg *SecurityGroup) Global() bool {
return false
}

func (sg *SecurityGroup) GetExpired(ctx context.Context, expirationTime time.Time) (ids []string, err error) {
func (sg *SecurityGroup) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (ids []string, err error) {
var nextToken *string
for {
out, err := sg.ec2Client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
Expand All @@ -58,6 +59,12 @@ func (sg *SecurityGroup) GetExpired(ctx context.Context, expirationTime time.Tim
}

for _, sgroup := range out.SecurityGroups {
clusterName, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
creationDate, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool {
return *tag.Key == "creation-date"
})
Expand Down
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
"github.com/samber/lo"
"go.uber.org/multierr"
"golang.org/x/exp/slices"
)

type Stack struct {
Expand All @@ -40,7 +41,7 @@ func (s *Stack) Global() bool {
return false
}

func (s *Stack) GetExpired(ctx context.Context, expirationTime time.Time) (names []string, err error) {
func (s *Stack) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (names []string, err error) {
var nextToken *string
for {
out, err := s.cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{
Expand All @@ -55,6 +56,12 @@ func (s *Stack) GetExpired(ctx context.Context, expirationTime time.Time) (names
s.StackStatus == cloudformationtypes.StackStatusDeleteInProgress
})
for _, stack := range stacks {
clusterName, found := lo.Find(stack.Tags, func(tag cloudformationtypes.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
if _, found := lo.Find(stack.Tags, func(t cloudformationtypes.Tag) bool {
return lo.FromPtr(t.Key) == karpenterTestingTag || lo.FromPtr(t.Key) == githubRunURLTag
}); found && lo.FromPtr(stack.CreationTime).Before(expirationTime) {
Expand Down
9 changes: 8 additions & 1 deletion test/hack/resource/pkg/resourcetypes/vpc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"golang.org/x/exp/slices"
)

type VPCEndpoint struct {
Expand Down Expand Up @@ -85,7 +86,7 @@ func (v *VPCEndpoint) CountAll(ctx context.Context) (count int, err error) {
return count, err
}

func (v *VPCEndpoint) GetExpired(ctx context.Context, expirationTime time.Time) (ids []string, err error) {
func (v *VPCEndpoint) GetExpired(ctx context.Context, expirationTime time.Time, excludedClusters []string) (ids []string, err error) {
var nextToken *string
for {
out, err := v.ec2Client.DescribeVpcEndpoints(ctx, &ec2.DescribeVpcEndpointsInput{
Expand All @@ -101,6 +102,12 @@ func (v *VPCEndpoint) GetExpired(ctx context.Context, expirationTime time.Time)
return ids, err
}
for _, endpoint := range out.VpcEndpoints {
clusterName, found := lo.Find(endpoint.Tags, func(tag ec2types.Tag) bool {
return *tag.Key == k8sClusterTag
})
if found && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value)) {
continue
}
if endpoint.CreationTimestamp.Before(expirationTime) {
ids = append(ids, lo.FromPtr(endpoint.VpcEndpointId))
}
Expand Down
16 changes: 13 additions & 3 deletions test/hack/soak/get_clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"fmt"
"slices"
"strings"
"time"

Expand All @@ -35,6 +36,10 @@ type cluster struct {

const expirationTTL = time.Hour * 168 // 7 days

var excludedClustersCleanup = []string{
"soak-periodic-46287782",
}

func main() {
ctx := context.Background()
cfg := lo.Must(config.LoadDefaultConfig(ctx))
Expand All @@ -54,9 +59,14 @@ func main() {

if strings.HasPrefix(c, "soak-periodic-") {
outputList = append(outputList, &cluster{
Name: c,
GitRef: clusterDetails.Cluster.Tags["test/git_ref"],
Cleanup: clusterDetails.Cluster.CreatedAt.Before(expirationTime)})
Name: c,
GitRef: clusterDetails.Cluster.Tags["test/git_ref"],
Cleanup: lo.Ternary(
slices.Contains(excludedClustersCleanup, c),
false,
clusterDetails.Cluster.CreatedAt.Before(expirationTime),
),
})
}
}

Expand Down

0 comments on commit e53c52b

Please sign in to comment.