From 0c27da4215b88cb78e3d4ef5c5b5f7a176b1c475 Mon Sep 17 00:00:00 2001 From: Amanuel Engeda Date: Tue, 11 Jul 2023 13:56:58 -0700 Subject: [PATCH] fix SG and OICD leak --- .github/actions/e2e/cleanup/action.yaml | 14 + .../actions/e2e/create-cluster/action.yaml | 12 + .github/workflows/sweeper.yaml | 6 +- test/cloudformation/iam_cloudformation.yaml | 4 + test/hack/cleanup/go.mod | 10 +- test/hack/cleanup/go.sum | 53 ++- test/hack/cleanup/main.go | 304 +++++++++++++++--- test/suites/drift/suite_test.go | 4 + 8 files changed, 356 insertions(+), 51 deletions(-) diff --git a/.github/actions/e2e/cleanup/action.yaml b/.github/actions/e2e/cleanup/action.yaml index 84bc1ea8cb42..3e2a2cf046fc 100644 --- a/.github/actions/e2e/cleanup/action.yaml +++ b/.github/actions/e2e/cleanup/action.yaml @@ -30,6 +30,20 @@ runs: - uses: ./.github/actions/e2e/install-eksctl with: eksctl_version: v0.147.0 + - name: delete-security-group + shell: bash + # For drift testing, we create a security group and need to clean it up here + # to avoid leaks if the tests is not fully completed + run: | + aws ec2 describe-security-groups \ + --filters Name=group-name,Values=security-group-drift Name=tag:karpenter.sh/discovery,Values=${{ inputs.cluster_name }} \ + --query "SecurityGroups[*].{ID:GroupId}" \ + --output text | + xargs \ + -n 1 \ + -r \ + aws ec2 delete-security-group \ + --group-id - name: delete-cluster shell: bash run: | diff --git a/.github/actions/e2e/create-cluster/action.yaml b/.github/actions/e2e/create-cluster/action.yaml index 6a6048c11ce6..ef9841ff5369 100644 --- a/.github/actions/e2e/create-cluster/action.yaml +++ b/.github/actions/e2e/create-cluster/action.yaml @@ -125,6 +125,18 @@ runs: wellKnownPolicies: ebsCSIController: true EOF + - name: tag oidc provider of the cluster + if: always() + shell: bash + run: | + for arn in $(aws iam list-open-id-connect-providers --query "OpenIDConnectProviderList[*].{ARN:Arn}" --output text); do + tags=$(aws iam list-open-id-connect-provider-tags --open-id-connect-provider-arn $arn --output json) + if [[ $(echo $tags | jq -r '.Tags[] | select(.Key == "alpha.eksctl.io/cluster-name") | .Value') == "${{ inputs.cluster_name }}" ]]; then + aws iam tag-open-id-connect-provider --open-id-connect-provider-arn $arn \ + --tags Key=testing.karpenter.sh/type,Value=e2e Key=github.com/run-url,Value=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + break + fi + done - name: give KarpenterNodeRole permission to bootstrap shell: bash run: | diff --git a/.github/workflows/sweeper.yaml b/.github/workflows/sweeper.yaml index 0ad247a5ccbd..24745257198b 100644 --- a/.github/workflows/sweeper.yaml +++ b/.github/workflows/sweeper.yaml @@ -9,6 +9,10 @@ permissions: jobs: sweeper: if: github.repository == 'aws/karpenter' || github.event_name == 'workflow_dispatch' + strategy: + fail-fast: false + matrix: + region: [us-east-2, us-west-2, eu-west-1] runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -16,7 +20,7 @@ jobs: uses: aws-actions/configure-aws-credentials@v2 with: role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }} - aws-region: ${{ vars.AWS_REGION }} + aws-region: ${{ matrix.region }} - uses: actions/setup-go@v4 with: go-version-file: test/hack/cleanup/go.mod diff --git a/test/cloudformation/iam_cloudformation.yaml b/test/cloudformation/iam_cloudformation.yaml index e82b983e59bd..52f82fa13e04 100644 --- a/test/cloudformation/iam_cloudformation.yaml +++ b/test/cloudformation/iam_cloudformation.yaml @@ -72,6 +72,7 @@ Resources: - !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/eksctl-*" - !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/karpenter-irsa-*" - !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/prometheus-irsa-*" + - !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:oidc-provider/*" - !GetAtt FISInterruptionRole.Arn - Effect: Allow Action: iam:PassRole @@ -90,6 +91,9 @@ Resources: Action: - iam:CreateOpenIDConnectProvider - iam:DeleteOpenIDConnectProvider + - iam:ListOpenIDConnectProviders + - iam:ListOpenIDConnectProviderTags + - iam:TagOpenIDConnectProvider - iam:GetOpenIDConnectProvider - iam:TagOpenIDConnectProvider Resource: !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:oidc-provider/*" diff --git a/test/hack/cleanup/go.mod b/test/hack/cleanup/go.mod index 6913122a71d0..8ad6dd340716 100644 --- a/test/hack/cleanup/go.mod +++ b/test/hack/cleanup/go.mod @@ -3,12 +3,16 @@ module github.com/aws/karpenter/test/hack/cleanup go 1.20 require ( + github.com/aws/aws-sdk-go v1.44.309 github.com/aws/aws-sdk-go-v2/config v1.18.27 github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 + github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 github.com/samber/lo v1.38.1 + go.uber.org/multierr v1.11.0 go.uber.org/zap v1.24.0 + k8s.io/client-go v0.27.4 ) require ( @@ -23,8 +27,12 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect github.com/aws/smithy-go v1.13.5 // indirect + github.com/go-logr/logr v1.2.3 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect + golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect + k8s.io/apimachinery v0.27.4 // indirect + k8s.io/klog/v2 v2.90.1 // indirect + k8s.io/utils v0.0.0-20230209194617-a36077c30491 // indirect ) diff --git a/test/hack/cleanup/go.sum b/test/hack/cleanup/go.sum index 838f7547d5f3..7c432e98aac4 100644 --- a/test/hack/cleanup/go.sum +++ b/test/hack/cleanup/go.sum @@ -1,3 +1,5 @@ +github.com/aws/aws-sdk-go v1.44.309 h1:IPJOFBzXekakxmEpDwd4RTKmmBR6LIAiXgNsM51bWbU= +github.com/aws/aws-sdk-go v1.44.309/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/aws/aws-sdk-go-v2 v1.18.1 h1:+tefE750oAb7ZQGzla6bLkOwfcQCEtC5y2RqoqCeqKo= github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA= @@ -18,6 +20,8 @@ github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 h1:PWGu2JhCb/XJlJ7SSFJq7 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2/go.mod h1:2KOZkkzMDZCo/aLzPhys06mHNkiU74u85aMJA3PLRvg= github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 h1:P4dyjm49F2kKws0FpouBC6fjVImACXKt752+CWa01lM= github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0/go.mod h1:tIctCeX9IbzsUTKHt53SVEcgyfxV2ElxJeEB+QUbc4M= +github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 h1:8hEpu60CWlrp7iEBUFRZhgPoX6+gadaGL1sD4LoRYS0= +github.com/aws/aws-sdk-go-v2/service/iam v1.21.0/go.mod h1:aQZ8BI+reeaY7RI/QQp7TKCSUHOesTdrzzylp3CW85c= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 h1:bkRyG4a929RCnpVSTvLM2j/T4ls015ZhhYApbmYs15s= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28/go.mod h1:jj7znCIg05jXlaGBlFMGP8+7UN3VtCkRBG2spnmRQkU= github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 h1:nneMBM2p79PGWBQovYO/6Xnc2ryRMw3InnDJq1FHkSY= @@ -32,20 +36,25 @@ github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLj github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= @@ -53,9 +62,45 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44= +golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +k8s.io/apimachinery v0.27.4 h1:CdxflD4AF61yewuid0fLl6bM4a3q04jWel0IlP+aYjs= +k8s.io/apimachinery v0.27.4/go.mod h1:XNfZ6xklnMCOGGFNqXG7bUrQCoR04dh/E7FprV6pb+E= +k8s.io/client-go v0.27.4 h1:vj2YTtSJ6J4KxaC88P4pMPEQECWMY8gqPqsTgUKzvjk= +k8s.io/client-go v0.27.4/go.mod h1:ragcly7lUlN0SRPk5/ZkGnDjPknzb37TICq07WhI6Xc= +k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= +k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +k8s.io/utils v0.0.0-20230209194617-a36077c30491 h1:r0BAOLElQnnFhE/ApUsg3iHdVYYPBjNSSOMowRZxxsY= +k8s.io/utils v0.0.0-20230209194617-a36077c30491/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= diff --git a/test/hack/cleanup/main.go b/test/hack/cleanup/main.go index 7890e34dda72..63dd4bac13bc 100644 --- a/test/hack/cleanup/main.go +++ b/test/hack/cleanup/main.go @@ -25,19 +25,36 @@ import ( cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" "github.com/aws/aws-sdk-go-v2/service/ec2" ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/aws/aws-sdk-go-v2/service/iam" + "github.com/aws/aws-sdk-go/aws" "github.com/samber/lo" + "go.uber.org/multierr" "go.uber.org/zap" + "k8s.io/client-go/util/workqueue" ) const ( expirationTTL = time.Hour * 12 - karpenterMetricNamespace = "testing.karpenter.sh/cleanup" + karpenterMetricNamespace = "testing.karpenter.sh/Cleanup" karpenterProvisionerNameTag = "karpenter.sh/provisioner-name" karpenterLaunchTemplateTag = "karpenter.k8s.aws/cluster" + karpenterSecurityGroupTag = "karpenter.sh/discovery" githubRunURLTag = "github.com/run-url" ) +type CleanableResourceType interface { + Type() string + Cleanup(ctx context.Context) + GetExpired(ctx context.Context) (ids []string, err error) +} + +type DefaultResource struct { + cloudWatchClient *cloudwatch.Client + expirationTime time.Time + logger *zap.SugaredLogger +} + func main() { ctx := context.Background() cfg := lo.Must(config.LoadDefaultConfig(ctx)) @@ -51,64 +68,187 @@ func main() { ec2Client := ec2.NewFromConfig(cfg) cloudFormationClient := cloudformation.NewFromConfig(cfg) cloudWatchClient := cloudwatch.NewFromConfig(cfg) + iamClient := iam.NewFromConfig(cfg) + + defaultResource := &DefaultResource{cloudWatchClient: cloudWatchClient, expirationTime: expirationTime, logger: logger} + resources := []CleanableResourceType{ + &instance{ec2Client: ec2Client, DefaultResource: defaultResource}, + &securitygroup{ec2Client: ec2Client, DefaultResource: defaultResource}, + &stack{cloudFormationClient: cloudFormationClient, DefaultResource: defaultResource}, + &launchtemplate{ec2Client: ec2Client, DefaultResource: defaultResource}, + &oidc{iamClient: iamClient, DefaultResource: defaultResource}, + } + + workqueue.ParallelizeUntil(ctx, len(resources), len(resources), func(i int) { + resources[i].Cleanup(ctx) + }) +} + +type instance struct { + *DefaultResource + ec2Client *ec2.Client +} + +func (i *instance) Type() string { + return "Instances" +} - // Terminate any old instances that were provisioned by Karpenter as part of testing - // We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively - ids := getOldInstances(ctx, ec2Client, expirationTime) - logger.With("ids", ids, "count", len(ids)).Infof("discovered test instances to delete") +// Terminate any old instances that were provisioned by Karpenter as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (i *instance) Cleanup(ctx context.Context) { + ids, err := i.GetExpired(ctx) + if err != nil { + i.logger.With("error", err).Error("getting instances") + } + i.logger.With("ids", ids, "count", len(ids)).Infof("discovered test instances to delete") if len(ids) > 0 { - if _, err := ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ + if _, err := i.ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{ InstanceIds: ids, }); err != nil { - logger.With("ids", ids, "count", len(ids)).Errorf("terminating test instances, %v", err) + i.logger.With("ids", ids, "count", len(ids)).Errorf("terminating test instances, %v", err) } else { - logger.With("ids", ids, "count", len(ids)).Infof("terminated test instances") - if err = fireMetric(ctx, cloudWatchClient, "InstancesDeleted", float64(len(ids))); err != nil { - logger.With("name", "InstancesDeleted").Errorf("firing metric, %v", err) + i.logger.With("ids", ids, "count", len(ids)).Infof("terminated test instances") + if err = i.fireMetric(ctx, "InstancesDeleted", float64(len(ids))); err != nil { + i.logger.With("name", "InstancesDeleted").Errorf("firing metric, %v", err) } } } +} + +type securitygroup struct { + *DefaultResource + ec2Client *ec2.Client +} + +func (i *securitygroup) Type() string { + return "Security Group" +} - // Terminate any old stacks that were provisioned as part of testing - // We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively - names := getOldStacks(ctx, cloudFormationClient, expirationTime) - logger.With("names", names, "count", len(names)).Infof("discovered test stacks to delete") +func (sg *securitygroup) Cleanup(ctx context.Context) { + ids, err := sg.GetExpired(ctx) + if err != nil { + sg.logger.With("error", err).Error("getting security groups") + } + sg.logger.With("ids", ids, "count", len(ids)).Infof("discovered test security groups to delete") deleted := 0 + + for i := range ids { + if _, err := sg.ec2Client.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{ + GroupId: aws.String(ids[i]), + }); err != nil { + sg.logger.With("ids", ids[i]).Errorf("deleting test security group, %v", err) + } else { + sg.logger.With("ids", ids[i]).Infof("deleted test security group") + deleted++ + } + } + if err := sg.fireMetric(ctx, "SecurityGroupDeleted", float64(deleted)); err != nil { + sg.logger.With("name", "InstancesDeleted").Errorf("firing metric, %v", err) + } +} + +type stack struct { + *DefaultResource + cloudFormationClient *cloudformation.Client +} + +func (i *stack) Type() string { + return "Cloudformation Stacks" +} + +// Terminate any old stacks that were provisioned as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (s *stack) Cleanup(ctx context.Context) { + names, err := s.GetExpired(ctx) + if err != nil { + s.logger.With("error", err).Error("getting stacks") + } + s.logger.With("names", names, "count", len(names)).Infof("discovered test stacks to delete") + deleted := 0 + for i := range names { - if _, err := cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{ + if _, err := s.cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{ StackName: lo.ToPtr(names[i]), }); err != nil { - logger.With("name", names[i]).Errorf("deleting test stack, %v", err) + s.logger.With("name", names[i]).Errorf("deleting test stack, %v", err) } else { - logger.With("name", names[i]).Infof("deleted test stack") + s.logger.With("name", names[i]).Infof("deleted test stack") deleted++ } } - if err := fireMetric(ctx, cloudWatchClient, "StacksDeleted", float64(deleted)); err != nil { - logger.With("name", "StacksDeleted").Errorf("firing metric, %v", err) + if err := s.fireMetric(ctx, "StacksDeleted", float64(deleted)); err != nil { + s.logger.With("name", "StacksDeleted").Errorf("firing metric, %v", err) + } +} + +type launchtemplate struct { + *DefaultResource + ec2Client *ec2.Client +} + +func (i *launchtemplate) Type() string { + return "Launch Templates" +} + +// Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (lt *launchtemplate) Cleanup(ctx context.Context) { + names, err := lt.GetExpired(ctx) + if err != nil { + lt.logger.With("error", err).Error("getting launch templates") } + lt.logger.With("names", names, "count", len(names)).Infof("discovered test launch templates to delete") + deleted := 0 - // Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing - names = getOldLaunchTemplates(ctx, ec2Client, expirationTime) - logger.With("names", names, "count", len(names)).Infof("discovered test launch templates to delete") - deleted = 0 for i := range names { - if _, err := ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{ + if _, err := lt.ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{ LaunchTemplateName: lo.ToPtr(names[i]), }); err != nil { - logger.With("name", names[i]).Errorf("deleting test launch template, %v", err) + lt.logger.With("name", names[i]).Errorf("deleting test launch template, %v", err) } else { - logger.With("name", names[i]).Infof("deleted test launch template") + lt.logger.With("name", names[i]).Infof("deleted test launch template") deleted++ } } - if err := fireMetric(ctx, cloudWatchClient, "LaunchTemplatesDeleted", float64(deleted)); err != nil { - logger.With("name", "LaunchTemplatesDeleted").Errorf("firing metric, %v", err) + if err := lt.fireMetric(ctx, "LaunchTemplatesDeleted", float64(deleted)); err != nil { + lt.logger.With("name", "LaunchTemplatesDeleted").Errorf("firing metric, %v", err) } } -func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name string, value float64) error { - _, err := cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{ +type oidc struct { + *DefaultResource + iamClient *iam.Client +} + +func (i *oidc) Type() string { + return "OpenID Connect Provider" +} + +// Terminate any old OIDC providers that were are remaining as part of testing +// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively +func (o *oidc) Cleanup(ctx context.Context) { + arns, err := o.GetExpired(ctx) + if err != nil { + o.logger.With("error", err).Error("getting OICD provider") + } + deleted := 0 + for i := range arns { + if _, err := o.iamClient.DeleteOpenIDConnectProvider(ctx, &iam.DeleteOpenIDConnectProviderInput{ + OpenIDConnectProviderArn: lo.ToPtr(arns[i]), + }); err != nil { + o.logger.With("arn", arns[i]).Errorf("deleting test cluster oidc provider, %v", err) + } else { + o.logger.With("arn", arns[i]).Infof("deleted test cluster oidc provider") + deleted++ + } + } + if err := o.fireMetric(ctx, "OIDCDeleted", float64(deleted)); err != nil { + o.logger.With("name", "OIDCDeleted").Errorf("firing metric, %v", err) + } +} + +func (d *DefaultResource) fireMetric(ctx context.Context, name string, value float64) error { + _, err := d.cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{ Namespace: lo.ToPtr(karpenterMetricNamespace), MetricData: []cloudwatchtypes.MetricDatum{ { @@ -120,10 +260,10 @@ func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name s return err } -func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (ids []string) { +func (i *instance) GetExpired(ctx context.Context) (ids []string, err error) { var nextToken *string for { - out := lo.Must(ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ + out, err := i.ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{ Filters: []ec2types.Filter{ { Name: lo.ToPtr("instance-state-name"), @@ -135,13 +275,16 @@ func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime }, }, NextToken: nextToken, - })) + }) + if err != nil { + return ids, err + } for _, res := range out.Reservations { for _, instance := range res.Instances { if _, found := lo.Find(instance.Tags, func(t ec2types.Tag) bool { return lo.FromPtr(t.Key) == "kubernetes.io/cluster/KITInfrastructure" - }); !found && lo.FromPtr(instance.LaunchTime).Before(expirationTime) { + }); !found && lo.FromPtr(instance.LaunchTime).Before(i.expirationTime) { ids = append(ids, lo.FromPtr(instance.InstanceId)) } } @@ -152,15 +295,58 @@ func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime break } } - return ids + return ids, err +} + +func (sg *securitygroup) GetExpired(ctx context.Context) (ids []string, err error) { + var nextToken *string + for { + out, err := sg.ec2Client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{ + Filters: []ec2types.Filter{ + { + Name: lo.ToPtr("group-name"), + Values: []string{"security-group-drift"}, + }, + }, + NextToken: nextToken, + }) + if err != nil { + return ids, err + } + + for _, sgroup := range out.SecurityGroups { + creationDate, found := lo.Find(sgroup.Tags, func(tag ec2types.Tag) bool { + return *tag.Key == "creation-date" + }) + if !found { + continue + } + time, err := time.Parse(time.RFC3339, *creationDate.Value) + if err != nil { + continue + } + if time.Before(sg.expirationTime) { + ids = append(ids, lo.FromPtr(sgroup.GroupId)) + } + } + + nextToken = out.NextToken + if nextToken == nil { + break + } + } + return ids, err } -func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Client, expirationTime time.Time) (names []string) { +func (s *stack) GetExpired(ctx context.Context) (names []string, err error) { var nextToken *string for { - out := lo.Must(cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ + out, err := s.cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{ NextToken: nextToken, - })) + }) + if err != nil { + return names, err + } stacks := lo.Reject(out.Stacks, func(s cloudformationtypes.Stack, _ int) bool { return s.StackStatus == cloudformationtypes.StackStatusDeleteComplete || @@ -169,7 +355,7 @@ func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Clie for _, stack := range stacks { if _, found := lo.Find(stack.Tags, func(t cloudformationtypes.Tag) bool { return lo.FromPtr(t.Key) == githubRunURLTag - }); found && lo.FromPtr(stack.CreationTime).Before(expirationTime) { + }); found && lo.FromPtr(stack.CreationTime).Before(s.expirationTime) { names = append(names, lo.FromPtr(stack.StackName)) } } @@ -179,13 +365,13 @@ func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Clie break } } - return names + return names, err } -func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (names []string) { +func (lt *launchtemplate) GetExpired(ctx context.Context) (names []string, err error) { var nextToken *string for { - out := lo.Must(ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ + out, err := lt.ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{ Filters: []ec2types.Filter{ { Name: lo.ToPtr("tag-key"), @@ -193,10 +379,13 @@ func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expiratio }, }, NextToken: nextToken, - })) + }) + if err != nil { + return names, err + } for _, launchTemplate := range out.LaunchTemplates { - if lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) { + if lo.FromPtr(launchTemplate.CreateTime).Before(lt.expirationTime) { names = append(names, lo.FromPtr(launchTemplate.LaunchTemplateName)) } } @@ -206,5 +395,30 @@ func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expiratio break } } - return names + return names, err +} + +func (o *oidc) GetExpired(ctx context.Context) (names []string, err error) { + out, err := o.iamClient.ListOpenIDConnectProviders(ctx, &iam.ListOpenIDConnectProvidersInput{}) + if err != nil { + return names, err + } + + errs := make([]error, len(out.OpenIDConnectProviderList)) + for i := range out.OpenIDConnectProviderList { + oicd, err := o.iamClient.GetOpenIDConnectProvider(ctx, &iam.GetOpenIDConnectProviderInput{ + OpenIDConnectProviderArn: out.OpenIDConnectProviderList[i].Arn, + }) + if err != nil { + errs[i] = err + } + + for _, t := range oicd.Tags { + if lo.FromPtr(t.Key) == githubRunURLTag && oicd.CreateDate.Before(o.expirationTime) { + names = append(names, lo.FromPtr(out.OpenIDConnectProviderList[i].Arn)) + } + } + } + + return names, multierr.Combine(errs...) } diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 7c86d3adb1c5..fc0c3e6a2af9 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -171,6 +171,10 @@ var _ = Describe("Drift", Label("AWS"), func() { Key: awssdk.String("karpenter.sh/discovery"), Value: awssdk.String(settings.FromContext(env.Context).ClusterName), }, + { + Key: awssdk.String("creation-date"), + Value: awssdk.String(time.Now().Format(time.RFC3339)), + }, }, }, },