Skip to content

Commit

Permalink
Merge pull request #229 from engedaam/soak-testing-v3
Browse files Browse the repository at this point in the history
Soak testing
  • Loading branch information
engedaam authored Dec 10, 2023
2 parents cb66674 + 8a2207a commit 0d139cd
Show file tree
Hide file tree
Showing 16 changed files with 266 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .github/actions/e2e/cleanup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ runs:
cache: false
- name: "Run cleanup script"
run: |
go run main.go ${{ inputs.cluster_name }}
go run main.go --cluster-name ${{ inputs.cluster_name }}
working-directory: ./test/hack/resource/clean
shell: bash
7 changes: 6 additions & 1 deletion .github/actions/e2e/slack/notify/action.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: SlackNotify
description: 'Notifies slack of the success or failure of the suite'
inputs:
cluster_name:
description: "Name of the cluster"
required: false
suite:
description: "Suite that's running"
required: true
Expand All @@ -18,8 +21,10 @@ runs:
- id: get-run-name
shell: bash
run: |
if [[ ${{ github.event_name }} == "schedule" ]]; then
if [[ ${{ github.event_name }} == "schedule" && inputs.suite != "soak" ]]; then
RUN_NAME="${{ inputs.suite }}-periodic"
elif [[ ${{ github.event_name }} == "schedule" ]]; then
RUN_NAME="soak-periodic"
else
RUN_NAME="${{ inputs.suite }}-${GITHUB_SHA::7}"
fi
Expand Down
51 changes: 51 additions & 0 deletions .github/workflows/e2e-soak-trigger.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: E2ESoakTrigger
on:
schedule:
- cron: '0 */3 * * *'
jobs:
resolve_cluster:
permissions:
id-token: write # aws-actions/[email protected]
# rmif: github.repository == 'aws/karpenter-provider-aws'
runs-on: ubuntu-latest
outputs:
PREEXISTING_CLUSTERS: ${{ steps.list_clusters.outputs.PREEXISTING_CLUSTERS }}
steps:
- uses: actions/checkout@v4
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
with:
role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }}
aws-region: eu-north-1
- uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
with:
go-version-file: test/hack/soak/go.mod
cache-dependency-path: test/hack/soak/go.sum
check-latest: true
cache: false
# Grab all the names of the soak testing clusters, and identify clusters that will need to be deleted
# Empty cluster_name will indicate that a new cluster will be created
- id: list_clusters
name: "Run list clusters script"
run: |
PREEXISTING_CLUSTERS="$(go run get_clusters.go)"
echo PREEXISTING_CLUSTERS="$PREEXISTING_CLUSTERS" >> "$GITHUB_OUTPUT"
working-directory: ./test/hack/soak
shell: bash
soak:
permissions:
id-token: write # aws-actions/[email protected]
statuses: write # required by e2e.yaml
needs: [resolve_cluster]
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.resolve_cluster.outputs.PREEXISTING_CLUSTERS) }}
uses: ./.github/workflows/e2e.yaml
with:
suite: Integration
region: eu-north-1
workflow_trigger: "soak"
cluster_name: ${{ matrix.cluster_name }}
cleanup: ${{ matrix.cluster_cleanup }}
secrets:
SLACK_WEBHOOK_SOAK_URL: ${{ secrets.SLACK_WEBHOOK_SOAK_URL }}
1 change: 1 addition & 0 deletions .github/workflows/e2e-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ jobs:
uses: ./.github/actions/e2e/slack/notify
if: (success() || failure()) && github.event_name != 'workflow_run' && inputs.workflow_trigger != 'versionCompatibility'
with:
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
url: ${{ secrets.SLACK_WEBHOOK_URL }}
suite: Upgrade
git_ref: ${{ inputs.to_git_ref }}
Expand Down
38 changes: 30 additions & 8 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ on:
- "1.27"
- "1.28"
default: "1.28"
enable_metrics:
type: boolean
default: false
cluster_name:
type: string
cleanup:
type: boolean
required: true
default: true
enable_metrics:
type: boolean
default: false
workflow_call:
inputs:
git_ref:
Expand All @@ -64,9 +66,14 @@ on:
required: true
workflow_trigger:
type: string
cluster_name:
type: string
description: If cluster_name is empty, a new cluster will be created. Otherwise, tests will run on an existing cluster
secrets:
SLACK_WEBHOOK_URL:
required: true
required: false
SLACK_WEBHOOK_SOAK_URL:
required: false
jobs:
run-suite:
permissions:
Expand Down Expand Up @@ -97,10 +104,18 @@ jobs:
- id: generate-cluster-name
name: generate cluster name
run: |
CLUSTER_NAME="$(echo ${{ inputs.suite }}-"$RANDOM$RANDOM" | awk '{print tolower($0)}' | tr / -)"
echo Using cluster name "$CLUSTER_NAME"
CLUSTER_NAME=''
if [[ '${{ inputs.cluster_name }}' == '' ]] && [[ '${{ inputs.workflow_trigger }}' == 'soak' ]]; then
CLUSTER_NAME=$(echo soak-periodic-$RANDOM$RANDOM | awk '{print tolower($0)}' | tr / -)
elif [[ '${{ inputs.cluster_name }}' == '' ]] && [[ '${{ inputs.workflow_trigger }}' != 'soak' ]]; then
CLUSTER_NAME=$(echo ${{ inputs.suite }}-$RANDOM$RANDOM | awk '{print tolower($0)}' | tr / -)
else
CLUSTER_NAME='${{ inputs.cluster_name }}'
fi
echo "Using cluster name \"$CLUSTER_NAME\""
echo CLUSTER_NAME="$CLUSTER_NAME" >> "$GITHUB_OUTPUT"
- name: setup eks cluster '${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}'
if: inputs.cluster_name == ''
uses: ./.github/actions/e2e/setup-cluster
with:
account_id: ${{ vars.ACCOUNT_ID }}
Expand All @@ -124,15 +139,22 @@ jobs:
TEST_SUITE="Integration"
fi
aws eks update-kubeconfig --name ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
# Clean up the cluster before running all tests
kubectl delete nodepool --all
kubectl delete ec2nodeclass --all
kubectl delete deployment --all
# Run test Suite
TEST_SUITE="$TEST_SUITE" ENABLE_METRICS=${{ inputs.enable_metrics }} METRICS_REGION=${{ vars.TIMESTREAM_REGION }} GIT_REF="$(git rev-parse HEAD)" \
CLUSTER_NAME="${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}" CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} --query "cluster.endpoint" --output text)" \
INTERRUPTION_QUEUE="${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}" make e2etests
- name: notify slack of success or failure
uses: ./.github/actions/e2e/slack/notify
if: (success() || failure()) && github.event_name != 'workflow_run' && inputs.workflow_trigger != 'versionCompatibility'
with:
url: ${{ secrets.SLACK_WEBHOOK_URL }}
suite: ${{ inputs.suite }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
url: ${{ inputs.workflow_trigger == 'soak' && secrets.SLACK_WEBHOOK_SOAK_URL || secrets.SLACK_WEBHOOK_URL }}
suite: ${{ inputs.workflow_trigger == 'soak' && 'soak' || inputs.suite }}
git_ref: ${{ inputs.git_ref }}
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/resource-count.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
fail-fast: false
matrix:
region: [us-east-2, us-west-2, eu-west-1]
region: [us-east-2, us-west-2, eu-west-1, eu-north-1]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down
10 changes: 8 additions & 2 deletions .github/workflows/sweeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
fail-fast: false
matrix:
region: [us-east-2, us-west-2, eu-west-1]
region: [us-east-2, us-west-2, eu-west-1, eu-north-1]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand All @@ -25,6 +25,12 @@ jobs:
go-version-file: test/hack/resource/go.mod
check-latest: true
cache-dependency-path: "test/hack/resource/go.sum"
- run: go run main.go
# eu-north-1 contains the soak tests and resources should be swept if they are older than 8 days
- run: |
if [[ "${{ matrix.region }}" == "eu-north-1" ]]; then
go run main.go --expiration 192h
else
go run main.go --expiration 12h
fi
working-directory: ./test/hack/resource/clean
name: "Run cleanup script"
22 changes: 13 additions & 9 deletions test/hack/resource/clean/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ package main

import (
"context"
"flag"
"fmt"
"os"
"time"

"github.com/aws/aws-sdk-go-v2/config"
Expand All @@ -31,19 +31,22 @@ import (
"github.com/aws/karpenter-provider-aws/test/hack/resource/pkg/resourcetypes"
)

const expirationTTL = time.Hour * 12
const sweeperCleanedResourcesTableName = "sweeperCleanedResources"

func main() {
var clusterName string
if len(os.Args) == 2 {
clusterName = os.Args[1]
}
expiration := flag.String("expiration", "12h", "define the expirationTTL of the resources")
clusterName := flag.String("cluster-name", "", "define cluster name to cleanup")
flag.Parse()

ctx := context.Background()
cfg := lo.Must(config.LoadDefaultConfig(ctx))

logger := lo.Must(zap.NewProduction()).Sugar()

expirationTTL, err := time.ParseDuration(lo.FromPtr(expiration))
if err != nil {
logger.Fatalln("need a valid expiration duration", err)
}
expirationTime := time.Now().Add(-expirationTTL)

logger.With("expiration-time", expirationTime.String()).Infof("resolved expiration time for all resourceTypes")
Expand Down Expand Up @@ -73,10 +76,11 @@ func main() {
resourceLogger := logger.With("type", resourceTypes[i].String())
var ids []string
var err error
if clusterName == "" {
// If there's no cluster defined, clean up all expired resources. otherwise, only cleanup the resources associated with the cluster
if lo.FromPtr(clusterName) == "" {
ids, err = resourceTypes[i].GetExpired(ctx, expirationTime)
} else {
ids, err = resourceTypes[i].Get(ctx, clusterName)
ids, err = resourceTypes[i].Get(ctx, lo.FromPtr(clusterName))
}
if err != nil {
resourceLogger.Errorf("%v", err)
Expand All @@ -88,7 +92,7 @@ func main() {
resourceLogger.Errorf("%v", err)
}
// Should only fire metrics if the resource have expired
if clusterName == "" {
if lo.FromPtr(clusterName) == "" {
if err = metricsClient.FireMetric(ctx, sweeperCleanedResourcesTableName, fmt.Sprintf("%sDeleted", resourceTypes[i].String()), float64(len(cleaned)), lo.Ternary(resourceTypes[i].Global(), "global", cfg.Region)); err != nil {
resourceLogger.Errorf("%v", err)
}
Expand Down
9 changes: 4 additions & 5 deletions test/hack/resource/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/aws/karpenter-provider-aws/test/hack/resource
go 1.21

require (
github.com/aws/aws-sdk-go-v2 v1.21.0
github.com/aws/aws-sdk-go-v2 v1.22.1
github.com/aws/aws-sdk-go-v2/config v1.18.27
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0
Expand All @@ -17,16 +17,15 @@ require (
require (
github.com/aws/aws-sdk-go-v2/credentials v1.13.26 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.32 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect
github.com/aws/smithy-go v1.14.2 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/aws/smithy-go v1.16.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/stretchr/testify v1.8.1 // indirect
Expand Down
19 changes: 9 additions & 10 deletions test/hack/resource/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2 v1.20.1/go.mod h1:NU06lETsFm8fUC6ZjhgDpVBcGZTFQ6XM+LZWZxMI4ac=
github.com/aws/aws-sdk-go-v2 v1.21.0 h1:gMT0IW+03wtYJhRqTVYn0wLzwdnK9sRMcxmtfGzRdJc=
github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M=
github.com/aws/aws-sdk-go-v2 v1.22.1 h1:sjnni/AuoTXxHitsIdT0FwmqUuNUuHtufcVDErVFT9U=
github.com/aws/aws-sdk-go-v2 v1.22.1/go.mod h1:Kd0OJtkW3Q0M0lUWGszapWjEvrXDzRW+D21JNsroB+c=
github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA=
github.com/aws/aws-sdk-go-v2/config v1.18.27/go.mod h1:0My+YgmkGxeqjXZb5BYme5pc4drjTnM+x1GJ3zv42Nw=
github.com/aws/aws-sdk-go-v2/credentials v1.13.26 h1:qmU+yhKmOCyujmuPY7tf5MxR/RKyZrOPO3V4DobiTUk=
Expand All @@ -10,12 +10,12 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 h1:LxK/bitrAr4lnh9LnIS6i7z
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4/go.mod h1:E1hLXN/BL2e6YizK1zFlYd8vsfi2GTjbjBazinMmeaM=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34/go.mod h1:wZpTEecJe0Btj3IYnDx/VlUzor9wm3fJHyvLpQF0VwY=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.38/go.mod h1:qggunOChCMu9ZF/UkAfhTz25+U2rLVb3ya0Ua6TTfCA=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1 h1:fi1ga6WysOyYb5PAf3Exd6B5GiSNpnZim4h1rhlBqx0=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1/go.mod h1:V5CY8wNurvPUibTi9mwqUqpiFZ5LnioKWIFUDtIzdI8=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28/go.mod h1:7VRpKQQedkfIEXb4k52I7swUnZP0wohVajJMRn3vsUw=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.32/go.mod h1:0ZXSqrty4FtQ7p8TEuRde/SZm9X05KT18LAUlR40Ln0=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35/go.mod h1:SJC1nEVVva1g3pHAIdCp7QsRIkMmLAgoDquQ9Rr8kYw=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1 h1:ZpaV/j48RlPc4AmOZuPv22pJliXjXq8/reL63YzyFnw=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1/go.mod h1:R8aXraabD2e3qv1csxM14/X9WF4wFMIY0kH4YEtYD5M=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 h1:LWA+3kDM8ly001vJ1X1waCuLJdtTl48gwkPKWy9sosI=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35/go.mod h1:0Eg1YjxE0Bhn56lx+SHJwCzhW+2JGtizsrx+lCqrfm0=
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 h1:XbDkc4FLeg1RfnqeblfbJvaEabqq9ByZl4zqyPFkfSc=
Expand All @@ -38,16 +38,15 @@ github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2 h1:5QyvAYyr+ZibpVxf
github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2/go.mod h1:3ZCiyyNF7myh/a7DcOjcqRsLmSF9EdhEZSr00Qlui4s=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.14.1/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.16.0 h1:gJZEH/Fqh+RsvlJ1Zt4tVAtV6bKkp3cC+R6FCZMNzik=
github.com/aws/smithy-go v1.16.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
Expand Down
Loading

0 comments on commit 0d139cd

Please sign in to comment.