Skip to content

Commit

Permalink
Merge branch 'sock_testing' of https://github.com/engedaam/karpenter
Browse files Browse the repository at this point in the history
…into sock_testing
  • Loading branch information
engedaam committed Aug 3, 2023
2 parents 941b8f2 + 1fc4618 commit f35aa21
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 51 deletions.
4 changes: 2 additions & 2 deletions .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ runs:
aws eks update-kubeconfig --name "${{ inputs.cluster_name }}"
helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter \
-n karpenter \
--version v0.29.0 \
--version v0.29.2 \
--set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"="arn:aws:iam::${{ inputs.account_id }}:role/karpenter-irsa-${{ inputs.cluster_name }}" \
--set settings.aws.clusterName="${{ inputs.cluster_name }}" \
--set settings.aws.defaultInstanceProfile="KarpenterNodeInstanceProfile-${{ inputs.cluster_name }}" \
Expand All @@ -64,5 +64,5 @@ runs:
run: |
helm diff upgrade --namespace karpenter \
karpenter oci://public.ecr.aws/karpenter/karpenter \
--version v0.29.0 \
--version v0.29.2 \
--reuse-values --three-way-merge --detailed-exitcode
38 changes: 11 additions & 27 deletions .github/actions/e2e/install-prometheus/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,14 @@ alertmanager:
tolerations:
- key: CriticalAddonsOnly
operator: Exists
kubelet:
serviceMonitor:
additionalLabels:
scrape: enabled
prometheus:
prometheusSpec:
tolerations:
- key: CriticalAddonsOnly
operator: Exists
resources:
requests:
cpu: 1
memory: 5Gi
limits:
cpu: 1
memory: 5Gi
serviceMonitorSelector:
matchLabels:
scrape: enabled
serviceMonitorNamespaceSelector:
matchLabels:
scrape: enabled
remoteWrite:
- queueConfig:
maxSamplesPerSend: 1000
maxShards: 200
capacity: 2500
extraScrapeConfigs: |
- job_name: karpenter
kubernetes_sd_configs:
- role: endpoints
namespaces:
names:
- karpenter
relabel_configs:
- source_labels: [__meta_kubernetes_endpoint_port_name]
regex: http-metrics
action: keep
27 changes: 8 additions & 19 deletions .github/workflows/e2e-soak-trigger.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
name: E2ESoakTrigger
on:
schedule:
- cron: '0 */3 * * *'
- cron: '0 */1 * * *'
workflow_dispatch:
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
statuses: write
jobs:
jobs:
soak:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }}
aws-region: us-west-2
role-duration-seconds: 21600
- name: run the Soak test suite
run: |
aws eks update-kubeconfig --name Soak-testing
TEST_SUITE="Soak" make e2etests
# if: github.repository == 'aws/karpenter' || github.event_name == 'workflow_dispatch'
uses: ./.github/workflows/e2e-soak.yaml
with:
event_name: ${{ github.event_name }}
secrets:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
132 changes: 132 additions & 0 deletions .github/workflows/e2e-soak.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
name: E2ESoak
on:
workflow_dispatch:
inputs:
git_ref:
type: string
region:
type: choice
options:
- "us-east-2"
- "us-west-2"
default: "us-east-2"
k8s_version:
type: choice
options:
- "1.23"
- "1.24"
- "1.25"
- "1.26"
- "1.27"
default: "1.27"
enable_metrics:
type: boolean
default: false
workflow_call:
inputs:
git_ref:
type: string
region:
type: string
default: "us-east-2"
event_name:
type: string
required: true
k8s_version:
type: string
default: "1.27"
enable_metrics:
type: boolean
default: false
secrets:
SLACK_WEBHOOK_URL:
required: true
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
statuses: write
jobs:
run-suite:
name: suite-Soak
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: ${{ inputs.git_ref }}
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }}
aws-region: ${{ inputs.region }}
role-duration-seconds: 21600
- uses: ./.github/actions/e2e/install-eksctl
with:
eksctl_version: v0.147.0
- name: find preexisting cluster
run: |
export PREEXISTING=$(eksctl get cluster -o json | jq '.[].Name' | grep soak)
echo "Found existing cluster name \"$PREEXISTING\""
echo PREEXISTING=$PREEXISTING >> $GITHUB_ENV
- name: generate cluster name
if: env.PREEXISTING == ''
run: |
CLUSTER_NAME=$(echo Soak-$RANDOM$RANDOM | awk '{print tolower($0)}')
echo "Using cluster name \"$CLUSTER_NAME\""
echo CLUSTER_NAME=$CLUSTER_NAME >> $GITHUB_ENV
- name: create eks cluster '${{ env.CLUSTER_NAME }}'
if: env.PREEXISTING == ''
uses: ./.github/actions/e2e/create-cluster
with:
account_id: ${{ vars.ACCOUNT_ID }}
role: ${{ vars.ROLE_NAME }}
region: ${{ inputs.region }}
cluster_name: ${{ env.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
ip_family: 'IPv4'
git_ref: ${{ inputs.git_ref }}
- name: install prometheus
if: env.PREEXISTING == ''
uses: ./.github/actions/e2e/install-prometheus
with:
account_id: ${{ vars.ACCOUNT_ID }}
role: ${{ vars.ROLE_NAME }}
region: ${{ vars.PROMETHEUS_REGION }}
cluster_name: ${{ env.CLUSTER_NAME }}
workspace_id: ${{ vars.WORKSPACE_ID }}
git_ref: ${{ inputs.git_ref }}
- name: install karpenter
if: env.PREEXISTING == ''
uses: ./.github/actions/e2e/install-karpenter
with:
account_id: ${{ vars.ACCOUNT_ID }}
role: ${{ vars.ROLE_NAME }}
region: ${{ inputs.region }}
cluster_name: ${{ env.CLUSTER_NAME }}
git_ref: ${{ inputs.git_ref }}
- name: run the ${{ inputs.suite }} test suite
if: ${{ env.PREEXISTING }} != ""
run: |
aws eks update-kubeconfig --name ${{ env.PREEXISTING }}
TEST_SUITE="Soak" ENABLE_METRICS=${{ inputs.enable_metrics }} METRICS_REGION=${{ vars.TIMESTREAM_REGION }} GIT_REF="$(git rev-parse HEAD)" make e2etests
- name: run the ${{ inputs.suite }} test suite
if: ${{ env.PREEXISTING }} == ""
run: |
aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }}
TEST_SUITE="Soak" ENABLE_METRICS=${{ inputs.enable_metrics }} METRICS_REGION=${{ vars.TIMESTREAM_REGION }} GIT_REF="$(git rev-parse HEAD)" make e2etests
- name: notify slack of success or failure
uses: ./.github/actions/e2e/slack/notify
if: (success() || failure()) && inputs.event_name != 'workflow_run' && inputs.event_name != 'conformance'
with:
url: ${{ secrets.SLACK_WEBHOOK_URL }}
suite: Soak
k8s_version: ${{ inputs.k8s_version }}
event_name: ${{ inputs.event_name }}
git_ref: ${{ inputs.git_ref }}
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: failure() || cancelled()
with:
account_id: ${{ vars.ACCOUNT_ID }}
role: ${{ vars.ROLE_NAME }}
region: ${{ inputs.region }}
cluster_name: ${{ env.CLUSTER_NAME }}
2 changes: 2 additions & 0 deletions charts/karpenter/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ spec:
image: {{ include "karpenter.controller.image" . }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
env:
- name: ENABLE_PROFILING
value: "true"
- name: KUBERNETES_MIN_VERSION
value: "1.19.0-0"
- name: KARPENTER_SERVICE
Expand Down
1 change: 1 addition & 0 deletions test/cloudformation/iam_cloudformation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ Resources:
Resource: "*"
- Effect: Allow
Action:
- eks:ListClusters
- eks:CreateCluster
- eks:CreateAddon
- eks:CreateNodegroup
Expand Down
1 change: 1 addition & 0 deletions test/pkg/environment/common/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ func (env *Environment) ExpectCleanCluster() {
var nodes v1.NodeList
Expect(env.Client.List(env.Context, &nodes)).To(Succeed())
for _, node := range nodes.Items {
fmt.Println(node.Name)
if len(node.Spec.Taints) == 0 && !node.Spec.Unschedulable {
Fail(fmt.Sprintf("expected system pool node %s to be tainted", node.Name))
}
Expand Down
13 changes: 10 additions & 3 deletions test/suites/soak/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"fmt"
"math/rand"
"os"
"sync/atomic"
"testing"
"time"
Expand Down Expand Up @@ -62,10 +63,14 @@ var _ = Describe("Soak", func() {
ctx, cancel := context.WithCancel(env.Context)
defer cancel()

content, err := os.ReadFile("testdata/user.sh")
Expect(err).NotTo(HaveOccurred())
provider := awstest.AWSNodeTemplate(v1alpha1.AWSNodeTemplateSpec{AWS: v1alpha1.AWS{
SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": settings.FromContext(env.Context).ClusterName},
SubnetSelector: map[string]string{"karpenter.sh/discovery": settings.FromContext(env.Context).ClusterName},
}})
},
UserData: awssdk.String(string(content)),
})
provisioner := test.Provisioner(test.ProvisionerOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "sock-test-provisioner",
Expand Down Expand Up @@ -110,9 +115,11 @@ var _ = Describe("Soak", func() {
time.Sleep(time.Second * 10)

Consistently(func(g Gomega) {
dep.Spec.Replicas = awssdk.Int32(int32(rand.Intn(20) + 1))
env.ExpectExists(dep)
dep.Spec.Replicas = awssdk.Int32(int32(rand.Intn(100) + 1))
env.ExpectUpdated(dep)
time.Sleep(time.Minute * 1)
time.Sleep(time.Minute * 5)
env.ExpectExists(dep)
dep.Spec.Replicas = awssdk.Int32(0)
env.ExpectUpdated(dep)
time.Sleep(time.Second * 30)
Expand Down
28 changes: 28 additions & 0 deletions test/suites/soak/testdata/user.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="BOUNDARY"

--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"

#!/bin/bash
mkdir -p /etc/systemd/logind.conf.d
cat << EOF > /etc/systemd/logind.conf.d/50-max-delay.conf
[Login]
InhibitDelayMaxSec=360
EOF

systemctl restart systemd-logind

sed -i '/"apiVersion*/a \ \ "shutdownGracePeriod": "3m",' /etc/kubernetes/kubelet/kubelet-config.json
sed -i '/"shutdownGracePeriod*/a \ \ "shutdownGracePeriodCriticalPods": "2m",' /etc/kubernetes/kubelet/kubelet-config.json

--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"

#!/bin/bash
exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1


echo $(jq '.containerLogMaxFiles=3|.containerLogMaxSize="100Mi"' /etc/kubernetes/kubelet/kubelet-config.json) > /etc/kubernetes/kubelet/kubelet-config.json

--BOUNDARY--

0 comments on commit f35aa21

Please sign in to comment.