diff --git a/.github/actions/e2e/cleanup/action.yaml b/.github/actions/e2e/cleanup/action.yaml index 4fdbd9638ad5..a4b92a667409 100644 --- a/.github/actions/e2e/cleanup/action.yaml +++ b/.github/actions/e2e/cleanup/action.yaml @@ -24,7 +24,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-eksctl @@ -37,7 +37,7 @@ runs: CLUSTER_NAME: ${{ inputs.cluster_name }} run: | eksctl delete cluster --name "$CLUSTER_NAME" --timeout 60m --wait || true - - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1 with: go-version-file: test/hack/resource/go.mod cache-dependency-path: test/hack/resource/go.sum diff --git a/.github/actions/e2e/install-karpenter/action.yaml b/.github/actions/e2e/install-karpenter/action.yaml index 4a67bf515a1c..c542500d324b 100644 --- a/.github/actions/e2e/install-karpenter/action.yaml +++ b/.github/actions/e2e/install-karpenter/action.yaml @@ -30,7 +30,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-helm diff --git a/.github/actions/e2e/install-prometheus/action.yaml b/.github/actions/e2e/install-prometheus/action.yaml index df7c3e8563af..6bae1978ded2 100644 --- a/.github/actions/e2e/install-prometheus/action.yaml +++ b/.github/actions/e2e/install-prometheus/action.yaml @@ -27,7 +27,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-helm diff --git a/.github/actions/e2e/install-prometheus/values.yaml b/.github/actions/e2e/install-prometheus/values.yaml index 5a6656cf742e..97570b00d9fd 100644 --- a/.github/actions/e2e/install-prometheus/values.yaml +++ b/.github/actions/e2e/install-prometheus/values.yaml @@ -47,10 +47,10 @@ prometheus: resources: requests: cpu: 1 - memory: 5Gi + memory: 15Gi limits: cpu: 1 - memory: 5Gi + memory: 15Gi serviceMonitorSelector: matchLabels: scrape: enabled diff --git a/.github/actions/e2e/setup-cluster/action.yaml b/.github/actions/e2e/setup-cluster/action.yaml index 10e988db4810..1b5b045872d1 100644 --- a/.github/actions/e2e/setup-cluster/action.yaml +++ b/.github/actions/e2e/setup-cluster/action.yaml @@ -50,7 +50,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - uses: ./.github/actions/e2e/install-eksctl @@ -78,6 +78,7 @@ runs: --capabilities CAPABILITY_NAMED_IAM \ --parameter-overrides "ClusterName=$CLUSTER_NAME" \ --tags "testing/type=e2e" "testing/cluster=$CLUSTER_NAME" "github.com/run-url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" "karpenter.sh/discovery=$CLUSTER_NAME" + aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy --role-name KarpenterNodeRole-$CLUSTER_NAME - name: create or upgrade cluster shell: bash env: @@ -174,6 +175,8 @@ runs: withOIDC: true addons: - name: amazon-cloudwatch-observability + # Pin addon version due to undiagnosed e2e failures after 1.6.0 release + version: '1.5.5-eksbuild.1' permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - name: vpc-cni permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" @@ -213,9 +216,22 @@ runs: fi # Adding taints after all necessary pods have scheduled to the manged node group nodes - # amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and + # amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and # amazon-cloudwatch-observability addons does not allow to add tolerations to the addon pods as part of the advanced configuration - kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all + # Overwrite existing taints to ensure we don't fail here on upgrade + kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite + + # We delete DaemonSets that we don't care about because it causes inconsistencies in scheduling due to + # dcgm-exporter and neuron-monitor selecting on specific instance types + # See https://github.com/kubernetes-sigs/karpenter/issues/715 for more detail + kubectl delete daemonsets -n amazon-cloudwatch dcgm-exporter neuron-monitor --ignore-not-found + + # We patch the priorityClass onto all DaemonSets to ensure that DaemonSets always schedule to nodes so we don't get scheduling inconsistencies + # See https://karpenter.sh/docs/faq/#when-deploying-an-additional-daemonset-to-my-cluster-why-does-karpenter-not-scale-up-my-nodes-to-support-the-extra-daemonset for more detail + # Additionally, we patch an everything toleration onto the daemonsets to prevent them from being included in drain operations. + for DAEMONSET in "cloudwatch-agent" "cloudwatch-agent-windows" "fluent-bit" "fluent-bit-windows"; do + kubectl patch daemonset -n amazon-cloudwatch $DAEMONSET -p '{"spec":{"template":{"spec":{"priorityClassName":"system-node-critical","tolerations": [{"operator": "Exists"}]}}}}' --type=merge + done - name: tag oidc provider of the cluster if: always() shell: bash diff --git a/.github/actions/e2e/slack/notify/action.yaml b/.github/actions/e2e/slack/notify/action.yaml index 37933fb0aa70..ed2e9255fc55 100644 --- a/.github/actions/e2e/slack/notify/action.yaml +++ b/.github/actions/e2e/slack/notify/action.yaml @@ -17,7 +17,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - id: get-run-name diff --git a/.github/actions/e2e/upgrade-crds/action.yaml b/.github/actions/e2e/upgrade-crds/action.yaml index 202d24f4d11c..25c207070882 100644 --- a/.github/actions/e2e/upgrade-crds/action.yaml +++ b/.github/actions/e2e/upgrade-crds/action.yaml @@ -24,7 +24,7 @@ runs: role-to-assume: arn:aws:iam::${{ inputs.account_id }}:role/${{ inputs.role }} aws-region: ${{ inputs.region }} role-duration-seconds: 21600 - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: ref: ${{ inputs.git_ref }} - name: install-karpenter diff --git a/.github/actions/install-deps/action.yaml b/.github/actions/install-deps/action.yaml index f0dd71b3b83c..46b73833e151 100644 --- a/.github/actions/install-deps/action.yaml +++ b/.github/actions/install-deps/action.yaml @@ -7,7 +7,7 @@ inputs: runs: using: "composite" steps: - - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1 id: setup-go with: go-version-file: go.mod diff --git a/.github/workflows/e2e-matrix.yaml b/.github/workflows/e2e-matrix.yaml index 475edab660e4..267c5c70934e 100644 --- a/.github/workflows/e2e-matrix.yaml +++ b/.github/workflows/e2e-matrix.yaml @@ -95,7 +95,7 @@ jobs: statuses: write # ./.github/actions/commit-status/start uses: ./.github/workflows/e2e-upgrade.yaml with: - from_git_ref: 283e7b2a51ec73903a6d3f9362fc3009b898ef33 + from_git_ref: 969530cc8ac4ee8a8c2efed9af823c44813b4ec2 to_git_ref: ${{ inputs.git_ref }} region: ${{ inputs.region }} k8s_version: ${{ inputs.k8s_version }} diff --git a/.github/workflows/e2e-soak-trigger.yaml b/.github/workflows/e2e-soak-trigger.yaml index 862473a2c2dd..293286f445da 100644 --- a/.github/workflows/e2e-soak-trigger.yaml +++ b/.github/workflows/e2e-soak-trigger.yaml @@ -17,7 +17,7 @@ jobs: with: role-to-assume: arn:aws:iam::${{ vars.CI_ACCOUNT_ID }}:role/${{ vars.CI_ROLE_NAME }} aws-region: eu-north-1 - - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1 with: go-version-file: test/hack/soak/go.mod cache-dependency-path: test/hack/soak/go.sum diff --git a/.github/workflows/e2e-upgrade.yaml b/.github/workflows/e2e-upgrade.yaml index 2c6b8a8c0f28..032c4c544338 100644 --- a/.github/workflows/e2e-upgrade.yaml +++ b/.github/workflows/e2e-upgrade.yaml @@ -108,7 +108,7 @@ jobs: region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} k8s_version: ${{ inputs.k8s_version }} - eksctl_version: v0.169.0 + eksctl_version: v0.175.0 ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4 git_ref: ${{ inputs.to_git_ref }} ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }} @@ -136,7 +136,7 @@ jobs: suite: Upgrade git_ref: ${{ inputs.to_git_ref }} - name: add log retention policy - if: ${{ inputs.workflow_trigger != 'private_cluster' }} + if: always() && inputs.workflow_trigger != 'private_cluster' env: CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} run: | diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 476ccf03991c..53f3d74eeaed 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -188,7 +188,7 @@ jobs: git_ref: ${{ inputs.git_ref }} workflow_trigger: ${{ inputs.workflow_trigger }} - name: add log retention policy - if: (success() || failure()) && inputs.workflow_trigger != 'private_cluster' + if: always() && inputs.workflow_trigger != 'private_cluster' env: CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} run: | diff --git a/.github/workflows/resource-count.yaml b/.github/workflows/resource-count.yaml index fa54688ae03c..1501b0815d18 100644 --- a/.github/workflows/resource-count.yaml +++ b/.github/workflows/resource-count.yaml @@ -20,7 +20,7 @@ jobs: with: role-to-assume: arn:aws:iam::${{ vars.CI_ACCOUNT_ID }}:role/${{ vars.CI_ROLE_NAME }} aws-region: ${{ matrix.region }} - - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1 with: go-version-file: test/hack/resource/go.mod check-latest: true diff --git a/.github/workflows/sweeper.yaml b/.github/workflows/sweeper.yaml index b6b4dfd1db2c..d5b9cc753b2d 100644 --- a/.github/workflows/sweeper.yaml +++ b/.github/workflows/sweeper.yaml @@ -20,7 +20,7 @@ jobs: with: role-to-assume: arn:aws:iam::${{ vars.CI_ACCOUNT_ID }}:role/${{ vars.CI_ROLE_NAME }} aws-region: ${{ matrix.region }} - - uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 + - uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1 with: go-version-file: test/hack/resource/go.mod check-latest: true diff --git a/.golangci.yaml b/.golangci.yaml index 8c54aa95b166..e28f6be70d88 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,16 +1,7 @@ # See https://github.com/golangci/golangci-lint/blob/master/.golangci.example.yml run: tests: true - timeout: 5m - - skip-dirs: - - tools - - website - - hack - - charts - - designs - linters: enable: - asciicheck @@ -31,12 +22,13 @@ linters: - nilerr disable: - prealloc - linters-settings: gocyclo: min-complexity: 11 govet: - check-shadowing: true + enable-all: true + disable: + - fieldalignment revive: rules: - name: dot-imports @@ -66,6 +58,12 @@ linters-settings: issues: fix: true exclude: ['declaration of "(err|ctx)" shadows declaration at'] + exclude-dirs: + - tools + - website + - hack + - charts + - designs exclude-rules: - linters: - goheader diff --git a/Makefile b/Makefile index 1aa29f5e69c4..1b355fd76f88 100644 --- a/Makefile +++ b/Makefile @@ -128,7 +128,8 @@ image: ## Build the Karpenter controller images using ko build $(eval IMG_TAG=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 2 -s)) $(eval IMG_DIGEST=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 2)) -apply: image ## Deploy the controller from the current state of your git repository into your ~/.kube/config cluster +apply: verify image ## Deploy the controller from the current state of your git repository into your ~/.kube/config cluster + kubectl apply -f ./pkg/apis/crds/ helm upgrade --install karpenter charts/karpenter --namespace ${KARPENTER_NAMESPACE} \ $(HELM_OPTS) \ --set logLevel=debug \ diff --git a/cmd/controller/main.go b/cmd/controller/main.go index fc09b917beef..9d370a42694d 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -31,6 +31,7 @@ import ( func main() { ctx, op := operator.NewOperator(coreoperator.NewOperator()) + awsCloudProvider := cloudprovider.New( op.InstanceTypesProvider, op.InstanceProvider, @@ -38,7 +39,6 @@ func main() { op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, - op.SubnetProvider, ) lo.Must0(op.AddHealthzCheck("cloud-provider", awsCloudProvider.LivenessProbe)) cloudProvider := metrics.Decorate(awsCloudProvider) diff --git a/designs/v1beta1-api.md b/designs/v1beta1-api.md index 409558a71a96..025f2e65eaaa 100644 --- a/designs/v1beta1-api.md +++ b/designs/v1beta1-api.md @@ -346,6 +346,7 @@ status: 8. `karpenter.k8s.aws/instance-cpu` 9. `karpenter.k8s.aws/instance-cpu-manufacturer` 10. `karpenter.k8s.aws/instance-memory` +11. `karpenter.k8s.aws/instance-ebs-bandwidth` 11. `karpenter.k8s.aws/instance-network-bandwidth` 12. `karpenter.k8s.aws/instance-gpu-name` 13. `karpenter.k8s.aws/instance-gpu-manufacturer` diff --git a/go.mod b/go.mod index 79ff96083f72..c47e5d3b6676 100644 --- a/go.mod +++ b/go.mod @@ -1,35 +1,36 @@ module github.com/aws/karpenter-provider-aws -go 1.22 +go 1.22.3 require ( github.com/Pallinder/go-randomdata v1.2.0 - github.com/PuerkitoBio/goquery v1.9.1 + github.com/PuerkitoBio/goquery v1.9.2 github.com/avast/retry-go v3.0.0+incompatible - github.com/aws/aws-sdk-go v1.51.30 + github.com/aws/aws-sdk-go v1.53.6 github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881 github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647 - github.com/awslabs/operatorpkg v0.0.0-20240502203521-a2115dcf4ac0 + github.com/awslabs/operatorpkg v0.0.0-20240518001059-1e35978ba21b github.com/go-logr/zapr v1.3.0 github.com/imdario/mergo v0.3.16 github.com/mitchellh/hashstructure/v2 v2.0.2 - github.com/onsi/ginkgo/v2 v2.17.2 - github.com/onsi/gomega v1.33.0 + github.com/onsi/ginkgo/v2 v2.17.3 + github.com/onsi/gomega v1.33.1 github.com/patrickmn/go-cache v2.1.0+incompatible - github.com/pelletier/go-toml/v2 v2.2.1 - github.com/prometheus/client_golang v1.19.0 + github.com/pelletier/go-toml/v2 v2.2.2 + github.com/prometheus/client_golang v1.19.1 github.com/samber/lo v1.39.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/sync v0.7.0 - k8s.io/api v0.29.3 - k8s.io/apiextensions-apiserver v0.29.3 - k8s.io/apimachinery v0.29.3 - k8s.io/client-go v0.29.3 + k8s.io/api v0.30.1 + k8s.io/apiextensions-apiserver v0.30.1 + k8s.io/apimachinery v0.30.1 + k8s.io/client-go v0.30.1 + k8s.io/klog/v2 v2.120.1 k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd - sigs.k8s.io/controller-runtime v0.17.3 - sigs.k8s.io/karpenter v0.36.0 + sigs.k8s.io/controller-runtime v0.18.2 + sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181 sigs.k8s.io/yaml v1.4.0 ) @@ -79,7 +80,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_model v0.6.0 // indirect + github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/prometheus/statsd_exporter v0.24.0 // indirect @@ -94,7 +95,7 @@ require ( golang.org/x/oauth2 v0.18.0 // indirect golang.org/x/sys v0.19.0 // indirect golang.org/x/term v0.19.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.20.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect @@ -108,11 +109,10 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/cloud-provider v0.29.3 // indirect - k8s.io/component-base v0.29.3 // indirect - k8s.io/csi-translation-lib v0.29.3 // indirect - k8s.io/klog/v2 v2.120.1 // indirect - k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/cloud-provider v0.30.1 // indirect + k8s.io/component-base v0.30.1 // indirect + k8s.io/csi-translation-lib v0.30.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/go.sum b/go.sum index 0ab4e74fa4bb..1ae6cc110fd0 100644 --- a/go.sum +++ b/go.sum @@ -41,8 +41,8 @@ github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0 github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg= github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y= -github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI= -github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= +github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE= +github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= @@ -54,14 +54,14 @@ github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= -github.com/aws/aws-sdk-go v1.51.30 h1:RVFkjn9P0JMwnuZCVH0TlV5k9zepHzlbc4943eZMhGw= -github.com/aws/aws-sdk-go v1.51.30/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= +github.com/aws/aws-sdk-go v1.53.6 h1:1/MYh/VmxdJu7v2bwvDA2JS30UI7bg62QYgQ7KxMa/Q= +github.com/aws/aws-sdk-go v1.53.6/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881 h1:m9rhsGhdepdQV96tZgfy68oU75AWAjOH8u65OefTjwA= github.com/aws/karpenter-provider-aws/tools/kompat v0.0.0-20240410220356-6b868db24881/go.mod h1:+Mk5k0b6HpKobxNq+B56DOhZ+I/NiPhd5MIBhQMSTSs= github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647 h1:8yRBVsjGmI7qQsPWtIrbWP+XfwHO9Wq7gdLVzjqiZFs= github.com/awslabs/amazon-eks-ami/nodeadm v0.0.0-20240229193347-cfab22a10647/go.mod h1:9NafTAUHL0FlMeL6Cu5PXnMZ1q/LnC9X2emLXHsVbM8= -github.com/awslabs/operatorpkg v0.0.0-20240502203521-a2115dcf4ac0 h1:sLJ+JX6Yko4dUc5MfqwHGcC7yWQxgKwry1Nhh+bMw/E= -github.com/awslabs/operatorpkg v0.0.0-20240502203521-a2115dcf4ac0/go.mod h1:I7p/HTgsO8XwYbqBvtp37JMB0yFHrFSv3Pki4blv5HQ= +github.com/awslabs/operatorpkg v0.0.0-20240518001059-1e35978ba21b h1:bmlbw6EjSDoZEWbGE2rnXDsCgbTsxMyufM4NRRHaLVk= +github.com/awslabs/operatorpkg v0.0.0-20240518001059-1e35978ba21b/go.mod h1:YcidmUg8Pjk349+jd+sRCdo6h3jzxqAY1VDNgVJKbSA= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -272,14 +272,14 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= -github.com/onsi/ginkgo/v2 v2.17.2 h1:7eMhcy3GimbsA3hEnVKdw/PQM9XN9krpKVXsZdph0/g= -github.com/onsi/ginkgo/v2 v2.17.2/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= -github.com/onsi/gomega v1.33.0 h1:snPCflnZrpMsy94p4lXVEkHo12lmPnc3vY5XBbreexE= -github.com/onsi/gomega v1.33.0/go.mod h1:+925n5YtiFsLzzafLUHzVMBpvvRAzrydIBiSIxjX3wY= +github.com/onsi/ginkgo/v2 v2.17.3 h1:oJcvKpIb7/8uLpDDtnQuf18xVnwKp8DTD7DQ6gTd/MU= +github.com/onsi/ginkgo/v2 v2.17.3/go.mod h1:nP2DPOQoNsQmsVyv5rDA8JkXQoCs6goXIvr/PRJ1eCc= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= -github.com/pelletier/go-toml/v2 v2.2.1 h1:9TA9+T8+8CUCO2+WYnDLCgrYi9+omqKXyjDtosvtEhg= -github.com/pelletier/go-toml/v2 v2.2.1/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -295,14 +295,14 @@ github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqr github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= github.com/prometheus/client_golang v1.13.0/go.mod h1:vTeo+zgvILHsnnj/39Ou/1fPN5nJFOEMgftOUOmlvYQ= -github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= -github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= -github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= @@ -550,8 +550,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -732,24 +732,24 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= -k8s.io/apiextensions-apiserver v0.29.3 h1:9HF+EtZaVpFjStakF4yVufnXGPRppWFEQ87qnO91YeI= -k8s.io/apiextensions-apiserver v0.29.3/go.mod h1:po0XiY5scnpJfFizNGo6puNU6Fq6D70UJY2Cb2KwAVc= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg= -k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0= -k8s.io/cloud-provider v0.29.3 h1:y39hNq0lrPD1qmqQ2ykwMJGeWF9LsepVkR2a4wskwLc= -k8s.io/cloud-provider v0.29.3/go.mod h1:daDV1WkAO6pTrdsn7v8TpN/q9n75ExUC4RJDl7vlPKk= -k8s.io/component-base v0.29.3 h1:Oq9/nddUxlnrCuuR2K/jp6aflVvc0uDvxMzAWxnGzAo= -k8s.io/component-base v0.29.3/go.mod h1:Yuj33XXjuOk2BAaHsIGHhCKZQAgYKhqIxIjIr2UXYio= -k8s.io/csi-translation-lib v0.29.3 h1:GNYCE0f86K3Xkyrk7WKKwQZkJrum6QQapbOzYxZv6Mg= -k8s.io/csi-translation-lib v0.29.3/go.mod h1:snAzieA58/oiQXQZr27b0+b6/3+ZzitwI+57cUsMKKQ= +k8s.io/api v0.30.1 h1:kCm/6mADMdbAxmIh0LBjS54nQBE+U4KmbCfIkF5CpJY= +k8s.io/api v0.30.1/go.mod h1:ddbN2C0+0DIiPntan/bye3SW3PdwLa11/0yqwvuRrJM= +k8s.io/apiextensions-apiserver v0.30.1 h1:4fAJZ9985BmpJG6PkoxVRpXv9vmPUOVzl614xarePws= +k8s.io/apiextensions-apiserver v0.30.1/go.mod h1:R4GuSrlhgq43oRY9sF2IToFh7PVlF1JjfWdoG3pixk4= +k8s.io/apimachinery v0.30.1 h1:ZQStsEfo4n65yAdlGTfP/uSHMQSoYzU/oeEbkmF7P2U= +k8s.io/apimachinery v0.30.1/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/client-go v0.30.1 h1:uC/Ir6A3R46wdkgCV3vbLyNOYyCJ8oZnjtJGKfytl/Q= +k8s.io/client-go v0.30.1/go.mod h1:wrAqLNs2trwiCH/wxxmT/x3hKVH9PuV0GGW0oDoHVqc= +k8s.io/cloud-provider v0.30.1 h1:OslHpog97zG9Kr7/vV1ki8nLKq8xTPUkN/kepCxBqKI= +k8s.io/cloud-provider v0.30.1/go.mod h1:1uZp+FSskXQoeAAIU91/XCO8X/9N1U3z5usYeSLT4MI= +k8s.io/component-base v0.30.1 h1:bvAtlPh1UrdaZL20D9+sWxsJljMi0QZ3Lmw+kmZAaxQ= +k8s.io/component-base v0.30.1/go.mod h1:e/X9kDiOebwlI41AvBHuWdqFriSRrX50CdwA9TFaHLI= +k8s.io/csi-translation-lib v0.30.1 h1:fIBtNMQjyr7HFv3xGSSH9cWOQS1K1kIBmZ1zRsHuVKs= +k8s.io/csi-translation-lib v0.30.1/go.mod h1:l0HrIBIxUKRvqnNWqn6AXTYgUa2mAFLT6bjo1lU+55U= k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= -k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ= k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd h1:KJXBX9dOmRTUWduHg1gnWtPGIEl+GMh8UHdrBEZgOXE= @@ -757,12 +757,12 @@ knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd/go.mod h1:36cYnaOVHkzmhgybmYX rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/controller-runtime v0.17.3 h1:65QmN7r3FWgTxDMz9fvGnO1kbf2nu+acg9p2R9oYYYk= -sigs.k8s.io/controller-runtime v0.17.3/go.mod h1:N0jpP5Lo7lMTF9aL56Z/B2oWBJjey6StQM0jRbKQXtY= +sigs.k8s.io/controller-runtime v0.18.2 h1:RqVW6Kpeaji67CY5nPEfRz6ZfFMk0lWQlNrLqlNpx+Q= +sigs.k8s.io/controller-runtime v0.18.2/go.mod h1:tuAt1+wbVsXIT8lPtk5RURxqAnq7xkpv2Mhttslg7Hw= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.36.0 h1:i82fOsFWKwnChedKsj0Hep2yrTkAjCek/aZPSMX2dW8= -sigs.k8s.io/karpenter v0.36.0/go.mod h1:fieFojxOec/l0tDmFT7R+g/Y+SGQbL9VlcYO8xb3sLo= +sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181 h1:OQlVI9wqaV+VW8y13clzV/tM8sEgm0M/Fs/fVsrnRsY= +sigs.k8s.io/karpenter v0.36.1-0.20240524020535-a30f67aaf181/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/code/bandwidth_gen/example/gp.html b/hack/code/bandwidth_gen/example/gp.html new file mode 100644 index 000000000000..fabc67a95b53 --- /dev/null +++ b/hack/code/bandwidth_gen/example/gp.html @@ -0,0 +1,9171 @@ + +General purpose instances - Amazon EC2
General purpose instances - Amazon EC2

General purpose instances

General purpose instances provide a balance of compute, memory, and networking resources. + These instances are ideal for applications that use these resources in equal proportions, + such as web servers and code repositories.

+

Available sizes

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeAvailable sizes
M5m5.large | m5.xlarge | m5.2xlarge | m5.4xlarge | m5.8xlarge | m5.12xlarge | m5.16xlarge | m5.24xlarge | m5.metal
M5am5a.large | m5a.xlarge | m5a.2xlarge | m5a.4xlarge | m5a.8xlarge | m5a.12xlarge | m5a.16xlarge | m5a.24xlarge
M5adm5ad.large | m5ad.xlarge | m5ad.2xlarge | m5ad.4xlarge | m5ad.8xlarge | m5ad.12xlarge | m5ad.16xlarge | m5ad.24xlarge
M5dm5d.large | m5d.xlarge | m5d.2xlarge | m5d.4xlarge | m5d.8xlarge | m5d.12xlarge | m5d.16xlarge | m5d.24xlarge | m5d.metal
M5dnm5dn.large | m5dn.xlarge | m5dn.2xlarge | m5dn.4xlarge | m5dn.8xlarge | m5dn.12xlarge | m5dn.16xlarge | m5dn.24xlarge | m5dn.metal
M5nm5n.large | m5n.xlarge | m5n.2xlarge | m5n.4xlarge | m5n.8xlarge | m5n.12xlarge | m5n.16xlarge | m5n.24xlarge | m5n.metal
M5znm5zn.large | m5zn.xlarge | m5zn.2xlarge | m5zn.3xlarge | m5zn.6xlarge | m5zn.12xlarge | m5zn.metal
M6am6a.large | m6a.xlarge | m6a.2xlarge | m6a.4xlarge | m6a.8xlarge | m6a.12xlarge | m6a.16xlarge | m6a.24xlarge | m6a.32xlarge | m6a.48xlarge | m6a.metal
M6gm6g.medium | m6g.large | m6g.xlarge | m6g.2xlarge | m6g.4xlarge | m6g.8xlarge | m6g.12xlarge | m6g.16xlarge | m6g.metal
M6gdm6gd.medium | m6gd.large | m6gd.xlarge | m6gd.2xlarge | m6gd.4xlarge | m6gd.8xlarge | m6gd.12xlarge | m6gd.16xlarge | m6gd.metal
M6im6i.large | m6i.xlarge | m6i.2xlarge | m6i.4xlarge | m6i.8xlarge | m6i.12xlarge | m6i.16xlarge | m6i.24xlarge | m6i.32xlarge | m6i.metal
M6idm6id.large | m6id.xlarge | m6id.2xlarge | m6id.4xlarge | m6id.8xlarge | m6id.12xlarge | m6id.16xlarge | m6id.24xlarge | m6id.32xlarge | m6id.metal
M6idnm6idn.large | m6idn.xlarge | m6idn.2xlarge | m6idn.4xlarge | m6idn.8xlarge | m6idn.12xlarge | m6idn.16xlarge | m6idn.24xlarge | m6idn.32xlarge | m6idn.metal
M6inm6in.large | m6in.xlarge | m6in.2xlarge | m6in.4xlarge | m6in.8xlarge | m6in.12xlarge | m6in.16xlarge | m6in.24xlarge | m6in.32xlarge | m6in.metal
M7am7a.medium | m7a.large | m7a.xlarge | m7a.2xlarge | m7a.4xlarge | m7a.8xlarge | m7a.12xlarge | m7a.16xlarge | m7a.24xlarge | m7a.32xlarge | m7a.48xlarge | m7a.metal-48xl
M7gm7g.medium | m7g.large | m7g.xlarge | m7g.2xlarge | m7g.4xlarge | m7g.8xlarge | m7g.12xlarge | m7g.16xlarge | m7g.metal
M7gdm7gd.medium | m7gd.large | m7gd.xlarge | m7gd.2xlarge | m7gd.4xlarge | m7gd.8xlarge | m7gd.12xlarge | m7gd.16xlarge | m7gd.metal
M7im7i.large | m7i.xlarge | m7i.2xlarge | m7i.4xlarge | m7i.8xlarge | m7i.12xlarge | m7i.16xlarge | m7i.24xlarge | m7i.48xlarge | m7i.metal-24xl | m7i.metal-48xl
M7i-flexm7i-flex.large | m7i-flex.xlarge | m7i-flex.2xlarge | m7i-flex.4xlarge | m7i-flex.8xlarge
Mac1mac1.metal
Mac2mac2.metal
Mac2-m2mac2-m2.metal
Mac2-m2promac2-m2pro.metal
T2t2.nano | t2.micro | t2.small | t2.medium | t2.large | t2.xlarge | t2.2xlarge
T3t3.nano | t3.micro | t3.small | t3.medium | t3.large | t3.xlarge | t3.2xlarge
T3at3a.nano | t3a.micro | t3a.small | t3a.medium | t3a.large | t3a.xlarge | t3a.2xlarge
T4gt4g.nano | t4g.micro | t4g.small | t4g.medium | t4g.large | t4g.xlarge | t4g.2xlarge
+ +

Platform summary

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeHypervisorProcessor type (architecture)Metal instances availableDedicated Hosts supportSpot supportHibernation supportSupported operating systems
M5NitroIntel (x86_64)Windows | Linux
M5aNitroAMD (x86_64)Windows | Linux
M5adNitroAMD (x86_64)Windows | Linux
M5dNitroIntel (x86_64)Windows | Linux
M5dnNitroIntel (x86_64)Windows | Linux
M5nNitroIntel (x86_64)Windows | Linux
M5znNitroIntel (x86_64)Windows | Linux
M6aNitroAMD (x86_64)Windows | Linux
M6gNitroAWS Graviton (arm64)Linux
M6gdNitroAWS Graviton (arm64)Linux
M6iNitroIntel (x86_64)Windows | Linux
M6idNitroIntel (x86_64)Windows | Linux
M6idnNitroIntel (x86_64)Windows | Linux
M6inNitroIntel (x86_64)Windows | Linux
M7aNitroAMD (x86_64)Windows | Linux
M7gNitroAWS Graviton (arm64)Linux
M7gdNitroAWS Graviton (arm64)Linux
M7iNitroIntel (x86_64)Windows | Linux
M7i-flexNitroIntel (x86_64)Windows | Linux
Mac1NitroIntel (x86_64_mac)Linux
Mac2NitroApple (arm64_mac)Linux
Mac2-m2NitroApple (arm64_mac)Linux
Mac2-m2proNitroApple (arm64_mac)Linux
T2XenIntel (x86_64)Windows | Linux
T3NitroIntel (x86_64)Windows | Linux
T3aNitroAMD (x86_64)Windows | Linux
T4gNitroAWS Graviton (arm64)Linux
+ +

Performance specifications

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeBurstableMemory (GiB)ProcessorvCPUsCPU coresThreads per coreAccelerators
M5
m5.large8.00Intel Xeon Platinum 8175212
m5.xlarge16.00Intel Xeon Platinum 8175422
m5.2xlarge32.00Intel Xeon Platinum 8175842
m5.4xlarge64.00Intel Xeon Platinum 81751682
m5.8xlarge128.00Intel Xeon Platinum 817532162
m5.12xlarge192.00Intel Xeon Platinum 817548242
m5.16xlarge256.00Intel Xeon Platinum 817564322
m5.24xlarge384.00Intel Xeon Platinum 817596482
m5.metal384.00Intel Xeon Platinum 817596482
M5a
m5a.large8.00AMD EPYC 7571212
m5a.xlarge16.00AMD EPYC 7571422
m5a.2xlarge32.00AMD EPYC 7571842
m5a.4xlarge64.00AMD EPYC 75711682
m5a.8xlarge128.00AMD EPYC 757132162
m5a.12xlarge192.00AMD EPYC 757148242
m5a.16xlarge256.00AMD EPYC 757164322
m5a.24xlarge384.00AMD EPYC 757196482
M5ad
m5ad.large8.00AMD EPYC 7571212
m5ad.xlarge16.00AMD EPYC 7571422
m5ad.2xlarge32.00AMD EPYC 7571842
m5ad.4xlarge64.00AMD EPYC 75711682
m5ad.8xlarge128.00AMD EPYC 757132162
m5ad.12xlarge192.00AMD EPYC 757148242
m5ad.16xlarge256.00AMD EPYC 757164322
m5ad.24xlarge384.00AMD EPYC 757196482
M5d
m5d.large8.00Intel Xeon Platinum 8175212
m5d.xlarge16.00Intel Xeon Platinum 8175422
m5d.2xlarge32.00Intel Xeon Platinum 8175842
m5d.4xlarge64.00Intel Xeon Platinum 81751682
m5d.8xlarge128.00Intel Xeon Platinum 817532162
m5d.12xlarge192.00Intel Xeon Platinum 817548242
m5d.16xlarge256.00Intel Xeon Platinum 817564322
m5d.24xlarge384.00Intel Xeon Platinum 817596482
m5d.metal384.00Intel Xeon Platinum 817596482
M5dn
m5dn.large8.00Intel Xeon Platinum 8259212
m5dn.xlarge16.00Intel Xeon Platinum 8259422
m5dn.2xlarge32.00Intel Xeon Platinum 8259842
m5dn.4xlarge64.00Intel Xeon Platinum 82591682
m5dn.8xlarge128.00Intel Xeon Platinum 825932162
m5dn.12xlarge192.00Intel Xeon Platinum 825948242
m5dn.16xlarge256.00Intel Xeon Platinum 825964322
m5dn.24xlarge384.00Intel Xeon Platinum 825996482
m5dn.metal384.00Intel Xeon Platinum 825996482
M5n
m5n.large8.00Intel Xeon Platinum 8259212
m5n.xlarge16.00Intel Xeon Platinum 8259422
m5n.2xlarge32.00Intel Xeon Platinum 8259842
m5n.4xlarge64.00Intel Xeon Platinum 82591682
m5n.8xlarge128.00Intel Xeon Platinum 825932162
m5n.12xlarge192.00Intel Xeon Platinum 825948242
m5n.16xlarge256.00Intel Xeon Platinum 825964322
m5n.24xlarge384.00Intel Xeon Platinum 825996482
m5n.metal384.00Intel Xeon Platinum 825996482
M5zn
m5zn.large8.00Intel Xeon Platinum 8252212
m5zn.xlarge16.00Intel Xeon Platinum 8252422
m5zn.2xlarge32.00Intel Xeon Platinum 8252842
m5zn.3xlarge48.00Intel Xeon Platinum 82521262
m5zn.6xlarge96.00Intel Xeon Platinum 825224122
m5zn.12xlarge192.00Intel Xeon Platinum 825248242
m5zn.metal192.00Intel Xeon Platinum 825248242
M6a
m6a.large8.00AMD EPYC 7R13212
m6a.xlarge16.00AMD EPYC 7R13422
m6a.2xlarge32.00AMD EPYC 7R13842
m6a.4xlarge64.00AMD EPYC 7R131682
m6a.8xlarge128.00AMD EPYC 7R1332162
m6a.12xlarge192.00AMD EPYC 7R1348242
m6a.16xlarge256.00AMD EPYC 7R1364322
m6a.24xlarge384.00AMD EPYC 7R1396482
m6a.32xlarge512.00AMD EPYC 7R13128642
m6a.48xlarge768.00AMD EPYC 7R13192962
m6a.metal768.00AMD EPYC 7R13192962
M6g
m6g.medium4.00AWS Graviton2 Processor111
m6g.large8.00AWS Graviton2 Processor221
m6g.xlarge16.00AWS Graviton2 Processor441
m6g.2xlarge32.00AWS Graviton2 Processor881
m6g.4xlarge64.00AWS Graviton2 Processor16161
m6g.8xlarge128.00AWS Graviton2 Processor32321
m6g.12xlarge192.00AWS Graviton2 Processor48481
m6g.16xlarge256.00AWS Graviton2 Processor64641
m6g.metal256.00AWS Graviton2 Processor64641
M6gd
m6gd.medium4.00AWS Graviton2 Processor111
m6gd.large8.00AWS Graviton2 Processor221
m6gd.xlarge16.00AWS Graviton2 Processor441
m6gd.2xlarge32.00AWS Graviton2 Processor881
m6gd.4xlarge64.00AWS Graviton2 Processor16161
m6gd.8xlarge128.00AWS Graviton2 Processor32321
m6gd.12xlarge192.00AWS Graviton2 Processor48481
m6gd.16xlarge256.00AWS Graviton2 Processor64641
m6gd.metal256.00AWS Graviton2 Processor64641
M6i
m6i.large8.00Intel Xeon Ice Lake212
m6i.xlarge16.00Intel Xeon Ice Lake422
m6i.2xlarge32.00Intel Xeon Ice Lake842
m6i.4xlarge64.00Intel Xeon Ice Lake1682
m6i.8xlarge128.00Intel Xeon Ice Lake32162
m6i.12xlarge192.00Intel Xeon Ice Lake48242
m6i.16xlarge256.00Intel Xeon Ice Lake64322
m6i.24xlarge384.00Intel Xeon Ice Lake96482
m6i.32xlarge512.00Intel Xeon Ice Lake128642
m6i.metal512.00Intel Xeon Ice Lake128642
M6id
m6id.large8.00Intel Xeon Ice Lake212
m6id.xlarge16.00Intel Xeon Ice Lake422
m6id.2xlarge32.00Intel Xeon Ice Lake842
m6id.4xlarge64.00Intel Xeon Ice Lake1682
m6id.8xlarge128.00Intel Xeon Ice Lake32162
m6id.12xlarge192.00Intel Xeon Ice Lake48242
m6id.16xlarge256.00Intel Xeon Ice Lake64322
m6id.24xlarge384.00Intel Xeon Ice Lake96482
m6id.32xlarge512.00Intel Xeon Ice Lake128642
m6id.metal512.00Intel Xeon Ice Lake128642
M6idn
m6idn.large8.00Intel Xeon Ice Lake212
m6idn.xlarge16.00Intel Xeon Ice Lake422
m6idn.2xlarge32.00Intel Xeon Ice Lake842
m6idn.4xlarge64.00Intel Xeon Ice Lake1682
m6idn.8xlarge128.00Intel Xeon Ice Lake32162
m6idn.12xlarge192.00Intel Xeon Ice Lake48242
m6idn.16xlarge256.00Intel Xeon Ice Lake64322
m6idn.24xlarge384.00Intel Xeon Ice Lake96482
m6idn.32xlarge512.00Intel Xeon Ice Lake128642
m6idn.metal512.00Intel Xeon Ice Lake128642
M6in
m6in.large8.00Intel Xeon Ice Lake212
m6in.xlarge16.00Intel Xeon Ice Lake422
m6in.2xlarge32.00Intel Xeon Ice Lake842
m6in.4xlarge64.00Intel Xeon Ice Lake1682
m6in.8xlarge128.00Intel Xeon Ice Lake32162
m6in.12xlarge192.00Intel Xeon Ice Lake48242
m6in.16xlarge256.00Intel Xeon Ice Lake64322
m6in.24xlarge384.00Intel Xeon Ice Lake96482
m6in.32xlarge512.00Intel Xeon Ice Lake128642
m6in.metal512.00Intel Xeon Ice Lake128642
M7a
m7a.medium4.00AMD EPYC 9R14111
m7a.large8.00AMD EPYC 9R14221
m7a.xlarge16.00AMD EPYC 9R14441
m7a.2xlarge32.00AMD EPYC 9R14881
m7a.4xlarge64.00AMD EPYC 9R1416161
m7a.8xlarge128.00AMD EPYC 9R1432321
m7a.12xlarge192.00AMD EPYC 9R1448481
m7a.16xlarge256.00AMD EPYC 9R1464641
m7a.24xlarge384.00AMD EPYC 9R1496961
m7a.32xlarge512.00AMD EPYC 9R141281281
m7a.48xlarge768.00AMD EPYC 9R141921921
m7a.metal-48xl768.00AMD EPYC 9R141921921
M7g
m7g.medium4.00AWS Graviton3 Processor111
m7g.large8.00AWS Graviton3 Processor221
m7g.xlarge16.00AWS Graviton3 Processor441
m7g.2xlarge32.00AWS Graviton3 Processor881
m7g.4xlarge64.00AWS Graviton3 Processor16161
m7g.8xlarge128.00AWS Graviton3 Processor32321
m7g.12xlarge192.00AWS Graviton3 Processor48481
m7g.16xlarge256.00AWS Graviton3 Processor64641
m7g.metal256.00AWS Graviton3 Processor64641
M7gd
m7gd.medium4.00AWS Graviton3 Processor111
m7gd.large8.00AWS Graviton3 Processor221
m7gd.xlarge16.00AWS Graviton3 Processor441
m7gd.2xlarge32.00AWS Graviton3 Processor881
m7gd.4xlarge64.00AWS Graviton3 Processor16161
m7gd.8xlarge128.00AWS Graviton3 Processor32321
m7gd.12xlarge192.00AWS Graviton3 Processor48481
m7gd.16xlarge256.00AWS Graviton3 Processor64641
m7gd.metal256.00AWS Graviton3 Processor64641
M7i
m7i.large8.00Intel Xeon Sapphire Rapids212
m7i.xlarge16.00Intel Xeon Sapphire Rapids422
m7i.2xlarge32.00Intel Xeon Sapphire Rapids842
m7i.4xlarge64.00Intel Xeon Sapphire Rapids1682
m7i.8xlarge128.00Intel Xeon Sapphire Rapids32162
m7i.12xlarge192.00Intel Xeon Sapphire Rapids48242
m7i.16xlarge256.00Intel Xeon Sapphire Rapids64322
m7i.24xlarge384.00Intel Xeon Sapphire Rapids96482
m7i.48xlarge768.00Intel Xeon Sapphire Rapids192962
m7i.metal-24xl384.00Intel Xeon Sapphire Rapids96482
m7i.metal-48xl768.00Intel Xeon Sapphire Rapids192962
M7i-flex
m7i-flex.large8.00Intel Xeon Sapphire Rapids212
m7i-flex.xlarge16.00Intel Xeon Sapphire Rapids422
m7i-flex.2xlarge32.00Intel Xeon Sapphire Rapids842
m7i-flex.4xlarge64.00Intel Xeon Sapphire Rapids1682
m7i-flex.8xlarge128.00Intel Xeon Sapphire Rapids32162
Mac1
mac1.metal32.00Intel Core i7-8700B1262
Mac2
mac2.metal16.00Apple M1 chip with 8-core CPU842
Mac2-m2
mac2-m2.metal24.00Apple M2 with 8‑core CPU881
Mac2-m2pro
mac2-m2pro.metal32.00Apple M2 Pro with 12‑core CPU12121
T2
t2.nano0.50Intel Xeon Family111
t2.micro1.00Intel Xeon Family111
t2.small2.00Intel Xeon Family111
t2.medium4.00Intel Broadwell E5-2686v4221
t2.large8.00Intel Broadwell E5-2686v4221
t2.xlarge16.00Intel Broadwell E5-2686v4441
t2.2xlarge32.00Intel Broadwell E5-2686v4881
T3
t3.nano0.50Intel Skylake P-8175212
t3.micro1.00Intel Skylake P-8175212
t3.small2.00Intel Skylake P-8175212
t3.medium4.00Intel Skylake P-8175212
t3.large8.00Intel Skylake P-8175212
t3.xlarge16.00Intel Skylake P-8175422
t3.2xlarge32.00Intel Skylake P-8175842
T3a
t3a.nano0.50AMD EPYC 7571212
t3a.micro1.00AMD EPYC 7571212
t3a.small2.00AMD EPYC 7571212
t3a.medium4.00AMD EPYC 7571212
t3a.large8.00AMD EPYC 7571212
t3a.xlarge16.00AMD EPYC 7571422
t3a.2xlarge32.00AMD EPYC 7571842
T4g
t4g.nano0.50AWS Graviton2 Processor221
t4g.micro1.00AWS Graviton2 Processor221
t4g.small2.00AWS Graviton2 Processor221
t4g.medium4.00AWS Graviton2 Processor221
t4g.large8.00AWS Graviton2 Processor221
t4g.xlarge16.00AWS Graviton2 Processor441
t4g.2xlarge32.00AWS Graviton2 Processor881
+ +

Network specifications

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeBaseline / Burst bandwidth (Gbps)EFAENAENA ExpressNetwork cardsMax. network interfacesIP addresses per interfaceIPv6
M5
m5.large 10.75 / 10.01310
m5.xlarge 11.25 / 10.01415
m5.2xlarge 12.5 / 10.01415
m5.4xlarge 15.0 / 10.01830
m5.8xlarge10 Gigabit1830
m5.12xlarge12 Gigabit1830
m5.16xlarge20 Gigabit11550
m5.24xlarge25 Gigabit11550
m5.metal25 Gigabit11550
M5a
m5a.large 10.75 / 10.01310
m5a.xlarge 11.25 / 10.01415
m5a.2xlarge 12.5 / 10.01415
m5a.4xlarge 15.0 / 10.01830
m5a.8xlarge 17.5 / 10.01830
m5a.12xlarge10 Gigabit1830
m5a.16xlarge12 Gigabit11550
m5a.24xlarge20 Gigabit11550
M5ad
m5ad.large 10.75 / 10.01310
m5ad.xlarge 11.25 / 10.01415
m5ad.2xlarge 12.5 / 10.01415
m5ad.4xlarge 15.0 / 10.01830
m5ad.8xlarge 17.5 / 10.01830
m5ad.12xlarge10 Gigabit1830
m5ad.16xlarge12 Gigabit11550
m5ad.24xlarge20 Gigabit11550
M5d
m5d.large 10.75 / 10.01310
m5d.xlarge 11.25 / 10.01415
m5d.2xlarge 12.5 / 10.01415
m5d.4xlarge 15.0 / 10.01830
m5d.8xlarge10 Gigabit1830
m5d.12xlarge12 Gigabit1830
m5d.16xlarge20 Gigabit11550
m5d.24xlarge25 Gigabit11550
m5d.metal25 Gigabit11550
M5dn
m5dn.large 12.1 / 25.01310
m5dn.xlarge 14.1 / 25.01415
m5dn.2xlarge 18.125 / 25.01415
m5dn.4xlarge 116.25 / 25.01830
m5dn.8xlarge25 Gigabit1830
m5dn.12xlarge50 Gigabit1830
m5dn.16xlarge75 Gigabit11550
m5dn.24xlarge100 Gigabit11550
m5dn.metal100 Gigabit11550
M5n
m5n.large 12.1 / 25.01310
m5n.xlarge 14.1 / 25.01415
m5n.2xlarge 18.125 / 25.01415
m5n.4xlarge 116.25 / 25.01830
m5n.8xlarge25 Gigabit1830
m5n.12xlarge50 Gigabit1830
m5n.16xlarge75 Gigabit11550
m5n.24xlarge100 Gigabit11550
m5n.metal100 Gigabit11550
M5zn
m5zn.large 13.0 / 25.01310
m5zn.xlarge 15.0 / 25.01415
m5zn.2xlarge 110.0 / 25.01415
m5zn.3xlarge 115.0 / 25.01830
m5zn.6xlarge50 Gigabit1830
m5zn.12xlarge100 Gigabit11550
m5zn.metal100 Gigabit11550
M6a
m6a.large 10.781 / 12.51310
m6a.xlarge 11.562 / 12.51415
m6a.2xlarge 13.125 / 12.51415
m6a.4xlarge 16.25 / 12.51830
m6a.8xlarge12.5 Gigabit1830
m6a.12xlarge18.75 Gigabit1830
m6a.16xlarge25 Gigabit11550
m6a.24xlarge37.5 Gigabit11550
m6a.32xlarge50 Gigabit11550
m6a.48xlarge50 Gigabit11550
m6a.metal50 Gigabit11550
M6g
m6g.medium 10.5 / 10.0124
m6g.large 10.75 / 10.01310
m6g.xlarge 11.25 / 10.01415
m6g.2xlarge 12.5 / 10.01415
m6g.4xlarge 15.0 / 10.01830
m6g.8xlarge12 Gigabit1830
m6g.12xlarge20 Gigabit1830
m6g.16xlarge25 Gigabit11550
m6g.metal25 Gigabit11550
M6gd
m6gd.medium 10.5 / 10.0124
m6gd.large 10.75 / 10.01310
m6gd.xlarge 11.25 / 10.01415
m6gd.2xlarge 12.5 / 10.01415
m6gd.4xlarge 15.0 / 10.01830
m6gd.8xlarge12 Gigabit1830
m6gd.12xlarge20 Gigabit1830
m6gd.16xlarge25 Gigabit11550
m6gd.metal25 Gigabit11550
M6i
m6i.large 10.781 / 12.51310
m6i.xlarge 11.562 / 12.51415
m6i.2xlarge 13.125 / 12.51415
m6i.4xlarge 16.25 / 12.51830
m6i.8xlarge12.5 Gigabit1830
m6i.12xlarge18.75 Gigabit1830
m6i.16xlarge25 Gigabit11550
m6i.24xlarge37.5 Gigabit11550
m6i.32xlarge50 Gigabit11550
m6i.metal50 Gigabit11550
M6id
m6id.large 10.781 / 12.51310
m6id.xlarge 11.562 / 12.51415
m6id.2xlarge 13.125 / 12.51415
m6id.4xlarge 16.25 / 12.51830
m6id.8xlarge12.5 Gigabit1830
m6id.12xlarge18.75 Gigabit1830
m6id.16xlarge25 Gigabit11550
m6id.24xlarge37.5 Gigabit11550
m6id.32xlarge50 Gigabit11550
m6id.metal50 Gigabit11550
M6idn
m6idn.large 13.125 / 25.01310
m6idn.xlarge 16.25 / 30.01415
m6idn.2xlarge 112.5 / 40.01415
m6idn.4xlarge 125.0 / 50.01830
m6idn.8xlarge50 Gigabit1830
m6idn.12xlarge75 Gigabit1830
m6idn.16xlarge100 Gigabit11550
m6idn.24xlarge150 Gigabit11550
m6idn.32xlarge200 Gigabit21650
m6idn.metal200 Gigabit21650
M6in
m6in.large 13.125 / 25.01310
m6in.xlarge 16.25 / 30.01415
m6in.2xlarge 112.5 / 40.01415
m6in.4xlarge 125.0 / 50.01830
m6in.8xlarge50 Gigabit1830
m6in.12xlarge75 Gigabit1830
m6in.16xlarge100 Gigabit11550
m6in.24xlarge150 Gigabit11550
m6in.32xlarge200 Gigabit21650
m6in.metal200 Gigabit21650
M7a
m7a.medium 10.39 / 12.5124
m7a.large 10.781 / 12.51310
m7a.xlarge 11.562 / 12.51415
m7a.2xlarge 13.125 / 12.51415
m7a.4xlarge 16.25 / 12.51830
m7a.8xlarge12.5 Gigabit1830
m7a.12xlarge18.75 Gigabit1830
m7a.16xlarge25 Gigabit11550
m7a.24xlarge37.5 Gigabit11550
m7a.32xlarge50 Gigabit11550
m7a.48xlarge50 Gigabit11550
m7a.metal-48xl50 Gigabit11550
M7g
m7g.medium 10.52 / 12.5124
m7g.large 10.937 / 12.51310
m7g.xlarge 11.876 / 12.51415
m7g.2xlarge 13.75 / 15.01415
m7g.4xlarge 17.5 / 15.01830
m7g.8xlarge15 Gigabit1830
m7g.12xlarge22.5 Gigabit1830
m7g.16xlarge30 Gigabit11550
m7g.metal30 Gigabit11550
M7gd
m7gd.medium 10.52 / 12.5124
m7gd.large 10.937 / 12.51310
m7gd.xlarge 11.876 / 12.51415
m7gd.2xlarge 13.75 / 15.01415
m7gd.4xlarge 17.5 / 15.01830
m7gd.8xlarge15 Gigabit1830
m7gd.12xlarge22.5 Gigabit1830
m7gd.16xlarge30 Gigabit11550
m7gd.metal30 Gigabit11550
M7i
m7i.large 10.781 / 12.51310
m7i.xlarge 11.562 / 12.51415
m7i.2xlarge 13.125 / 12.51415
m7i.4xlarge 16.25 / 12.51830
m7i.8xlarge12.5 Gigabit1830
m7i.12xlarge18.75 Gigabit1830
m7i.16xlarge25 Gigabit11550
m7i.24xlarge37.5 Gigabit11550
m7i.48xlarge50 Gigabit11550
m7i.metal-24xl37.5 Gigabit11550
m7i.metal-48xl50 Gigabit11550
M7i-flex
m7i-flex.large 10.39 / 12.51310
m7i-flex.xlarge 10.781 / 12.51415
m7i-flex.2xlarge 11.562 / 12.51415
m7i-flex.4xlarge 13.125 / 12.51830
m7i-flex.8xlarge 16.25 / 12.51830
Mac1
mac1.metal25 Gigabit1830
Mac2
mac2.metal10 Gigabit1830
Mac2-m2
mac2-m2.metal10 Gigabit1830
Mac2-m2pro
mac2-m2pro.metal10 Gigabit1830
T2
t2.nanoLow to Moderate122
t2.microLow to Moderate122
t2.smallLow to Moderate134
t2.mediumLow to Moderate136
t2.largeLow to Moderate1312
t2.xlargeModerate1315
t2.2xlargeModerate1315
T3
t3.nano 10.032 / 5.0122
t3.micro 10.064 / 5.0122
t3.small 10.128 / 5.0134
t3.medium 10.256 / 5.0136
t3.large 10.512 / 5.01312
t3.xlarge 11.024 / 5.01415
t3.2xlarge 12.048 / 5.01415
T3a
t3a.nano 10.032 / 5.0122
t3a.micro 10.064 / 5.0122
t3a.small 10.128 / 5.0124
t3a.medium 10.256 / 5.0136
t3a.large 10.512 / 5.01312
t3a.xlarge 11.024 / 5.01415
t3a.2xlarge 12.048 / 5.01415
T4g
t4g.nano 10.032 / 5.0122
t4g.micro 10.064 / 5.0122
t4g.small 10.128 / 5.0134
t4g.medium 10.256 / 5.0136
t4g.large 10.512 / 5.01312
t4g.xlarge 11.024 / 5.01415
t4g.2xlarge 12.048 / 5.01415
+
Note

1 These instances have a baseline bandwidth and can + use a network I/O credit mechanism to burst beyond their baseline bandwidth on a best effort basis. + Other instances types can sustain their maximum performance indefinitely. For more information, + see + instance network bandwidth.

For 32xlarge and metal instance types that + support 200 Gbps, at least 2 ENIs, each attached to a different network card, are required on the instance to achieve + 200 Gbps throughput. Each ENI attached to a network card can achieve a max of 170 Gbps.

+ +

Amazon EBS specifications

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeBaseline / Maximum bandwidth (Mbps)Baseline / Maximum throughput (MB/s, 128 KiB I/O)Baseline / Maximum IOPS (16 KiB I/O)NVMeEBS optimization 2
M5
m5.large 1650.00 / 4750.0081.25 / 593.753600.00 / 18750.00default
m5.xlarge 11150.00 / 4750.00143.75 / 593.756000.00 / 18750.00default
m5.2xlarge 12300.00 / 4750.00287.50 / 593.7512000.00 / 18750.00default
m5.4xlarge4750.00593.7518750.00default
m5.8xlarge6800.00850.0030000.00default
m5.12xlarge9500.001187.5040000.00default
m5.16xlarge13600.001700.0060000.00default
m5.24xlarge19000.002375.0080000.00default
m5.metal19000.002375.0080000.00default
M5a
m5a.large 1650.00 / 2880.0081.25 / 360.003600.00 / 16000.00default
m5a.xlarge 11085.00 / 2880.00135.62 / 360.006000.00 / 16000.00default
m5a.2xlarge 11580.00 / 2880.00197.50 / 360.008333.00 / 16000.00default
m5a.4xlarge2880.00360.0016000.00default
m5a.8xlarge4750.00593.7520000.00default
m5a.12xlarge6780.00847.5030000.00default
m5a.16xlarge9500.001187.5040000.00default
m5a.24xlarge13750.001718.7560000.00default
M5ad
m5ad.large 1650.00 / 2880.0081.25 / 360.003600.00 / 16000.00default
m5ad.xlarge 11085.00 / 2880.00135.62 / 360.006000.00 / 16000.00default
m5ad.2xlarge 11580.00 / 2880.00197.50 / 360.008333.00 / 16000.00default
m5ad.4xlarge2880.00360.0016000.00default
m5ad.8xlarge4750.00593.7520000.00default
m5ad.12xlarge6780.00847.5030000.00default
m5ad.16xlarge9500.001187.5040000.00default
m5ad.24xlarge13750.001718.7560000.00default
M5d
m5d.large 1650.00 / 4750.0081.25 / 593.753600.00 / 18750.00default
m5d.xlarge 11150.00 / 4750.00143.75 / 593.756000.00 / 18750.00default
m5d.2xlarge 12300.00 / 4750.00287.50 / 593.7512000.00 / 18750.00default
m5d.4xlarge4750.00593.7518750.00default
m5d.8xlarge6800.00850.0030000.00default
m5d.12xlarge9500.001187.5040000.00default
m5d.16xlarge13600.001700.0060000.00default
m5d.24xlarge19000.002375.0080000.00default
m5d.metal19000.002375.0080000.00default
M5dn
m5dn.large 1650.00 / 4750.0081.25 / 593.753600.00 / 18750.00default
m5dn.xlarge 11150.00 / 4750.00143.75 / 593.756000.00 / 18750.00default
m5dn.2xlarge 12300.00 / 4750.00287.50 / 593.7512000.00 / 18750.00default
m5dn.4xlarge4750.00593.7518750.00default
m5dn.8xlarge6800.00850.0030000.00default
m5dn.12xlarge9500.001187.5040000.00default
m5dn.16xlarge13600.001700.0060000.00default
m5dn.24xlarge19000.002375.0080000.00default
m5dn.metal19000.002375.0080000.00default
M5n
m5n.large 1650.00 / 4750.0081.25 / 593.753600.00 / 18750.00default
m5n.xlarge 11150.00 / 4750.00143.75 / 593.756000.00 / 18750.00default
m5n.2xlarge 12300.00 / 4750.00287.50 / 593.7512000.00 / 18750.00default
m5n.4xlarge4750.00593.7518750.00default
m5n.8xlarge6800.00850.0030000.00default
m5n.12xlarge9500.001187.5040000.00default
m5n.16xlarge13600.001700.0060000.00default
m5n.24xlarge19000.002375.0080000.00default
m5n.metal19000.002375.0080000.00default
M5zn
m5zn.large 1800.00 / 3170.00100.00 / 396.253333.00 / 13333.00default
m5zn.xlarge 11564.00 / 3170.00195.50 / 396.256667.00 / 13333.00default
m5zn.2xlarge3170.00396.2513333.00default
m5zn.3xlarge4750.00593.7520000.00default
m5zn.6xlarge9500.001187.5040000.00default
m5zn.12xlarge19000.002375.0080000.00default
m5zn.metal19000.002375.0080000.00default
M6a
m6a.large 1650.00 / 10000.0081.25 / 1250.003600.00 / 40000.00default
m6a.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m6a.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m6a.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m6a.8xlarge10000.001250.0040000.00default
m6a.12xlarge15000.001875.0060000.00default
m6a.16xlarge20000.002500.0080000.00default
m6a.24xlarge30000.003750.00120000.00default
m6a.32xlarge40000.005000.00160000.00default
m6a.48xlarge40000.005000.00240000.00default
m6a.metal40000.005000.00240000.00default
M6g
m6g.medium 1315.00 / 4750.0039.38 / 593.752500.00 / 20000.00default
m6g.large 1630.00 / 4750.0078.75 / 593.753600.00 / 20000.00default
m6g.xlarge 11188.00 / 4750.00148.50 / 593.756000.00 / 20000.00default
m6g.2xlarge 12375.00 / 4750.00296.88 / 593.7512000.00 / 20000.00default
m6g.4xlarge4750.00593.7520000.00default
m6g.8xlarge9500.001187.5040000.00default
m6g.12xlarge14250.001781.2550000.00default
m6g.16xlarge19000.002375.0080000.00default
m6g.metal19000.002375.0080000.00default
M6gd
m6gd.medium 1315.00 / 4750.0039.38 / 593.752500.00 / 20000.00default
m6gd.large 1630.00 / 4750.0078.75 / 593.753600.00 / 20000.00default
m6gd.xlarge 11188.00 / 4750.00148.50 / 593.756000.00 / 20000.00default
m6gd.2xlarge 12375.00 / 4750.00296.88 / 593.7512000.00 / 20000.00default
m6gd.4xlarge4750.00593.7520000.00default
m6gd.8xlarge9500.001187.5040000.00default
m6gd.12xlarge14250.001781.2550000.00default
m6gd.16xlarge19000.002375.0080000.00default
m6gd.metal19000.002375.0080000.00default
M6i
m6i.large 1650.00 / 10000.0081.25 / 1250.003600.00 / 40000.00default
m6i.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m6i.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m6i.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m6i.8xlarge10000.001250.0040000.00default
m6i.12xlarge15000.001875.0060000.00default
m6i.16xlarge20000.002500.0080000.00default
m6i.24xlarge30000.003750.00120000.00default
m6i.32xlarge40000.005000.00160000.00default
m6i.metal40000.005000.00160000.00default
M6id
m6id.large 1650.00 / 10000.0081.25 / 1250.003600.00 / 40000.00default
m6id.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m6id.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m6id.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m6id.8xlarge10000.001250.0040000.00default
m6id.12xlarge15000.001875.0060000.00default
m6id.16xlarge20000.002500.0080000.00default
m6id.24xlarge30000.003750.00120000.00default
m6id.32xlarge40000.005000.00160000.00default
m6id.metal40000.005000.00160000.00default
M6idn
m6idn.large 11562.00 / 25000.00195.31 / 3125.006250.00 / 100000.00default
m6idn.xlarge 13125.00 / 25000.00390.62 / 3125.0012500.00 / 100000.00default
m6idn.2xlarge 16250.00 / 25000.00781.25 / 3125.0025000.00 / 100000.00default
m6idn.4xlarge 112500.00 / 25000.001562.50 / 3125.0050000.00 / 100000.00default
m6idn.8xlarge25000.003125.00100000.00default
m6idn.12xlarge37500.004687.50150000.00default
m6idn.16xlarge50000.006250.00200000.00default
m6idn.24xlarge75000.009375.00300000.00default
m6idn.32xlarge100000.0012500.00400000.00default
m6idn.metal100000.0012500.00400000.00default
M6in
m6in.large 11562.00 / 25000.00195.31 / 3125.006250.00 / 100000.00default
m6in.xlarge 13125.00 / 25000.00390.62 / 3125.0012500.00 / 100000.00default
m6in.2xlarge 16250.00 / 25000.00781.25 / 3125.0025000.00 / 100000.00default
m6in.4xlarge 112500.00 / 25000.001562.50 / 3125.0050000.00 / 100000.00default
m6in.8xlarge25000.003125.00100000.00default
m6in.12xlarge37500.004687.50150000.00default
m6in.16xlarge50000.006250.00200000.00default
m6in.24xlarge75000.009375.00300000.00default
m6in.32xlarge100000.0012500.00400000.00default
m6in.metal100000.0012500.00400000.00default
M7a
m7a.medium 1325.00 / 10000.0040.62 / 1250.002500.00 / 40000.00default
m7a.large 1650.00 / 10000.0081.25 / 1250.003600.00 / 40000.00default
m7a.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m7a.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m7a.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m7a.8xlarge10000.001250.0040000.00default
m7a.12xlarge15000.001875.0060000.00default
m7a.16xlarge20000.002500.0080000.00default
m7a.24xlarge30000.003750.00120000.00default
m7a.32xlarge40000.005000.00160000.00default
m7a.48xlarge40000.005000.00240000.00default
m7a.metal-48xl40000.005000.00240000.00default
M7g
m7g.medium 1315.00 / 10000.0039.38 / 1250.002500.00 / 40000.00default
m7g.large 1630.00 / 10000.0078.75 / 1250.003600.00 / 40000.00default
m7g.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m7g.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m7g.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m7g.8xlarge10000.001250.0040000.00default
m7g.12xlarge15000.001875.0060000.00default
m7g.16xlarge20000.002500.0080000.00default
m7g.metal20000.002500.0080000.00default
M7gd
m7gd.medium 1315.00 / 10000.0039.38 / 1250.002500.00 / 40000.00default
m7gd.large 1630.00 / 10000.0078.75 / 1250.003600.00 / 40000.00default
m7gd.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m7gd.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m7gd.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m7gd.8xlarge10000.001250.0040000.00default
m7gd.12xlarge15000.001875.0060000.00default
m7gd.16xlarge20000.002500.0080000.00default
m7gd.metal20000.002500.0080000.00default
M7i
m7i.large 1650.00 / 10000.0081.25 / 1250.003600.00 / 40000.00default
m7i.xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m7i.2xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m7i.4xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
m7i.8xlarge10000.001250.0040000.00default
m7i.12xlarge15000.001875.0060000.00default
m7i.16xlarge20000.002500.0080000.00default
m7i.24xlarge30000.003750.00120000.00default
m7i.48xlarge40000.005000.00240000.00default
m7i.metal-24xl30000.003750.00120000.00default
m7i.metal-48xl40000.005000.00240000.00default
M7i-flex
m7i-flex.large 1312.00 / 10000.0039.06 / 1250.002500.00 / 40000.00default
m7i-flex.xlarge 1625.00 / 10000.0078.12 / 1250.003600.00 / 40000.00default
m7i-flex.2xlarge 11250.00 / 10000.00156.25 / 1250.006000.00 / 40000.00default
m7i-flex.4xlarge 12500.00 / 10000.00312.50 / 1250.0012000.00 / 40000.00default
m7i-flex.8xlarge 15000.00 / 10000.00625.00 / 1250.0020000.00 / 40000.00default
Mac1
mac1.metal14000.001750.0080000.00default
Mac2
mac2.metal10000.001250.0055000.00default
Mac2-m2
mac2-m2.metal8000.001000.0055000.00default
Mac2-m2pro
mac2-m2pro.metal8000.001000.0055000.00default
T2
T3
t3.nano 143.00 / 2085.005.38 / 260.62250.00 / 11800.00default
t3.micro 187.00 / 2085.0010.88 / 260.62500.00 / 11800.00default
t3.small 1174.00 / 2085.0021.75 / 260.621000.00 / 11800.00default
t3.medium 1347.00 / 2085.0043.38 / 260.622000.00 / 11800.00default
t3.large 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t3.xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t3.2xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
T3a
t3a.nano 145.00 / 2085.005.62 / 260.62250.00 / 11800.00default
t3a.micro 190.00 / 2085.0011.25 / 260.62500.00 / 11800.00default
t3a.small 1175.00 / 2085.0021.88 / 260.621000.00 / 11800.00default
t3a.medium 1350.00 / 2085.0043.75 / 260.622000.00 / 11800.00default
t3a.large 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t3a.xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t3a.2xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
T4g
t4g.nano 143.00 / 2085.005.38 / 260.62250.00 / 11800.00default
t4g.micro 187.00 / 2085.0010.88 / 260.62500.00 / 11800.00default
t4g.small 1174.00 / 2085.0021.75 / 260.621000.00 / 11800.00default
t4g.medium 1347.00 / 2085.0043.38 / 260.622000.00 / 11800.00default
t4g.large 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t4g.xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
t4g.2xlarge 1695.00 / 2780.0086.88 / 347.504000.00 / 15700.00default
+
Note

1 These instances can support maximum performance for 30 minutes at + least once every 24 hours, after which they revert to their baseline performance. + Other instances can sustain the maximum performance indefinitely. If your workload requires + sustained maximum performance for longer than 30 minutes, use one of these instances.

2 default indicates that instances are enabled + for EBS optimization by default. supported indicates that instances can optionally + be enabled for EBS optimization For more information, see Amazon EBS–optimized instances.

+ +

Instance store specifications

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeInstance store volumesInstance store typeRead / Write performance (IOPS)Needs initialization 1TRIM support 2
M5ad
m5ad.large1 x 75 GBNVMe SSD30,000 / 15,000
m5ad.xlarge1 x 150 GBNVMe SSD59,000 / 29,000
m5ad.2xlarge1 x 300 GBNVMe SSD117,000 / 57,000
m5ad.4xlarge2 x 300 GBNVMe SSD234,000 / 114,000
m5ad.8xlarge2 x 600 GBNVMe SSD466,666 / 233,334
m5ad.12xlarge2 x 900 GBNVMe SSD700,000 / 340,000
m5ad.16xlarge4 x 600 GBNVMe SSD933,332 / 466,668
m5ad.24xlarge4 x 900 GBNVMe SSD1,400,000 / 680,000
M5d
m5d.large1 x 75 GBNVMe SSD30,000 / 15,000
m5d.xlarge1 x 150 GBNVMe SSD59,000 / 29,000
m5d.2xlarge1 x 300 GBNVMe SSD117,000 / 57,000
m5d.4xlarge2 x 300 GBNVMe SSD234,000 / 114,000
m5d.8xlarge2 x 600 GBNVMe SSD466,666 / 233,334
m5d.12xlarge2 x 900 GBNVMe SSD700,000 / 340,000
m5d.16xlarge4 x 600 GBNVMe SSD933,332 / 466,668
m5d.24xlarge4 x 900 GBNVMe SSD1,400,000 / 680,000
m5d.metal4 x 900 GBNVMe SSD1,400,000 / 680,000
M5dn
m5dn.large1 x 75 GBNVMe SSD29,000 / 14,500
m5dn.xlarge1 x 150 GBNVMe SSD58,000 / 29,000
m5dn.2xlarge1 x 300 GBNVMe SSD116,000 / 58,000
m5dn.4xlarge2 x 300 GBNVMe SSD232,000 / 116,000
m5dn.8xlarge2 x 600 GBNVMe SSD464,000 / 232,000
m5dn.12xlarge2 x 900 GBNVMe SSD700,000 / 350,000
m5dn.16xlarge4 x 600 GBNVMe SSD930,000 / 465,000
m5dn.24xlarge4 x 900 GBNVMe SSD1,400,000 / 700,000
m5dn.metal4 x 900 GBNVMe SSD1,400,000 / 700,000
M6gd
m6gd.medium1 x 59 GBNVMe SSD13,438 / 5,625
m6gd.large1 x 118 GBNVMe SSD26,875 / 11,250
m6gd.xlarge1 x 237 GBNVMe SSD53,750 / 22,500
m6gd.2xlarge1 x 474 GBNVMe SSD107,500 / 45,000
m6gd.4xlarge1 x 950 GBNVMe SSD215,000 / 90,000
m6gd.8xlarge1 x 1900 GBNVMe SSD430,000 / 180,000
m6gd.12xlarge2 x 1425 GBNVMe SSD645,000 / 270,000
m6gd.16xlarge2 x 1900 GBNVMe SSD860,000 / 360,000
m6gd.metal2 x 1900 GBNVMe SSD860,000 / 360,000
M6id
m6id.large1 x 118 GBNVMe SSD33,542 / 16,771
m6id.xlarge1 x 237 GBNVMe SSD67,083 / 33,542
m6id.2xlarge1 x 474 GBNVMe SSD134,167 / 67,084
m6id.4xlarge1 x 950 GBNVMe SSD268,333 / 134,167
m6id.8xlarge1 x 1900 GBNVMe SSD536,666 / 268,334
m6id.12xlarge2 x 1425 GBNVMe SSD804,998 / 402,500
m6id.16xlarge2 x 1900 GBNVMe SSD1,073,332 / 536,668
m6id.24xlarge4 x 1425 GBNVMe SSD1,609,996 / 805,000
m6id.32xlarge4 x 1900 GBNVMe SSD2,146,664 / 1,073,336
m6id.metal4 x 1900 GBNVMe SSD2,146,664 / 1,073,336
M6idn
m6idn.large1 x 118 GBNVMe SSD33,542 / 16,771
m6idn.xlarge1 x 237 GBNVMe SSD67,083 / 33,542
m6idn.2xlarge1 x 474 GBNVMe SSD134,167 / 67,084
m6idn.4xlarge1 x 950 GBNVMe SSD268,333 / 134,167
m6idn.8xlarge1 x 1900 GBNVMe SSD536,666 / 268,334
m6idn.12xlarge2 x 1425 GBNVMe SSD804,998 / 402,500
m6idn.16xlarge2 x 1900 GBNVMe SSD1,073,332 / 536,668
m6idn.24xlarge4 x 1425 GBNVMe SSD1,609,996 / 805,000
m6idn.32xlarge4 x 1900 GBNVMe SSD2,146,664 / 1,073,336
m6idn.metal4 x 1900 GBNVMe SSD2,146,664 / 1,073,336
M7gd
m7gd.medium1 x 59 GBNVMe SSD16,771 / 8,385
m7gd.large1 x 118 GBNVMe SSD33,542 / 16,771
m7gd.xlarge1 x 237 GBNVMe SSD67,083 / 33,542
m7gd.2xlarge1 x 474 GBNVMe SSD134,167 / 67,084
m7gd.4xlarge1 x 950 GBNVMe SSD268,333 / 134,167
m7gd.8xlarge1 x 1900 GBNVMe SSD536,666 / 268,334
m7gd.12xlarge2 x 1425 GBNVMe SSD804,998 / 402,500
m7gd.16xlarge2 x 1900 GBNVMe SSD1,073,332 / 536,668
m7gd.metal2 x 1900 GBNVMe SSD1,073,332 / 536,668
+

1 Volumes attached to certain instances suffer a first-write + penalty unless initialized. For more information, see Optimize disk performance for + instance store volumes.

+

2 For more information, see Instance + store volume TRIM support.

+ +

Security specifications

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance typeEBS encryptionInstance store encryptionEncryption in transitAMD SEV-SNPNitroTPMNitro Enclaves
M5
m5.largeInstance store not supported
m5.xlargeInstance store not supported
m5.2xlargeInstance store not supported
m5.4xlargeInstance store not supported
m5.8xlargeInstance store not supported
m5.12xlargeInstance store not supported
m5.16xlargeInstance store not supported
m5.24xlargeInstance store not supported
m5.metalInstance store not supported
M5a
m5a.largeInstance store not supported
m5a.xlargeInstance store not supported
m5a.2xlargeInstance store not supported
m5a.4xlargeInstance store not supported
m5a.8xlargeInstance store not supported
m5a.12xlargeInstance store not supported
m5a.16xlargeInstance store not supported
m5a.24xlargeInstance store not supported
M5ad
m5ad.large
m5ad.xlarge
m5ad.2xlarge
m5ad.4xlarge
m5ad.8xlarge
m5ad.12xlarge
m5ad.16xlarge
m5ad.24xlarge
M5d
m5d.large
m5d.xlarge
m5d.2xlarge
m5d.4xlarge
m5d.8xlarge
m5d.12xlarge
m5d.16xlarge
m5d.24xlarge
m5d.metal
M5dn
m5dn.large
m5dn.xlarge
m5dn.2xlarge
m5dn.4xlarge
m5dn.8xlarge
m5dn.12xlarge
m5dn.16xlarge
m5dn.24xlarge
m5dn.metal
M5n
m5n.largeInstance store not supported
m5n.xlargeInstance store not supported
m5n.2xlargeInstance store not supported
m5n.4xlargeInstance store not supported
m5n.8xlargeInstance store not supported
m5n.12xlargeInstance store not supported
m5n.16xlargeInstance store not supported
m5n.24xlargeInstance store not supported
m5n.metalInstance store not supported
M5zn
m5zn.largeInstance store not supported
m5zn.xlargeInstance store not supported
m5zn.2xlargeInstance store not supported
m5zn.3xlargeInstance store not supported
m5zn.6xlargeInstance store not supported
m5zn.12xlargeInstance store not supported
m5zn.metalInstance store not supported
M6a
m6a.largeInstance store not supported
m6a.xlargeInstance store not supported
m6a.2xlargeInstance store not supported
m6a.4xlargeInstance store not supported
m6a.8xlargeInstance store not supported
m6a.12xlargeInstance store not supported
m6a.16xlargeInstance store not supported
m6a.24xlargeInstance store not supported
m6a.32xlargeInstance store not supported
m6a.48xlargeInstance store not supported
m6a.metalInstance store not supported
M6g
m6g.mediumInstance store not supported
m6g.largeInstance store not supported
m6g.xlargeInstance store not supported
m6g.2xlargeInstance store not supported
m6g.4xlargeInstance store not supported
m6g.8xlargeInstance store not supported
m6g.12xlargeInstance store not supported
m6g.16xlargeInstance store not supported
m6g.metalInstance store not supported
M6gd
m6gd.medium
m6gd.large
m6gd.xlarge
m6gd.2xlarge
m6gd.4xlarge
m6gd.8xlarge
m6gd.12xlarge
m6gd.16xlarge
m6gd.metal
M6i
m6i.largeInstance store not supported
m6i.xlargeInstance store not supported
m6i.2xlargeInstance store not supported
m6i.4xlargeInstance store not supported
m6i.8xlargeInstance store not supported
m6i.12xlargeInstance store not supported
m6i.16xlargeInstance store not supported
m6i.24xlargeInstance store not supported
m6i.32xlargeInstance store not supported
m6i.metalInstance store not supported
M6id
m6id.large
m6id.xlarge
m6id.2xlarge
m6id.4xlarge
m6id.8xlarge
m6id.12xlarge
m6id.16xlarge
m6id.24xlarge
m6id.32xlarge
m6id.metal
M6idn
m6idn.large
m6idn.xlarge
m6idn.2xlarge
m6idn.4xlarge
m6idn.8xlarge
m6idn.12xlarge
m6idn.16xlarge
m6idn.24xlarge
m6idn.32xlarge
m6idn.metal
M6in
m6in.largeInstance store not supported
m6in.xlargeInstance store not supported
m6in.2xlargeInstance store not supported
m6in.4xlargeInstance store not supported
m6in.8xlargeInstance store not supported
m6in.12xlargeInstance store not supported
m6in.16xlargeInstance store not supported
m6in.24xlargeInstance store not supported
m6in.32xlargeInstance store not supported
m6in.metalInstance store not supported
M7a
m7a.mediumInstance store not supported
m7a.largeInstance store not supported
m7a.xlargeInstance store not supported
m7a.2xlargeInstance store not supported
m7a.4xlargeInstance store not supported
m7a.8xlargeInstance store not supported
m7a.12xlargeInstance store not supported
m7a.16xlargeInstance store not supported
m7a.24xlargeInstance store not supported
m7a.32xlargeInstance store not supported
m7a.48xlargeInstance store not supported
m7a.metal-48xlInstance store not supported
M7g
m7g.mediumInstance store not supported
m7g.largeInstance store not supported
m7g.xlargeInstance store not supported
m7g.2xlargeInstance store not supported
m7g.4xlargeInstance store not supported
m7g.8xlargeInstance store not supported
m7g.12xlargeInstance store not supported
m7g.16xlargeInstance store not supported
m7g.metalInstance store not supported
M7gd
m7gd.medium
m7gd.large
m7gd.xlarge
m7gd.2xlarge
m7gd.4xlarge
m7gd.8xlarge
m7gd.12xlarge
m7gd.16xlarge
m7gd.metal
M7i
m7i.largeInstance store not supported
m7i.xlargeInstance store not supported
m7i.2xlargeInstance store not supported
m7i.4xlargeInstance store not supported
m7i.8xlargeInstance store not supported
m7i.12xlargeInstance store not supported
m7i.16xlargeInstance store not supported
m7i.24xlargeInstance store not supported
m7i.48xlargeInstance store not supported
m7i.metal-24xlInstance store not supported
m7i.metal-48xlInstance store not supported
M7i-flex
m7i-flex.largeInstance store not supported
m7i-flex.xlargeInstance store not supported
m7i-flex.2xlargeInstance store not supported
m7i-flex.4xlargeInstance store not supported
m7i-flex.8xlargeInstance store not supported
Mac1
mac1.metalInstance store not supported
Mac2
mac2.metalInstance store not supported
Mac2-m2
mac2-m2.metalInstance store not supported
Mac2-m2pro
mac2-m2pro.metalInstance store not supported
T2
t2.nanoInstance store not supported
t2.microInstance store not supported
t2.smallInstance store not supported
t2.mediumInstance store not supported
t2.largeInstance store not supported
t2.xlargeInstance store not supported
t2.2xlargeInstance store not supported
T3
t3.nanoInstance store not supported
t3.microInstance store not supported
t3.smallInstance store not supported
t3.mediumInstance store not supported
t3.largeInstance store not supported
t3.xlargeInstance store not supported
t3.2xlargeInstance store not supported
T3a
t3a.nanoInstance store not supported
t3a.microInstance store not supported
t3a.smallInstance store not supported
t3a.mediumInstance store not supported
t3a.largeInstance store not supported
t3a.xlargeInstance store not supported
t3a.2xlargeInstance store not supported
T4g
t4g.nanoInstance store not supported
t4g.microInstance store not supported
t4g.smallInstance store not supported
t4g.mediumInstance store not supported
t4g.largeInstance store not supported
t4g.xlargeInstance store not supported
t4g.2xlargeInstance store not supported
+
\ No newline at end of file diff --git a/hack/code/bandwidth_gen/main.go b/hack/code/bandwidth_gen/main.go index 3bbdc2e132fd..7ec947dbdd8a 100644 --- a/hack/code/bandwidth_gen/main.go +++ b/hack/code/bandwidth_gen/main.go @@ -32,12 +32,13 @@ import ( ) var uriSelectors = map[string]string{ - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/general-purpose-instances.html": "#general-purpose-network-performance", - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/compute-optimized-instances.html": "#compute-network-performance", - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/memory-optimized-instances.html": "#memory-network-perf", - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/storage-optimized-instances.html": "#storage-network-performance", - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/accelerated-computing-instances.html": "#gpu-network-performance", - "https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/high-performance-computing-instances.html": "#hpc-network-performance", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/gp.html": "#gp_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/co.html": "#co_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/mo.html": "#mo_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/so.html": "#so_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/ac.html": "#ac_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/hpc.html": "#hpc_network", + "https://docs.aws.amazon.com/ec2/latest/instancetypes/pg.html": "#pg_network", } const fileFormat = ` @@ -62,6 +63,7 @@ func main() { } bandwidth := map[string]int64{} + vagueBandwidth := map[string]string{} for uri, selector := range uriSelectors { func() { @@ -70,16 +72,18 @@ func main() { doc := lo.Must(goquery.NewDocumentFromReader(response.Body)) - // grab two tables that contain the network performance values - // first table will contain all the instance type and bandwidth data - // some rows will will have vague describe such as "Very Low", "Low", "Low to Moderate", etc. - // These instance types will can be found on the second table with absolute values in Gbps - // If the instance type is skipped on the first table it will be grabbed on the second table + // grab the table that contains the network performance values. Some instance types will have vague + // description for bandwidth such as "Very Low", "Low", "Low to Moderate", etc. These instance types + // will be ignored since we don't know the exact bandwidth for these instance types for _, row := range doc.Find(selector).NextAllFiltered(".table-container").Eq(0).Find("tbody").Find("tr").Nodes { - instanceTypeData := row.FirstChild.NextSibling.FirstChild.FirstChild.Data + instanceTypeData := strings.TrimSpace(row.FirstChild.NextSibling.FirstChild.Data) + if !strings.ContainsAny(instanceTypeData, ".") { + continue + } bandwidthData := row.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.Data // exclude all rows that contain any of the following strings if containsAny(bandwidthData, "Low", "Moderate", "High", "Up to") { + vagueBandwidth[instanceTypeData] = bandwidthData continue } bandwidthSlice := strings.Split(bandwidthData, " ") @@ -92,30 +96,9 @@ func main() { bandwidth[instanceTypeData] = int64(lo.Must(strconv.ParseFloat(bandwidthSlice[0], 64)) * 1000) } } - - // Collect instance types bandwidth data from the baseline/bandwidth table underneath the standard table - // The HPC network performance doc is laid out differently than the other docs. There is no table underneath - // the standard table that contains information for network performance with baseline and burst bandwidth. - if selector != "#hpc-network-performance" { - for _, row := range doc.Find(selector).NextAllFiltered(".table-container").Eq(1).Find("tbody").Find("tr").Nodes { - instanceTypeData := row.FirstChild.NextSibling.FirstChild.FirstChild.Data - bandwidthData := row.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.Data - bandwidth[instanceTypeData] = int64(lo.Must(strconv.ParseFloat(bandwidthData, 64)) * 1000) - } - } }() } - if err := os.Setenv("AWS_SDK_LOAD_CONFIG", "true"); err != nil { - log.Fatalf("setting AWS_SDK_LOAD_CONFIG, %s", err) - } - if err := os.Setenv("AWS_REGION", "us-east-1"); err != nil { - log.Fatalf("setting AWS_REGION, %s", err) - } - sess := session.Must(session.NewSession()) - ec2api := ec2.New(sess) - instanceTypesOutput := lo.Must(ec2api.DescribeInstanceTypes(&ec2.DescribeInstanceTypesInput{})) - allInstanceTypes := lo.Map(instanceTypesOutput.InstanceTypes, func(info *ec2.InstanceTypeInfo, _ int) string { return *info.InstanceType }) - + allInstanceTypes := getAllInstanceTypes() instanceTypes := lo.Keys(bandwidth) // 2d sort for readability sort.Strings(allInstanceTypes) @@ -127,6 +110,10 @@ func main() { // Generate body var body string for _, instanceType := range lo.Without(allInstanceTypes, instanceTypes...) { + if lo.Contains(lo.Keys(vagueBandwidth), instanceType) { + body += fmt.Sprintf("// %s has vague bandwidth information, bandwidth is %s\n", instanceType, vagueBandwidth[instanceType]) + continue + } body += fmt.Sprintf("// %s is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html\n", instanceType) } for _, instanceType := range instanceTypes { @@ -150,3 +137,29 @@ func containsAny(value string, excludedSubstrings ...string) bool { } return false } + +func getAllInstanceTypes() []string { + if err := os.Setenv("AWS_SDK_LOAD_CONFIG", "true"); err != nil { + log.Fatalf("setting AWS_SDK_LOAD_CONFIG, %s", err) + } + if err := os.Setenv("AWS_REGION", "us-east-1"); err != nil { + log.Fatalf("setting AWS_REGION, %s", err) + } + sess := session.Must(session.NewSession()) + ec2api := ec2.New(sess) + var allInstanceTypes []string + + params := &ec2.DescribeInstanceTypesInput{} + // Retrieve the instance types in a loop using NextToken + for { + result := lo.Must(ec2api.DescribeInstanceTypes(params)) + allInstanceTypes = append(allInstanceTypes, lo.Map(result.InstanceTypes, func(info *ec2.InstanceTypeInfo, _ int) string { return *info.InstanceType })...) + // Check if they are any instances left + if result.NextToken != nil { + params.NextToken = result.NextToken + } else { + break + } + } + return allInstanceTypes +} diff --git a/hack/code/instancetype_testdata_gen/main.go b/hack/code/instancetype_testdata_gen/main.go index 9069cb63cd60..e0df1b16163d 100644 --- a/hack/code/instancetype_testdata_gen/main.go +++ b/hack/code/instancetype_testdata_gen/main.go @@ -130,6 +130,23 @@ func getInstanceTypeInfo(info *ec2.InstanceTypeInfo) string { fmt.Fprintf(src, "SizeInMiB: aws.Int64(%d),\n", lo.FromPtr(info.MemoryInfo.SizeInMiB)) fmt.Fprintf(src, "},\n") + if info.EbsInfo != nil { + fmt.Fprintf(src, "EbsInfo: &ec2.EbsInfo{\n") + if info.EbsInfo.EbsOptimizedInfo != nil { + fmt.Fprintf(src, "EbsOptimizedInfo: &ec2.EbsOptimizedInfo{\n") + fmt.Fprintf(src, "BaselineBandwidthInMbps: aws.Int64(%d),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.BaselineBandwidthInMbps)) + fmt.Fprintf(src, "BaselineIops: aws.Int64(%d),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.BaselineIops)) + fmt.Fprintf(src, "BaselineThroughputInMBps: aws.Float64(%.2f),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.BaselineThroughputInMBps)) + fmt.Fprintf(src, "MaximumBandwidthInMbps: aws.Int64(%d),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.MaximumBandwidthInMbps)) + fmt.Fprintf(src, "MaximumIops: aws.Int64(%d),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.MaximumIops)) + fmt.Fprintf(src, "MaximumThroughputInMBps: aws.Float64(%.2f),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedInfo.MaximumThroughputInMBps)) + fmt.Fprintf(src, "},\n") + } + fmt.Fprintf(src, "EbsOptimizedSupport: aws.String(\"%s\"),\n", lo.FromPtr(info.EbsInfo.EbsOptimizedSupport)) + fmt.Fprintf(src, "EncryptionSupport: aws.String(\"%s\"),\n", lo.FromPtr(info.EbsInfo.EncryptionSupport)) + fmt.Fprintf(src, "NvmeSupport: aws.String(\"%s\"),\n", lo.FromPtr(info.EbsInfo.NvmeSupport)) + fmt.Fprintf(src, "},\n") + } if info.InferenceAcceleratorInfo != nil { fmt.Fprintf(src, "InferenceAcceleratorInfo: &ec2.InferenceAcceleratorInfo{\n") fmt.Fprintf(src, "Accelerators: []*ec2.InferenceDeviceInfo{\n") diff --git a/hack/docs/instancetypes_gen_docs.go b/hack/docs/instancetypes_gen_docs.go index 074dd4c7222b..2c56c0274c33 100644 --- a/hack/docs/instancetypes_gen_docs.go +++ b/hack/docs/instancetypes_gen_docs.go @@ -23,6 +23,7 @@ import ( "sort" "strings" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -32,13 +33,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoperator "sigs.k8s.io/karpenter/pkg/operator" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" coretest "sigs.k8s.io/karpenter/pkg/test" - awscloudprovider "github.com/aws/karpenter-provider-aws/pkg/cloudprovider" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/operator" "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -95,16 +96,36 @@ func main() { Manager: &FakeManager{}, KubernetesInterface: kubernetes.NewForConfigOrDie(&rest.Config{}), }) - cp := awscloudprovider.New(op.InstanceTypesProvider, op.InstanceProvider, - op.EventRecorder, op.GetClient(), op.AMIProvider, op.SecurityGroupProvider, op.SubnetProvider) - if err := op.InstanceTypesProvider.UpdateInstanceTypes(ctx); err != nil { log.Fatalf("updating instance types, %s", err) } if err := op.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx); err != nil { log.Fatalf("updating instance types offerings, %s", err) } - instanceTypes, err := cp.GetInstanceTypes(ctx, nil) + // Fake a NodeClass so we can use it to get InstanceTypes + nodeClass := &v1beta1.EC2NodeClass{ + Spec: v1beta1.EC2NodeClassSpec{ + AMIFamily: &v1beta1.AMIFamilyAL2023, + SubnetSelectorTerms: []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{ + "*": "*", + }, + }, + }, + }, + } + subnets, err := op.SubnetProvider.List(ctx, nodeClass) + if err != nil { + log.Fatalf("listing subnets, %s", err) + } + nodeClass.Status.Subnets = lo.Map(subnets, func(ec2subnet *ec2.Subnet, _ int) v1beta1.Subnet { + return v1beta1.Subnet{ + ID: *ec2subnet.SubnetId, + Zone: *ec2subnet.AvailabilityZone, + } + }) + instanceTypes, err := op.InstanceTypesProvider.List(ctx, &corev1beta1.KubeletConfiguration{}, nodeClass) if err != nil { log.Fatalf("listing instance types, %s", err) } @@ -129,8 +150,7 @@ description: > fmt.Fprintln(f, `AWS instance types offer varying resources and can be selected by labels. The values provided below are the resources available with some assumptions and after the instance overhead has been subtracted: - `+"`blockDeviceMappings` are not configured"+` -- `+"`aws-eni-limited-pod-density` is assumed to be `true`"+` -- `+"`amiFamily` is set to the default of `AL2`") +- `+"`amiFamily` is set to `AL2023`") // generate a map of family -> instance types along with some other sorted lists. The sorted lists ensure we // generate consistent docs every run. @@ -152,7 +172,7 @@ below are the resources available with some assumptions and after the instance o // we don't want to show a few labels that will vary amongst regions delete(labelNameMap, v1.LabelTopologyZone) - delete(labelNameMap, v1beta1.CapacityTypeLabelKey) + delete(labelNameMap, corev1beta1.CapacityTypeLabelKey) labelNames := lo.Keys(labelNameMap) diff --git a/hack/validation/labels.sh b/hack/validation/labels.sh index 1fb9c6293b32..f3b7990bc6d7 100755 --- a/hack/validation/labels.sh +++ b/hack/validation/labels.sh @@ -4,4 +4,4 @@ # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.metadata.properties.labels.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self.all(x, x in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !x.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\"))"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml \ No newline at end of file diff --git a/hack/validation/requirements.sh b/hack/validation/requirements.sh index 763d359fab16..3a74bb0d3962 100755 --- a/hack/validation/requirements.sh +++ b/hack/validation/requirements.sh @@ -4,9 +4,9 @@ ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml # # Adding validation for nodepool # ## checking for restricted labels while filtering out well known labels yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [ - {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml + {"message": "label domain \"karpenter.k8s.aws\" is restricted", "rule": "self in [\"karpenter.k8s.aws/instance-encryption-in-transit-supported\", \"karpenter.k8s.aws/instance-category\", \"karpenter.k8s.aws/instance-hypervisor\", \"karpenter.k8s.aws/instance-family\", \"karpenter.k8s.aws/instance-generation\", \"karpenter.k8s.aws/instance-local-nvme\", \"karpenter.k8s.aws/instance-size\", \"karpenter.k8s.aws/instance-cpu\",\"karpenter.k8s.aws/instance-cpu-manufacturer\",\"karpenter.k8s.aws/instance-memory\", \"karpenter.k8s.aws/instance-ebs-bandwidth\", \"karpenter.k8s.aws/instance-network-bandwidth\", \"karpenter.k8s.aws/instance-gpu-name\", \"karpenter.k8s.aws/instance-gpu-manufacturer\", \"karpenter.k8s.aws/instance-gpu-count\", \"karpenter.k8s.aws/instance-gpu-memory\", \"karpenter.k8s.aws/instance-accelerator-name\", \"karpenter.k8s.aws/instance-accelerator-manufacturer\", \"karpenter.k8s.aws/instance-accelerator-count\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.k8s.aws\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 66ec659e7747..aec3d01d61d4 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -478,19 +478,12 @@ spec: type items: description: |- - A node selector requirement with min values is a selector that contains values, a key, an operator that relates the key and values - and minValues that represent the requirement to have at least that many values. + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. properties: key: description: The label key that the selector applies to. type: string - minValues: - description: |- - This field is ALPHA and can be dropped or replaced at any time - MinValues is the minimum number of unique values required to define the flexibility of the specific requirement. - maximum: 50 - minimum: 1 - type: integer operator: description: |- Represents a key's relationship to a set of values. @@ -506,6 +499,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic required: - key - operator @@ -516,6 +510,68 @@ spec: - requirements type: object type: array + conditions: + description: Conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional + helper methods + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array instanceProfile: description: InstanceProfile contains the resolved instance profile for the role @@ -552,6 +608,9 @@ spec: zone: description: The associated availability zone type: string + zoneID: + description: The associated availability zone ID + type: string required: - id - zone diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index 37abdeca8e80..63dee5756d69 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: nodeclaims.karpenter.sh spec: group: karpenter.sh @@ -220,7 +220,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time @@ -250,6 +250,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic maxLength: 63 pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ required: @@ -384,34 +385,57 @@ spec: conditions: description: Conditions contains signals for health and readiness items: - description: |- - Condition defines a readiness condition for a Knative resource. - See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties + description: Condition aliases the upstream type and adds additional helper methods properties: lastTransitionTime: description: |- - LastTransitionTime is the last time the condition transitioned from one status to another. - We use VolatileTime in place of metav1.Time to exclude this from creating equality.Semantic - differences (all other things held constant). + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time type: string message: - description: A human readable message indicating details about the transition. + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer reason: - description: The reason for the condition's last transition. - type: string - severity: description: |- - Severity with which to treat failures of this type of condition. - When this is not specified, it defaults to Error. + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ type: string status: - description: Status of the condition, one of True, False, Unknown. + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown type: string type: - description: Type of condition. + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string required: + - lastTransitionTime - status - type type: object diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index 44a38267e3e9..20e234581e45 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.14.0 + controller-gen.kubebuilder.io/version: v0.15.0 name: nodepools.karpenter.sh spec: group: karpenter.sh @@ -190,7 +190,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self.all(x, x != "kubernetes.io/hostname") - message: label domain "karpenter.k8s.aws" is restricted - rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) + rule: self.all(x, x in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !x.find("^([^/]+)").endsWith("karpenter.k8s.aws")) type: object spec: description: NodeClaimSpec describes the desired state of the NodeClaim @@ -348,7 +348,7 @@ spec: - message: label "kubernetes.io/hostname" is restricted rule: self != "kubernetes.io/hostname" - message: label domain "karpenter.k8s.aws" is restricted - rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") + rule: self in ["karpenter.k8s.aws/instance-encryption-in-transit-supported", "karpenter.k8s.aws/instance-category", "karpenter.k8s.aws/instance-hypervisor", "karpenter.k8s.aws/instance-family", "karpenter.k8s.aws/instance-generation", "karpenter.k8s.aws/instance-local-nvme", "karpenter.k8s.aws/instance-size", "karpenter.k8s.aws/instance-cpu","karpenter.k8s.aws/instance-cpu-manufacturer","karpenter.k8s.aws/instance-memory", "karpenter.k8s.aws/instance-ebs-bandwidth", "karpenter.k8s.aws/instance-network-bandwidth", "karpenter.k8s.aws/instance-gpu-name", "karpenter.k8s.aws/instance-gpu-manufacturer", "karpenter.k8s.aws/instance-gpu-count", "karpenter.k8s.aws/instance-gpu-memory", "karpenter.k8s.aws/instance-accelerator-name", "karpenter.k8s.aws/instance-accelerator-manufacturer", "karpenter.k8s.aws/instance-accelerator-count"] || !self.find("^([^/]+)").endsWith("karpenter.k8s.aws") minValues: description: |- This field is ALPHA and can be dropped or replaced at any time @@ -378,6 +378,7 @@ spec: items: type: string type: array + x-kubernetes-list-type: atomic maxLength: 63 pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$ required: diff --git a/pkg/apis/v1beta1/ec2nodeclass_status.go b/pkg/apis/v1beta1/ec2nodeclass_status.go index 611e94d62117..9510e5b0567b 100644 --- a/pkg/apis/v1beta1/ec2nodeclass_status.go +++ b/pkg/apis/v1beta1/ec2nodeclass_status.go @@ -15,7 +15,8 @@ limitations under the License. package v1beta1 import ( - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "github.com/awslabs/operatorpkg/status" + v1 "k8s.io/api/core/v1" ) // Subnet contains resolved Subnet selector values utilized for node launch @@ -26,6 +27,9 @@ type Subnet struct { // The associated availability zone // +required Zone string `json:"zone"` + // The associated availability zone ID + // +optional + ZoneID string `json:"zoneID,omitempty"` } // SecurityGroup contains resolved SecurityGroup selector values utilized for node launch @@ -48,7 +52,7 @@ type AMI struct { Name string `json:"name,omitempty"` // Requirements of the AMI to be utilized on an instance type // +required - Requirements []corev1beta1.NodeSelectorRequirementWithMinValues `json:"requirements"` + Requirements []v1.NodeSelectorRequirement `json:"requirements"` } // EC2NodeClassStatus contains the resolved state of the EC2NodeClass @@ -68,4 +72,19 @@ type EC2NodeClassStatus struct { // InstanceProfile contains the resolved instance profile for the role // +optional InstanceProfile string `json:"instanceProfile,omitempty"` + // Conditions contains signals for health and readiness + // +optional + Conditions []status.Condition `json:"conditions,omitempty"` +} + +func (in *EC2NodeClass) StatusConditions() status.ConditionSet { + return status.NewReadyConditions().For(in) +} + +func (in *EC2NodeClass) GetConditions() []status.Condition { + return in.Status.Conditions +} + +func (in *EC2NodeClass) SetConditions(conditions []status.Condition) { + in.Status.Conditions = conditions } diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index fa49b4dc4dfd..ac72ab48b908 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -37,6 +37,7 @@ func init() { LabelInstanceCPU, LabelInstanceCPUManufacturer, LabelInstanceMemory, + LabelInstanceEBSBandwidth, LabelInstanceNetworkBandwidth, LabelInstanceGPUName, LabelInstanceGPUManufacturer, @@ -45,6 +46,7 @@ func init() { LabelInstanceAcceleratorName, LabelInstanceAcceleratorManufacturer, LabelInstanceAcceleratorCount, + LabelTopologyZoneID, v1.LabelWindowsBuild, ) } @@ -93,6 +95,8 @@ var ( LabelNodeClass = Group + "/ec2nodeclass" + LabelTopologyZoneID = "topology.k8s.aws/zone-id" + LabelInstanceHypervisor = Group + "/instance-hypervisor" LabelInstanceEncryptionInTransitSupported = Group + "/instance-encryption-in-transit-supported" LabelInstanceCategory = Group + "/instance-category" @@ -103,6 +107,7 @@ var ( LabelInstanceCPU = Group + "/instance-cpu" LabelInstanceCPUManufacturer = Group + "/instance-cpu-manufacturer" LabelInstanceMemory = Group + "/instance-memory" + LabelInstanceEBSBandwidth = Group + "/instance-ebs-bandwidth" LabelInstanceNetworkBandwidth = Group + "/instance-network-bandwidth" LabelInstanceGPUName = Group + "/instance-gpu-name" LabelInstanceGPUManufacturer = Group + "/instance-gpu-manufacturer" diff --git a/pkg/apis/v1beta1/suite_test.go b/pkg/apis/v1beta1/suite_test.go index c610ab795189..a07a4471a03e 100644 --- a/pkg/apis/v1beta1/suite_test.go +++ b/pkg/apis/v1beta1/suite_test.go @@ -20,9 +20,9 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" diff --git a/pkg/apis/v1beta1/zz_generated.deepcopy.go b/pkg/apis/v1beta1/zz_generated.deepcopy.go index 781d88c876c8..f248b480be5b 100644 --- a/pkg/apis/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/v1beta1/zz_generated.deepcopy.go @@ -19,8 +19,9 @@ limitations under the License. package v1beta1 import ( + "github.com/awslabs/operatorpkg/status" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - apisv1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -28,7 +29,7 @@ func (in *AMI) DeepCopyInto(out *AMI) { *out = *in if in.Requirements != nil { in, out := &in.Requirements, &out.Requirements - *out = make([]apisv1beta1.NodeSelectorRequirementWithMinValues, len(*in)) + *out = make([]v1.NodeSelectorRequirement, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } @@ -320,6 +321,13 @@ func (in *EC2NodeClassStatus) DeepCopyInto(out *EC2NodeClassStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]status.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EC2NodeClassStatus. diff --git a/pkg/batcher/createfleet.go b/pkg/batcher/createfleet.go index 2d7a17557391..6f9ab1221d7b 100644 --- a/pkg/batcher/createfleet.go +++ b/pkg/batcher/createfleet.go @@ -22,7 +22,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" ) type CreateFleetBatcher struct { @@ -70,7 +70,7 @@ func execCreateFleetBatch(ec2api ec2iface.EC2API) BatchExecutor[ec2.CreateFleetI for _, instanceID := range reservation.InstanceIds { requestIdx++ if requestIdx >= len(inputs) { - logging.FromContext(ctx).Errorf("received more instances than requested, ignoring instance %s", aws.StringValue(instanceID)) + log.FromContext(ctx).Error(fmt.Errorf("received more instances than requested, ignoring instance %s", aws.StringValue(instanceID)), "received error while batching") continue } results = append(results, Result[ec2.CreateFleetOutput]{ diff --git a/pkg/batcher/describeinstances.go b/pkg/batcher/describeinstances.go index b0b9df7a4853..961402aa5dcd 100644 --- a/pkg/batcher/describeinstances.go +++ b/pkg/batcher/describeinstances.go @@ -26,7 +26,7 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" ) type DescribeInstancesBatcher struct { @@ -56,7 +56,7 @@ func (b *DescribeInstancesBatcher) DescribeInstances(ctx context.Context, descri func FilterHasher(ctx context.Context, input *ec2.DescribeInstancesInput) uint64 { hash, err := hashstructure.Hash(input.Filters, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) if err != nil { - logging.FromContext(ctx).Errorf("error hashing") + log.FromContext(ctx).Error(err, "failed hashing input filters") } return hash } diff --git a/pkg/batcher/suite_test.go b/pkg/batcher/suite_test.go index 3f33fb033ec3..523c05c7e213 100644 --- a/pkg/batcher/suite_test.go +++ b/pkg/batcher/suite_test.go @@ -30,7 +30,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var fakeEC2API *fake.EC2API diff --git a/pkg/batcher/terminateinstances.go b/pkg/batcher/terminateinstances.go index ba3442e02dd6..c1d5d6d49c37 100644 --- a/pkg/batcher/terminateinstances.go +++ b/pkg/batcher/terminateinstances.go @@ -25,7 +25,7 @@ import ( "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/samber/lo" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" ) type TerminateInstancesBatcher struct { @@ -68,7 +68,7 @@ func execTerminateInstancesBatch(ec2api ec2iface.EC2API) BatchExecutor[ec2.Termi // We don't care about the error here since we'll break up the batch upon any sort of failure output, err := ec2api.TerminateInstancesWithContext(ctx, firstInput) if err != nil { - logging.FromContext(ctx).Errorf("terminating instances, %s", err) + log.FromContext(ctx).Error(err, "failed terminating instances") } if output == nil { diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 796fa1ff9b20..2b7a926db59b 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -32,9 +32,9 @@ const ( // InstanceProfileTTL is the time before we refresh checking instance profile existence at IAM InstanceProfileTTL = 15 * time.Minute // AvailableIPAddressTTL is time to drop AvailableIPAddress data if it is not updated within the TTL - AvailableIPAddressTTL = 2 * time.Minute + AvailableIPAddressTTL = 5 * time.Minute // AvailableIPAddressTTL is time to drop AssociatePublicIPAddressTTL data if it is not updated within the TTL - AssociatePublicIPAddressTTL = 2 * time.Minute + AssociatePublicIPAddressTTL = 5 * time.Minute ) const ( diff --git a/pkg/cache/unavailableofferings.go b/pkg/cache/unavailableofferings.go index bbc2c16b3fb5..e909d4fce161 100644 --- a/pkg/cache/unavailableofferings.go +++ b/pkg/cache/unavailableofferings.go @@ -22,7 +22,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/patrickmn/go-cache" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" ) // UnavailableOfferings stores any offerings that return ICE (insufficient capacity errors) when @@ -54,12 +54,12 @@ func (u *UnavailableOfferings) IsUnavailable(instanceType, zone, capacityType st // MarkUnavailable communicates recently observed temporary capacity shortages in the provided offerings func (u *UnavailableOfferings) MarkUnavailable(ctx context.Context, unavailableReason, instanceType, zone, capacityType string) { // even if the key is already in the cache, we still need to call Set to extend the cached entry's TTL - logging.FromContext(ctx).With( + log.FromContext(ctx).WithValues( "reason", unavailableReason, "instance-type", instanceType, "zone", zone, "capacity-type", capacityType, - "ttl", UnavailableOfferingsTTL).Debugf("removing offering from offerings") + "ttl", UnavailableOfferingsTTL).V(1).Info("removing offering from offerings") u.cache.SetDefault(u.key(instanceType, zone, capacityType), struct{}{}) atomic.AddUint64(&u.SeqNum, 1) } diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index acea3187e4d1..a4c0c83fff16 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -21,10 +21,12 @@ import ( "time" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/awslabs/operatorpkg/status" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/events" "sigs.k8s.io/karpenter/pkg/scheduling" @@ -38,7 +40,6 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "knative.dev/pkg/logging" "sigs.k8s.io/controller-runtime/pkg/client" cloudproviderevents "github.com/aws/karpenter-provider-aws/pkg/cloudprovider/events" @@ -46,7 +47,6 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" - "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" "sigs.k8s.io/karpenter/pkg/cloudprovider" ) @@ -61,18 +61,16 @@ type CloudProvider struct { instanceProvider instance.Provider amiProvider amifamily.Provider securityGroupProvider securitygroup.Provider - subnetProvider subnet.Provider } func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.Provider, recorder events.Recorder, - kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider, subnetProvider subnet.Provider) *CloudProvider { + kubeClient client.Client, amiProvider amifamily.Provider, securityGroupProvider securitygroup.Provider) *CloudProvider { return &CloudProvider{ instanceTypeProvider: instanceTypeProvider, instanceProvider: instanceProvider, kubeClient: kubeClient, amiProvider: amiProvider, securityGroupProvider: securityGroupProvider, - subnetProvider: subnetProvider, recorder: recorder, } } @@ -87,6 +85,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC // We treat a failure to resolve the NodeClass as an ICE since this means there is no capacity possibilities for this NodeClaim return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err)) } + nodeClassReady := nodeClass.StatusConditions().Get(status.ConditionReady) + if !nodeClassReady.IsTrue() { + return nil, fmt.Errorf("resolving ec2nodeclass, %s", nodeClassReady.Message) + } instanceTypes, err := c.resolveInstanceTypes(ctx, nodeClaim, nodeClass) if err != nil { return nil, fmt.Errorf("resolving instance types, %w", err) @@ -101,7 +103,7 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool { return i.Name == instance.Type }) - nc := c.instanceToNodeClaim(instance, instanceType) + nc := c.instanceToNodeClaim(instance, instanceType, nodeClass) nc.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), v1beta1.AnnotationEC2NodeClassHashVersion: v1beta1.EC2NodeClassHashVersion, @@ -120,7 +122,11 @@ func (c *CloudProvider) List(ctx context.Context) ([]*corev1beta1.NodeClaim, err if err != nil { return nil, fmt.Errorf("resolving instance type, %w", err) } - nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType)) + nc, err := c.resolveNodeClassFromInstance(ctx, instance) + if client.IgnoreNotFound(err) != nil { + return nil, fmt.Errorf("resolving nodeclass, %w", err) + } + nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType, nc)) } return nodeClaims, nil } @@ -130,7 +136,7 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*corev1beta if err != nil { return nil, fmt.Errorf("getting instance ID, %w", err) } - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("id", id)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id)) instance, err := c.instanceProvider.Get(ctx, id) if err != nil { return nil, fmt.Errorf("getting instance, %w", err) @@ -139,7 +145,11 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*corev1beta if err != nil { return nil, fmt.Errorf("resolving instance type, %w", err) } - return c.instanceToNodeClaim(instance, instanceType), nil + nc, err := c.resolveNodeClassFromInstance(ctx, instance) + if client.IgnoreNotFound(err) != nil { + return nil, fmt.Errorf("resolving nodeclass, %w", err) + } + return c.instanceToNodeClaim(instance, instanceType, nc), nil } func (c *CloudProvider) LivenessProbe(req *http.Request) error { @@ -148,9 +158,6 @@ func (c *CloudProvider) LivenessProbe(req *http.Request) error { // GetInstanceTypes returns all available InstanceTypes func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *corev1beta1.NodePool) ([]*cloudprovider.InstanceType, error) { - if nodePool == nil { - return c.instanceTypeProvider.List(ctx, &corev1beta1.KubeletConfiguration{}, &v1beta1.EC2NodeClass{}) - } nodeClass, err := c.resolveNodeClassFromNodePool(ctx, nodePool) if err != nil { if errors.IsNotFound(err) { @@ -170,13 +177,11 @@ func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *corev1be } func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *corev1beta1.NodeClaim) error { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("nodeclaim", nodeClaim.Name)) - id, err := utils.ParseInstanceID(nodeClaim.Status.ProviderID) if err != nil { return fmt.Errorf("getting instance ID, %w", err) } - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("id", id)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id)) return c.instanceProvider.Delete(ctx, id) } @@ -279,6 +284,14 @@ func (c *CloudProvider) resolveInstanceTypeFromInstance(ctx context.Context, ins return instanceType, nil } +func (c *CloudProvider) resolveNodeClassFromInstance(ctx context.Context, instance *instance.Instance) (*v1beta1.EC2NodeClass, error) { + np, err := c.resolveNodePoolFromInstance(ctx, instance) + if err != nil { + return nil, fmt.Errorf("resolving nodepool, %w", err) + } + return c.resolveNodeClassFromNodePool(ctx, np) +} + func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instance *instance.Instance) (*corev1beta1.NodePool, error) { if nodePoolName, ok := instance.Tags[corev1beta1.NodePoolLabelKey]; ok { nodePool := &corev1beta1.NodePool{} @@ -290,7 +303,8 @@ func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instanc return nil, errors.NewNotFound(schema.GroupResource{Group: corev1beta1.Group, Resource: "nodepools"}, "") } -func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType) *corev1beta1.NodeClaim { +//nolint:gocyclo +func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType, nodeClass *v1beta1.EC2NodeClass) *corev1beta1.NodeClaim { nodeClaim := &corev1beta1.NodeClaim{} labels := map[string]string{} annotations := map[string]string{} @@ -316,6 +330,17 @@ func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType * nodeClaim.Status.Allocatable = functional.FilterMap(instanceType.Allocatable(), resourceFilter) } labels[v1.LabelTopologyZone] = i.Zone + // Attempt to resolve the zoneID from the instance's EC2NodeClass' status condition. + // If the EC2NodeClass is nil, we know we're in the List or Get paths, where we don't care about the zone-id value. + // If we're in the Create path, we've already validated the EC2NodeClass exists. In this case, we resolve the zone-id from the status condition + // both when creating offerings and when adding the label. + if nodeClass != nil { + if subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool { + return s.Zone == i.Zone + }); ok && subnet.ZoneID != "" { + labels[v1beta1.LabelTopologyZoneID] = subnet.ZoneID + } + } labels[corev1beta1.CapacityTypeLabelKey] = i.CapacityType if v, ok := i.Tags[corev1beta1.NodePoolLabelKey]; ok { labels[corev1beta1.NodePoolLabelKey] = v diff --git a/pkg/cloudprovider/drift.go b/pkg/cloudprovider/drift.go index f40455ad3837..5c87fdb62da9 100644 --- a/pkg/cloudprovider/drift.go +++ b/pkg/cloudprovider/drift.go @@ -77,14 +77,10 @@ func (c *CloudProvider) isAMIDrifted(ctx context.Context, nodeClaim *corev1beta1 if !found { return "", fmt.Errorf(`finding node instance type "%s"`, nodeClaim.Labels[v1.LabelInstanceTypeStable]) } - amis, err := c.amiProvider.Get(ctx, nodeClass, &amifamily.Options{}) - if err != nil { - return "", fmt.Errorf("getting amis, %w", err) - } - if len(amis) == 0 { + if len(nodeClass.Status.AMIs) == 0 { return "", fmt.Errorf("no amis exist given constraints") } - mappedAMIs := amis.MapToInstanceTypes([]*cloudprovider.InstanceType{nodeInstanceType}) + mappedAMIs := amifamily.MapToInstanceTypes([]*cloudprovider.InstanceType{nodeInstanceType}, nodeClass.Status.AMIs) if !lo.Contains(lo.Keys(mappedAMIs), instance.ImageID) { return AMIDrift, nil } diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 1dbf5ab68baa..f978a161f226 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -27,12 +27,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" - clock "k8s.io/utils/clock/testing" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ssm" + opstatus "github.com/awslabs/operatorpkg/status" "github.com/imdario/mergo" "github.com/samber/lo" @@ -44,7 +44,6 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/karpenter-provider-aws/pkg/test" - "sigs.k8s.io/controller-runtime/pkg/client" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" corecloudproivder "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/controllers/provisioning" @@ -56,8 +55,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context @@ -85,7 +84,7 @@ var _ = BeforeSuite(func() { fakeClock = clock.NewFakeClock(time.Now()) recorder = events.NewRecorder(&record.FakeRecorder{}) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, recorder, - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster) }) @@ -135,21 +134,25 @@ var _ = Describe("CloudProvider", func() { }, Subnets: []v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, }, }, }, ) + nodeClass.StatusConditions().SetTrue(opstatus.ConditionReady) nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ @@ -179,6 +182,12 @@ var _ = Describe("CloudProvider", func() { Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) + It("should not proceed with instance creation of nodeClass in not ready", func() { + nodeClass.StatusConditions().SetFalse(opstatus.ConditionReady, "NodeClassNotReady", "NodeClass not ready") + ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) + _, err := cloudProvider.Create(ctx, nodeClaim) + Expect(err).To(HaveOccurred()) + }) It("should return an ICE error when there are no instance types to launch", func() { // Specify no instance types and expect to receive a capacity error nodeClaim.Spec.Requirements = []corev1beta1.NodeSelectorRequirementWithMinValues{ @@ -202,6 +211,21 @@ var _ = Describe("CloudProvider", func() { Expect(cloudProviderNodeClaim).ToNot(BeNil()) Expect(cloudProviderNodeClaim.Status.ImageID).ToNot(BeEmpty()) }) + It("should return availability zone ID as a label on the nodeClaim", func() { + ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) + cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) + Expect(err).ToNot(HaveOccurred()) + Expect(cloudProviderNodeClaim).ToNot(BeNil()) + zone, ok := cloudProviderNodeClaim.GetLabels()[v1.LabelTopologyZone] + Expect(ok).To(BeTrue()) + zoneID, ok := cloudProviderNodeClaim.GetLabels()[v1beta1.LabelTopologyZoneID] + Expect(ok).To(BeTrue()) + subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1beta1.Subnet) bool { + return s.Zone == zone + }) + Expect(ok).To(BeTrue()) + Expect(zoneID).To(Equal(subnet.ZoneID)) + }) It("should return NodeClass Hash on the nodeClaim", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) cloudProviderNodeClaim, err := cloudProvider.Create(ctx, nodeClaim) @@ -624,19 +648,36 @@ var _ = Describe("CloudProvider", func() { }, }, }) - nodeClass.Status.Subnets = []v1beta1.Subnet{ - { - ID: validSubnet1, - Zone: "zone-1", + nodeClass.Status = v1beta1.EC2NodeClassStatus{ + InstanceProfile: "test-profile", + Subnets: []v1beta1.Subnet{ + { + ID: validSubnet1, + Zone: "zone-1", + }, + { + ID: validSubnet2, + Zone: "zone-2", + }, }, - { - ID: validSubnet2, - Zone: "zone-2", + SecurityGroups: []v1beta1.SecurityGroup{ + { + ID: validSecurityGroup, + }, }, - } - nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{ - { - ID: validSecurityGroup, + AMIs: []v1beta1.AMI{ + { + ID: armAMIID, + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureArm64}}, + }, + }, + { + ID: amdAMIID, + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + }, + }, }, } ExpectApplied(ctx, env.Client, nodePool, nodeClass) @@ -787,6 +828,14 @@ var _ = Describe("CloudProvider", func() { }) It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMIID}} + nodeClass.Status.AMIs = []v1beta1.AMI{ + { + ID: amdAMIID, + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + }, + }, + } ExpectApplied(ctx, env.Client, nodeClass) isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim) Expect(err).ToNot(HaveOccurred()) @@ -794,6 +843,12 @@ var _ = Describe("CloudProvider", func() { }) Context("Static Drift Detection", func() { BeforeEach(func() { + armRequirements := []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureArm64}}, + } + amdRequirements := []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + } nodeClass = &v1beta1.EC2NodeClass{ ObjectMeta: nodeClass.ObjectMeta, Spec: v1beta1.EC2NodeClassSpec{ @@ -832,6 +887,7 @@ var _ = Describe("CloudProvider", func() { }, }, Status: v1beta1.EC2NodeClassStatus{ + InstanceProfile: "test-profile", Subnets: []v1beta1.Subnet{ { ID: validSubnet1, @@ -847,6 +903,16 @@ var _ = Describe("CloudProvider", func() { ID: validSecurityGroup, }, }, + AMIs: []v1beta1.AMI{ + { + ID: armAMIID, + Requirements: armRequirements, + }, + { + ID: amdAMIID, + Requirements: amdRequirements, + }, + }, }, } nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()}) @@ -1011,14 +1077,14 @@ var _ = Describe("CloudProvider", func() { It("should launch instances into subnet with the most available IP addresses", func() { awsEnv.SubnetCache.Flush() awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(100), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) ExpectApplied(ctx, env.Client, nodePool, nodeClass) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}}) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) ExpectScheduled(ctx, env.Client, pod) @@ -1028,15 +1094,15 @@ var _ = Describe("CloudProvider", func() { It("should launch instances into subnet with the most available IP addresses in-between cache refreshes", func() { awsEnv.SubnetCache.Flush() awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(11), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(11), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{MaxPods: aws.Int32(1)} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod1 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}}) pod2 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{v1.LabelTopologyZone: "test-zone-1a"}}) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod1, pod2) @@ -1064,15 +1130,15 @@ var _ = Describe("CloudProvider", func() { }) It("should launch instances into subnets that are excluded by another NodePool", func() { awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(10), + {SubnetId: aws.String("test-subnet-1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(10), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-1")}}}, - {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100), + {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1b"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) ExpectScheduled(ctx, env.Client, podSubnet1) @@ -1093,6 +1159,7 @@ var _ = Describe("CloudProvider", func() { }, }, Status: v1beta1.EC2NodeClassStatus{ + AMIs: nodeClass.Status.AMIs, SecurityGroups: []v1beta1.SecurityGroup{ { ID: "sg-test1", @@ -1112,7 +1179,7 @@ var _ = Describe("CloudProvider", func() { }, }) ExpectApplied(ctx, env.Client, nodePool2, nodeClass2) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass2)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass2) podSubnet2 := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1beta1.NodePoolLabelKey: nodePool2.Name}}) ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet2) ExpectScheduled(ctx, env.Client, podSubnet2) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 6510a522f870..282ad695f2c1 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -18,6 +18,7 @@ import ( "context" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/operator/controller" nodeclasshash "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/hash" nodeclassstatus "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status" @@ -33,7 +34,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/karpenter/pkg/events" - "sigs.k8s.io/karpenter/pkg/operator/controller" "github.com/aws/karpenter-provider-aws/pkg/cache" "github.com/aws/karpenter-provider-aws/pkg/controllers/interruption" diff --git a/pkg/controllers/interruption/controller.go b/pkg/controllers/interruption/controller.go index 589db848f75b..e5addf69f5e3 100644 --- a/pkg/controllers/interruption/controller.go +++ b/pkg/controllers/interruption/controller.go @@ -25,9 +25,10 @@ import ( "go.uber.org/multierr" v1 "k8s.io/api/core/v1" "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" "k8s.io/utils/clock" - "knative.dev/pkg/logging" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/karpenter/pkg/metrics" @@ -81,9 +82,9 @@ func NewController(kubeClient client.Client, clk clock.Clock, recorder events.Re } func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("queue", c.sqsProvider.Name())) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("queue", c.sqsProvider.Name())) if c.cm.HasChanged(c.sqsProvider.Name(), nil) { - logging.FromContext(ctx).Debugf("watching interruption queue") + log.FromContext(ctx).V(1).Info("watching interruption queue") } sqsMessages, err := c.sqsProvider.GetSQSMessages(ctx) if err != nil { @@ -105,7 +106,7 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc msg, e := c.parseMessage(sqsMessages[i]) if e != nil { // If we fail to parse, then we should delete the message but still log the error - logging.FromContext(ctx).Errorf("parsing message, %v", e) + log.FromContext(ctx).Error(err, "failed parsing interruption message") errs[i] = c.deleteMessage(ctx, sqsMessages[i]) return } @@ -121,12 +122,10 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc return reconcile.Result{}, nil } -func (c *Controller) Name() string { - return "interruption" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.NewSingletonManagedBy(m) +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return corecontroller.NewSingletonManagedBy(m). + Named("interruption"). + Complete(c) } // parseMessage parses the passed SQS message into an internal Message interface @@ -146,7 +145,7 @@ func (c *Controller) parseMessage(raw *sqsapi.Message) (messages.Message, error) func (c *Controller) handleMessage(ctx context.Context, nodeClaimInstanceIDMap map[string]*v1beta1.NodeClaim, nodeInstanceIDMap map[string]*v1.Node, msg messages.Message) (err error) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("messageKind", msg.Kind())) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("messageKind", msg.Kind())) receivedMessages.WithLabelValues(string(msg.Kind())).Inc() if msg.Kind() == messages.NoOpKind { @@ -181,14 +180,19 @@ func (c *Controller) deleteMessage(ctx context.Context, msg *sqsapi.Message) err // handleNodeClaim retrieves the action for the message and then performs the appropriate action against the node func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message, nodeClaim *v1beta1.NodeClaim, node *v1.Node) error { action := actionForMessage(msg) - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("nodeclaim", nodeClaim.Name, "action", string(action))) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("NodeClaim", klog.KRef("", nodeClaim.Name), "action", string(action))) if node != nil { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("node", node.Name)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", node.Name))) } // Record metric and event for this action c.notifyForMessage(msg, nodeClaim, node) - actionsPerformed.WithLabelValues(string(action)).Inc() + actionsPerformed.With( + prometheus.Labels{ + actionTypeLabel: string(action), + metrics.NodePoolLabel: nodeClaim.Labels[v1beta1.NodePoolLabelKey], + }, + ).Inc() // Mark the offering as unavailable in the ICE cache since we got a spot interruption warning if msg.Kind() == messages.SpotInterruptionKind { @@ -212,7 +216,7 @@ func (c *Controller) deleteNodeClaim(ctx context.Context, nodeClaim *v1beta1.Nod if err := c.kubeClient.Delete(ctx, nodeClaim); err != nil { return client.IgnoreNotFound(fmt.Errorf("deleting the node on interruption message, %w", err)) } - logging.FromContext(ctx).Infof("initiating delete from interruption message") + log.FromContext(ctx).Info("initiating delete from interruption message") c.recorder.Publish(interruptionevents.TerminatingOnInterruption(node, nodeClaim)...) metrics.NodeClaimsTerminatedCounter.With(prometheus.Labels{ metrics.ReasonLabel: terminationReasonLabel, diff --git a/pkg/controllers/interruption/interruption_benchmark_test.go b/pkg/controllers/interruption/interruption_benchmark_test.go index 6a9a591db8de..4c369e11900d 100644 --- a/pkg/controllers/interruption/interruption_benchmark_test.go +++ b/pkg/controllers/interruption/interruption_benchmark_test.go @@ -32,6 +32,7 @@ import ( "github.com/aws/aws-sdk-go/aws/session" servicesqs "github.com/aws/aws-sdk-go/service/sqs" "github.com/aws/aws-sdk-go/service/sqs/sqsiface" + "github.com/go-logr/zapr" "github.com/samber/lo" "go.uber.org/multierr" "go.uber.org/zap" @@ -39,9 +40,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" clock "k8s.io/utils/clock/testing" - "knative.dev/pkg/logging" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/operator/scheme" @@ -78,7 +79,7 @@ func BenchmarkNotification100(b *testing.B) { //nolint:gocyclo func benchmarkNotificationController(b *testing.B, messageCount int) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("message-count", messageCount)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("message-count", messageCount)) fakeClock = &clock.FakeClock{} ctx = coreoptions.ToContext(ctx, coretest.Options()) ctx = options.ToContext(ctx, test.Options(test.OptionsFields{ @@ -118,20 +119,20 @@ func benchmarkNotificationController(b *testing.B, messageCount int) { interruptionController := interruption.NewController(env.Client, fakeClock, recorder, providers.sqsProvider, unavailableOfferingsCache) messages, nodes := makeDiverseMessagesAndNodes(messageCount) - logging.FromContext(ctx).Infof("provisioning nodes") + log.FromContext(ctx).Info("provisioning nodes") if err := provisionNodes(ctx, env.Client, nodes); err != nil { b.Fatalf("provisioning nodes, %v", err) } - logging.FromContext(ctx).Infof("completed provisioning nodes") + log.FromContext(ctx).Info("completed provisioning nodes") - logging.FromContext(ctx).Infof("provisioning messages into the SQS Queue") + log.FromContext(ctx).Info("provisioning messages into the SQS Queue") if err := providers.provisionMessages(ctx, messages...); err != nil { b.Fatalf("provisioning messages, %v", err) } - logging.FromContext(ctx).Infof("completed provisioning messages into the SQS Queue") + log.FromContext(ctx).Info("completed provisioning messages into the SQS Queue") m, err := controllerruntime.NewManager(env.Config, controllerruntime.Options{ - BaseContext: func() context.Context { return logging.WithLogger(ctx, zap.NewNop().Sugar()) }, + BaseContext: func() context.Context { return log.IntoContext(ctx, zapr.NewLogger(zap.NewNop())) }, }) if err != nil { b.Fatalf("creating manager, %v", err) @@ -146,7 +147,7 @@ func benchmarkNotificationController(b *testing.B, messageCount int) { start := time.Now() managerErr := make(chan error) go func() { - logging.FromContext(ctx).Infof("starting controller manager") + log.FromContext(ctx).Info("starting controller manager") managerErr <- m.Start(ctx) }() @@ -225,7 +226,7 @@ func (p *providerSet) monitorMessagesProcessed(ctx context.Context, eventRecorde eventRecorder.Calls(events.Unhealthy(coretest.Node(), coretest.NodeClaim())[0].Reason) + eventRecorder.Calls(events.RebalanceRecommendation(coretest.Node(), coretest.NodeClaim())[0].Reason) + eventRecorder.Calls(events.SpotInterrupted(coretest.Node(), coretest.NodeClaim())[0].Reason) - logging.FromContext(ctx).With("processed-message-count", totalProcessed).Infof("processed messages from the queue") + log.FromContext(ctx).WithValues("processed-message-count", totalProcessed).Info("processed messages from the queue") time.Sleep(time.Second) } close(done) diff --git a/pkg/controllers/interruption/metrics.go b/pkg/controllers/interruption/metrics.go index 7fd01ca241d2..9f8122fc16bc 100644 --- a/pkg/controllers/interruption/metrics.go +++ b/pkg/controllers/interruption/metrics.go @@ -62,7 +62,10 @@ var ( Name: "actions_performed", Help: "Number of notification actions performed. Labeled by action", }, - []string{actionTypeLabel}, + []string{ + actionTypeLabel, + metrics.NodePoolLabel, + }, ) ) diff --git a/pkg/controllers/interruption/suite_test.go b/pkg/controllers/interruption/suite_test.go index 52c2b0972add..20d9ac2e30a1 100644 --- a/pkg/controllers/interruption/suite_test.go +++ b/pkg/controllers/interruption/suite_test.go @@ -31,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" - _ "knative.dev/pkg/system/testing" "sigs.k8s.io/controller-runtime/pkg/client" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" @@ -53,8 +52,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) const ( diff --git a/pkg/controllers/nodeclaim/garbagecollection/controller.go b/pkg/controllers/nodeclaim/garbagecollection/controller.go index dae66f7f6f1e..71748eaaa02c 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/controller.go +++ b/pkg/controllers/nodeclaim/garbagecollection/controller.go @@ -24,8 +24,9 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/util/workqueue" - "knative.dev/pkg/logging" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -48,10 +49,6 @@ func NewController(kubeClient client.Client, cloudProvider cloudprovider.CloudPr } } -func (c *Controller) Name() string { - return "nodeclaim.garbagecollection" -} - func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { // We LIST machines on the CloudProvider BEFORE we grab Machines/Nodes on the cluster so that we make sure that, if // LISTing instances takes a long time, our information is more updated by the time we get to Machine and Node LIST @@ -89,11 +86,11 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc } func (c *Controller) garbageCollect(ctx context.Context, nodeClaim *v1beta1.NodeClaim, nodeList *v1.NodeList) error { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", nodeClaim.Status.ProviderID)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("provider-id", nodeClaim.Status.ProviderID)) if err := c.cloudProvider.Delete(ctx, nodeClaim); err != nil { return cloudprovider.IgnoreNodeClaimNotFoundError(err) } - logging.FromContext(ctx).Debugf("garbage collected cloudprovider instance") + log.FromContext(ctx).V(1).Info("garbage collected cloudprovider instance") // Go ahead and cleanup the node if we know that it exists to make scheduling go quicker if node, ok := lo.Find(nodeList.Items, func(n v1.Node) bool { @@ -102,11 +99,13 @@ func (c *Controller) garbageCollect(ctx context.Context, nodeClaim *v1beta1.Node if err := c.kubeClient.Delete(ctx, &node); err != nil { return client.IgnoreNotFound(err) } - logging.FromContext(ctx).With("node", node.Name).Debugf("garbage collected node") + log.FromContext(ctx).WithValues("Node", klog.KRef("", node.Name)).V(1).Info("garbage collected node") } return nil } -func (c *Controller) Builder(_ context.Context, m manager.Manager) controller.Builder { - return controller.NewSingletonManagedBy(m) +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controller.NewSingletonManagedBy(m). + Named("nodeclaim.garbagecollection"). + Complete(c) } diff --git a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go index 6527c20a2a4a..0446d774da30 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go @@ -31,7 +31,6 @@ import ( corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/events" - "sigs.k8s.io/karpenter/pkg/operator/controller" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -45,14 +44,14 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context var awsEnv *test.Environment var env *coretest.Environment -var garbageCollectionController controller.Controller +var garbageCollectionController *garbagecollection.Controller var cloudProvider *cloudprovider.CloudProvider func TestAPIs(t *testing.T) { @@ -66,7 +65,7 @@ var _ = BeforeSuite(func() { env = coretest.NewEnvironment(scheme.Scheme, coretest.WithCRDs(apis.CRDs...)) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) garbageCollectionController = garbagecollection.NewController(env.Client, cloudProvider) }) diff --git a/pkg/controllers/nodeclaim/tagging/controller.go b/pkg/controllers/nodeclaim/tagging/controller.go index 30682f1c91c2..6e9d4519d898 100644 --- a/pkg/controllers/nodeclaim/tagging/controller.go +++ b/pkg/controllers/nodeclaim/tagging/controller.go @@ -20,13 +20,14 @@ import ( "time" "k8s.io/apimachinery/pkg/api/equality" - "knative.dev/pkg/logging" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/operator/injection" "github.com/samber/lo" @@ -38,7 +39,6 @@ import ( corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" ) type Controller struct { @@ -46,27 +46,25 @@ type Controller struct { instanceProvider instance.Provider } -func NewController(kubeClient client.Client, instanceProvider instance.Provider) corecontroller.Controller { - return corecontroller.Typed[*corev1beta1.NodeClaim](kubeClient, &Controller{ +func NewController(kubeClient client.Client, instanceProvider instance.Provider) *Controller { + return &Controller{ kubeClient: kubeClient, instanceProvider: instanceProvider, - }) -} - -func (c *Controller) Name() string { - return "nodeclaim.tagging" + } } func (c *Controller) Reconcile(ctx context.Context, nodeClaim *corev1beta1.NodeClaim) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, "nodeclaim.tagging") + stored := nodeClaim.DeepCopy() if !isTaggable(nodeClaim) { return reconcile.Result{}, nil } - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", nodeClaim.Status.ProviderID)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("provider-id", nodeClaim.Status.ProviderID)) id, err := utils.ParseInstanceID(nodeClaim.Status.ProviderID) if err != nil { // We don't throw an error here since we don't want to retry until the ProviderID has been updated. - logging.FromContext(ctx).Errorf("failed to parse instance ID, %w", err) + log.FromContext(ctx).Error(err, "failed parsing instance id") return reconcile.Result{}, nil } if err = c.tagInstance(ctx, nodeClaim, id); err != nil { @@ -81,18 +79,18 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClaim *corev1beta1.NodeC return reconcile.Result{}, nil } -func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt( - controllerruntime. - NewControllerManagedBy(m). - For(&corev1beta1.NodeClaim{}). - WithEventFilter(predicate.NewPredicateFuncs(func(o client.Object) bool { - return isTaggable(o.(*corev1beta1.NodeClaim)) - })). - // Ok with using the default MaxConcurrentReconciles of 1 to avoid throttling from CreateTag write API - WithOptions(controller.Options{ - RateLimiter: reasonable.RateLimiter(), - })) +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("nodeclaim.tagging"). + For(&corev1beta1.NodeClaim{}). + WithEventFilter(predicate.NewPredicateFuncs(func(o client.Object) bool { + return isTaggable(o.(*corev1beta1.NodeClaim)) + })). + // Ok with using the default MaxConcurrentReconciles of 1 to avoid throttling from CreateTag write API + WithOptions(controller.Options{ + RateLimiter: reasonable.RateLimiter(), + }). + Complete(reconcile.AsReconciler(m.GetClient(), c)) } func (c *Controller) tagInstance(ctx context.Context, nc *corev1beta1.NodeClaim, id string) error { diff --git a/pkg/controllers/nodeclaim/tagging/suite_test.go b/pkg/controllers/nodeclaim/tagging/suite_test.go index 6addad95b22b..b132b71d90e7 100644 --- a/pkg/controllers/nodeclaim/tagging/suite_test.go +++ b/pkg/controllers/nodeclaim/tagging/suite_test.go @@ -19,12 +19,10 @@ import ( "fmt" "testing" - "github.com/samber/lo" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/samber/lo" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -36,20 +34,19 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/test" - "sigs.k8s.io/karpenter/pkg/operator/controller" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/operator/scheme" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context var awsEnv *test.Environment var env *coretest.Environment -var taggingController controller.Controller +var taggingController *tagging.Controller func TestAPIs(t *testing.T) { ctx = TestContextWithLogger(t) @@ -117,7 +114,7 @@ var _ = Describe("TaggingController", func() { }) ExpectApplied(ctx, env.Client, nodeClaim) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) Expect(nodeClaim.Annotations).To(Not(HaveKey(v1beta1.AnnotationInstanceTagged))) Expect(lo.ContainsBy(ec2Instance.Tags, func(tag *ec2.Tag) bool { return *tag.Key == v1beta1.TagName @@ -133,7 +130,7 @@ var _ = Describe("TaggingController", func() { }) ExpectApplied(ctx, env.Client, nodeClaim) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) Expect(nodeClaim.Annotations).To(Not(HaveKey(v1beta1.AnnotationInstanceTagged))) Expect(lo.ContainsBy(ec2Instance.Tags, func(tag *ec2.Tag) bool { return *tag.Key == v1beta1.TagName @@ -150,7 +147,7 @@ var _ = Describe("TaggingController", func() { ExpectApplied(ctx, env.Client, nodeClaim) ExpectDeleted(ctx, env.Client, nodeClaim) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) }) It("should gracefully handle missing instance", func() { @@ -163,7 +160,7 @@ var _ = Describe("TaggingController", func() { ExpectApplied(ctx, env.Client, nodeClaim) awsEnv.EC2API.Instances.Delete(*ec2Instance.InstanceId) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) Expect(nodeClaim.Annotations).To(Not(HaveKey(v1beta1.AnnotationInstanceTagged))) }) @@ -180,7 +177,7 @@ var _ = Describe("TaggingController", func() { ExpectApplied(ctx, env.Client, nodeClaim) Expect(env.Client.Delete(ctx, nodeClaim)).To(Succeed()) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) Expect(nodeClaim.Annotations).To(Not(HaveKey(v1beta1.AnnotationInstanceTagged))) Expect(lo.ContainsBy(ec2Instance.Tags, func(tag *ec2.Tag) bool { return *tag.Key == v1beta1.TagName @@ -206,7 +203,7 @@ var _ = Describe("TaggingController", func() { awsEnv.EC2API.Instances.Store(*ec2Instance.InstanceId, ec2Instance) ExpectApplied(ctx, env.Client, nodeClaim) - ExpectReconcileSucceeded(ctx, taggingController, client.ObjectKeyFromObject(nodeClaim)) + ExpectObjectReconciled(ctx, env.Client, taggingController, nodeClaim) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) Expect(nodeClaim.Annotations).To(HaveKey(v1beta1.AnnotationInstanceTagged)) diff --git a/pkg/controllers/nodeclass/hash/controller.go b/pkg/controllers/nodeclass/hash/controller.go index 5d40a48da5ae..711b8572b939 100644 --- a/pkg/controllers/nodeclass/hash/controller.go +++ b/pkg/controllers/nodeclass/hash/controller.go @@ -25,28 +25,27 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" - - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" + "sigs.k8s.io/karpenter/pkg/operator/injection" "github.com/awslabs/operatorpkg/reasonable" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" ) -var _ corecontroller.TypedController[*v1beta1.EC2NodeClass] = (*Controller)(nil) - type Controller struct { kubeClient client.Client } -func NewController(kubeClient client.Client) corecontroller.Controller { - return corecontroller.Typed[*v1beta1.EC2NodeClass](kubeClient, &Controller{ +func NewController(kubeClient client.Client) *Controller { + return &Controller{ kubeClient: kubeClient, - }) + } } func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, "nodeclass.hash") + stored := nodeClass.DeepCopy() if nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHashVersion] != v1beta1.EC2NodeClassHashVersion { @@ -68,18 +67,15 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl return reconcile.Result{}, nil } -func (c *Controller) Name() string { - return "nodeclass.hash" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("nodeclass.hash"). For(&v1beta1.EC2NodeClass{}). WithOptions(controller.Options{ RateLimiter: reasonable.RateLimiter(), MaxConcurrentReconciles: 10, - })) + }). + Complete(reconcile.AsReconciler(m.GetClient(), c)) } // Updating `ec2nodeclass-hash-version` annotation inside the karpenter controller means a breaking change has been made to the hash calculation. @@ -104,7 +100,7 @@ func (c *Controller) updateNodeClaimHash(ctx context.Context, nodeClass *v1beta1 // Any NodeClaim that is already drifted will remain drifted if the karpenter.k8s.aws/nodepool-hash-version doesn't match // Since the hashing mechanism has changed we will not be able to determine if the drifted status of the NodeClaim has changed - if nc.StatusConditions().GetCondition(corev1beta1.Drifted) == nil { + if nc.StatusConditions().Get(corev1beta1.ConditionTypeDrifted) == nil { nc.Annotations = lo.Assign(nc.Annotations, map[string]string{ v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash(), }) diff --git a/pkg/controllers/nodeclass/hash/suite_test.go b/pkg/controllers/nodeclass/hash/suite_test.go index a1e6bf19ff7f..f8962aa64cdd 100644 --- a/pkg/controllers/nodeclass/hash/suite_test.go +++ b/pkg/controllers/nodeclass/hash/suite_test.go @@ -18,14 +18,10 @@ import ( "context" "testing" + "github.com/aws/aws-sdk-go/aws" "github.com/imdario/mergo" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/aws/aws-sdk-go/aws" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -38,14 +34,14 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context var env *coretest.Environment var awsEnv *test.Environment -var hashController corecontroller.Controller +var hashController *hash.Controller func TestAPIs(t *testing.T) { ctx = TestContextWithLogger(t) @@ -100,7 +96,7 @@ var _ = Describe("NodeClass Hash Controller", func() { }) DescribeTable("should update the drift hash when static field is updated", func(changes *v1beta1.EC2NodeClass) { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHash := nodeClass.Hash() @@ -109,7 +105,7 @@ var _ = Describe("NodeClass Hash Controller", func() { Expect(mergo.Merge(nodeClass, changes, mergo.WithOverride)).To(Succeed()) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHashTwo := nodeClass.Hash() @@ -127,7 +123,7 @@ var _ = Describe("NodeClass Hash Controller", func() { ) It("should not update the drift hash when dynamic field is updated", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHash := nodeClass.Hash() @@ -150,7 +146,7 @@ var _ = Describe("NodeClass Hash Controller", func() { } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Annotations[v1beta1.AnnotationEC2NodeClassHash]).To(Equal(expectedHash)) }) @@ -161,7 +157,7 @@ var _ = Describe("NodeClass Hash Controller", func() { } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) expectedHash := nodeClass.Hash() @@ -203,7 +199,7 @@ var _ = Describe("NodeClass Hash Controller", func() { ExpectApplied(ctx, env.Client, nodeClass, nodeClaimOne, nodeClaimTwo) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) nodeClaimOne = ExpectExists(ctx, env.Client, nodeClaimOne) nodeClaimTwo = ExpectExists(ctx, env.Client, nodeClaimTwo) @@ -235,7 +231,7 @@ var _ = Describe("NodeClass Hash Controller", func() { }) ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) @@ -266,10 +262,10 @@ var _ = Describe("NodeClass Hash Controller", func() { }, }, }) - nodeClaim.StatusConditions().MarkTrue(corev1beta1.Drifted) + nodeClaim.StatusConditions().SetTrue(corev1beta1.ConditionTypeDrifted) ExpectApplied(ctx, env.Client, nodeClass, nodeClaim) - ExpectReconcileSucceeded(ctx, hashController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, hashController, nodeClass) nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) // Expect ec2nodeclass-hash on the NodeClaims to stay the same diff --git a/pkg/controllers/nodeclass/status/ami.go b/pkg/controllers/nodeclass/status/ami.go index 71a80f51f130..baeba547d390 100644 --- a/pkg/controllers/nodeclass/status/ami.go +++ b/pkg/controllers/nodeclass/status/ami.go @@ -21,8 +21,11 @@ import ( "time" "github.com/samber/lo" + v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/reconcile" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" ) @@ -32,16 +35,19 @@ type AMI struct { } func (a *AMI) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { - amis, err := a.amiProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := a.amiProvider.List(ctx, nodeClass) if err != nil { - return reconcile.Result{}, err + return reconcile.Result{}, fmt.Errorf("getting amis, %w", err) } if len(amis) == 0 { nodeClass.Status.AMIs = nil - return reconcile.Result{}, fmt.Errorf("no amis exist given constraints") + return reconcile.Result{}, nil } nodeClass.Status.AMIs = lo.Map(amis, func(ami amifamily.AMI, _ int) v1beta1.AMI { - reqs := ami.Requirements.NodeSelectorRequirements() + reqs := lo.Map(ami.Requirements.NodeSelectorRequirements(), func(item corev1beta1.NodeSelectorRequirementWithMinValues, _ int) v1.NodeSelectorRequirement { + return item.NodeSelectorRequirement + }) + sort.Slice(reqs, func(i, j int) bool { if len(reqs[i].Key) != len(reqs[j].Key) { return len(reqs[i].Key) < len(reqs[j].Key) diff --git a/pkg/controllers/nodeclass/status/ami_test.go b/pkg/controllers/nodeclass/status/ami_test.go index 0e442cbf33a1..a5213a0491b5 100644 --- a/pkg/controllers/nodeclass/status/ami_test.go +++ b/pkg/controllers/nodeclass/status/ami_test.go @@ -18,13 +18,10 @@ import ( "fmt" "time" - "github.com/samber/lo" - v1 "k8s.io/api/core/v1" - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" @@ -136,94 +133,74 @@ var _ = Describe("NodeClass AMI Status Controller", func() { }) nodeClass.Spec.AMISelectorTerms = nil ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.AMIs).To(Equal([]v1beta1.AMI{ { Name: "test-ami-3", ID: "ami-id-789", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureArm64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureArm64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceGPUCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceGPUCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceAcceleratorCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceAcceleratorCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, }, }, { Name: "test-ami-2", ID: "ami-id-456", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureAmd64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureAmd64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceGPUCount, - Operator: v1.NodeSelectorOpExists, - }, + Key: v1beta1.LabelInstanceGPUCount, + Operator: v1.NodeSelectorOpExists, }, }, }, { Name: "test-ami-2", ID: "ami-id-456", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureAmd64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureAmd64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceAcceleratorCount, - Operator: v1.NodeSelectorOpExists, - }, + Key: v1beta1.LabelInstanceAcceleratorCount, + Operator: v1.NodeSelectorOpExists, }, }, }, { Name: "test-ami-1", ID: "ami-id-123", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureAmd64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureAmd64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceGPUCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceGPUCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceAcceleratorCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceAcceleratorCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, }, }, @@ -263,57 +240,45 @@ var _ = Describe("NodeClass AMI Status Controller", func() { }, }) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.AMIs).To(Equal([]v1beta1.AMI{ { Name: "test-ami-2", ID: "ami-id-456", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureArm64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureArm64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceGPUCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceGPUCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceAcceleratorCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceAcceleratorCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, }, }, { Name: "test-ami-1", ID: "ami-id-123", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ + Requirements: []v1.NodeSelectorRequirement{ { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1.LabelArchStable, - Operator: v1.NodeSelectorOpIn, - Values: []string{corev1beta1.ArchitectureAmd64}, - }, + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureAmd64}, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceGPUCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceGPUCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceAcceleratorCount, - Operator: v1.NodeSelectorOpDoesNotExist, - }, + Key: v1beta1.LabelInstanceAcceleratorCount, + Operator: v1.NodeSelectorOpDoesNotExist, }, }, }, @@ -321,23 +286,18 @@ var _ = Describe("NodeClass AMI Status Controller", func() { }) It("Should resolve a valid AMI selector", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.AMIs).To(Equal( []v1beta1.AMI{ { Name: "test-ami-3", ID: "ami-test3", - Requirements: []corev1beta1.NodeSelectorRequirementWithMinValues{ - { - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: "kubernetes.io/arch", - Operator: "In", - Values: []string{ - "amd64", - }, - }, - }, + Requirements: []v1.NodeSelectorRequirement{{ + Key: v1.LabelArchStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{corev1beta1.ArchitectureAmd64}, + }, }, }, }, diff --git a/pkg/controllers/nodeclass/status/controller.go b/pkg/controllers/nodeclass/status/controller.go index 0e0c862422e3..8c32ff832d14 100644 --- a/pkg/controllers/nodeclass/status/controller.go +++ b/pkg/controllers/nodeclass/status/controller.go @@ -25,8 +25,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/operator/injection" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" "sigs.k8s.io/karpenter/pkg/utils/result" "github.com/awslabs/operatorpkg/reasonable" @@ -39,8 +39,6 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) -var _ corecontroller.TypedController[*v1beta1.EC2NodeClass] = (*Controller)(nil) - type nodeClassStatusReconciler interface { Reconcile(context.Context, *v1beta1.EC2NodeClass) (reconcile.Result, error) } @@ -52,23 +50,25 @@ type Controller struct { instanceprofile *InstanceProfile subnet *Subnet securitygroup *SecurityGroup - launchtemplate *LaunchTemplate + readiness *Readiness //TODO : Remove this when we have sub status conditions } func NewController(kubeClient client.Client, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, - amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider) corecontroller.Controller { - return corecontroller.Typed[*v1beta1.EC2NodeClass](kubeClient, &Controller{ + amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider) *Controller { + return &Controller{ kubeClient: kubeClient, ami: &AMI{amiProvider: amiProvider}, subnet: &Subnet{subnetProvider: subnetProvider}, securitygroup: &SecurityGroup{securityGroupProvider: securityGroupProvider}, instanceprofile: &InstanceProfile{instanceProfileProvider: instanceProfileProvider}, - launchtemplate: &LaunchTemplate{launchTemplateProvider: launchTemplateProvider}, - }) + readiness: &Readiness{launchTemplateProvider: launchTemplateProvider}, + } } func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, "nodeclass.status") + if !controllerutil.ContainsFinalizer(nodeClass, v1beta1.TerminationFinalizer) { stored := nodeClass.DeepCopy() controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) @@ -85,7 +85,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl c.subnet, c.securitygroup, c.instanceprofile, - c.launchtemplate, + c.readiness, } { res, err := reconciler.Reconcile(ctx, nodeClass) errs = multierr.Append(errs, err) @@ -103,16 +103,13 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl return result.Min(results...), nil } -func (c *Controller) Name() string { - return "nodeclass.status" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("nodeclass.status"). For(&v1beta1.EC2NodeClass{}). WithOptions(controller.Options{ RateLimiter: reasonable.RateLimiter(), MaxConcurrentReconciles: 10, - })) + }). + Complete(reconcile.AsReconciler(m.GetClient(), c)) } diff --git a/pkg/controllers/nodeclass/status/instanceprofile_test.go b/pkg/controllers/nodeclass/status/instanceprofile_test.go index 386fd6dddc1b..163e0b95e909 100644 --- a/pkg/controllers/nodeclass/status/instanceprofile_test.go +++ b/pkg/controllers/nodeclass/status/instanceprofile_test.go @@ -15,12 +15,9 @@ limitations under the License. package status_test import ( - "github.com/samber/lo" - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/iam" + "github.com/samber/lo" "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/operator/options" @@ -38,7 +35,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { It("should create the instance profile when it doesn't exist", func() { nodeClass.Spec.Role = "test-role" ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(awsEnv.IAMAPI.InstanceProfiles[profileName].Roles).To(HaveLen(1)) @@ -57,7 +54,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { nodeClass.Spec.Role = "test-role" ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(awsEnv.IAMAPI.InstanceProfiles[profileName].Roles).To(HaveLen(1)) @@ -81,7 +78,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { nodeClass.Spec.Role = "test-role" ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(awsEnv.IAMAPI.InstanceProfiles[profileName].Roles).To(HaveLen(1)) @@ -105,7 +102,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { nodeClass.Spec.Role = "test-role" ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(awsEnv.IAMAPI.InstanceProfiles[profileName].Roles).To(HaveLen(1)) @@ -118,7 +115,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { nodeClass.Spec.Role = "" nodeClass.Spec.InstanceProfile = lo.ToPtr("test-instance-profile") ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.InstanceProfile).To(Equal(lo.FromPtr(nodeClass.Spec.InstanceProfile))) @@ -127,7 +124,7 @@ var _ = Describe("NodeClass InstanceProfile Status Controller", func() { nodeClass.Spec.Role = "" nodeClass.Spec.InstanceProfile = lo.ToPtr("test-instance-profile") ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.IAMAPI.CreateInstanceProfileBehavior.Calls()).To(BeZero()) Expect(awsEnv.IAMAPI.AddRoleToInstanceProfileBehavior.Calls()).To(BeZero()) diff --git a/pkg/controllers/nodeclass/status/launchtemplate.go b/pkg/controllers/nodeclass/status/launchtemplate.go deleted file mode 100644 index 7f8477b099fe..000000000000 --- a/pkg/controllers/nodeclass/status/launchtemplate.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package status - -import ( - "context" - "fmt" - - "github.com/samber/lo" - "sigs.k8s.io/controller-runtime/pkg/reconcile" - - "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" - "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" -) - -type LaunchTemplate struct { - launchTemplateProvider launchtemplate.Provider -} - -func (lt *LaunchTemplate) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { - // A NodeClass that use AL2023 requires the cluster CIDR for launching nodes. - // To allow Karpenter to be used for Non-EKS clusters, resolving the Cluster CIDR - // will not be done at startup but instead in a reconcile loop. - if lo.FromPtr(nodeClass.Spec.AMIFamily) == v1beta1.AMIFamilyAL2023 { - if err := lt.launchTemplateProvider.ResolveClusterCIDR(ctx); err != nil { - return reconcile.Result{}, fmt.Errorf("unable to detect the cluster CIDR, %w", err) - } - } - return reconcile.Result{}, nil -} diff --git a/pkg/controllers/nodeclass/status/launchtemplate_test.go b/pkg/controllers/nodeclass/status/launchtemplate_test.go index 8d5bbb8457ea..f5163d75df5f 100644 --- a/pkg/controllers/nodeclass/status/launchtemplate_test.go +++ b/pkg/controllers/nodeclass/status/launchtemplate_test.go @@ -15,9 +15,6 @@ limitations under the License. package status_test import ( - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/service/eks" "github.com/samber/lo" @@ -64,14 +61,14 @@ var _ = Describe("NodeClass Launch Template CIDR Resolution Controller", func() } { nodeClass.Spec.AMIFamily = lo.ToPtr(family) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load()).To(BeNil()) } }) It("should resolve cluster CIDR for IPv4 clusters", func() { nodeClass.Spec.AMIFamily = lo.ToPtr(v1beta1.AMIFamilyAL2023) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(lo.FromPtr(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load())).To(Equal("10.100.0.0/16")) }) It("should resolve cluster CIDR for IPv6 clusters", func() { @@ -84,7 +81,7 @@ var _ = Describe("NodeClass Launch Template CIDR Resolution Controller", func() }) nodeClass.Spec.AMIFamily = lo.ToPtr(v1beta1.AMIFamilyAL2023) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(lo.FromPtr(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load())).To(Equal("2001:db8::/64")) }) }) diff --git a/pkg/controllers/nodeclass/status/readiness.go b/pkg/controllers/nodeclass/status/readiness.go new file mode 100644 index 000000000000..1aac2e87bf2a --- /dev/null +++ b/pkg/controllers/nodeclass/status/readiness.go @@ -0,0 +1,63 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status + +import ( + "context" + "fmt" + + "github.com/awslabs/operatorpkg/status" + "github.com/samber/lo" + + "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" +) + +type Readiness struct { + launchTemplateProvider launchtemplate.Provider +} + +func (n Readiness) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { + if len(nodeClass.Status.AMIs) == 0 { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to resolve AMIs") + return reconcile.Result{}, nil + } + if len(nodeClass.Status.Subnets) == 0 { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to resolve subnets") + return reconcile.Result{}, nil + } + if len(nodeClass.Status.SecurityGroups) == 0 { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to resolve security groups") + return reconcile.Result{}, nil + } + if len(nodeClass.Status.InstanceProfile) == 0 { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to resolve instance profile") + return reconcile.Result{}, nil + } + // A NodeClass that uses AL2023 requires the cluster CIDR for launching nodes. + // To allow Karpenter to be used for Non-EKS clusters, resolving the Cluster CIDR + // will not be done at startup but instead in a reconcile loop. + if lo.FromPtr(nodeClass.Spec.AMIFamily) == v1beta1.AMIFamilyAL2023 { + if err := n.launchTemplateProvider.ResolveClusterCIDR(ctx); err != nil { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to detect the cluster CIDR") + return reconcile.Result{}, fmt.Errorf("failed to detect the cluster CIDR, %w", err) + } + } + nodeClass.StatusConditions().SetTrue(status.ConditionReady) + return reconcile.Result{}, nil +} diff --git a/pkg/controllers/nodeclass/status/readiness_test.go b/pkg/controllers/nodeclass/status/readiness_test.go new file mode 100644 index 000000000000..8bf678c4b3ad --- /dev/null +++ b/pkg/controllers/nodeclass/status/readiness_test.go @@ -0,0 +1,70 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package status_test + +import ( + "github.com/awslabs/operatorpkg/status" + + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + "github.com/aws/karpenter-provider-aws/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/test/expectations" +) + +var _ = Describe("NodeClass Status Condition Controller", func() { + BeforeEach(func() { + nodeClass = test.EC2NodeClass(v1beta1.EC2NodeClass{ + Spec: v1beta1.EC2NodeClassSpec{ + SubnetSelectorTerms: []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{"*": "*"}, + }, + }, + SecurityGroupSelectorTerms: []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"*": "*"}, + }, + }, + AMISelectorTerms: []v1beta1.AMISelectorTerm{ + { + Tags: map[string]string{"*": "*"}, + }, + }, + }, + }) + }) + It("should update status condition on nodeClass as Ready", func() { + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.Conditions).To(HaveLen(1)) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsTrue()).To(BeTrue()) + }) + It("should update status condition as Not Ready", func() { + nodeClass.Spec.SecurityGroupSelectorTerms = []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"foo": "invalid"}, + }, + } + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).Message).To(Equal("Failed to resolve security groups")) + }) +}) diff --git a/pkg/controllers/nodeclass/status/securitygroup.go b/pkg/controllers/nodeclass/status/securitygroup.go index 378398ef04c0..764bf26969de 100644 --- a/pkg/controllers/nodeclass/status/securitygroup.go +++ b/pkg/controllers/nodeclass/status/securitygroup.go @@ -35,11 +35,11 @@ type SecurityGroup struct { func (sg *SecurityGroup) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { securityGroups, err := sg.securityGroupProvider.List(ctx, nodeClass) if err != nil { - return reconcile.Result{}, err + return reconcile.Result{}, fmt.Errorf("getting security groups, %w", err) } if len(securityGroups) == 0 && len(nodeClass.Spec.SecurityGroupSelectorTerms) > 0 { nodeClass.Status.SecurityGroups = nil - return reconcile.Result{}, fmt.Errorf("no security groups exist given constraints") + return reconcile.Result{}, nil } sort.Slice(securityGroups, func(i, j int) bool { return *securityGroups[i].GroupId < *securityGroups[j].GroupId diff --git a/pkg/controllers/nodeclass/status/securitygroup_test.go b/pkg/controllers/nodeclass/status/securitygroup_test.go index ad0589122921..78a14476c9ce 100644 --- a/pkg/controllers/nodeclass/status/securitygroup_test.go +++ b/pkg/controllers/nodeclass/status/securitygroup_test.go @@ -15,8 +15,7 @@ limitations under the License. package status_test import ( - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/awslabs/operatorpkg/status" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -50,7 +49,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }) It("Should update EC2NodeClass status for Security Groups", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -77,7 +76,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -97,7 +96,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -108,7 +107,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }) It("Should update Security Groups status when the Security Groups selector gets updated by tags", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -134,7 +133,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -149,7 +148,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }) It("Should update Security Groups status when the Security Groups selector gets updated by ids", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -172,7 +171,7 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -188,13 +187,15 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).Message).To(Equal("Failed to resolve security groups")) }) It("Should not resolve a invalid selectors for an updated Security Groups selector", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(Equal([]v1beta1.SecurityGroup{ { @@ -217,8 +218,10 @@ var _ = Describe("NodeClass Security Group Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.SecurityGroups).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).Message).To(Equal("Failed to resolve security groups")) }) }) diff --git a/pkg/controllers/nodeclass/status/subnet.go b/pkg/controllers/nodeclass/status/subnet.go index f2562c8336f5..2f87638ca340 100644 --- a/pkg/controllers/nodeclass/status/subnet.go +++ b/pkg/controllers/nodeclass/status/subnet.go @@ -35,11 +35,11 @@ type Subnet struct { func (s *Subnet) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { subnets, err := s.subnetProvider.List(ctx, nodeClass) if err != nil { - return reconcile.Result{}, err + return reconcile.Result{}, fmt.Errorf("getting subnets, %w", err) } if len(subnets) == 0 { nodeClass.Status.Subnets = nil - return reconcile.Result{}, fmt.Errorf("no subnets exist given constraints %v", nodeClass.Spec.SubnetSelectorTerms) + return reconcile.Result{}, nil } sort.Slice(subnets, func(i, j int) bool { if int(*subnets[i].AvailableIpAddressCount) != int(*subnets[j].AvailableIpAddressCount) { @@ -49,8 +49,9 @@ func (s *Subnet) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) }) nodeClass.Status.Subnets = lo.Map(subnets, func(ec2subnet *ec2.Subnet, _ int) v1beta1.Subnet { return v1beta1.Subnet{ - ID: *ec2subnet.SubnetId, - Zone: *ec2subnet.AvailabilityZone, + ID: *ec2subnet.SubnetId, + Zone: *ec2subnet.AvailabilityZone, + ZoneID: *ec2subnet.AvailabilityZoneId, } }) diff --git a/pkg/controllers/nodeclass/status/subnet_test.go b/pkg/controllers/nodeclass/status/subnet_test.go index ba37f1d3b11a..5658e0fc7cce 100644 --- a/pkg/controllers/nodeclass/status/subnet_test.go +++ b/pkg/controllers/nodeclass/status/subnet_test.go @@ -15,11 +15,9 @@ limitations under the License. package status_test import ( - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/awslabs/operatorpkg/status" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/test" @@ -53,48 +51,55 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }) It("Should update EC2NodeClass status for Subnets", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) }) It("Should have the correct ordering for the Subnets", func() { awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{Subnets: []*ec2.Subnet{ - {SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), AvailableIpAddressCount: aws.Int64(20)}, - {SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), AvailableIpAddressCount: aws.Int64(100)}, - {SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), AvailableIpAddressCount: aws.Int64(50)}, + {SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(20)}, + {SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), AvailabilityZoneId: aws.String("tstz1-1b"), AvailableIpAddressCount: aws.Int64(100)}, + {SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), AvailabilityZoneId: aws.String("tstz1-1c"), AvailableIpAddressCount: aws.Int64(50)}, }}) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) @@ -108,16 +113,18 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, })) }) @@ -128,35 +135,40 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) It("Should update Subnet status when the Subnet selector gets updated by tags", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) @@ -173,39 +185,45 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, })) }) It("Should update Subnet status when the Subnet selector gets updated by ids", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) @@ -215,12 +233,13 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, })) }) @@ -231,30 +250,36 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).Message).To(Equal("Failed to resolve subnets")) }) It("Should not resolve a invalid selectors for an updated subnet selector", func() { ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(Equal([]v1beta1.Subnet{ { - ID: "subnet-test1", - Zone: "test-zone-1a", + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", }, { - ID: "subnet-test2", - Zone: "test-zone-1b", + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", }, { - ID: "subnet-test3", - Zone: "test-zone-1c", + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", }, { - ID: "subnet-test4", - Zone: "test-zone-1a-local", + ID: "subnet-test4", + Zone: "test-zone-1a-local", + ZoneID: "tstz1-1alocal", }, })) @@ -264,8 +289,10 @@ var _ = Describe("NodeClass Subnet Status Controller", func() { }, } ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.Status.Subnets).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(status.ConditionReady).Message).To(Equal("Failed to resolve subnets")) }) }) diff --git a/pkg/controllers/nodeclass/status/suite_test.go b/pkg/controllers/nodeclass/status/suite_test.go index 545c9ba6cfd5..bd3afe8886ab 100644 --- a/pkg/controllers/nodeclass/status/suite_test.go +++ b/pkg/controllers/nodeclass/status/suite_test.go @@ -18,9 +18,6 @@ import ( "context" "testing" - _ "knative.dev/pkg/system/testing" - - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -33,15 +30,15 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context var env *coretest.Environment var awsEnv *test.Environment var nodeClass *v1beta1.EC2NodeClass -var statusController corecontroller.Controller +var statusController *status.Controller func TestAPIs(t *testing.T) { ctx = TestContextWithLogger(t) diff --git a/pkg/controllers/nodeclass/termination/controller.go b/pkg/controllers/nodeclass/termination/controller.go index b31d491148f3..884c3a7c8405 100644 --- a/pkg/controllers/nodeclass/termination/controller.go +++ b/pkg/controllers/nodeclass/termination/controller.go @@ -20,6 +20,7 @@ import ( "time" "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/karpenter/pkg/operator/injection" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" @@ -37,18 +38,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/awslabs/operatorpkg/reasonable" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/events" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" - - "github.com/awslabs/operatorpkg/reasonable" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" ) -var _ corecontroller.FinalizingTypedController[*v1beta1.EC2NodeClass] = (*Controller)(nil) - type Controller struct { kubeClient client.Client recorder events.Recorder @@ -57,21 +54,26 @@ type Controller struct { } func NewController(kubeClient client.Client, recorder events.Recorder, - instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider) corecontroller.Controller { + instanceProfileProvider instanceprofile.Provider, launchTemplateProvider launchtemplate.Provider) *Controller { - return corecontroller.Typed[*v1beta1.EC2NodeClass](kubeClient, &Controller{ + return &Controller{ kubeClient: kubeClient, recorder: recorder, instanceProfileProvider: instanceProfileProvider, launchTemplateProvider: launchTemplateProvider, - }) + } } func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, "nodeclass.termination") + + if !nodeClass.GetDeletionTimestamp().IsZero() { + return c.finalize(ctx, nodeClass) + } return reconcile.Result{}, nil } -func (c *Controller) Finalize(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { +func (c *Controller) finalize(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (reconcile.Result, error) { stored := nodeClass.DeepCopy() if !controllerutil.ContainsFinalizer(nodeClass, v1beta1.TerminationFinalizer) { return reconcile.Result{}, nil @@ -107,13 +109,9 @@ func (c *Controller) Finalize(ctx context.Context, nodeClass *v1beta1.EC2NodeCla return reconcile.Result{}, nil } -func (c *Controller) Name() string { - return "nodeclass.termination" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("nodeclass.termination"). For(&v1beta1.EC2NodeClass{}). Watches( &corev1beta1.NodeClaim{}, @@ -134,5 +132,6 @@ func (c *Controller) Builder(_ context.Context, m manager.Manager) corecontrolle WithOptions(controller.Options{ RateLimiter: reasonable.RateLimiter(), MaxConcurrentReconciles: 10, - })) + }). + Complete(reconcile.AsReconciler(m.GetClient(), c)) } diff --git a/pkg/controllers/nodeclass/termination/suite_test.go b/pkg/controllers/nodeclass/termination/suite_test.go index 7b707f516039..f45a45b4f69a 100644 --- a/pkg/controllers/nodeclass/termination/suite_test.go +++ b/pkg/controllers/nodeclass/termination/suite_test.go @@ -20,18 +20,14 @@ import ( "testing" "time" - "github.com/samber/lo" - "k8s.io/client-go/tools/record" - _ "knative.dev/pkg/system/testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/iam" + "github.com/samber/lo" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/events" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -45,14 +41,14 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context var env *coretest.Environment var awsEnv *test.Environment -var terminationController corecontroller.Controller +var terminationController *termination.Controller func TestAPIs(t *testing.T) { ctx = TestContextWithLogger(t) @@ -115,11 +111,11 @@ var _ = Describe("NodeClass Termination", func() { Expect(ok).To(BeTrue()) controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) awsEnv.EC2API.NextError.Set(fmt.Errorf("delete Launch Template Error")) - ExpectReconcileFailed(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + _ = ExpectObjectReconcileFailed(ctx, env.Client, terminationController, nodeClass) ExpectExists(ctx, env.Client, nodeClass) }) It("should not delete the launch template not associated with the nodeClass", func() { @@ -129,10 +125,10 @@ var _ = Describe("NodeClass Termination", func() { Expect(ok).To(BeTrue()) controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) _, ok = awsEnv.EC2API.LaunchTemplates.Load(launchTemplateName) Expect(ok).To(BeTrue()) ExpectNotFound(ctx, env.Client, nodeClass) @@ -148,10 +144,10 @@ var _ = Describe("NodeClass Termination", func() { Expect(ok).To(BeTrue()) controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) _, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName1) Expect(ok).To(BeFalse()) _, ok = awsEnv.EC2API.LaunchTemplates.Load(ltName2) @@ -172,11 +168,11 @@ var _ = Describe("NodeClass Termination", func() { } controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(0)) ExpectNotFound(ctx, env.Client, nodeClass) }) @@ -188,11 +184,11 @@ var _ = Describe("NodeClass Termination", func() { } controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(0)) ExpectNotFound(ctx, env.Client, nodeClass) }) @@ -202,7 +198,7 @@ var _ = Describe("NodeClass Termination", func() { ExpectApplied(ctx, env.Client, nodeClass) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(0)) ExpectNotFound(ctx, env.Client, nodeClass) }) @@ -232,11 +228,11 @@ var _ = Describe("NodeClass Termination", func() { } controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - res := ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + res := ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(res.RequeueAfter).To(Equal(time.Minute * 10)) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClass) @@ -244,7 +240,7 @@ var _ = Describe("NodeClass Termination", func() { // Delete one of the NodeClaims // The NodeClass should still not delete ExpectDeleted(ctx, env.Client, nodeClaims[0]) - res = ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + res = ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(res.RequeueAfter).To(Equal(time.Minute * 10)) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) ExpectExists(ctx, env.Client, nodeClass) @@ -252,7 +248,7 @@ var _ = Describe("NodeClass Termination", func() { // Delete the last NodeClaim // The NodeClass should now delete ExpectDeleted(ctx, env.Client, nodeClaims[1]) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(0)) ExpectNotFound(ctx, env.Client, nodeClass) }) @@ -272,11 +268,11 @@ var _ = Describe("NodeClass Termination", func() { nodeClass.Spec.InstanceProfile = lo.ToPtr("test-instance-profile") controllerutil.AddFinalizer(nodeClass, v1beta1.TerminationFinalizer) ExpectApplied(ctx, env.Client, nodeClass) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) Expect(env.Client.Delete(ctx, nodeClass)).To(Succeed()) - ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, terminationController, nodeClass) Expect(awsEnv.IAMAPI.InstanceProfiles).To(HaveLen(1)) ExpectNotFound(ctx, env.Client, nodeClass) diff --git a/pkg/controllers/providers/instancetype/controller.go b/pkg/controllers/providers/instancetype/controller.go index 2a35101c8e7c..0768c81d1abb 100644 --- a/pkg/controllers/providers/instancetype/controller.go +++ b/pkg/controllers/providers/instancetype/controller.go @@ -55,11 +55,9 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc return reconcile.Result{RequeueAfter: 12 * time.Hour}, nil } -func (c *Controller) Name() string { - return "providers.instancetype" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) controller.Builder { +func (c *Controller) Register(_ context.Context, m manager.Manager) error { // Includes a default exponential failure rate limiter of base: time.Millisecond, and max: 1000*time.Second - return controller.NewSingletonManagedBy(m) + return controller.NewSingletonManagedBy(m). + Named("providers.instancetype"). + Complete(c) } diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index b2d7f3fca2ee..4a156e5d9ae6 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -18,6 +18,7 @@ import ( "context" "testing" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" @@ -36,8 +37,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context @@ -150,7 +151,7 @@ var _ = Describe("InstanceType", func() { }) Expect(found).To(BeTrue()) for y := range instanceTypes[x].Offerings { - Expect(instanceTypes[x].Offerings[y].Zone).To(Equal(lo.FromPtr(offering.Location))) + Expect(instanceTypes[x].Offerings[y].Requirements.Get(v1.LabelTopologyZone).Any()).To(Equal(lo.FromPtr(offering.Location))) } } }) diff --git a/pkg/controllers/providers/pricing/controller.go b/pkg/controllers/providers/pricing/controller.go index e07f1b2307f0..8cd42a8fd489 100644 --- a/pkg/controllers/providers/pricing/controller.go +++ b/pkg/controllers/providers/pricing/controller.go @@ -56,10 +56,8 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc return reconcile.Result{RequeueAfter: 12 * time.Hour}, nil } -func (c *Controller) Name() string { - return "providers.pricing" -} - -func (c *Controller) Builder(_ context.Context, m manager.Manager) controller.Builder { - return controller.NewSingletonManagedBy(m) +func (c *Controller) Register(_ context.Context, m manager.Manager) error { + return controller.NewSingletonManagedBy(m). + Named("providers.pricing"). + Complete(c) } diff --git a/pkg/controllers/providers/pricing/suite_test.go b/pkg/controllers/providers/pricing/suite_test.go index 02ab8e84bad0..8941d0f85cac 100644 --- a/pkg/controllers/providers/pricing/suite_test.go +++ b/pkg/controllers/providers/pricing/suite_test.go @@ -38,8 +38,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index ce9cf21e0087..654e986084da 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -418,6 +418,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test1"), AvailabilityZone: aws.String("test-zone-1a"), + AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(false), Tags: []*ec2.Tag{ @@ -428,6 +429,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test2"), AvailabilityZone: aws.String("test-zone-1b"), + AvailabilityZoneId: aws.String("tstz1-1b"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(true), Tags: []*ec2.Tag{ @@ -438,6 +440,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test3"), AvailabilityZone: aws.String("test-zone-1c"), + AvailabilityZoneId: aws.String("tstz1-1c"), AvailableIpAddressCount: aws.Int64(100), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String("test-subnet-3")}, @@ -448,6 +451,7 @@ func (e *EC2API) DescribeSubnetsWithContext(_ context.Context, input *ec2.Descri { SubnetId: aws.String("subnet-test4"), AvailabilityZone: aws.String("test-zone-1a-local"), + AvailabilityZoneId: aws.String("tstz1-1alocal"), AvailableIpAddressCount: aws.Int64(100), MapPublicIpOnLaunch: aws.Bool(true), Tags: []*ec2.Tag{ @@ -513,10 +517,10 @@ func (e *EC2API) DescribeAvailabilityZonesWithContext(context.Context, *ec2.Desc return e.DescribeAvailabilityZonesOutput.Clone(), nil } return &ec2.DescribeAvailabilityZonesOutput{AvailabilityZones: []*ec2.AvailabilityZone{ - {ZoneName: aws.String("test-zone-1a"), ZoneId: aws.String("testzone1a"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1b"), ZoneId: aws.String("testzone1b"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1c"), ZoneId: aws.String("testzone1c"), ZoneType: aws.String("availability-zone")}, - {ZoneName: aws.String("test-zone-1a-local"), ZoneId: aws.String("testzone1alocal"), ZoneType: aws.String("local-zone")}, + {ZoneName: aws.String("test-zone-1a"), ZoneId: aws.String("tstz1-1a"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1b"), ZoneId: aws.String("tstz1-1b"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1c"), ZoneId: aws.String("tstz1-1c"), ZoneType: aws.String("availability-zone")}, + {ZoneName: aws.String("test-zone-1a-local"), ZoneId: aws.String("tstz1-1alocal"), ZoneType: aws.String("local-zone")}, }}, nil } diff --git a/pkg/fake/zz_generated.describe_instance_types.go b/pkg/fake/zz_generated.describe_instance_types.go index ad2b7da73e67..15b8ef5a4ea6 100644 --- a/pkg/fake/zz_generated.describe_instance_types.go +++ b/pkg/fake/zz_generated.describe_instance_types.go @@ -45,6 +45,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(4096), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(630), + BaselineIops: aws.Int64(3600), + BaselineThroughputInMBps: aws.Float64(78.75), + MaximumBandwidthInMbps: aws.Int64(4750), + MaximumIops: aws.Int64(20000), + MaximumThroughputInMBps: aws.Float64(593.75), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(3), Ipv4AddressesPerInterface: aws.Int64(10), @@ -76,6 +89,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(786432), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(19000), + BaselineIops: aws.Int64(80000), + BaselineThroughputInMBps: aws.Float64(2375.00), + MaximumBandwidthInMbps: aws.Int64(19000), + MaximumIops: aws.Int64(80000), + MaximumThroughputInMBps: aws.Float64(2375.00), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, GpuInfo: &ec2.GpuInfo{ Gpus: []*ec2.GpuDeviceInfo{ { @@ -137,6 +163,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(131072), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(9500), + BaselineIops: aws.Int64(40000), + BaselineThroughputInMBps: aws.Float64(1187.50), + MaximumBandwidthInMbps: aws.Int64(9500), + MaximumIops: aws.Int64(40000), + MaximumThroughputInMBps: aws.Float64(1187.50), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, GpuInfo: &ec2.GpuInfo{ Gpus: []*ec2.GpuDeviceInfo{ { @@ -186,6 +225,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(16384), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(1190), + BaselineIops: aws.Int64(6000), + BaselineThroughputInMBps: aws.Float64(148.75), + MaximumBandwidthInMbps: aws.Int64(4750), + MaximumIops: aws.Int64(20000), + MaximumThroughputInMBps: aws.Float64(593.75), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, InferenceAcceleratorInfo: &ec2.InferenceAcceleratorInfo{ Accelerators: []*ec2.InferenceDeviceInfo{ { @@ -226,6 +278,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(49152), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(4750), + BaselineIops: aws.Int64(20000), + BaselineThroughputInMBps: aws.Float64(593.75), + MaximumBandwidthInMbps: aws.Int64(4750), + MaximumIops: aws.Int64(20000), + MaximumThroughputInMBps: aws.Float64(593.75), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, InferenceAcceleratorInfo: &ec2.InferenceAcceleratorInfo{ Accelerators: []*ec2.InferenceDeviceInfo{ { @@ -266,6 +331,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(8192), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(650), + BaselineIops: aws.Int64(3600), + BaselineThroughputInMBps: aws.Float64(81.25), + MaximumBandwidthInMbps: aws.Int64(4750), + MaximumIops: aws.Int64(18750), + MaximumThroughputInMBps: aws.Float64(593.75), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(3), Ipv4AddressesPerInterface: aws.Int64(10), @@ -297,6 +375,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(393216), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(19000), + BaselineIops: aws.Int64(80000), + BaselineThroughputInMBps: aws.Float64(2375.00), + MaximumBandwidthInMbps: aws.Int64(19000), + MaximumIops: aws.Int64(80000), + MaximumThroughputInMBps: aws.Float64(2375.00), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(15), Ipv4AddressesPerInterface: aws.Int64(50), @@ -328,6 +419,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(16384), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(1150), + BaselineIops: aws.Int64(6000), + BaselineThroughputInMBps: aws.Float64(143.75), + MaximumBandwidthInMbps: aws.Int64(4750), + MaximumIops: aws.Int64(18750), + MaximumThroughputInMBps: aws.Float64(593.75), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(4), Ipv4AddressesPerInterface: aws.Int64(15), @@ -359,6 +463,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(524288), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(100000), + BaselineIops: aws.Int64(400000), + BaselineThroughputInMBps: aws.Float64(12500.00), + MaximumBandwidthInMbps: aws.Int64(100000), + MaximumIops: aws.Int64(400000), + MaximumThroughputInMBps: aws.Float64(12500.00), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, InstanceStorageInfo: &ec2.InstanceStorageInfo{NvmeSupport: aws.String("required"), TotalSizeInGB: aws.Int64(7600), }, @@ -366,18 +483,18 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ EfaInfo: &ec2.EfaInfo{ MaximumEfaInterfaces: aws.Int64(2), }, - MaximumNetworkInterfaces: aws.Int64(14), + MaximumNetworkInterfaces: aws.Int64(16), Ipv4AddressesPerInterface: aws.Int64(50), EncryptionInTransitSupported: aws.Bool(true), DefaultNetworkCardIndex: aws.Int64(0), NetworkCards: []*ec2.NetworkCardInfo{ { NetworkCardIndex: aws.Int64(0), - MaximumNetworkInterfaces: aws.Int64(7), + MaximumNetworkInterfaces: aws.Int64(8), }, { NetworkCardIndex: aws.Int64(1), - MaximumNetworkInterfaces: aws.Int64(7), + MaximumNetworkInterfaces: aws.Int64(8), }, }, }, @@ -400,6 +517,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(249856), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(7000), + BaselineIops: aws.Int64(40000), + BaselineThroughputInMBps: aws.Float64(875.00), + MaximumBandwidthInMbps: aws.Int64(7000), + MaximumIops: aws.Int64(40000), + MaximumThroughputInMBps: aws.Float64(875.00), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("unsupported"), + }, GpuInfo: &ec2.GpuInfo{ Gpus: []*ec2.GpuDeviceInfo{ { @@ -443,6 +573,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(8192), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(695), + BaselineIops: aws.Int64(4000), + BaselineThroughputInMBps: aws.Float64(86.88), + MaximumBandwidthInMbps: aws.Int64(2780), + MaximumIops: aws.Int64(15700), + MaximumThroughputInMBps: aws.Float64(347.50), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(3), Ipv4AddressesPerInterface: aws.Int64(12), @@ -474,6 +617,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(4096), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(347), + BaselineIops: aws.Int64(2000), + BaselineThroughputInMBps: aws.Float64(43.38), + MaximumBandwidthInMbps: aws.Int64(2085), + MaximumIops: aws.Int64(11800), + MaximumThroughputInMBps: aws.Float64(260.62), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(3), Ipv4AddressesPerInterface: aws.Int64(6), @@ -505,6 +661,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(2048), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(174), + BaselineIops: aws.Int64(1000), + BaselineThroughputInMBps: aws.Float64(21.75), + MaximumBandwidthInMbps: aws.Int64(2085), + MaximumIops: aws.Int64(11800), + MaximumThroughputInMBps: aws.Float64(260.62), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(3), Ipv4AddressesPerInterface: aws.Int64(4), @@ -536,6 +705,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(16384), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(695), + BaselineIops: aws.Int64(4000), + BaselineThroughputInMBps: aws.Float64(86.88), + MaximumBandwidthInMbps: aws.Int64(2780), + MaximumIops: aws.Int64(15700), + MaximumThroughputInMBps: aws.Float64(347.50), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, NetworkInfo: &ec2.NetworkInfo{ MaximumNetworkInterfaces: aws.Int64(4), Ipv4AddressesPerInterface: aws.Int64(15), @@ -567,6 +749,19 @@ var defaultDescribeInstanceTypesOutput = &ec2.DescribeInstanceTypesOutput{ MemoryInfo: &ec2.MemoryInfo{ SizeInMiB: aws.Int64(32768), }, + EbsInfo: &ec2.EbsInfo{ + EbsOptimizedInfo: &ec2.EbsOptimizedInfo{ + BaselineBandwidthInMbps: aws.Int64(5000), + BaselineIops: aws.Int64(16250), + BaselineThroughputInMBps: aws.Float64(625.00), + MaximumBandwidthInMbps: aws.Int64(20000), + MaximumIops: aws.Int64(65000), + MaximumThroughputInMBps: aws.Float64(2500.00), + }, + EbsOptimizedSupport: aws.String("default"), + EncryptionSupport: aws.String("supported"), + NvmeSupport: aws.String("required"), + }, InstanceStorageInfo: &ec2.InstanceStorageInfo{NvmeSupport: aws.String("required"), TotalSizeInGB: aws.Int64(474), }, diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 113b863f9732..3b95d093d869 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "net" + "os" "time" "github.com/aws/aws-sdk-go/aws" @@ -43,8 +44,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/transport" - "knative.dev/pkg/logging" - "knative.dev/pkg/ptr" + "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/operator" @@ -106,29 +106,31 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont ))) if *sess.Config.Region == "" { - logging.FromContext(ctx).Debug("retrieving region from IMDS") + log.FromContext(ctx).V(1).Info("retrieving region from IMDS") region, err := ec2metadata.New(sess).Region() *sess.Config.Region = lo.Must(region, err, "failed to get region from metadata server") } ec2api := ec2.New(sess) if err := CheckEC2Connectivity(ctx, ec2api); err != nil { - logging.FromContext(ctx).Fatalf("Checking EC2 API connectivity, %s", err) + log.FromContext(ctx).Error(err, "ec2 api connectivity check failed") + os.Exit(1) } - logging.FromContext(ctx).With("region", *sess.Config.Region).Debugf("discovered region") + log.FromContext(ctx).WithValues("region", *sess.Config.Region).V(1).Info("discovered region") clusterEndpoint, err := ResolveClusterEndpoint(ctx, eks.New(sess)) if err != nil { - logging.FromContext(ctx).Fatalf("unable to detect the cluster endpoint, %s", err) + log.FromContext(ctx).Error(err, "failed detecting cluster endpoint") + os.Exit(1) } else { - logging.FromContext(ctx).With("cluster-endpoint", clusterEndpoint).Debugf("discovered cluster endpoint") + log.FromContext(ctx).WithValues("cluster-endpoint", clusterEndpoint).V(1).Info("discovered cluster endpoint") } // We perform best-effort on resolving the kube-dns IP kubeDNSIP, err := KubeDNSIP(ctx, operator.KubernetesInterface) if err != nil { // If we fail to get the kube-dns IP, we don't want to crash because this causes issues with custom DNS setups // https://github.com/aws/karpenter-provider-aws/issues/2787 - logging.FromContext(ctx).Debugf("unable to detect the IP of the kube-dns service, %s", err) + log.FromContext(ctx).V(1).Info(fmt.Sprintf("unable to detect the IP of the kube-dns service, %s", err)) } else { - logging.FromContext(ctx).With("kube-dns-ip", kubeDNSIP).Debugf("discovered kube dns") + log.FromContext(ctx).WithValues("kube-dns-ip", kubeDNSIP).V(1).Info("discovered kube dns") } unavailableOfferingsCache := awscache.NewUnavailableOfferings() @@ -241,7 +243,7 @@ func GetCABundle(ctx context.Context, restConfig *rest.Config) (*string, error) if err != nil { return nil, fmt.Errorf("discovering caBundle, loading TLS config, %w", err) } - return ptr.String(base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData)), nil + return lo.ToPtr(base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData)), nil } func KubeDNSIP(ctx context.Context, kubernetesInterface kubernetes.Interface) (net.IP, error) { diff --git a/pkg/operator/options/suite_test.go b/pkg/operator/options/suite_test.go index 6b7e39715149..04281d8dacdf 100644 --- a/pkg/operator/options/suite_test.go +++ b/pkg/operator/options/suite_test.go @@ -29,7 +29,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context diff --git a/pkg/operator/suite_test.go b/pkg/operator/suite_test.go index f725b2f23652..e9b47af1664d 100644 --- a/pkg/operator/suite_test.go +++ b/pkg/operator/suite_test.go @@ -33,8 +33,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context diff --git a/pkg/providers/amifamily/ami.go b/pkg/providers/amifamily/ami.go index fd89ced04dcb..5ed0f57d78d4 100644 --- a/pkg/providers/amifamily/ami.go +++ b/pkg/providers/amifamily/ami.go @@ -18,7 +18,7 @@ import ( "context" "fmt" "sort" - "strings" + "sync" "time" "github.com/aws/aws-sdk-go/aws" @@ -30,7 +30,7 @@ import ( "github.com/patrickmn/go-cache" "github.com/samber/lo" v1 "k8s.io/api/core/v1" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/providers/version" @@ -41,10 +41,11 @@ import ( ) type Provider interface { - Get(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, options *Options) (AMIs, error) + List(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (AMIs, error) } type DefaultProvider struct { + sync.Mutex cache *cache.Cache ssm ssmiface.SSMAPI ec2api ec2iface.EC2API @@ -62,7 +63,7 @@ type AMI struct { type AMIs []AMI // Sort orders the AMIs by creation date in descending order. -// If creation date is nil or two AMIs have the same creation date, the AMIs will be sorted by name in ascending order. +// If creation date is nil or two AMIs have the same creation date, the AMIs will be sorted by ID, which is guaranteed to be unique, in ascending order. func (a AMIs) Sort() { sort.Slice(a, func(i, j int) bool { itime, _ := time.Parse(time.RFC3339, a[i].CreationDate) @@ -70,34 +71,17 @@ func (a AMIs) Sort() { if itime.Unix() != jtime.Unix() { return itime.Unix() > jtime.Unix() } - if a[i].Name != a[j].Name { - return a[i].Name < a[j].Name - } - iHash, _ := hashstructure.Hash(a[i].Requirements, hashstructure.FormatV2, &hashstructure.HashOptions{}) - jHash, _ := hashstructure.Hash(a[i].Requirements, hashstructure.FormatV2, &hashstructure.HashOptions{}) - return iHash < jHash + return a[i].AmiID < a[j].AmiID }) } -func (a AMIs) String() string { - var sb strings.Builder - ids := lo.Map(a, func(a AMI, _ int) string { return a.AmiID }) - if len(a) > 25 { - sb.WriteString(strings.Join(ids[:25], ", ")) - sb.WriteString(fmt.Sprintf(" and %d other(s)", len(a)-25)) - } else { - sb.WriteString(strings.Join(ids, ", ")) - } - return sb.String() -} - // MapToInstanceTypes returns a map of AMIIDs that are the most recent on creationDate to compatible instancetypes -func (a AMIs) MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType) map[string][]*cloudprovider.InstanceType { +func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1beta1.AMI) map[string][]*cloudprovider.InstanceType { amiIDs := map[string][]*cloudprovider.InstanceType{} for _, instanceType := range instanceTypes { - for _, ami := range a { - if err := instanceType.Requirements.Compatible(ami.Requirements, scheduling.AllowUndefinedWellKnownLabels); err == nil { - amiIDs[ami.AmiID] = append(amiIDs[ami.AmiID], instanceType) + for _, ami := range amis { + if err := instanceType.Requirements.Compatible(scheduling.NewNodeSelectorRequirements(ami.Requirements...), scheduling.AllowUndefinedWellKnownLabels); err == nil { + amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType) break } } @@ -116,11 +100,14 @@ func NewDefaultProvider(versionProvider version.Provider, ssm ssmiface.SSMAPI, e } // Get Returning a list of AMIs with its associated requirements -func (p *DefaultProvider) Get(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, options *Options) (AMIs, error) { +func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (AMIs, error) { + p.Lock() + defer p.Unlock() + var err error var amis AMIs if len(nodeClass.Spec.AMISelectorTerms) == 0 { - amis, err = p.getDefaultAMIs(ctx, nodeClass, options) + amis, err = p.getDefaultAMIs(ctx, nodeClass) if err != nil { return nil, err } @@ -131,17 +118,21 @@ func (p *DefaultProvider) Get(ctx context.Context, nodeClass *v1beta1.EC2NodeCla } } amis.Sort() - if p.cm.HasChanged(fmt.Sprintf("amis/%s", nodeClass.Name), amis) { - logging.FromContext(ctx).With("ids", amis, "count", len(amis)).Debugf("discovered amis") + uniqueAMIs := lo.Uniq(lo.Map(amis, func(a AMI, _ int) string { return a.AmiID })) + if p.cm.HasChanged(fmt.Sprintf("amis/%s", nodeClass.Name), uniqueAMIs) { + log.FromContext(ctx).WithValues( + "ids", uniqueAMIs).V(1).Info("discovered amis") } return amis, nil } -func (p *DefaultProvider) getDefaultAMIs(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, options *Options) (res AMIs, err error) { +func (p *DefaultProvider) getDefaultAMIs(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (res AMIs, err error) { if images, ok := p.cache.Get(lo.FromPtr(nodeClass.Spec.AMIFamily)); ok { - return images.(AMIs), nil + // Ensure what's returned from this function is a deep-copy of AMIs so alterations + // to the data don't affect the original + return append(AMIs{}, images.(AMIs)...), nil } - amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, options) + amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, &Options{}) kubernetesVersion, err := p.versionProvider.Get(ctx) if err != nil { return nil, fmt.Errorf("getting kubernetes version %w", err) @@ -149,7 +140,7 @@ func (p *DefaultProvider) getDefaultAMIs(ctx context.Context, nodeClass *v1beta1 defaultAMIs := amiFamily.DefaultAMIs(kubernetesVersion) for _, ami := range defaultAMIs { if id, err := p.resolveSSMParameter(ctx, ami.Query); err != nil { - logging.FromContext(ctx).With("query", ami.Query).Errorf("discovering amis from ssm, %s", err) + log.FromContext(ctx).WithValues("query", ami.Query).Error(err, "failed discovering amis from ssm") } else { res = append(res, AMI{AmiID: id, Requirements: ami.Requirements}) } @@ -191,7 +182,9 @@ func (p *DefaultProvider) getAMIs(ctx context.Context, terms []v1beta1.AMISelect return nil, err } if images, ok := p.cache.Get(fmt.Sprintf("%d", hash)); ok { - return images.(AMIs), nil + // Ensure what's returned from this function is a deep-copy of AMIs so alterations + // to the data don't affect the original + return append(AMIs{}, images.(AMIs)...), nil } images := map[uint64]AMI{} for _, filtersAndOwners := range filterAndOwnerSets { @@ -199,7 +192,7 @@ func (p *DefaultProvider) getAMIs(ctx context.Context, terms []v1beta1.AMISelect // Don't include filters in the Describe Images call as EC2 API doesn't allow empty filters. Filters: lo.Ternary(len(filtersAndOwners.Filters) > 0, filtersAndOwners.Filters, nil), Owners: lo.Ternary(len(filtersAndOwners.Owners) > 0, aws.StringSlice(filtersAndOwners.Owners), nil), - MaxResults: aws.Int64(500), + MaxResults: aws.Int64(1000), }, func(page *ec2.DescribeImagesOutput, _ bool) bool { for i := range page.Images { reqs := p.getRequirementsFromImage(page.Images[i]) diff --git a/pkg/providers/amifamily/bootstrap/bootstrap.go b/pkg/providers/amifamily/bootstrap/bootstrap.go index ac39772d2c53..b1b54d094e86 100644 --- a/pkg/providers/amifamily/bootstrap/bootstrap.go +++ b/pkg/providers/amifamily/bootstrap/bootstrap.go @@ -22,7 +22,6 @@ import ( "github.com/samber/lo" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" @@ -51,10 +50,10 @@ func (o Options) kubeletExtraArgs() (args []string) { return lo.Compact(args) } if o.KubeletConfig.MaxPods != nil { - args = append(args, fmt.Sprintf("--max-pods=%d", ptr.Int32Value(o.KubeletConfig.MaxPods))) + args = append(args, fmt.Sprintf("--max-pods=%d", lo.FromPtr(o.KubeletConfig.MaxPods))) } if o.KubeletConfig.PodsPerCore != nil { - args = append(args, fmt.Sprintf("--pods-per-core=%d", ptr.Int32Value(o.KubeletConfig.PodsPerCore))) + args = append(args, fmt.Sprintf("--pods-per-core=%d", lo.FromPtr(o.KubeletConfig.PodsPerCore))) } // We have to convert some of these maps so that their values return the correct string args = append(args, joinParameterArgs("--system-reserved", o.KubeletConfig.SystemReserved, "=")) @@ -64,13 +63,13 @@ func (o Options) kubeletExtraArgs() (args []string) { args = append(args, joinParameterArgs("--eviction-soft-grace-period", lo.MapValues(o.KubeletConfig.EvictionSoftGracePeriod, func(v metav1.Duration, _ string) string { return v.Duration.String() }), "=")) if o.KubeletConfig.EvictionMaxPodGracePeriod != nil { - args = append(args, fmt.Sprintf("--eviction-max-pod-grace-period=%d", ptr.Int32Value(o.KubeletConfig.EvictionMaxPodGracePeriod))) + args = append(args, fmt.Sprintf("--eviction-max-pod-grace-period=%d", lo.FromPtr(o.KubeletConfig.EvictionMaxPodGracePeriod))) } if o.KubeletConfig.ImageGCHighThresholdPercent != nil { - args = append(args, fmt.Sprintf("--image-gc-high-threshold=%d", ptr.Int32Value(o.KubeletConfig.ImageGCHighThresholdPercent))) + args = append(args, fmt.Sprintf("--image-gc-high-threshold=%d", lo.FromPtr(o.KubeletConfig.ImageGCHighThresholdPercent))) } if o.KubeletConfig.ImageGCLowThresholdPercent != nil { - args = append(args, fmt.Sprintf("--image-gc-low-threshold=%d", ptr.Int32Value(o.KubeletConfig.ImageGCLowThresholdPercent))) + args = append(args, fmt.Sprintf("--image-gc-low-threshold=%d", lo.FromPtr(o.KubeletConfig.ImageGCLowThresholdPercent))) } if o.KubeletConfig.CPUCFSQuota != nil { args = append(args, fmt.Sprintf("--cpu-cfs-quota=%t", lo.FromPtr(o.KubeletConfig.CPUCFSQuota))) diff --git a/pkg/providers/amifamily/bootstrap/bottlerocket.go b/pkg/providers/amifamily/bootstrap/bottlerocket.go index c1b2d8b66962..f7dbddd1b9e3 100644 --- a/pkg/providers/amifamily/bootstrap/bottlerocket.go +++ b/pkg/providers/amifamily/bootstrap/bottlerocket.go @@ -19,8 +19,6 @@ import ( "fmt" "strconv" - "knative.dev/pkg/ptr" - "github.com/imdario/mergo" "github.com/samber/lo" @@ -48,7 +46,7 @@ func (b Bottlerocket) Script() (string, error) { // Backwards compatibility for AWSENILimitedPodDensity flag if b.KubeletConfig != nil && b.KubeletConfig.MaxPods != nil { - s.Settings.Kubernetes.MaxPods = aws.Int(int(ptr.Int32Value(b.KubeletConfig.MaxPods))) + s.Settings.Kubernetes.MaxPods = aws.Int(int(lo.FromPtr(b.KubeletConfig.MaxPods))) } else if !b.AWSENILimitedPodDensity { s.Settings.Kubernetes.MaxPods = aws.Int(110) } diff --git a/pkg/providers/amifamily/bootstrap/bottlerocketsettings.go b/pkg/providers/amifamily/bootstrap/bottlerocketsettings.go index f2f59f0de5e6..8ccc2933baf9 100644 --- a/pkg/providers/amifamily/bootstrap/bottlerocketsettings.go +++ b/pkg/providers/amifamily/bootstrap/bottlerocketsettings.go @@ -69,11 +69,14 @@ type BottlerocketKubernetes struct { CPUManagerPolicy *string `toml:"cpu-manager-policy,omitempty"` CPUManagerReconcilePeriod *string `toml:"cpu-manager-reconcile-period,omitempty"` TopologyManagerScope *string `toml:"topology-manager-scope,omitempty"` + TopologyManagerPolicy *string `toml:"topology-manager-policy,omitempty"` ImageGCHighThresholdPercent *string `toml:"image-gc-high-threshold-percent,omitempty"` ImageGCLowThresholdPercent *string `toml:"image-gc-low-threshold-percent,omitempty"` CPUCFSQuota *bool `toml:"cpu-cfs-quota-enforced,omitempty"` ShutdownGracePeriod *string `toml:"shutdown-grace-period,omitempty"` ShutdownGracePeriodForCriticalPods *string `toml:"shutdown-grace-period-for-critical-pods,omitempty"` + ClusterDomain *string `toml:"cluster-domain,omitempty"` + SeccompDefault *bool `toml:"seccomp-default,omitempty"` } type BottlerocketStaticPod struct { diff --git a/pkg/providers/amifamily/bootstrap/mime/suite_test.go b/pkg/providers/amifamily/bootstrap/mime/suite_test.go index e0cf9fc820e6..58b2cc9325b9 100644 --- a/pkg/providers/amifamily/bootstrap/mime/suite_test.go +++ b/pkg/providers/amifamily/bootstrap/mime/suite_test.go @@ -23,8 +23,9 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/utils/testing" + "github.com/samber/lo" - . "knative.dev/pkg/logging/testing" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap/mime" ) diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 65adec3a9abb..c257111900b1 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -15,7 +15,6 @@ limitations under the License. package amifamily import ( - "context" "fmt" "net" @@ -120,18 +119,14 @@ func NewResolver(amiProvider Provider) *Resolver { // Resolve generates launch templates using the static options and dynamically generates launch template parameters. // Multiple ResolvedTemplates are returned based on the instanceTypes passed in to support special AMIs for certain instance types like GPUs. -func (r Resolver) Resolve(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, options *Options) ([]*LaunchTemplate, error) { +func (r Resolver) Resolve(nodeClass *v1beta1.EC2NodeClass, nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, options *Options) ([]*LaunchTemplate, error) { amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, options) - amis, err := r.amiProvider.Get(ctx, nodeClass, options) - if err != nil { - return nil, err - } - if len(amis) == 0 { + if len(nodeClass.Status.AMIs) == 0 { return nil, fmt.Errorf("no amis exist given constraints") } - mappedAMIs := amis.MapToInstanceTypes(instanceTypes) + mappedAMIs := MapToInstanceTypes(instanceTypes, nodeClass.Status.AMIs) if len(mappedAMIs) == 0 { - return nil, fmt.Errorf("no instance types satisfy requirements of amis %v", amis) + return nil, fmt.Errorf("no instance types satisfy requirements of amis %v", lo.Uniq(lo.Map(nodeClass.Status.AMIs, func(a v1beta1.AMI, _ int) string { return a.ID }))) } var resolvedTemplates []*LaunchTemplate for amiID, instanceTypes := range mappedAMIs { diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 7dfc5ebe0d0b..56c4660901c1 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -18,16 +18,19 @@ import ( "context" "fmt" "sort" + "sync" "testing" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/utils/testing" + "github.com/samber/lo" v1 "k8s.io/api/core/v1" - . "knative.dev/pkg/logging/testing" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" @@ -74,7 +77,7 @@ var _ = BeforeEach(func() { { Name: aws.String(amd64AMI), ImageId: aws.String("amd64-ami-id"), - CreationDate: aws.String(time.Now().Format(time.RFC3339)), + CreationDate: aws.String(time.Time{}.Format(time.RFC3339)), Architecture: aws.String("x86_64"), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String(amd64AMI)}, @@ -84,7 +87,7 @@ var _ = BeforeEach(func() { { Name: aws.String(arm64AMI), ImageId: aws.String("arm64-ami-id"), - CreationDate: aws.String(time.Now().Add(time.Minute).Format(time.RFC3339)), + CreationDate: aws.String(time.Time{}.Add(time.Minute).Format(time.RFC3339)), Architecture: aws.String("arm64"), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String(arm64AMI)}, @@ -94,7 +97,7 @@ var _ = BeforeEach(func() { { Name: aws.String(amd64NvidiaAMI), ImageId: aws.String("amd64-nvidia-ami-id"), - CreationDate: aws.String(time.Now().Add(2 * time.Minute).Format(time.RFC3339)), + CreationDate: aws.String(time.Time{}.Add(2 * time.Minute).Format(time.RFC3339)), Architecture: aws.String("x86_64"), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String(amd64NvidiaAMI)}, @@ -104,7 +107,7 @@ var _ = BeforeEach(func() { { Name: aws.String(arm64NvidiaAMI), ImageId: aws.String("arm64-nvidia-ami-id"), - CreationDate: aws.String(time.Now().Add(2 * time.Minute).Format(time.RFC3339)), + CreationDate: aws.String(time.Time{}.Add(2 * time.Minute).Format(time.RFC3339)), Architecture: aws.String("arm64"), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String(arm64NvidiaAMI)}, @@ -136,7 +139,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-gpu/recommended/image_id", version): amd64NvidiaAMI, fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64/recommended/image_id", version): arm64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(4)) }) @@ -146,7 +149,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", version): amd64AMI, fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", version): arm64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(2)) }) @@ -158,7 +161,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/arm64/latest/image_id", version): arm64AMI, fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/arm64/latest/image_id", version): arm64NvidiaAMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(6)) }) @@ -168,7 +171,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/amd64/hvm/ebs-gp2/ami-id", version): amd64AMI, fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/arm64/hvm/ebs-gp2/ami-id", version): arm64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(2)) }) @@ -177,7 +180,7 @@ var _ = Describe("AMIProvider", func() { awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/ami-windows-latest/Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id", version): amd64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) }) @@ -186,16 +189,59 @@ var _ = Describe("AMIProvider", func() { awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/ami-windows-latest/Windows_Server-2022-English-Core-EKS_Optimized-%s/image_id", version): amd64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) }) It("should succeed to resolve AMIs (Custom)", func() { nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(0)) }) + It("should not cause data races when calling Get() simultaneously", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: "amd64-ami-id", + }, + { + ID: "arm64-ami-id", + }, + } + wg := sync.WaitGroup{} + for i := 0; i < 10000; i++ { + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + images, err := awsEnv.AMIProvider.List(ctx, nodeClass) + Expect(err).ToNot(HaveOccurred()) + + Expect(images).To(HaveLen(2)) + // Sort everything in parallel and ensure that we don't get data races + images.Sort() + Expect(images).To(BeEquivalentTo([]amifamily.AMI{ + { + Name: arm64AMI, + AmiID: "arm64-ami-id", + CreationDate: time.Time{}.Add(time.Minute).Format(time.RFC3339), + Requirements: scheduling.NewLabelRequirements(map[string]string{ + v1.LabelArchStable: corev1beta1.ArchitectureArm64, + }), + }, + { + Name: amd64AMI, + AmiID: "amd64-ami-id", + CreationDate: time.Time{}.Format(time.RFC3339), + Requirements: scheduling.NewLabelRequirements(map[string]string{ + v1.LabelArchStable: corev1beta1.ArchitectureAmd64, + }), + }, + })) + }() + } + wg.Wait() + }) Context("SSM Alias Missing", func() { It("should succeed to partially resolve AMIs if all SSM aliases don't exist (Al2)", func() { nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2 @@ -205,7 +251,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64/recommended/image_id", version): arm64AMI, } // Only 2 of the requirements sets for the SSM aliases will resolve - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(2)) }) @@ -214,7 +260,7 @@ var _ = Describe("AMIProvider", func() { awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", version): amd64AMI, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) }) @@ -227,7 +273,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/arm64/latest/image_id", version): arm64AMI, } // Only 4 of the requirements sets for the SSM aliases will resolve - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(4)) }) @@ -238,7 +284,7 @@ var _ = Describe("AMIProvider", func() { fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/arm64/hvm/ebs-gp2/ami-id", version): arm64AMI, } // Only 1 of the requirements sets for the SSM aliases will resolve - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) }) @@ -270,7 +316,7 @@ var _ = Describe("AMIProvider", func() { Tags: map[string]string{"*": "*"}, }, } - amis, err := awsEnv.AMIProvider.Get(ctx, nodeClass, &amifamily.Options{}) + amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) Expect(amis).To(ConsistOf(amifamily.AMI{ @@ -459,6 +505,64 @@ var _ = Describe("AMIProvider", func() { }, )) }) + It("should sort amis with the same name and creation date consistently", func() { + amis := amifamily.AMIs{ + { + Name: "test-ami-1", + AmiID: "test-ami-4-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-3-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-2-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-1-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + } + + amis.Sort() + Expect(amis).To(Equal( + amifamily.AMIs{ + { + Name: "test-ami-1", + AmiID: "test-ami-1-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-2-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-3-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + { + Name: "test-ami-1", + AmiID: "test-ami-4-id", + CreationDate: "2021-08-31T00:10:42.000Z", + Requirements: scheduling.NewRequirements(), + }, + }, + )) + }) }) }) diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index f228815563a7..95c17cefb206 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -30,7 +30,7 @@ import ( "go.uber.org/multierr" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/utils/resources" @@ -49,9 +49,12 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" ) -var ( +const ( instanceTypeFlexibilityThreshold = 5 // falling back to on-demand without flexibility risks insufficient capacity errors + maxInstanceTypes = 60 +) +var ( instanceStateFilter = &ec2.Filter{ Name: aws.String("instance-state-name"), Values: aws.StringSlice([]string{ec2.InstanceStateNamePending, ec2.InstanceStateNameRunning, ec2.InstanceStateNameStopping, ec2.InstanceStateNameStopped, ec2.InstanceStateNameShuttingDown}), @@ -95,6 +98,10 @@ func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1beta1.EC2Node if !schedulingRequirements.HasMinValues() { instanceTypes = p.filterInstanceTypes(nodeClaim, instanceTypes) } + instanceTypes, err := cloudprovider.InstanceTypes(instanceTypes).Truncate(schedulingRequirements, maxInstanceTypes) + if err != nil { + return nil, fmt.Errorf("truncating instance types, %w", err) + } tags := getTags(ctx, nodeClass, nodeClaim) fleetInstance, err := p.launchInstance(ctx, nodeClass, nodeClaim, instanceTypes, tags) if awserrors.IsLaunchTemplateNotFound(err) { @@ -166,7 +173,7 @@ func (p *DefaultProvider) Delete(ctx context.Context, id string) error { if awserrors.IsNotFound(err) { return cloudprovider.NewNodeClaimNotFoundError(fmt.Errorf("instance already terminated")) } - if _, e := p.Get(ctx, id); err != nil { + if _, e := p.Get(ctx, id); e != nil { if cloudprovider.IsNodeClaimNotFoundError(e) { return e } @@ -206,7 +213,7 @@ func (p *DefaultProvider) launchInstance(ctx context.Context, nodeClass *v1beta1 return nil, fmt.Errorf("getting launch template configs, %w", err) } if err := p.checkODFallback(nodeClaim, instanceTypes, launchTemplateConfigs); err != nil { - logging.FromContext(ctx).Warn(err.Error()) + log.FromContext(ctx).Error(err, "failed while checking on-demand fallback") } // Create fleet createFleetInput := &ec2.CreateFleetInput{ @@ -291,9 +298,11 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas if err != nil { return nil, fmt.Errorf("getting launch templates, %w", err) } + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + requirements[corev1beta1.CapacityTypeLabelKey] = scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType) for _, launchTemplate := range launchTemplates { launchTemplateConfig := &ec2.FleetLaunchTemplateConfigRequest{ - Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Get(v1.LabelTopologyZone), capacityType, launchTemplate.ImageID), + Overrides: p.getOverrides(launchTemplate.InstanceTypes, zonalSubnets, requirements, launchTemplate.ImageID), LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{ LaunchTemplateName: aws.String(launchTemplate.Name), Version: aws.String("$Latest"), @@ -311,7 +320,7 @@ func (p *DefaultProvider) getLaunchTemplateConfigs(ctx context.Context, nodeClas // getOverrides creates and returns launch template overrides for the cross product of InstanceTypes and subnets (with subnets being constrained by // zones and the offerings in InstanceTypes) -func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, zones *scheduling.Requirement, capacityType string, image string) []*ec2.FleetLaunchTemplateOverridesRequest { +func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceType, zonalSubnets map[string]*subnet.Subnet, reqs scheduling.Requirements, image string) []*ec2.FleetLaunchTemplateOverridesRequest { // Unwrap all the offerings to a flat slice that includes a pointer // to the parent instance type name type offeringWithParentName struct { @@ -328,16 +337,12 @@ func (p *DefaultProvider) getOverrides(instanceTypes []*cloudprovider.InstanceTy }) unwrappedOfferings = append(unwrappedOfferings, ofs...) } - var overrides []*ec2.FleetLaunchTemplateOverridesRequest for _, offering := range unwrappedOfferings { - if capacityType != offering.CapacityType { - continue - } - if !zones.Has(offering.Zone) { + if reqs.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { continue } - subnet, ok := zonalSubnets[offering.Zone] + subnet, ok := zonalSubnets[offering.Requirements.Get(v1.LabelTopologyZone).Any()] if !ok { continue } @@ -365,12 +370,12 @@ func (p *DefaultProvider) updateUnavailableOfferingsCache(ctx context.Context, e // available offering. The AWS Cloud Provider defaults to [ on-demand ], so spot // must be explicitly included in capacity type requirements. func (p *DefaultProvider) getCapacityType(nodeClaim *corev1beta1.NodeClaim, instanceTypes []*cloudprovider.InstanceType) string { - requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim. - Spec.Requirements...) + requirements := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { + requirements[corev1beta1.CapacityTypeLabelKey] = scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, corev1beta1.CapacityTypeSpot) for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone) && offering.CapacityType == corev1beta1.CapacityTypeSpot { + if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil { return corev1beta1.CapacityTypeSpot } } @@ -405,12 +410,13 @@ func (p *DefaultProvider) isMixedCapacityLaunch(nodeClaim *corev1beta1.NodeClaim if requirements.Get(corev1beta1.CapacityTypeLabelKey).Has(corev1beta1.CapacityTypeSpot) { for _, instanceType := range instanceTypes { for _, offering := range instanceType.Offerings.Available() { - if requirements.Get(v1.LabelTopologyZone).Has(offering.Zone) { - if offering.CapacityType == corev1beta1.CapacityTypeSpot { - hasSpotOfferings = true - } else { - hasODOffering = true - } + if requirements.Compatible(offering.Requirements, scheduling.AllowUndefinedWellKnownLabels) != nil { + continue + } + if offering.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeSpot { + hasSpotOfferings = true + } else { + hasODOffering = true } } } @@ -425,7 +431,7 @@ func filterUnwantedSpot(instanceTypes []*cloudprovider.InstanceType) []*cloudpro // first, find the price of our cheapest available on-demand instance type that could support this node for _, it := range instanceTypes { for _, o := range it.Offerings.Available() { - if o.CapacityType == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeOnDemand && o.Price < cheapestOnDemand { cheapestOnDemand = o.Price } } diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index 723cfcd8c16b..08da5f5053cf 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -43,8 +43,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context @@ -64,7 +64,7 @@ var _ = BeforeSuite(func() { ctx = options.ToContext(ctx, test.Options()) awsEnv = test.NewEnvironment(ctx, env) cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) }) var _ = AfterSuite(func() { @@ -82,38 +82,7 @@ var _ = Describe("InstanceProvider", func() { var nodePool *corev1beta1.NodePool var nodeClaim *corev1beta1.NodeClaim BeforeEach(func() { - nodeClass = test.EC2NodeClass( - v1beta1.EC2NodeClass{ - Status: v1beta1.EC2NodeClassStatus{ - InstanceProfile: "test-profile", - SecurityGroups: []v1beta1.SecurityGroup{ - { - ID: "sg-test1", - }, - { - ID: "sg-test2", - }, - { - ID: "sg-test3", - }, - }, - Subnets: []v1beta1.Subnet{ - { - ID: "subnet-test1", - Zone: "test-zone-1a", - }, - { - ID: "subnet-test2", - Zone: "test-zone-1b", - }, - { - ID: "subnet-test3", - Zone: "test-zone-1c", - }, - }, - }, - }, - ) + nodeClass = test.EC2NodeClass() nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 0d86dd941e8a..82bbcb099faa 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -24,8 +24,10 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" + "sigs.k8s.io/karpenter/pkg/scheduling" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" awscache "github.com/aws/karpenter-provider-aws/pkg/cache" @@ -34,8 +36,8 @@ import ( "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/samber/lo" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/logging" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" @@ -104,10 +106,6 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi if kc == nil { kc = &corev1beta1.KubeletConfiguration{} } - if nodeClass == nil { - nodeClass = &v1beta1.EC2NodeClass{} - } - if len(p.instanceTypesInfo) == 0 { return nil, fmt.Errorf("no instance types found") } @@ -137,7 +135,9 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi aws.StringValue(nodeClass.Spec.AMIFamily), ) if item, ok := p.instanceTypesCache.Get(key); ok { - return item.([]*cloudprovider.InstanceType), nil + // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) + // so that modifications to the ordering of the data don't affect the original + return append([]*cloudprovider.InstanceType{}, item.([]*cloudprovider.InstanceType)...), nil } // Get all zones across all offerings @@ -149,7 +149,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi } } if p.cm.HasChanged("zones", allZones) { - logging.FromContext(ctx).With("zones", allZones.UnsortedList()).Debugf("discovered zones") + log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") } amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) result := lo.Map(p.instanceTypesInfo, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { @@ -167,7 +167,8 @@ func (p *DefaultProvider) List(ctx context.Context, kc *corev1beta1.KubeletConfi return NewInstanceType(ctx, i, p.region, nodeClass.Spec.BlockDeviceMappings, nodeClass.Spec.InstanceStorePolicy, kc.MaxPods, kc.PodsPerCore, kc.KubeReserved, kc.SystemReserved, kc.EvictionHard, kc.EvictionSoft, - amiFamily, p.createOfferings(ctx, i, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], allZones, subnetZones)) + amiFamily, p.createOfferings(ctx, i, allZones, p.instanceTypeOfferings[aws.StringValue(i.InstanceType)], nodeClass.Status.Subnets), + ) }) p.instanceTypesCache.SetDefault(key, result) return result, nil @@ -211,8 +212,8 @@ func (p *DefaultProvider) UpdateInstanceTypes(ctx context.Context) error { // Only update instanceTypesSeqNun with the instance types have been changed // This is to not create new keys with duplicate instance types option atomic.AddUint64(&p.instanceTypesSeqNum, 1) - logging.FromContext(ctx).With( - "count", len(instanceTypes)).Debugf("discovered instance types") + log.FromContext(ctx).WithValues( + "count", len(instanceTypes)).V(1).Info("discovered instance types") } p.instanceTypesInfo = instanceTypes return nil @@ -245,13 +246,22 @@ func (p *DefaultProvider) UpdateInstanceTypeOfferings(ctx context.Context) error // Only update instanceTypesSeqNun with the instance type offerings have been changed // This is to not create new keys with duplicate instance type offerings option atomic.AddUint64(&p.instanceTypeOfferingsSeqNum, 1) - logging.FromContext(ctx).With("instance-type-count", len(instanceTypeOfferings)).Debugf("discovered offerings for instance types") + log.FromContext(ctx).WithValues("instance-type-count", len(instanceTypeOfferings)).V(1).Info("discovered offerings for instance types") } p.instanceTypeOfferings = instanceTypeOfferings return nil } -func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, instanceTypeZones, zones, subnetZones sets.Set[string]) []cloudprovider.Offering { +// createOfferings creates a set of mutually exclusive offerings for a given instance type. This provider maintains an +// invariant that each offering is mutually exclusive. Specifically, there is an offering for each permutation of zone +// and capacity type. ZoneID is also injected into the offering requirements, when available, but there is a 1-1 +// mapping between zone and zoneID so this does not change the number of offerings. +// +// Each requirement on the offering is guaranteed to have a single value. To get the value for a requirement on an +// offering, you can do the following thanks to this invariant: +// +// offering.Requirements.Get(v1.TopologyLabelZone).Any() +func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2.InstanceTypeInfo, zones, instanceTypeZones sets.Set[string], subnets []v1beta1.Subnet) []cloudprovider.Offering { var offerings []cloudprovider.Offering for zone := range zones { // while usage classes should be a distinct set, there's no guarantee of that @@ -269,16 +279,26 @@ func (p *DefaultProvider) createOfferings(ctx context.Context, instanceType *ec2 // ignore since karpenter doesn't support it yet, but do not log an unknown capacity type error continue default: - logging.FromContext(ctx).Errorf("Received unknown capacity type %s for instance type %s", capacityType, *instanceType.InstanceType) + log.FromContext(ctx).WithValues("capacity-type", capacityType, "instance-type", *instanceType.InstanceType).Error(fmt.Errorf("received unknown capacity type"), "failed parsing offering") continue } - available := !isUnavailable && ok && instanceTypeZones.Has(zone) && subnetZones.Has(zone) - offerings = append(offerings, cloudprovider.Offering{ - Zone: zone, - CapacityType: capacityType, - Price: price, - Available: available, + + subnet, hasSubnet := lo.Find(subnets, func(s v1beta1.Subnet) bool { + return s.Zone == zone }) + available := !isUnavailable && ok && instanceTypeZones.Has(zone) && hasSubnet + offering := cloudprovider.Offering{ + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, zone), + ), + Price: price, + Available: available, + } + if subnet.ZoneID != "" { + offering.Requirements.Add(scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, subnet.ZoneID)) + } + offerings = append(offerings, offering) instanceTypeOfferingAvailable.With(prometheus.Labels{ instanceTypeLabel: *instanceType.InstanceType, capacityTypeLabel: capacityType, diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 217a5bd1e0bd..3843909d641a 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -22,11 +22,13 @@ import ( "reflect" "sort" "strings" + "sync" "testing" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/awslabs/operatorpkg/status" "github.com/imdario/mergo" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -36,8 +38,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" - . "knative.dev/pkg/logging/testing" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -48,7 +48,10 @@ import ( "sigs.k8s.io/karpenter/pkg/operator/scheme" "sigs.k8s.io/karpenter/pkg/scheduling" coretest "sigs.k8s.io/karpenter/pkg/test" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" + "sigs.k8s.io/karpenter/pkg/utils/resources" "github.com/aws/karpenter-provider-aws/pkg/apis" @@ -82,7 +85,7 @@ var _ = BeforeSuite(func() { awsEnv = test.NewEnvironment(ctx, env) fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) @@ -140,6 +143,7 @@ var _ = Describe("InstanceTypeProvider", func() { }, }, ) + nodeClass.StatusConditions().SetTrue(status.ConditionReady) nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ @@ -169,8 +173,19 @@ var _ = Describe("InstanceTypeProvider", func() { InstanceProfile: "test-profile", SecurityGroups: nodeClass.Status.SecurityGroups, Subnets: nodeClass.Status.Subnets, + AMIs: []v1beta1.AMI{ + { + ID: "ami-window-test1", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + {Key: v1.LabelOSStable, Operator: v1.NodeSelectorOpIn, Values: []string{string(v1.Windows)}}, + {Key: v1.LabelWindowsBuild, Operator: v1.NodeSelectorOpIn, Values: []string{v1beta1.Windows2022Build}}, + }, + }, + }, }, }) + windowsNodeClass.StatusConditions().SetTrue(status.ConditionReady) windowsNodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ @@ -219,6 +234,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceCPU: "32", v1beta1.LabelInstanceCPUManufacturer: "intel", v1beta1.LabelInstanceMemory: "131072", + v1beta1.LabelInstanceEBSBandwidth: "9500", v1beta1.LabelInstanceNetworkBandwidth: "50000", v1beta1.LabelInstanceGPUName: "t4", v1beta1.LabelInstanceGPUManufacturer: "nvidia", @@ -228,6 +244,7 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -272,12 +289,14 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceCPU: "32", v1beta1.LabelInstanceCPUManufacturer: "intel", v1beta1.LabelInstanceMemory: "131072", + v1beta1.LabelInstanceEBSBandwidth: "9500", v1beta1.LabelInstanceNetworkBandwidth: "50000", v1beta1.LabelInstanceGPUName: "t4", v1beta1.LabelInstanceGPUManufacturer: "nvidia", v1beta1.LabelInstanceGPUCount: "1", v1beta1.LabelInstanceGPUMemory: "16384", v1beta1.LabelInstanceLocalNVME: "900", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -323,10 +342,12 @@ var _ = Describe("InstanceTypeProvider", func() { v1beta1.LabelInstanceCPU: "8", v1beta1.LabelInstanceCPUManufacturer: "intel", v1beta1.LabelInstanceMemory: "16384", + v1beta1.LabelInstanceEBSBandwidth: "4750", v1beta1.LabelInstanceNetworkBandwidth: "5000", v1beta1.LabelInstanceAcceleratorName: "inferentia", v1beta1.LabelInstanceAcceleratorManufacturer: "aws", v1beta1.LabelInstanceAcceleratorCount: "1", + v1beta1.LabelTopologyZoneID: "tstz1-1a", // Deprecated Labels v1.LabelFailureDomainBetaRegion: fake.DefaultRegion, v1.LabelFailureDomainBetaZone: "test-zone-1a", @@ -859,8 +880,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType, - "zone": of.Zone, + "capacity_type": of.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any(), + "zone": of.Requirements.Get(v1.LabelTopologyZone).Any(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -877,8 +898,8 @@ var _ = Describe("InstanceTypeProvider", func() { for _, of := range it.Offerings { metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{ "instance_type": it.Name, - "capacity_type": of.CapacityType, - "zone": of.Zone, + "capacity_type": of.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any(), + "zone": of.Requirements.Get(v1.LabelTopologyZone).Any(), }) Expect(ok).To(BeTrue()) Expect(metric).To(Not(BeNil())) @@ -906,7 +927,6 @@ var _ = Describe("InstanceTypeProvider", func() { ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) ExpectScheduled(ctx, env.Client, pod) }) - Context("Overhead", func() { var info *ec2.InstanceTypeInfo BeforeEach(func() { @@ -1431,7 +1451,7 @@ var _ = Describe("InstanceTypeProvider", func() { amiFamily, nil, ) - Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 345)) + Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 394)) } } }) @@ -1439,7 +1459,7 @@ var _ = Describe("InstanceTypeProvider", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - MaxPods: ptr.Int32(10), + MaxPods: lo.ToPtr(int32(10)), } for _, info := range instanceInfo.InstanceTypes { amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) @@ -1464,7 +1484,7 @@ var _ = Describe("InstanceTypeProvider", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - MaxPods: ptr.Int32(10), + MaxPods: lo.ToPtr(int32(10)), } for _, info := range instanceInfo.InstanceTypes { amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) @@ -1558,7 +1578,7 @@ var _ = Describe("InstanceTypeProvider", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - PodsPerCore: ptr.Int32(1), + PodsPerCore: lo.ToPtr(int32(1)), } for _, info := range instanceInfo.InstanceTypes { amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) @@ -1576,15 +1596,15 @@ var _ = Describe("InstanceTypeProvider", func() { amiFamily, nil, ) - Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", ptr.Int64Value(info.VCpuInfo.DefaultVCpus))) + Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", lo.FromPtr(info.VCpuInfo.DefaultVCpus))) } }) It("should take the minimum of pods-per-core and max-pods", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - PodsPerCore: ptr.Int32(4), - MaxPods: ptr.Int32(20), + PodsPerCore: lo.ToPtr(int32(4)), + MaxPods: lo.ToPtr(int32(20)), } for _, info := range instanceInfo.InstanceTypes { amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) @@ -1602,7 +1622,7 @@ var _ = Describe("InstanceTypeProvider", func() { amiFamily, nil, ) - Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", lo.Min([]int64{20, ptr.Int64Value(info.VCpuInfo.DefaultVCpus) * 4}))) + Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", lo.Min([]int64{20, lo.FromPtr(info.VCpuInfo.DefaultVCpus) * 4}))) } }) It("should ignore pods-per-core when using Bottlerocket AMI", func() { @@ -1610,7 +1630,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(err).To(BeNil()) nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - PodsPerCore: ptr.Int32(1), + PodsPerCore: lo.ToPtr(int32(1)), } for _, info := range instanceInfo.InstanceTypes { amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) @@ -1636,7 +1656,7 @@ var _ = Describe("InstanceTypeProvider", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - PodsPerCore: ptr.Int32(0), + PodsPerCore: lo.ToPtr(int32(0)), } for _, info := range instanceInfo.InstanceTypes { if *info.InstanceType == "t3.large" { @@ -1673,7 +1693,7 @@ var _ = Describe("InstanceTypeProvider", func() { amiFamily, nil, ) - Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 345)) + Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 394)) } } }) @@ -2307,6 +2327,41 @@ var _ = Describe("InstanceTypeProvider", func() { uniqueInstanceTypeList(instanceTypeResult) }) }) + It("should not cause data races when calling List() simultaneously", func() { + mu := sync.RWMutex{} + var instanceTypeOrder []string + wg := sync.WaitGroup{} + for i := 0; i < 10000; i++ { + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, &corev1beta1.KubeletConfiguration{}, nodeClass) + Expect(err).ToNot(HaveOccurred()) + + // Sort everything in parallel and ensure that we don't get data races + sort.Slice(instanceTypes, func(i, j int) bool { + return instanceTypes[i].Name < instanceTypes[j].Name + }) + // Get the ordering of the instance types based on name + tempInstanceTypeOrder := lo.Map(instanceTypes, func(i *corecloudprovider.InstanceType, _ int) string { + return i.Name + }) + // Expect that all the elements in the instance type list are unique + Expect(lo.Uniq(tempInstanceTypeOrder)).To(HaveLen(len(tempInstanceTypeOrder))) + + // We have to lock since we are doing simultaneous access to this value + mu.Lock() + if len(instanceTypeOrder) == 0 { + instanceTypeOrder = tempInstanceTypeOrder + } else { + Expect(tempInstanceTypeOrder).To(BeEquivalentTo(instanceTypeOrder)) + } + mu.Unlock() + }() + } + wg.Wait() + }) }) func uniqueInstanceTypeList(instanceTypesLists [][]*corecloudprovider.InstanceType) { @@ -2333,18 +2388,19 @@ func generateSpotPricing(cp *cloudprovider.CloudProvider, nodePool *corev1beta1. instanceType := it onDemandPrice := 1.00 for _, o := range it.Offerings { - if o.CapacityType == corev1beta1.CapacityTypeOnDemand { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() == corev1beta1.CapacityTypeOnDemand { onDemandPrice = o.Price } } for _, o := range instanceType.Offerings { o := o - if o.CapacityType != corev1beta1.CapacityTypeSpot { + if o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() != corev1beta1.CapacityTypeSpot { continue } + zone := o.Requirements.Get(v1.LabelTopologyZone).Any() spotPrice := fmt.Sprintf("%0.3f", onDemandPrice*0.5) rsp.SpotPriceHistory = append(rsp.SpotPriceHistory, &ec2.SpotPrice{ - AvailabilityZone: &o.Zone, + AvailabilityZone: &zone, InstanceType: &instanceType.Name, SpotPrice: &spotPrice, Timestamp: &t, diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index 6cebec2f523b..a4482cf7715d 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -27,7 +27,6 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" @@ -78,15 +77,20 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1.LabelInstanceTypeStable, v1.NodeSelectorOpIn, aws.StringValue(info.InstanceType)), scheduling.NewRequirement(v1.LabelArchStable, v1.NodeSelectorOpIn, getArchitecture(info)), scheduling.NewRequirement(v1.LabelOSStable, v1.NodeSelectorOpIn, getOS(info, amiFamily)...), - scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.Zone })...), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.Requirements.Get(v1.LabelTopologyZone).Any() + })...), scheduling.NewRequirement(v1.LabelTopologyRegion, v1.NodeSelectorOpIn, region), scheduling.NewRequirement(v1.LabelWindowsBuild, v1.NodeSelectorOpDoesNotExist), // Well Known to Karpenter - scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { return o.CapacityType })...), + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, lo.Map(offerings.Available(), func(o cloudprovider.Offering, _ int) string { + return o.Requirements.Get(corev1beta1.CapacityTypeLabelKey).Any() + })...), // Well Known to AWS scheduling.NewRequirement(v1beta1.LabelInstanceCPU, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.VCpuInfo.DefaultVCpus))), scheduling.NewRequirement(v1beta1.LabelInstanceCPUManufacturer, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceMemory, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.MemoryInfo.SizeInMiB))), + scheduling.NewRequirement(v1beta1.LabelInstanceEBSBandwidth, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceNetworkBandwidth, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceCategory, v1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1beta1.LabelInstanceFamily, v1.NodeSelectorOpDoesNotExist), @@ -103,6 +107,14 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off scheduling.NewRequirement(v1beta1.LabelInstanceHypervisor, v1.NodeSelectorOpIn, aws.StringValue(info.Hypervisor)), scheduling.NewRequirement(v1beta1.LabelInstanceEncryptionInTransitSupported, v1.NodeSelectorOpIn, fmt.Sprint(aws.BoolValue(info.NetworkInfo.EncryptionInTransitSupported))), ) + // Only add zone-id label when available in offerings. It may not be available if a user has upgraded from a + // previous version of Karpenter w/o zone-id support and the nodeclass subnet status has not yet updated. + if zoneIDs := lo.FilterMap(offerings.Available(), func(o cloudprovider.Offering, _ int) (string, bool) { + zoneID := o.Requirements.Get(v1beta1.LabelTopologyZoneID).Any() + return zoneID, zoneID != "" + }); len(zoneIDs) != 0 { + requirements.Add(scheduling.NewRequirement(v1beta1.LabelTopologyZoneID, v1.NodeSelectorOpIn, zoneIDs...)) + } // Instance Type Labels instanceFamilyParts := instanceTypeScheme.FindStringSubmatch(aws.StringValue(info.InstanceType)) if len(instanceFamilyParts) == 4 { @@ -152,6 +164,10 @@ func computeRequirements(info *ec2.InstanceTypeInfo, offerings cloudprovider.Off if info.ProcessorInfo != nil { requirements.Get(v1beta1.LabelInstanceCPUManufacturer).Insert(lowerKabobCase(aws.StringValue(info.ProcessorInfo.Manufacturer))) } + // EBS Max Bandwidth + if info.EbsInfo != nil && aws.StringValue(info.EbsInfo.EbsOptimizedSupport) == ec2.EbsOptimizedSupportDefault { + requirements.Get(v1beta1.LabelInstanceEBSBandwidth).Insert(fmt.Sprint(aws.Int64Value(info.EbsInfo.EbsOptimizedInfo.MaximumBandwidthInMbps))) + } return requirements } @@ -321,12 +337,12 @@ func efas(info *ec2.InstanceTypeInfo) *resource.Quantity { func ENILimitedPods(ctx context.Context, info *ec2.InstanceTypeInfo) *resource.Quantity { // The number of pods per node is calculated using the formula: // max number of ENIs * (IPv4 Addresses per ENI -1) + 2 - // https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt#L20 + // https://github.com/awslabs/amazon-eks-ami/blob/main/templates/shared/runtime/eni-max-pods.txt // VPC CNI only uses the default network interface // https://github.com/aws/amazon-vpc-cni-k8s/blob/3294231c0dce52cfe473bf6c62f47956a3b333b6/scripts/gen_vpc_ip_limits.go#L162 networkInterfaces := *info.NetworkInfo.NetworkCards[*info.NetworkInfo.DefaultNetworkCardIndex].MaximumNetworkInterfaces - usableNetworkInterfaces := lo.Max([]int64{(networkInterfaces - int64(options.FromContext(ctx).ReservedENIs)), 0}) + usableNetworkInterfaces := lo.Max([]int64{networkInterfaces - int64(options.FromContext(ctx).ReservedENIs), 0}) if usableNetworkInterfaces == 0 { return resource.NewQuantity(0, resource.DecimalSI) } @@ -414,15 +430,15 @@ func pods(ctx context.Context, info *ec2.InstanceTypeInfo, amiFamily amifamily.A var count int64 switch { case maxPods != nil: - count = int64(ptr.Int32Value(maxPods)) + count = int64(lo.FromPtr(maxPods)) case amiFamily.FeatureFlags().SupportsENILimitedPodDensity: count = ENILimitedPods(ctx, info).Value() default: count = 110 } - if ptr.Int32Value(podsPerCore) > 0 && amiFamily.FeatureFlags().PodsPerCoreEnabled { - count = lo.Min([]int64{int64(ptr.Int32Value(podsPerCore)) * ptr.Int64Value(info.VCpuInfo.DefaultVCpus), count}) + if lo.FromPtr(podsPerCore) > 0 && amiFamily.FeatureFlags().PodsPerCoreEnabled { + count = lo.Min([]int64{int64(lo.FromPtr(podsPerCore)) * lo.FromPtr(info.VCpuInfo.DefaultVCpus), count}) } return resources.Quantity(fmt.Sprint(count)) } diff --git a/pkg/providers/instancetype/zz_generated.bandwidth.go b/pkg/providers/instancetype/zz_generated.bandwidth.go index f8f92d72731f..1f4f07404ee1 100644 --- a/pkg/providers/instancetype/zz_generated.bandwidth.go +++ b/pkg/providers/instancetype/zz_generated.bandwidth.go @@ -20,14 +20,55 @@ package instancetype var ( InstanceTypeBandwidthMegabits = map[string]int64{ - // c3.large is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // c4.4xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // i2.2xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // m2.4xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // m4.4xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // r3.large is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // t1.micro is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html - // t2.2xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html + // c1.medium has vague bandwidth information, bandwidth is Moderate + // c1.xlarge has vague bandwidth information, bandwidth is High + // c3.2xlarge has vague bandwidth information, bandwidth is High + // c3.4xlarge has vague bandwidth information, bandwidth is High + // c3.large has vague bandwidth information, bandwidth is Moderate + // c3.xlarge has vague bandwidth information, bandwidth is Moderate + // c4.2xlarge has vague bandwidth information, bandwidth is High + // c4.4xlarge has vague bandwidth information, bandwidth is High + // c4.large has vague bandwidth information, bandwidth is Moderate + // c4.xlarge has vague bandwidth information, bandwidth is High + // d2.2xlarge has vague bandwidth information, bandwidth is High + // d2.4xlarge has vague bandwidth information, bandwidth is High + // d2.xlarge has vague bandwidth information, bandwidth is Moderate + // f1.2xlarge has vague bandwidth information, bandwidth is Up to 10 Gigabit + // f1.4xlarge has vague bandwidth information, bandwidth is Up to 10 Gigabit + // g3.4xlarge has vague bandwidth information, bandwidth is Up to 10 Gigabit + // g3s.xlarge is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html + // i2.2xlarge has vague bandwidth information, bandwidth is High + // i2.4xlarge has vague bandwidth information, bandwidth is High + // i2.xlarge has vague bandwidth information, bandwidth is Moderate + // m1.large has vague bandwidth information, bandwidth is Moderate + // m1.medium has vague bandwidth information, bandwidth is Moderate + // m1.small has vague bandwidth information, bandwidth is Low + // m1.xlarge has vague bandwidth information, bandwidth is High + // m2.2xlarge has vague bandwidth information, bandwidth is Moderate + // m2.4xlarge has vague bandwidth information, bandwidth is High + // m2.xlarge has vague bandwidth information, bandwidth is Moderate + // m3.2xlarge has vague bandwidth information, bandwidth is High + // m3.large has vague bandwidth information, bandwidth is Moderate + // m3.medium has vague bandwidth information, bandwidth is Moderate + // m3.xlarge has vague bandwidth information, bandwidth is High + // m4.2xlarge has vague bandwidth information, bandwidth is High + // m4.4xlarge has vague bandwidth information, bandwidth is High + // m4.large has vague bandwidth information, bandwidth is Moderate + // m4.xlarge has vague bandwidth information, bandwidth is High + // p2.xlarge has vague bandwidth information, bandwidth is High + // p3.2xlarge has vague bandwidth information, bandwidth is Up to 10 Gigabit + // r3.2xlarge has vague bandwidth information, bandwidth is High + // r3.4xlarge has vague bandwidth information, bandwidth is High + // r3.large has vague bandwidth information, bandwidth is Moderate + // r3.xlarge has vague bandwidth information, bandwidth is Moderate + // t1.micro has vague bandwidth information, bandwidth is Very Low + // t2.2xlarge has vague bandwidth information, bandwidth is Moderate + // t2.large has vague bandwidth information, bandwidth is Low to Moderate + // t2.medium has vague bandwidth information, bandwidth is Low to Moderate + // t2.micro has vague bandwidth information, bandwidth is Low to Moderate + // t2.nano has vague bandwidth information, bandwidth is Low to Moderate + // t2.small has vague bandwidth information, bandwidth is Low to Moderate + // t2.xlarge has vague bandwidth information, bandwidth is Moderate "t3.nano": 32, "t3a.nano": 32, "t4g.nano": 32, @@ -41,6 +82,7 @@ var ( "t3a.medium": 256, "t4g.medium": 256, "c7a.medium": 390, + "c7i-flex.large": 390, "m7a.medium": 390, "m7i-flex.large": 390, "r7a.medium": 390, @@ -90,6 +132,7 @@ var ( "c6i.large": 781, "c6id.large": 781, "c7a.large": 781, + "c7i-flex.xlarge": 781, "c7i.large": 781, "i4g.large": 781, "i4i.large": 781, @@ -144,6 +187,7 @@ var ( "c6i.xlarge": 1562, "c6id.xlarge": 1562, "c7a.xlarge": 1562, + "c7i-flex.2xlarge": 1562, "c7i.xlarge": 1562, "is4gen.medium": 1562, "m6a.xlarge": 1562, @@ -185,9 +229,9 @@ var ( "c5d.2xlarge": 2500, "c6g.2xlarge": 2500, "c6gd.2xlarge": 2500, - "f1.2xlarge": 2500, "g5.xlarge": 2500, "g5g.2xlarge": 2500, + "g6.xlarge": 2500, "h1.2xlarge": 2500, "i3.2xlarge": 2500, "m5.2xlarge": 2500, @@ -218,6 +262,7 @@ var ( "c6in.large": 3125, "c7a.2xlarge": 3125, "c7gn.medium": 3125, + "c7i-flex.4xlarge": 3125, "c7i.2xlarge": 3125, "im4gn.large": 3125, "is4gen.large": 3125, @@ -261,11 +306,10 @@ var ( "c5n.xlarge": 5000, "c6g.4xlarge": 5000, "c6gd.4xlarge": 5000, - "f1.4xlarge": 5000, - "g3.4xlarge": 5000, "g4dn.xlarge": 5000, "g5.2xlarge": 5000, "g5g.4xlarge": 5000, + "g6.2xlarge": 5000, "h1.4xlarge": 5000, "i3.4xlarge": 5000, "inf1.2xlarge": 5000, @@ -297,6 +341,7 @@ var ( "c6in.xlarge": 6250, "c7a.4xlarge": 6250, "c7gn.large": 6250, + "c7i-flex.8xlarge": 6250, "c7i.4xlarge": 6250, "im4gn.xlarge": 6250, "is4gen.xlarge": 6250, @@ -345,6 +390,8 @@ var ( "g3.8xlarge": 10000, "g4dn.2xlarge": 10000, "g5.4xlarge": 10000, + "g6.4xlarge": 10000, + "gr6.4xlarge": 10000, "h1.8xlarge": 10000, "i2.8xlarge": 10000, "i3.8xlarge": 10000, @@ -508,6 +555,9 @@ var ( "g5.8xlarge": 25000, "g5g.16xlarge": 25000, "g5g.metal": 25000, + "g6.16xlarge": 25000, + "g6.8xlarge": 25000, + "gr6.8xlarge": 25000, "h1.16xlarge": 25000, "i3.16xlarge": 25000, "i3.metal": 25000, @@ -602,6 +652,7 @@ var ( "r7i.metal-24xl": 37500, "d3en.6xlarge": 40000, "g5.12xlarge": 40000, + "g6.12xlarge": 40000, "c5n.9xlarge": 50000, "c6a.32xlarge": 50000, "c6a.48xlarge": 50000, @@ -623,6 +674,7 @@ var ( "g4dn.16xlarge": 50000, "g4dn.8xlarge": 50000, "g5.24xlarge": 50000, + "g6.24xlarge": 50000, "i3en.12xlarge": 50000, "im4gn.8xlarge": 50000, "inf2.24xlarge": 50000, @@ -691,6 +743,7 @@ var ( "dl2q.24xlarge": 100000, "g4dn.metal": 100000, "g5.48xlarge": 100000, + "g6.48xlarge": 100000, "hpc6a.48xlarge": 100000, "i3en.24xlarge": 100000, "i3en.metal": 100000, @@ -738,6 +791,7 @@ var ( "c6in.32xlarge": 200000, "c6in.metal": 200000, "c7gn.16xlarge": 200000, + "c7gn.metal": 200000, "hpc6id.32xlarge": 200000, "hpc7g.16xlarge": 200000, "hpc7g.4xlarge": 200000, diff --git a/pkg/providers/instancetype/zz_generated.vpclimits.go b/pkg/providers/instancetype/zz_generated.vpclimits.go index da5f712b9b5e..19ab1fc97f11 100644 --- a/pkg/providers/instancetype/zz_generated.vpclimits.go +++ b/pkg/providers/instancetype/zz_generated.vpclimits.go @@ -17,7 +17,7 @@ // so we can get this information at runtime. // Code generated by go generate; DO NOT EDIT. -// This file was generated at 2024-01-29T18:28:02Z +// This file was generated at 2024-04-30T17:56:45Z // WARNING: please add @ellistarn, @bwagner5, or @jonathan-innis from aws/karpenter to reviewers // if you are updating this file since Karpenter is depending on this file to calculate max pods. @@ -1846,19 +1846,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "c6in.32xlarge": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -1911,19 +1911,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "c6in.metal": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -2365,6 +2365,21 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "c7gd.metal": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, "c7gd.xlarge": { Interface: 4, IPv4PerInterface: 15, @@ -2485,6 +2500,21 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "c7gn.metal": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, "c7gn.xlarge": { Interface: 4, IPv4PerInterface: 15, @@ -2983,8 +3013,8 @@ var Limits = map[string]*VPCLimits{ "g3.4xlarge": { Interface: 8, IPv4PerInterface: 30, - IsTrunkingCompatible: false, - BranchInterface: 0, + IsTrunkingCompatible: true, + BranchInterface: 6, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { @@ -2998,8 +3028,8 @@ var Limits = map[string]*VPCLimits{ "g3.8xlarge": { Interface: 8, IPv4PerInterface: 30, - IsTrunkingCompatible: false, - BranchInterface: 0, + IsTrunkingCompatible: true, + BranchInterface: 6, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { @@ -3013,8 +3043,8 @@ var Limits = map[string]*VPCLimits{ "g3s.xlarge": { Interface: 4, IPv4PerInterface: 15, - IsTrunkingCompatible: false, - BranchInterface: 0, + IsTrunkingCompatible: true, + BranchInterface: 10, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { @@ -3415,6 +3445,156 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "g6.12xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 114, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.16xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.24xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.2xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 38, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.48xlarge": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.4xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.8xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 84, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "g6.xlarge": { + Interface: 4, + IPv4PerInterface: 15, + IsTrunkingCompatible: true, + BranchInterface: 18, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 4, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "gr6.4xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 54, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, + "gr6.8xlarge": { + Interface: 8, + IPv4PerInterface: 30, + IsTrunkingCompatible: true, + BranchInterface: 84, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 8, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "nitro", + IsBareMetal: false, + }, "h1.16xlarge": { Interface: 8, IPv4PerInterface: 50, @@ -6236,19 +6416,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "m6idn.32xlarge": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -6301,19 +6481,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "m6idn.metal": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -6396,19 +6576,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "m6in.32xlarge": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -6461,19 +6641,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "m6in.metal": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -6915,6 +7095,21 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "m7gd.metal": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, "m7gd.xlarge": { Interface: 4, IPv4PerInterface: 15, @@ -9411,19 +9606,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "r6idn.32xlarge": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -9476,19 +9671,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "r6idn.metal": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -9571,19 +9766,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "r6in.32xlarge": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -9636,19 +9831,19 @@ var Limits = map[string]*VPCLimits{ IsBareMetal: false, }, "r6in.metal": { - Interface: 14, + Interface: 16, IPv4PerInterface: 50, IsTrunkingCompatible: true, - BranchInterface: 108, + BranchInterface: 106, DefaultNetworkCardIndex: 0, NetworkCards: []NetworkCard{ { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 0, }, { - MaximumNetworkInterfaces: 7, + MaximumNetworkInterfaces: 8, NetworkCardIndex: 1, }, }, @@ -10090,6 +10285,21 @@ var Limits = map[string]*VPCLimits{ Hypervisor: "nitro", IsBareMetal: false, }, + "r7gd.metal": { + Interface: 15, + IPv4PerInterface: 50, + IsTrunkingCompatible: true, + BranchInterface: 107, + DefaultNetworkCardIndex: 0, + NetworkCards: []NetworkCard{ + { + MaximumNetworkInterfaces: 15, + NetworkCardIndex: 0, + }, + }, + Hypervisor: "", + IsBareMetal: true, + }, "r7gd.xlarge": { Interface: 4, IPv4PerInterface: 15, diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index b90949530f6f..d40fda648fb1 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -26,6 +26,7 @@ import ( "time" "go.uber.org/multierr" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" @@ -37,7 +38,6 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "knative.dev/pkg/logging" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" @@ -119,7 +119,7 @@ func (p *DefaultProvider) EnsureAll(ctx context.Context, nodeClass *v1beta1.EC2N if err != nil { return nil, err } - resolvedLaunchTemplates, err := p.amiFamily.Resolve(ctx, nodeClass, nodeClaim, instanceTypes, capacityType, options) + resolvedLaunchTemplates, err := p.amiFamily.Resolve(nodeClass, nodeClaim, instanceTypes, capacityType, options) if err != nil { return nil, err } @@ -137,12 +137,12 @@ func (p *DefaultProvider) EnsureAll(ctx context.Context, nodeClass *v1beta1.EC2N // InvalidateCache deletes a launch template from cache if it exists func (p *DefaultProvider) InvalidateCache(ctx context.Context, ltName string, ltID string) { - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("launch-template-name", ltName, "launch-template-id", ltID)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("launch-template-name", ltName, "launch-template-id", ltID)) p.Lock() defer p.Unlock() defer p.cache.OnEvicted(p.cachedEvictedFunc(ctx)) p.cache.OnEvicted(nil) - logging.FromContext(ctx).Debugf("invalidating launch template in the cache because it no longer exists") + log.FromContext(ctx).V(1).Info("invalidating launch template in the cache because it no longer exists") p.cache.Delete(ltName) } @@ -188,15 +188,13 @@ func (p *DefaultProvider) createAMIOptions(ctx context.Context, nodeClass *v1bet } if nodeClass.Spec.AssociatePublicIPAddress != nil { options.AssociatePublicIPAddress = nodeClass.Spec.AssociatePublicIPAddress - } else if ok, err := p.subnetProvider.CheckAnyPublicIPAssociations(ctx, nodeClass); err != nil { - return nil, err - } else if !ok { + } else { // when `AssociatePublicIPAddress` is not specified in the `EC2NodeClass` spec, // If all referenced subnets do not assign public IPv4 addresses to EC2 instances therein, we explicitly set // AssociatePublicIPAddress to 'false' in the Launch Template, generated based on this configuration struct. // This is done to help comply with AWS account policies that require explicitly setting of that field to 'false'. // https://github.com/aws/karpenter-provider-aws/issues/3815 - options.AssociatePublicIPAddress = aws.Bool(false) + options.AssociatePublicIPAddress = p.subnetProvider.AssociatePublicIPAddressValue(nodeClass) } return options, nil } @@ -204,7 +202,7 @@ func (p *DefaultProvider) createAMIOptions(ctx context.Context, nodeClass *v1bet func (p *DefaultProvider) ensureLaunchTemplate(ctx context.Context, options *amifamily.LaunchTemplate) (*ec2.LaunchTemplate, error) { var launchTemplate *ec2.LaunchTemplate name := LaunchTemplateName(options) - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("launch-template-name", name)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("launch-template-name", name)) // Read from cache if launchTemplate, ok := p.cache.Get(name); ok { p.cache.SetDefault(name, launchTemplate) @@ -226,7 +224,7 @@ func (p *DefaultProvider) ensureLaunchTemplate(ctx context.Context, options *ami return nil, fmt.Errorf("expected to find one launch template, but found %d", len(output.LaunchTemplates)) } else { if p.cm.HasChanged("launchtemplate-"+name, name) { - logging.FromContext(ctx).Debugf("discovered launch template") + log.FromContext(ctx).V(1).Info("discovered launch template") } launchTemplate = output.LaunchTemplates[0] } @@ -280,7 +278,7 @@ func (p *DefaultProvider) createLaunchTemplate(ctx context.Context, options *ami if err != nil { return nil, err } - logging.FromContext(ctx).With("id", aws.StringValue(output.LaunchTemplate.LaunchTemplateId)).Debugf("created launch template") + log.FromContext(ctx).WithValues("id", aws.StringValue(output.LaunchTemplate.LaunchTemplateId)).V(1).Info("created launch template") return output.LaunchTemplate, nil } @@ -350,7 +348,7 @@ func (p *DefaultProvider) volumeSize(quantity *resource.Quantity) *int64 { // Any error during hydration will result in a panic func (p *DefaultProvider) hydrateCache(ctx context.Context) { clusterName := options.FromContext(ctx).ClusterName - ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("tag-key", v1beta1.TagManagedLaunchTemplate, "tag-value", clusterName)) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("tag-key", v1beta1.TagManagedLaunchTemplate, "tag-value", clusterName)) if err := p.ec2api.DescribeLaunchTemplatesPagesWithContext(ctx, &ec2.DescribeLaunchTemplatesInput{ Filters: []*ec2.Filter{{Name: aws.String(fmt.Sprintf("tag:%s", v1beta1.TagManagedLaunchTemplate)), Values: []*string{aws.String(clusterName)}}}, }, func(output *ec2.DescribeLaunchTemplatesOutput, _ bool) bool { @@ -359,9 +357,9 @@ func (p *DefaultProvider) hydrateCache(ctx context.Context) { } return true }); err != nil { - logging.FromContext(ctx).Errorf(fmt.Sprintf("Unable to hydrate the AWS launch template cache, %s", err)) + log.FromContext(ctx).Error(err, "unable to hydrate the AWS launch template cache") } else { - logging.FromContext(ctx).With("count", p.cache.ItemCount()).Debugf("hydrated launch template cache") + log.FromContext(ctx).WithValues("count", p.cache.ItemCount()).V(1).Info("hydrated launch template cache") } } @@ -374,13 +372,13 @@ func (p *DefaultProvider) cachedEvictedFunc(ctx context.Context) func(string, in } launchTemplate := lt.(*ec2.LaunchTemplate) if _, err := p.ec2api.DeleteLaunchTemplateWithContext(ctx, &ec2.DeleteLaunchTemplateInput{LaunchTemplateId: launchTemplate.LaunchTemplateId}); awserrors.IgnoreNotFound(err) != nil { - logging.FromContext(ctx).With("launch-template", launchTemplate.LaunchTemplateName).Errorf("failed to delete launch template, %v", err) + log.FromContext(ctx).WithValues("launch-template", launchTemplate.LaunchTemplateName).Error(err, "failed to delete launch template") return } - logging.FromContext(ctx).With( + log.FromContext(ctx).WithValues( "id", aws.StringValue(launchTemplate.LaunchTemplateId), "name", aws.StringValue(launchTemplate.LaunchTemplateName), - ).Debugf("deleted launch template") + ).V(1).Info("deleted launch template") } } @@ -420,7 +418,7 @@ func (p *DefaultProvider) DeleteAll(ctx context.Context, nodeClass *v1beta1.EC2N deleteErr = multierr.Append(deleteErr, err) } if len(ltNames) > 0 { - logging.FromContext(ctx).With("launchTemplates", utils.PrettySlice(aws.StringValueSlice(ltNames), 5)).Debugf("deleted launch templates") + log.FromContext(ctx).WithValues("launchTemplates", utils.PrettySlice(aws.StringValueSlice(ltNames), 5)).V(1).Info("deleted launch templates") } if deleteErr != nil { return fmt.Errorf("deleting launch templates, %w", deleteErr) @@ -440,12 +438,12 @@ func (p *DefaultProvider) ResolveClusterCIDR(ctx context.Context) error { } if ipv4CIDR := out.Cluster.KubernetesNetworkConfig.ServiceIpv4Cidr; ipv4CIDR != nil { p.ClusterCIDR.Store(ipv4CIDR) - logging.FromContext(ctx).With("cluster-cidr", *ipv4CIDR).Debugf("discovered cluster CIDR") + log.FromContext(ctx).WithValues("cluster-cidr", *ipv4CIDR).V(1).Info("discovered cluster CIDR") return nil } if ipv6CIDR := out.Cluster.KubernetesNetworkConfig.ServiceIpv6Cidr; ipv6CIDR != nil { p.ClusterCIDR.Store(ipv6CIDR) - logging.FromContext(ctx).With("cluster-cidr", *ipv6CIDR).Debugf("discovered cluster CIDR") + log.FromContext(ctx).WithValues("cluster-cidr", *ipv6CIDR).V(1).Info("discovered cluster CIDR") return nil } return fmt.Errorf("no CIDR found in DescribeCluster response") diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 48bf35f5fc73..d2a0d6382ffe 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" admv1alpha1 "github.com/awslabs/amazon-eks-ami/nodeadm/api/v1alpha1" + opstatus "github.com/awslabs/operatorpkg/status" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/samber/lo" @@ -40,7 +41,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" clock "k8s.io/utils/clock/testing" - . "knative.dev/pkg/logging/testing" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/yaml" @@ -52,7 +52,9 @@ import ( coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/operator/scheme" coretest "sigs.k8s.io/karpenter/pkg/test" + . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" "github.com/aws/karpenter-provider-aws/pkg/apis" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" @@ -92,7 +94,7 @@ var _ = BeforeSuite(func() { fakeClock = &clock.FakeClock{} cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}), - env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider) + env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider) cluster = state.NewCluster(fakeClock, env.Client, cloudProvider) prov = provisioning.NewProvisioner(env.Client, events.NewRecorder(&record.FakeRecorder{}), cloudProvider, cluster) }) @@ -153,6 +155,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }, }, ) + nodeClass.StatusConditions().SetTrue(opstatus.ConditionReady) nodePool = coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ @@ -184,7 +187,16 @@ var _ = Describe("LaunchTemplate Provider", func() { Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) It("should create unique launch templates for multiple identical nodeClasses", func() { - nodeClass2 := test.EC2NodeClass() + nodeClass2 := test.EC2NodeClass(v1beta1.EC2NodeClass{ + Status: v1beta1.EC2NodeClassStatus{ + InstanceProfile: "test-profile", + Subnets: nodeClass.Status.Subnets, + SecurityGroups: nodeClass.Status.SecurityGroups, + AMIs: nodeClass.Status.AMIs, + }, + }) + _, err := awsEnv.SubnetProvider.List(ctx, nodeClass2) // Hydrate the subnet cache + Expect(err).To(BeNil()) nodePool2 := coretest.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ Template: corev1beta1.NodeClaimTemplate{ @@ -230,6 +242,7 @@ var _ = Describe("LaunchTemplate Provider", func() { Zone: "test-zone-1c", }, } + nodeClass2.StatusConditions().SetTrue(opstatus.ConditionReady) pods := []*v1.Pod{ coretest.UnschedulablePod(coretest.PodOptions{NodeRequirements: []v1.NodeSelectorRequirement{ @@ -1825,14 +1838,14 @@ var _ = Describe("LaunchTemplate Provider", func() { Context("Custom AMI Selector", func() { It("should use ami selector specified in EC2NodeClass", func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} - awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{ + nodeClass.Status.AMIs = []v1beta1.AMI{ { - Name: aws.String(coretest.RandomName()), - ImageId: aws.String("ami-123"), - Architecture: aws.String("x86_64"), - CreationDate: aws.String("2022-08-15T12:00:00Z"), + ID: "ami-123", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + }, }, - }}) + } ExpectApplied(ctx, env.Client, nodeClass, nodePool) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -1846,14 +1859,14 @@ var _ = Describe("LaunchTemplate Provider", func() { nodeClass.Spec.UserData = aws.String("special user data") nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom - awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{ + nodeClass.Status.AMIs = []v1beta1.AMI{ { - Name: aws.String(coretest.RandomName()), - ImageId: aws.String("ami-123"), - Architecture: aws.String("x86_64"), - CreationDate: aws.String("2022-08-15T12:00:00Z"), + ID: "ami-123", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + }, }, - }}) + } ExpectApplied(ctx, env.Client, nodeClass, nodePool) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -1882,6 +1895,8 @@ var _ = Describe("LaunchTemplate Provider", func() { pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) ExpectScheduled(ctx, env.Client, pod) + _, err := awsEnv.AMIProvider.List(ctx, nodeClass) + Expect(err).To(BeNil()) Expect(awsEnv.EC2API.CalledWithCreateLaunchTemplateInput.Len()).To(BeNumerically(">=", 2)) actualFilter := awsEnv.EC2API.CalledWithDescribeImagesInput.Pop().Filters expectedFilter := []*ec2.Filter{ @@ -1893,21 +1908,21 @@ var _ = Describe("LaunchTemplate Provider", func() { Expect(actualFilter).To(Equal(expectedFilter)) }) It("should create multiple launch templates when multiple amis are discovered with non-equivalent requirements", func() { - awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{ + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} + nodeClass.Status.AMIs = []v1beta1.AMI{ { - Name: aws.String(coretest.RandomName()), - ImageId: aws.String("ami-123"), - Architecture: aws.String("x86_64"), - CreationDate: aws.String("2022-08-15T12:00:00Z"), + ID: "ami-123", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + }, }, { - Name: aws.String(coretest.RandomName()), - ImageId: aws.String("ami-456"), - Architecture: aws.String("arm64"), - CreationDate: aws.String("2022-08-10T12:00:00Z"), + ID: "ami-456", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureArm64}}, + }, }, - }}) - nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} + } ExpectApplied(ctx, env.Client, nodeClass, nodePool) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -1944,6 +1959,8 @@ var _ = Describe("LaunchTemplate Provider", func() { }}) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} ExpectApplied(ctx, env.Client, nodeClass) + controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) nodePool.Spec.Template.Spec.Requirements = []corev1beta1.NodeSelectorRequirementWithMinValues{ { NodeSelectorRequirement: v1.NodeSelectorRequirement{ @@ -1966,6 +1983,7 @@ var _ = Describe("LaunchTemplate Provider", func() { It("should fail if no amis match selector.", func() { awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{}}) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} + nodeClass.Status.AMIs = []v1beta1.AMI{} ExpectApplied(ctx, env.Client, nodeClass, nodePool) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -1976,6 +1994,14 @@ var _ = Describe("LaunchTemplate Provider", func() { awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{ {Name: aws.String(coretest.RandomName()), ImageId: aws.String("ami-123"), Architecture: aws.String("newnew"), CreationDate: aws.String("2022-01-01T12:00:00Z")}}}) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} + nodeClass.Status.AMIs = []v1beta1.AMI{ + { + ID: "ami-123", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{"newnew"}}, + }, + }, + } ExpectApplied(ctx, env.Client, nodeClass, nodePool) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -1987,14 +2013,14 @@ var _ = Describe("LaunchTemplate Provider", func() { awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", version): "test-ami-123", } - awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{Images: []*ec2.Image{ + nodeClass.Status.AMIs = []v1beta1.AMI{ { - Name: aws.String(coretest.RandomName()), - ImageId: aws.String("test-ami-123"), - Architecture: aws.String("x86_64"), - CreationDate: aws.String("2022-08-15T12:00:00Z"), + ID: "test-ami-123", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{string(corev1beta1.ArchitectureAmd64)}}, + }, }, - }}) + } ExpectApplied(ctx, env.Client, nodeClass) ExpectApplied(ctx, env.Client, nodePool) pod := coretest.UnschedulablePod() @@ -2012,7 +2038,7 @@ var _ = Describe("LaunchTemplate Provider", func() { } ExpectApplied(ctx, env.Client, nodePool, nodeClass) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) ExpectScheduled(ctx, env.Client, pod) @@ -2025,7 +2051,7 @@ var _ = Describe("LaunchTemplate Provider", func() { } ExpectApplied(ctx, env.Client, nodePool, nodeClass) controller := status.NewController(env.Client, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.LaunchTemplateProvider) - ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(nodeClass)) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) ExpectScheduled(ctx, env.Client, pod) diff --git a/pkg/providers/pricing/pricing.go b/pkg/providers/pricing/pricing.go index 242416c4a25a..e5b8677696b6 100644 --- a/pkg/providers/pricing/pricing.go +++ b/pkg/providers/pricing/pricing.go @@ -25,6 +25,8 @@ import ( "sync" "time" + "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/aws/karpenter-provider-aws/pkg/operator/options" "github.com/aws/aws-sdk-go/aws" @@ -35,8 +37,6 @@ import ( "github.com/aws/aws-sdk-go/service/pricing/pricingiface" "github.com/samber/lo" "go.uber.org/multierr" - "knative.dev/pkg/logging" - "sigs.k8s.io/karpenter/pkg/utils/pretty" ) @@ -166,7 +166,7 @@ func (p *DefaultProvider) UpdateOnDemandPricing(ctx context.Context) error { // as pricing api may not be available if options.FromContext(ctx).IsolatedVPC { if p.cm.HasChanged("on-demand-prices", nil) { - logging.FromContext(ctx).Debug("running in an isolated VPC, on-demand pricing information will not be updated") + log.FromContext(ctx).V(1).Info("running in an isolated VPC, on-demand pricing information will not be updated") } return nil } @@ -220,7 +220,7 @@ func (p *DefaultProvider) UpdateOnDemandPricing(ctx context.Context) error { p.onDemandPrices = lo.Assign(onDemandPrices, onDemandMetalPrices) if p.cm.HasChanged("on-demand-prices", p.onDemandPrices) { - logging.FromContext(ctx).With("instance-type-count", len(p.onDemandPrices)).Debugf("updated on-demand pricing") + log.FromContext(ctx).WithValues("instance-type-count", len(p.onDemandPrices)).V(1).Info("updated on-demand pricing") } return nil } @@ -282,7 +282,7 @@ func (p *DefaultProvider) spotPage(ctx context.Context, prices map[string]map[st spotPrice, err := strconv.ParseFloat(spotPriceStr, 64) // these errors shouldn't occur, but if pricing API does have an error, we ignore the record if err != nil { - logging.FromContext(ctx).Debugf("unable to parse price record %#v", sph) + log.FromContext(ctx).V(1).Info(fmt.Sprintf("unable to parse price record %#v", sph)) continue } if sph.Timestamp == nil { @@ -329,12 +329,12 @@ func (p *DefaultProvider) onDemandPage(ctx context.Context, prices map[string]fl var buf bytes.Buffer enc := json.NewEncoder(&buf) if err := enc.Encode(outer); err != nil { - logging.FromContext(ctx).Errorf("encoding %s", err) + log.FromContext(ctx).Error(err, "failed encoding pricing data") } dec := json.NewDecoder(&buf) var pItem priceItem if err := dec.Decode(&pItem); err != nil { - logging.FromContext(ctx).Errorf("decoding %s", err) + log.FromContext(ctx).Error(err, "failed decoding pricing data") } if pItem.Product.Attributes.InstanceType == "" { continue @@ -392,9 +392,9 @@ func (p *DefaultProvider) UpdateSpotPricing(ctx context.Context) error { p.spotPricingUpdated = true if p.cm.HasChanged("spot-prices", p.spotPrices) { - logging.FromContext(ctx).With( + log.FromContext(ctx).WithValues( "instance-type-count", len(p.onDemandPrices), - "offering-count", totalOfferings).Debugf("updated spot pricing with instance types and offerings") + "offering-count", totalOfferings).V(1).Info("updated spot pricing with instance types and offerings") } return nil } diff --git a/pkg/providers/pricing/zz_generated.pricing_aws.go b/pkg/providers/pricing/zz_generated.pricing_aws.go index 91bce11f094d..acf5653f5122 100644 --- a/pkg/providers/pricing/zz_generated.pricing_aws.go +++ b/pkg/providers/pricing/zz_generated.pricing_aws.go @@ -16,7 +16,7 @@ limitations under the License. package pricing -// generated at 2024-03-11T13:05:10Z for us-east-1 +// generated at 2024-04-25T18:18:32Z for us-east-1 var InitialOnDemandPricesAWS = map[string]map[string]float64{ // us-east-1 @@ -90,7 +90,8 @@ var InitialOnDemandPricesAWS = map[string]map[string]float64{ "c7gd.xlarge": 0.181400, // c7gn family "c7gn.12xlarge": 2.995200, "c7gn.16xlarge": 3.993600, "c7gn.2xlarge": 0.499200, "c7gn.4xlarge": 0.998400, - "c7gn.8xlarge": 1.996800, "c7gn.large": 0.124800, "c7gn.medium": 0.062400, "c7gn.xlarge": 0.249600, + "c7gn.8xlarge": 1.996800, "c7gn.large": 0.124800, "c7gn.medium": 0.062400, "c7gn.metal": 3.993600, + "c7gn.xlarge": 0.249600, // c7i family "c7i.12xlarge": 2.142000, "c7i.16xlarge": 2.856000, "c7i.24xlarge": 4.284000, "c7i.2xlarge": 0.357000, "c7i.48xlarge": 8.568000, "c7i.4xlarge": 0.714000, "c7i.8xlarge": 1.428000, "c7i.large": 0.089250, @@ -126,6 +127,11 @@ var InitialOnDemandPricesAWS = map[string]map[string]float64{ // g5g family "g5g.16xlarge": 2.744000, "g5g.2xlarge": 0.556000, "g5g.4xlarge": 0.828000, "g5g.8xlarge": 1.372000, "g5g.metal": 2.744000, "g5g.xlarge": 0.420000, + // g6 family + "g6.12xlarge": 4.601600, "g6.16xlarge": 3.396800, "g6.24xlarge": 6.675200, "g6.2xlarge": 0.977600, + "g6.48xlarge": 13.350400, "g6.4xlarge": 1.323200, "g6.8xlarge": 2.014400, "g6.xlarge": 0.804800, + // gr6 family + "gr6.4xlarge": 1.539200, "gr6.8xlarge": 2.446400, // h1 family "h1.16xlarge": 3.744000, "h1.2xlarge": 0.468000, "h1.4xlarge": 0.936000, "h1.8xlarge": 1.872000, // hpc7g family diff --git a/pkg/providers/pricing/zz_generated.pricing_aws_us_gov.go b/pkg/providers/pricing/zz_generated.pricing_aws_us_gov.go index 099208bd90d9..0bfa2bb8b1c3 100644 --- a/pkg/providers/pricing/zz_generated.pricing_aws_us_gov.go +++ b/pkg/providers/pricing/zz_generated.pricing_aws_us_gov.go @@ -16,7 +16,7 @@ limitations under the License. package pricing -// generated at 2024-03-18T13:06:23Z for us-east-1 +// generated at 2024-05-13T13:06:50Z for us-east-1 var InitialOnDemandPricesUSGov = map[string]map[string]float64{ // us-gov-east-1 @@ -74,7 +74,8 @@ var InitialOnDemandPricesUSGov = map[string]map[string]float64{ "inf1.24xlarge": 5.953000, "inf1.2xlarge": 0.456000, "inf1.6xlarge": 1.488000, "inf1.xlarge": 0.288000, // m5 family "m5.12xlarge": 2.904000, "m5.16xlarge": 3.872000, "m5.24xlarge": 5.808000, "m5.2xlarge": 0.484000, - "m5.4xlarge": 0.968000, "m5.8xlarge": 1.936000, "m5.large": 0.121000, "m5.xlarge": 0.242000, + "m5.4xlarge": 0.968000, "m5.8xlarge": 1.936000, "m5.large": 0.121000, "m5.metal": 5.808000, + "m5.xlarge": 0.242000, // m5a family "m5a.12xlarge": 2.616000, "m5a.16xlarge": 3.488000, "m5a.24xlarge": 5.232000, "m5a.2xlarge": 0.436000, "m5a.4xlarge": 0.872000, "m5a.8xlarge": 1.744000, "m5a.large": 0.109000, "m5a.xlarge": 0.218000, @@ -135,6 +136,10 @@ var InitialOnDemandPricesUSGov = map[string]map[string]float64{ "r6i.12xlarge": 3.624000, "r6i.16xlarge": 4.832000, "r6i.24xlarge": 7.248000, "r6i.2xlarge": 0.604000, "r6i.32xlarge": 9.664000, "r6i.4xlarge": 1.208000, "r6i.8xlarge": 2.416000, "r6i.large": 0.151000, "r6i.metal": 9.664000, "r6i.xlarge": 0.302000, + // r7i family + "r7i.12xlarge": 3.805200, "r7i.16xlarge": 5.073600, "r7i.24xlarge": 7.610400, "r7i.2xlarge": 0.634200, + "r7i.48xlarge": 15.220800, "r7i.4xlarge": 1.268400, "r7i.8xlarge": 2.536800, "r7i.large": 0.158550, + "r7i.metal-24xl": 8.371440, "r7i.metal-48xl": 15.220800, "r7i.xlarge": 0.317100, // t3 family "t3.2xlarge": 0.390400, "t3.large": 0.097600, "t3.medium": 0.048800, "t3.micro": 0.012200, "t3.nano": 0.006100, "t3.small": 0.024400, "t3.xlarge": 0.195200, diff --git a/pkg/providers/securitygroup/securitygroup.go b/pkg/providers/securitygroup/securitygroup.go index 94a89ad20675..db955dde3e12 100644 --- a/pkg/providers/securitygroup/securitygroup.go +++ b/pkg/providers/securitygroup/securitygroup.go @@ -25,7 +25,7 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/samber/lo" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/karpenter/pkg/utils/pretty" @@ -63,11 +63,11 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl return nil, err } if p.cm.HasChanged(fmt.Sprintf("security-groups/%s", nodeClass.Name), securityGroups) { - logging.FromContext(ctx). - With("security-groups", lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) string { + log.FromContext(ctx). + WithValues("security-groups", lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) string { return aws.StringValue(s.GroupId) })). - Debugf("discovered security groups") + V(1).Info("discovered security groups") } return securityGroups, nil } @@ -78,7 +78,9 @@ func (p *DefaultProvider) getSecurityGroups(ctx context.Context, filterSets [][] return nil, err } if sg, ok := p.cache.Get(fmt.Sprint(hash)); ok { - return sg.([]*ec2.SecurityGroup), nil + // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) + // so that modifications to the ordering of the data don't affect the original + return append([]*ec2.SecurityGroup{}, sg.([]*ec2.SecurityGroup)...), nil } securityGroups := map[string]*ec2.SecurityGroup{} for _, filters := range filterSets { diff --git a/pkg/providers/securitygroup/suite_test.go b/pkg/providers/securitygroup/suite_test.go index 901dbcdd5d35..b1ed34b6a38c 100644 --- a/pkg/providers/securitygroup/suite_test.go +++ b/pkg/providers/securitygroup/suite_test.go @@ -16,6 +16,8 @@ package securitygroup_test import ( "context" + "sort" + "sync" "testing" "github.com/aws/aws-sdk-go/aws" @@ -33,8 +35,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context @@ -330,6 +332,72 @@ var _ = Describe("SecurityGroupProvider", func() { } }) }) + It("should not cause data races when calling List() simultaneously", func() { + wg := sync.WaitGroup{} + for i := 0; i < 10000; i++ { + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + securityGroups, err := awsEnv.SecurityGroupProvider.List(ctx, nodeClass) + Expect(err).ToNot(HaveOccurred()) + + Expect(securityGroups).To(HaveLen(3)) + // Sort everything in parallel and ensure that we don't get data races + sort.Slice(securityGroups, func(i, j int) bool { + return *securityGroups[i].GroupId < *securityGroups[j].GroupId + }) + Expect(securityGroups).To(BeEquivalentTo([]*ec2.SecurityGroup{ + { + GroupId: lo.ToPtr("sg-test1"), + GroupName: lo.ToPtr("securityGroup-test1"), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-security-group-1"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + { + GroupId: lo.ToPtr("sg-test2"), + GroupName: lo.ToPtr("securityGroup-test2"), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-security-group-2"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + { + GroupId: lo.ToPtr("sg-test3"), + GroupName: lo.ToPtr("securityGroup-test3"), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-security-group-3"), + }, + { + Key: lo.ToPtr("TestTag"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + })) + }() + } + wg.Wait() + }) }) func ExpectConsistsOfSecurityGroups(expected, actual []*ec2.SecurityGroup) { diff --git a/pkg/providers/subnet/subnet.go b/pkg/providers/subnet/subnet.go index 692bae822a6b..95bba2532361 100644 --- a/pkg/providers/subnet/subnet.go +++ b/pkg/providers/subnet/subnet.go @@ -26,24 +26,27 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/samber/lo" - "knative.dev/pkg/logging" + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/utils/pretty" ) type Provider interface { LivenessProbe(*http.Request) error List(context.Context, *v1beta1.EC2NodeClass) ([]*ec2.Subnet, error) - CheckAnyPublicIPAssociations(context.Context, *v1beta1.EC2NodeClass) (bool, error) + AssociatePublicIPAddressValue(*v1beta1.EC2NodeClass) *bool ZonalSubnetsForLaunch(context.Context, *v1beta1.EC2NodeClass, []*cloudprovider.InstanceType, string) (map[string]*Subnet, error) UpdateInflightIPs(*ec2.CreateFleetInput, *ec2.CreateFleetOutput, []*cloudprovider.InstanceType, []*Subnet, string) } type DefaultProvider struct { - sync.RWMutex + sync.Mutex ec2api ec2iface.EC2API cache *cache.Cache availableIPAddressCache *cache.Cache @@ -55,6 +58,7 @@ type DefaultProvider struct { type Subnet struct { ID string Zone string + ZoneID string AvailableIPAddressCount int64 } @@ -84,7 +88,9 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl return nil, err } if subnets, ok := p.cache.Get(fmt.Sprint(hash)); ok { - return subnets.([]*ec2.Subnet), nil + // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) + // so that modifications to the ordering of the data don't affect the original + return append([]*ec2.Subnet{}, subnets.([]*ec2.Subnet)...), nil } // Ensure that all the subnets that are returned here are unique @@ -105,23 +111,29 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1beta1.EC2NodeCl } p.cache.SetDefault(fmt.Sprint(hash), lo.Values(subnets)) if p.cm.HasChanged(fmt.Sprintf("subnets/%s", nodeClass.Name), subnets) { - logging.FromContext(ctx). - With("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) string { - return fmt.Sprintf("%s (%s)", aws.StringValue(s.SubnetId), aws.StringValue(s.AvailabilityZone)) - })). - Debugf("discovered subnets") + log.FromContext(ctx). + WithValues("subnets", lo.Map(lo.Values(subnets), func(s *ec2.Subnet, _ int) v1beta1.Subnet { + return v1beta1.Subnet{ + ID: lo.FromPtr(s.SubnetId), + Zone: lo.FromPtr(s.AvailabilityZone), + ZoneID: lo.FromPtr(s.AvailabilityZoneId), + } + })).V(1).Info("discovered subnets") } return lo.Values(subnets), nil } -// CheckAnyPublicIPAssociations returns a bool indicating whether all referenced subnets assign public IPv4 addresses to EC2 instances created therein -func (p *DefaultProvider) CheckAnyPublicIPAssociations(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (bool, error) { +// associatePublicIPAddressValue validates whether we know the association value for all subnets AND +// that all subnets don't have associatePublicIP set. If both of these are true, we set the value explicitly to false +// For more detail see: https://github.com/aws/karpenter-provider-aws/pull/3814 +func (p *DefaultProvider) AssociatePublicIPAddressValue(nodeClass *v1beta1.EC2NodeClass) *bool { for _, subnet := range nodeClass.Status.Subnets { - if subnetAssociatePublicIP, ok := p.associatePublicIPAddressCache.Get(subnet.ID); ok && subnetAssociatePublicIP.(bool) { - return true, nil + subnetAssociatePublicIP, ok := p.associatePublicIPAddressCache.Get(subnet.ID) + if !ok || subnetAssociatePublicIP.(bool) { + return nil } } - return false, nil + return lo.ToPtr(false) } // ZonalSubnetsForLaunch returns a mapping of zone to the subnet with the most available IP addresses and deducts the passed ips from the available count @@ -156,11 +168,14 @@ func (p *DefaultProvider) ZonalSubnetsForLaunch(ctx context.Context, nodeClass * continue } } - zonalSubnets[subnet.Zone] = &Subnet{ID: subnet.ID, Zone: subnet.Zone, AvailableIPAddressCount: availableIPAddressCount[subnet.ID]} + zonalSubnets[subnet.Zone] = &Subnet{ID: subnet.ID, Zone: subnet.Zone, ZoneID: subnet.ZoneID, AvailableIPAddressCount: availableIPAddressCount[subnet.ID]} } for _, subnet := range zonalSubnets { - predictedIPsUsed := p.minPods(instanceTypes, subnet.Zone, capacityType) + predictedIPsUsed := p.minPods(instanceTypes, scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, subnet.Zone), + )) prevIPs := subnet.AvailableIPAddressCount if trackedIPs, ok := p.inflightIPs[subnet.ID]; ok { prevIPs = trackedIPs @@ -222,7 +237,10 @@ func (p *DefaultProvider) UpdateInflightIPs(createFleetInput *ec2.CreateFleetInp if originalSubnet.AvailableIPAddressCount == cachedIPAddressCount { // other IPs deducted were opportunistic and need to be readded since Fleet didn't pick those subnets to launch into if ips, ok := p.inflightIPs[originalSubnet.ID]; ok { - minPods := p.minPods(instanceTypes, originalSubnet.Zone, capacityType) + minPods := p.minPods(instanceTypes, scheduling.NewRequirements( + scheduling.NewRequirement(corev1beta1.CapacityTypeLabelKey, v1.NodeSelectorOpIn, capacityType), + scheduling.NewRequirement(v1.LabelTopologyZone, v1.NodeSelectorOpIn, originalSubnet.Zone), + )) p.inflightIPs[originalSubnet.ID] = ips + minPods } } @@ -236,10 +254,10 @@ func (p *DefaultProvider) LivenessProbe(_ *http.Request) error { return nil } -func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, zone string, capacityType string) int64 { +func (p *DefaultProvider) minPods(instanceTypes []*cloudprovider.InstanceType, reqs scheduling.Requirements) int64 { // filter for instance types available in the zone and capacity type being requested filteredInstanceTypes := lo.Filter(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { - offering, ok := it.Offerings.Get(capacityType, zone) + offering, ok := it.Offerings.Get(reqs) if !ok { return false } diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index ff5c6146b706..4760e6ff7354 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -16,6 +16,8 @@ package subnet_test import ( "context" + "sort" + "sync" "testing" "github.com/aws/aws-sdk-go/aws" @@ -33,8 +35,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "knative.dev/pkg/logging/testing" . "sigs.k8s.io/karpenter/pkg/test/expectations" + . "sigs.k8s.io/karpenter/pkg/utils/testing" ) var ctx context.Context @@ -105,6 +107,7 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -124,11 +127,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -150,11 +155,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -171,6 +178,7 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -190,11 +198,13 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test1"), AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) @@ -212,24 +222,26 @@ var _ = Describe("SubnetProvider", func() { { SubnetId: lo.ToPtr("subnet-test2"), AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), AvailableIpAddressCount: lo.ToPtr[int64](100), }, }, subnets) }) }) - Context("CheckAnyPublicIPAssociations", func() { - It("should note that no subnets assign a public IPv4 address to EC2 instances on launch", func() { + Context("AssociatePublicIPAddress", func() { + It("should be false when no subnets assign a public IPv4 address to EC2 instances on launch", func() { nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ { ID: "subnet-test1", Tags: map[string]string{"foo": "bar"}, }, } - onlyPrivate, err := awsEnv.SubnetProvider.CheckAnyPublicIPAssociations(ctx, nodeClass) + _, err := awsEnv.SubnetProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - Expect(onlyPrivate).To(BeFalse()) + associatePublicIP := awsEnv.SubnetProvider.AssociatePublicIPAddressValue(nodeClass) + Expect(lo.FromPtr(associatePublicIP)).To(BeFalse()) }) - It("should note that at least one subnet assigns a public IPv4 address to EC2instances on launch", func() { + It("should be nil when at least one subnet assigns a public IPv4 address to EC2instances on launch", func() { nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ { ID: "subnet-test2", @@ -243,9 +255,24 @@ var _ = Describe("SubnetProvider", func() { } _, err := awsEnv.SubnetProvider.List(ctx, nodeClass) Expect(err).To(BeNil()) - onlyPrivate, err := awsEnv.SubnetProvider.CheckAnyPublicIPAssociations(ctx, nodeClass) - Expect(err).To(BeNil()) - Expect(onlyPrivate).To(BeTrue()) + associatePublicIP := awsEnv.SubnetProvider.AssociatePublicIPAddressValue(nodeClass) + Expect(associatePublicIP).To(BeNil()) + }) + It("should be nil when no subnet data is present in the provider cache", func() { + nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ + { + ID: "subnet-test2", + }, + } + nodeClass.Status.Subnets = []v1beta1.Subnet{ + { + ID: "subnet-test2", + Zone: "test-zone-1b", + }, + } + awsEnv.SubnetCache.Flush() // remove any subnet data that might be in the subnetCache + associatePublicIP := awsEnv.SubnetProvider.AssociatePublicIPAddressValue(nodeClass) + Expect(associatePublicIP).To(BeNil()) }) }) Context("Provider Cache", func() { @@ -294,6 +321,97 @@ var _ = Describe("SubnetProvider", func() { } }) }) + It("should not cause data races when calling List() simultaneously", func() { + wg := sync.WaitGroup{} + for i := 0; i < 10000; i++ { + wg.Add(1) + go func() { + defer wg.Done() + defer GinkgoRecover() + subnets, err := awsEnv.SubnetProvider.List(ctx, nodeClass) + Expect(err).ToNot(HaveOccurred()) + + Expect(subnets).To(HaveLen(4)) + // Sort everything in parallel and ensure that we don't get data races + sort.Slice(subnets, func(i, j int) bool { + if int(*subnets[i].AvailableIpAddressCount) != int(*subnets[j].AvailableIpAddressCount) { + return int(*subnets[i].AvailableIpAddressCount) > int(*subnets[j].AvailableIpAddressCount) + } + return *subnets[i].SubnetId < *subnets[j].SubnetId + }) + Expect(subnets).To(BeEquivalentTo([]*ec2.Subnet{ + { + AvailabilityZone: lo.ToPtr("test-zone-1a"), + AvailabilityZoneId: lo.ToPtr("tstz1-1a"), + AvailableIpAddressCount: lo.ToPtr[int64](100), + SubnetId: lo.ToPtr("subnet-test1"), + MapPublicIpOnLaunch: lo.ToPtr(false), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-subnet-1"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1b"), + AvailabilityZoneId: lo.ToPtr("tstz1-1b"), + AvailableIpAddressCount: lo.ToPtr[int64](100), + MapPublicIpOnLaunch: lo.ToPtr(true), + SubnetId: lo.ToPtr("subnet-test2"), + + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-subnet-2"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1c"), + AvailabilityZoneId: lo.ToPtr("tstz1-1c"), + AvailableIpAddressCount: lo.ToPtr[int64](100), + SubnetId: lo.ToPtr("subnet-test3"), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-subnet-3"), + }, + { + Key: lo.ToPtr("TestTag"), + }, + { + Key: lo.ToPtr("foo"), + Value: lo.ToPtr("bar"), + }, + }, + }, + { + AvailabilityZone: lo.ToPtr("test-zone-1a-local"), + AvailabilityZoneId: lo.ToPtr("tstz1-1alocal"), + AvailableIpAddressCount: lo.ToPtr[int64](100), + SubnetId: lo.ToPtr("subnet-test4"), + MapPublicIpOnLaunch: lo.ToPtr(true), + Tags: []*ec2.Tag{ + { + Key: lo.ToPtr("Name"), + Value: lo.ToPtr("test-subnet-4"), + }, + }, + }, + })) + }() + } + wg.Wait() + }) }) func ExpectConsistsOfSubnets(expected, actual []*ec2.Subnet) { @@ -302,6 +420,7 @@ func ExpectConsistsOfSubnets(expected, actual []*ec2.Subnet) { for _, elem := range expected { _, ok := lo.Find(actual, func(s *ec2.Subnet) bool { return lo.FromPtr(s.SubnetId) == lo.FromPtr(elem.SubnetId) && + lo.FromPtr(s.AvailabilityZoneId) == lo.FromPtr(elem.AvailabilityZoneId) && lo.FromPtr(s.AvailabilityZone) == lo.FromPtr(elem.AvailabilityZone) && lo.FromPtr(s.AvailableIpAddressCount) == lo.FromPtr(elem.AvailableIpAddressCount) }) diff --git a/pkg/providers/version/version.go b/pkg/providers/version/version.go index c0c201cd7c7e..87ab95bb3f12 100644 --- a/pkg/providers/version/version.go +++ b/pkg/providers/version/version.go @@ -22,7 +22,7 @@ import ( "github.com/patrickmn/go-cache" "k8s.io/apimachinery/pkg/util/version" "k8s.io/client-go/kubernetes" - "knative.dev/pkg/logging" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/karpenter/pkg/utils/pretty" ) @@ -67,9 +67,9 @@ func (p *DefaultProvider) Get(ctx context.Context) (string, error) { version := fmt.Sprintf("%s.%s", serverVersion.Major, strings.TrimSuffix(serverVersion.Minor, "+")) p.cache.SetDefault(kubernetesVersionCacheKey, version) if p.cm.HasChanged("kubernetes-version", version) { - logging.FromContext(ctx).With("version", version).Debugf("discovered kubernetes version") + log.FromContext(ctx).WithValues("version", version).V(1).Info("discovered kubernetes version") if err := validateK8sVersion(version); err != nil { - logging.FromContext(ctx).Error(err) + log.FromContext(ctx).Error(err, "failed validating kubernetes version") } } return version, nil diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 81aa70575470..afef49ebfac5 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -21,7 +21,6 @@ import ( "github.com/patrickmn/go-cache" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/operator/scheme" @@ -120,7 +119,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment amiResolver, securityGroupProvider, subnetProvider, - ptr.String("ca-bundle"), + lo.ToPtr("ca-bundle"), make(chan struct{}), net.ParseIP("10.0.100.10"), "https://test-cluster", diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index 4aa58d4a75d3..13f457bdddea 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -19,6 +19,7 @@ import ( "fmt" "github.com/imdario/mergo" + v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" @@ -37,6 +38,38 @@ func EC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { } if options.Spec.AMIFamily == nil { options.Spec.AMIFamily = &v1beta1.AMIFamilyAL2 + options.Status.AMIs = []v1beta1.AMI{ + { + ID: "ami-test1", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + {Key: v1beta1.LabelInstanceGPUCount, Operator: v1.NodeSelectorOpDoesNotExist}, + {Key: v1beta1.LabelInstanceAcceleratorCount, Operator: v1.NodeSelectorOpDoesNotExist}, + }, + }, + { + ID: "ami-test2", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + {Key: v1beta1.LabelInstanceGPUCount, Operator: v1.NodeSelectorOpExists}, + }, + }, + { + ID: "ami-test3", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureAmd64}}, + {Key: v1beta1.LabelInstanceAcceleratorCount, Operator: v1.NodeSelectorOpExists}, + }, + }, + { + ID: "ami-test4", + Requirements: []v1.NodeSelectorRequirement{ + {Key: v1.LabelArchStable, Operator: v1.NodeSelectorOpIn, Values: []string{corev1beta1.ArchitectureArm64}}, + {Key: v1beta1.LabelInstanceGPUCount, Operator: v1.NodeSelectorOpDoesNotExist}, + {Key: v1beta1.LabelInstanceAcceleratorCount, Operator: v1.NodeSelectorOpDoesNotExist}, + }, + }, + } } if options.Spec.Role == "" { options.Spec.Role = "test-role" @@ -50,6 +83,17 @@ func EC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { }, }, } + options.Status.SecurityGroups = []v1beta1.SecurityGroup{ + { + ID: "sg-test1", + }, + { + ID: "sg-test2", + }, + { + ID: "sg-test3", + }, + } } if len(options.Spec.SubnetSelectorTerms) == 0 { options.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ @@ -59,6 +103,23 @@ func EC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { }, }, } + options.Status.Subnets = []v1beta1.Subnet{ + { + ID: "subnet-test1", + Zone: "test-zone-1a", + ZoneID: "tstz1-1a", + }, + { + ID: "subnet-test2", + Zone: "test-zone-1b", + ZoneID: "tstz1-1b", + }, + { + ID: "subnet-test3", + Zone: "test-zone-1c", + ZoneID: "tstz1-1c", + }, + } } return &v1beta1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(options.ObjectMeta), diff --git a/test/hack/e2e_scripts/install_karpenter.sh b/test/hack/e2e_scripts/install_karpenter.sh index dbe1aba20d71..7fa823f98924 100755 --- a/test/hack/e2e_scripts/install_karpenter.sh +++ b/test/hack/e2e_scripts/install_karpenter.sh @@ -26,9 +26,9 @@ helm upgrade --install karpenter "${CHART}" \ --set settings.clusterName="$CLUSTER_NAME" \ --set settings.interruptionQueue="$CLUSTER_NAME" \ --set settings.featureGates.spotToSpotConsolidation=true \ - --set controller.resources.requests.cpu=3 \ + --set controller.resources.requests.cpu=5 \ --set controller.resources.requests.memory=3Gi \ - --set controller.resources.limits.cpu=3 \ + --set controller.resources.limits.cpu=5 \ --set controller.resources.limits.memory=3Gi \ --set serviceMonitor.enabled=true \ --set serviceMonitor.additionalLabels.scrape=enabled \ diff --git a/test/hack/resource/go.mod b/test/hack/resource/go.mod index 81b29ec37b30..fe3daf63140e 100644 --- a/test/hack/resource/go.mod +++ b/test/hack/resource/go.mod @@ -1,48 +1,48 @@ module github.com/aws/karpenter-provider-aws/test/hack/resource -go 1.22 +go 1.22.3 require ( - github.com/aws/aws-sdk-go-v2 v1.22.1 - github.com/aws/aws-sdk-go-v2/config v1.18.27 - github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 - github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 - github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 - github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2 - github.com/samber/lo v1.38.1 + github.com/aws/aws-sdk-go-v2 v1.26.1 + github.com/aws/aws-sdk-go-v2/config v1.27.11 + github.com/aws/aws-sdk-go-v2/service/cloudformation v1.50.0 + github.com/aws/aws-sdk-go-v2/service/ec2 v1.160.0 + github.com/aws/aws-sdk-go-v2/service/iam v1.32.0 + github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.25.5 + github.com/samber/lo v1.39.0 go.uber.org/multierr v1.11.0 - go.uber.org/zap v1.24.0 - golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 - k8s.io/api v0.29.2 + go.uber.org/zap v1.27.0 + golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f + k8s.io/api v0.30.0 ) require ( - github.com/aws/aws-sdk-go-v2/credentials v1.13.26 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.32 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect - github.com/aws/smithy-go v1.16.0 // indirect - github.com/go-logr/logr v1.3.0 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.11 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.9.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.20.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.28.6 // indirect + github.com/aws/smithy-go v1.20.2 // indirect + github.com/go-logr/logr v1.4.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - go.uber.org/atomic v1.7.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/text v0.15.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/apimachinery v0.29.2 // indirect - k8s.io/klog/v2 v2.110.1 // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + k8s.io/apimachinery v0.30.0 // indirect + k8s.io/klog/v2 v2.120.1 // indirect + k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/test/hack/resource/go.sum b/test/hack/resource/go.sum index 43143644115f..ab9878db89cb 100644 --- a/test/hack/resource/go.sum +++ b/test/hack/resource/go.sum @@ -1,55 +1,46 @@ -github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= -github.com/aws/aws-sdk-go-v2 v1.20.1/go.mod h1:NU06lETsFm8fUC6ZjhgDpVBcGZTFQ6XM+LZWZxMI4ac= -github.com/aws/aws-sdk-go-v2 v1.22.1 h1:sjnni/AuoTXxHitsIdT0FwmqUuNUuHtufcVDErVFT9U= -github.com/aws/aws-sdk-go-v2 v1.22.1/go.mod h1:Kd0OJtkW3Q0M0lUWGszapWjEvrXDzRW+D21JNsroB+c= -github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA= -github.com/aws/aws-sdk-go-v2/config v1.18.27/go.mod h1:0My+YgmkGxeqjXZb5BYme5pc4drjTnM+x1GJ3zv42Nw= -github.com/aws/aws-sdk-go-v2/credentials v1.13.26 h1:qmU+yhKmOCyujmuPY7tf5MxR/RKyZrOPO3V4DobiTUk= -github.com/aws/aws-sdk-go-v2/credentials v1.13.26/go.mod h1:GoXt2YC8jHUBbA4jr+W3JiemnIbkXOfxSXcisUsZ3os= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 h1:LxK/bitrAr4lnh9LnIS6i7zWbCOdMsfzKFBI6LUCS0I= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4/go.mod h1:E1hLXN/BL2e6YizK1zFlYd8vsfi2GTjbjBazinMmeaM= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34/go.mod h1:wZpTEecJe0Btj3IYnDx/VlUzor9wm3fJHyvLpQF0VwY= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.38/go.mod h1:qggunOChCMu9ZF/UkAfhTz25+U2rLVb3ya0Ua6TTfCA= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1 h1:fi1ga6WysOyYb5PAf3Exd6B5GiSNpnZim4h1rhlBqx0= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.2.1/go.mod h1:V5CY8wNurvPUibTi9mwqUqpiFZ5LnioKWIFUDtIzdI8= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28/go.mod h1:7VRpKQQedkfIEXb4k52I7swUnZP0wohVajJMRn3vsUw= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.32/go.mod h1:0ZXSqrty4FtQ7p8TEuRde/SZm9X05KT18LAUlR40Ln0= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1 h1:ZpaV/j48RlPc4AmOZuPv22pJliXjXq8/reL63YzyFnw= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.5.1/go.mod h1:R8aXraabD2e3qv1csxM14/X9WF4wFMIY0kH4YEtYD5M= -github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 h1:LWA+3kDM8ly001vJ1X1waCuLJdtTl48gwkPKWy9sosI= -github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35/go.mod h1:0Eg1YjxE0Bhn56lx+SHJwCzhW+2JGtizsrx+lCqrfm0= -github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 h1:XbDkc4FLeg1RfnqeblfbJvaEabqq9ByZl4zqyPFkfSc= -github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0/go.mod h1:SwQFcCs9Rog8hSHm+81KBkAK+UKLXErA/1ChaEI8mLE= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 h1:P4dyjm49F2kKws0FpouBC6fjVImACXKt752+CWa01lM= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0/go.mod h1:tIctCeX9IbzsUTKHt53SVEcgyfxV2ElxJeEB+QUbc4M= -github.com/aws/aws-sdk-go-v2/service/iam v1.21.0 h1:8hEpu60CWlrp7iEBUFRZhgPoX6+gadaGL1sD4LoRYS0= -github.com/aws/aws-sdk-go-v2/service/iam v1.21.0/go.mod h1:aQZ8BI+reeaY7RI/QQp7TKCSUHOesTdrzzylp3CW85c= -github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.32 h1:ltFklFRb78MNetqtmqZ/6Tc6i76QRMXxDe0LXYl/jd8= -github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.32/go.mod h1:jBlPRKTAedLFuhO71Wm5dgN9x+/pJ6TtwfQmq7RLvNk= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 h1:bkRyG4a929RCnpVSTvLM2j/T4ls015ZhhYApbmYs15s= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28/go.mod h1:jj7znCIg05jXlaGBlFMGP8+7UN3VtCkRBG2spnmRQkU= -github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 h1:nneMBM2p79PGWBQovYO/6Xnc2ryRMw3InnDJq1FHkSY= -github.com/aws/aws-sdk-go-v2/service/sso v1.12.12/go.mod h1:HuCOxYsF21eKrerARYO6HapNeh9GBNq7fius2AcwodY= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 h1:2qTR7IFk7/0IN/adSFhYu9Xthr0zVFTgBrmPldILn80= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12/go.mod h1:E4VrHCPzmVB/KFXtqBGKb3c8zpbNBgKe3fisDNLAW5w= -github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 h1:XFJ2Z6sNUUcAz9poj+245DMkrHE4h2j5I9/xD50RHfE= -github.com/aws/aws-sdk-go-v2/service/sts v1.19.2/go.mod h1:dp0yLPsLBOi++WTxzCjA/oZqi6NPIhoR+uF7GeMU9eg= -github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2 h1:5QyvAYyr+ZibpVxfovzd5JMTZ8miv9s3zT4jG4PJkIA= -github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.18.2/go.mod h1:3ZCiyyNF7myh/a7DcOjcqRsLmSF9EdhEZSr00Qlui4s= -github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= -github.com/aws/smithy-go v1.14.1/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= -github.com/aws/smithy-go v1.16.0 h1:gJZEH/Fqh+RsvlJ1Zt4tVAtV6bKkp3cC+R6FCZMNzik= -github.com/aws/smithy-go v1.16.0/go.mod h1:NukqUGpCZIILqqiV0NIjeFh24kd/FAa4beRb6nbIUPE= -github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= -github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/aws/aws-sdk-go-v2 v1.26.1 h1:5554eUqIYVWpU0YmeeYZ0wU64H2VLBs8TlhRB2L+EkA= +github.com/aws/aws-sdk-go-v2 v1.26.1/go.mod h1:ffIFB97e2yNsv4aTSGkqtHnppsIJzw7G7BReUZ3jCXM= +github.com/aws/aws-sdk-go-v2/config v1.27.11 h1:f47rANd2LQEYHda2ddSCKYId18/8BhSRM4BULGmfgNA= +github.com/aws/aws-sdk-go-v2/config v1.27.11/go.mod h1:SMsV78RIOYdve1vf36z8LmnszlRWkwMQtomCAI0/mIE= +github.com/aws/aws-sdk-go-v2/credentials v1.17.11 h1:YuIB1dJNf1Re822rriUOTxopaHHvIq0l/pX3fwO+Tzs= +github.com/aws/aws-sdk-go-v2/credentials v1.17.11/go.mod h1:AQtFPsDH9bI2O+71anW6EKL+NcD7LG3dpKGMV4SShgo= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 h1:FVJ0r5XTHSmIHJV6KuDmdYhEpvlHpiSd38RQWhut5J4= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1/go.mod h1:zusuAeqezXzAB24LGuzuekqMAEgWkVYukBec3kr3jUg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5 h1:aw39xVGeRWlWx9EzGVnhOR4yOjQDHPQ6o6NmBlscyQg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.5/go.mod h1:FSaRudD0dXiMPK2UjknVwwTYyZMRsHv3TtkabsZih5I= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5 h1:PG1F3OD1szkuQPzDw3CIQsRIrtTlUC3lP84taWzHlq0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.5/go.mod h1:jU1li6RFryMz+so64PpKtudI+QzbKoIEivqdf6LNpOc= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY= +github.com/aws/aws-sdk-go-v2/service/cloudformation v1.50.0 h1:Ap5tOJfeAH1hO2UQc3X3uMlwP7uryFeZXMvZCXIlLSE= +github.com/aws/aws-sdk-go-v2/service/cloudformation v1.50.0/go.mod h1:/v2KYdCW4BaHKayenaWEXOOdxItIwEA3oU0XzuQY3F0= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.160.0 h1:ooy0OFbrdSwgk32OFGPnvBwry5ySYCKkgTEbQ2hejs8= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.160.0/go.mod h1:xejKuuRDjz6z5OqyeLsz01MlOqqW7CqpAB4PabNvpu8= +github.com/aws/aws-sdk-go-v2/service/iam v1.32.0 h1:ZNlfPdw849gBo/lvLFbEEvpTJMij0LXqiNWZ+lIamlU= +github.com/aws/aws-sdk-go-v2/service/iam v1.32.0/go.mod h1:aXWImQV0uTW35LM0A/T4wEg6R1/ReXUu4SM6/lUHYK0= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 h1:Ji0DY1xUsUr3I8cHps0G+XM3WWU16lP6yG8qu1GAZAs= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2/go.mod h1:5CsjAbs3NlGQyZNFACh+zztPDI7fU6eW9QsxjfnuBKg= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.9.6 h1:6tayEze2Y+hiL3kdnEUxSPsP+pJsUfwLSFspFl1ru9Q= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.9.6/go.mod h1:qVNb/9IOVsLCZh0x2lnagrBwQ9fxajUpXS7OZfIsKn0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7 h1:ogRAwT1/gxJBcSWDMZlgyFUM962F51A5CRhDLbxLdmo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.7/go.mod h1:YCsIZhXfRPLFFCl5xxY+1T9RKzOKjCut+28JSX2DnAk= +github.com/aws/aws-sdk-go-v2/service/sso v1.20.5 h1:vN8hEbpRnL7+Hopy9dzmRle1xmDc7o8tmY0klsr175w= +github.com/aws/aws-sdk-go-v2/service/sso v1.20.5/go.mod h1:qGzynb/msuZIE8I75DVRCUXw3o3ZyBmUvMwQ2t/BrGM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 h1:Jux+gDDyi1Lruk+KHF91tK2KCuY61kzoCpvtvJJBtOE= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4/go.mod h1:mUYPBhaF2lGiukDEjJX2BLRRKTmoUSitGDUgM4tRxak= +github.com/aws/aws-sdk-go-v2/service/sts v1.28.6 h1:cwIxeBttqPN3qkaAjcEcsh8NYr8n2HZPkcKgPAi1phU= +github.com/aws/aws-sdk-go-v2/service/sts v1.28.6/go.mod h1:FZf1/nKNEkHdGGJP/cI2MoIMquumuRK6ol3QQJNDxmw= +github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.25.5 h1:0Ty3j3QkLoqkZ+VagFisIsKYxGAzjv9hIQb84nlt/Jc= +github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.25.5/go.mod h1:9R1IlrgiivwTCZdbKgMPkseFS+moUM+DLh0TEjO6pvE= +github.com/aws/smithy-go v1.20.2 h1:tbp628ireGtzcHDDmLT/6ADHidqnwgF57XOXZe6tp4Q= +github.com/aws/smithy-go v1.20.2/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -73,14 +64,12 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= -github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= +github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -89,27 +78,25 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= -go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= -go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= -golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f h1:99ci1mjWVBWwJiEKYY6jWa4d2nTQVIEhZIptnrVb1XY= +golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -118,8 +105,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -138,14 +125,14 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= -k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= -k8s.io/apimachinery v0.29.2 h1:EWGpfJ856oj11C52NRCHuU7rFDwxev48z+6DSlGNsV8= -k8s.io/apimachinery v0.29.2/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= -k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= -k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/api v0.30.0 h1:siWhRq7cNjy2iHssOB9SCGNCl2spiF1dO3dABqZ8niA= +k8s.io/api v0.30.0/go.mod h1:OPlaYhoHs8EQ1ql0R/TsUgaRPhpKNxIMrKQfWUp8QSE= +k8s.io/apimachinery v0.30.0 h1:qxVPsyDM5XS96NIh9Oj6LavoVFYff/Pon9cZeDIkHHA= +k8s.io/apimachinery v0.30.0/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc= +k8s.io/klog/v2 v2.120.1 h1:QXU6cPEOIslTGvZaXvFWiP9VKyeet3sawzTOvdXb4Vw= +k8s.io/klog/v2 v2.120.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak= +k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= diff --git a/test/hack/resource/pkg/resourcetypes/instanceprofile.go b/test/hack/resource/pkg/resourcetypes/instanceprofile.go index 04505b7a7abd..9a6b3cbdb9a3 100644 --- a/test/hack/resource/pkg/resourcetypes/instanceprofile.go +++ b/test/hack/resource/pkg/resourcetypes/instanceprofile.go @@ -16,6 +16,7 @@ package resourcetypes import ( "context" + "fmt" "time" "github.com/aws/aws-sdk-go-v2/config" @@ -59,26 +60,21 @@ func (ip *InstanceProfile) GetExpired(ctx context.Context, expirationTime time.T continue } - clusterName, foundClusterName := lo.Find(profiles.Tags, func(tag iamtypes.Tag) bool { + clusterName, _ := lo.Find(profiles.Tags, func(tag iamtypes.Tag) bool { return lo.FromPtr(tag.Key) == karpenterTestingTag }) // Checking to make sure we are only list resources in the given region - region, foundRegion := lo.Find(profiles.Tags, func(tag iamtypes.Tag) bool { + region, _ := lo.Find(profiles.Tags, func(tag iamtypes.Tag) bool { return lo.FromPtr(tag.Key) == v1.LabelTopologyRegion }) - - if (foundClusterName && slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value))) || (foundRegion && lo.FromPtr(region.Value) != lo.Must(config.LoadDefaultConfig(ctx)).Region) { + if (slices.Contains(excludedClusters, lo.FromPtr(clusterName.Value))) || (lo.FromPtr(region.Value) != lo.Must(config.LoadDefaultConfig(ctx)).Region) { continue } - - for _, t := range profiles.Tags { - // Since we can only get the date of the instance profile (not the exact time the instance profile was created) - // we add a day to the time that it was created to account for the worst-case of the instance profile being created - // at 23:59:59 and being marked with a time of 00:00:00 due to only capturing the date and not the time - if lo.FromPtr(t.Key) == karpenterTestingTag && instanceProfiles[i].CreateDate.Add(time.Hour*24).Before(expirationTime) { - names = append(names, lo.FromPtr(instanceProfiles[i].InstanceProfileName)) - break - } + // Since we can only get the date of the instance profile (not the exact time the instance profile was created) + // we add a day to the time that it was created to account for the worst-case of the instance profile being created + // at 23:59:59 and being marked with a time of 00:00:00 due to only capturing the date and not the time + if lo.FromPtr(clusterName.Value) != "" && instanceProfiles[i].CreateDate.Add(time.Hour * 24).Before(expirationTime) { + names = append(names, lo.FromPtr(instanceProfiles[i].InstanceProfileName)) } } @@ -143,23 +139,16 @@ func (ip *InstanceProfile) Cleanup(ctx context.Context, names []string) ([]strin return deleted, errs } -func (ip *InstanceProfile) getAllInstanceProfiles(ctx context.Context) (instanceprofile []iamtypes.InstanceProfile, err error) { - var nextToken *string - for { - out, err := ip.iamClient.ListInstanceProfiles(ctx, &iam.ListInstanceProfilesInput{ - Marker: nextToken, - }) - if err != nil { - return instanceprofile, err - } +func (ip *InstanceProfile) getAllInstanceProfiles(ctx context.Context) (instanceprofiles []iamtypes.InstanceProfile, err error) { + paginator := iam.NewListInstanceProfilesPaginator(ip.iamClient, &iam.ListInstanceProfilesInput{}) - instanceprofile = append(instanceprofile, out.InstanceProfiles...) - - nextToken = out.Marker - if nextToken == nil { - break + for paginator.HasMorePages() { + out, err := paginator.NextPage(ctx) + if err != nil { + return instanceprofiles, err } + instanceprofiles = append(instanceprofiles, out.InstanceProfiles...) } - return instanceprofile, nil + return instanceprofiles, nil } diff --git a/test/pkg/debug/monitor.go b/test/pkg/debug/monitor.go index 4d807c0a11d9..c0f8588b8654 100644 --- a/test/pkg/debug/monitor.go +++ b/test/pkg/debug/monitor.go @@ -18,17 +18,15 @@ import ( "context" "sync" - "github.com/go-logr/zapr" "github.com/samber/lo" "k8s.io/client-go/rest" - "knative.dev/pkg/logging" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - ctrl "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "sigs.k8s.io/karpenter/pkg/operator/controller" + "sigs.k8s.io/karpenter/pkg/operator/scheme" ) @@ -40,22 +38,15 @@ type Monitor struct { } func New(ctx context.Context, config *rest.Config, kubeClient client.Client) *Monitor { - logger := logging.FromContext(ctx) - ctrl.SetLogger(zapr.NewLogger(logger.Desugar())) + log.SetLogger(log.FromContext(ctx)) mgr := lo.Must(controllerruntime.NewManager(config, controllerruntime.Options{ Scheme: scheme.Scheme, - BaseContext: func() context.Context { - ctx := context.Background() - ctx = logging.WithLogger(ctx, logger) - logger.WithOptions() - return ctx - }, Metrics: server.Options{ BindAddress: "0", }, })) for _, c := range newControllers(kubeClient) { - lo.Must0(c.Builder(ctx, mgr).Complete(c), "failed to register controller") + lo.Must0(c.Register(ctx, mgr), "failed to register controller") } ctx, cancel := context.WithCancel(ctx) // this context is only meant for monitor start/stop return &Monitor{ diff --git a/test/pkg/debug/node.go b/test/pkg/debug/node.go index 5fd1d0e3f4ff..1e030f42db10 100644 --- a/test/pkg/debug/node.go +++ b/test/pkg/debug/node.go @@ -31,7 +31,6 @@ import ( "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" nodeutils "sigs.k8s.io/karpenter/pkg/utils/node" ) @@ -45,10 +44,6 @@ func NewNodeController(kubeClient client.Client) *NodeController { } } -func (c *NodeController) Name() string { - return "node" -} - func (c *NodeController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { n := &v1.Node{} if err := c.kubeClient.Get(ctx, req.NamespacedName, n); err != nil { @@ -66,9 +61,9 @@ func (c *NodeController) GetInfo(ctx context.Context, n *v1.Node) string { return fmt.Sprintf("ready=%s schedulable=%t initialized=%s pods=%d taints=%v", nodeutils.GetCondition(n, v1.NodeReady).Status, !n.Spec.Unschedulable, n.Labels[v1beta1.NodeInitializedLabelKey], len(pods), n.Spec.Taints) } -func (c *NodeController) Builder(ctx context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *NodeController) Register(ctx context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("node"). For(&v1.Node{}). WithEventFilter(predicate.And( predicate.Funcs{ @@ -82,5 +77,6 @@ func (c *NodeController) Builder(ctx context.Context, m manager.Manager) corecon return o.GetLabels()[v1beta1.NodePoolLabelKey] != "" }), )). - WithOptions(controller.Options{MaxConcurrentReconciles: 10})) + WithOptions(controller.Options{MaxConcurrentReconciles: 10}). + Complete(c) } diff --git a/test/pkg/debug/nodeclaim.go b/test/pkg/debug/nodeclaim.go index 926f52eed3c2..2e7ec0ca948f 100644 --- a/test/pkg/debug/nodeclaim.go +++ b/test/pkg/debug/nodeclaim.go @@ -29,7 +29,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" ) type NodeClaimController struct { @@ -42,10 +41,6 @@ func NewNodeClaimController(kubeClient client.Client) *NodeClaimController { } } -func (c *NodeClaimController) Name() string { - return "nodeclaim" -} - func (c *NodeClaimController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { nc := &corev1beta1.NodeClaim{} if err := c.kubeClient.Get(ctx, req.NamespacedName, nc); err != nil { @@ -60,16 +55,16 @@ func (c *NodeClaimController) Reconcile(ctx context.Context, req reconcile.Reque func (c *NodeClaimController) GetInfo(nc *corev1beta1.NodeClaim) string { return fmt.Sprintf("ready=%t launched=%t registered=%t initialized=%t", - nc.StatusConditions().IsHappy(), - nc.StatusConditions().GetCondition(corev1beta1.Launched).IsTrue(), - nc.StatusConditions().GetCondition(corev1beta1.Registered).IsTrue(), - nc.StatusConditions().GetCondition(corev1beta1.Initialized).IsTrue(), + nc.StatusConditions().Root().IsTrue(), + nc.StatusConditions().Get(corev1beta1.ConditionTypeLaunched).IsTrue(), + nc.StatusConditions().Get(corev1beta1.ConditionTypeRegistered).IsTrue(), + nc.StatusConditions().Get(corev1beta1.ConditionTypeInitialized).IsTrue(), ) } -func (c *NodeClaimController) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *NodeClaimController) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("nodeclaim"). For(&corev1beta1.NodeClaim{}). WithEventFilter(predicate.Funcs{ UpdateFunc: func(e event.UpdateEvent) bool { @@ -78,5 +73,6 @@ func (c *NodeClaimController) Builder(_ context.Context, m manager.Manager) core return c.GetInfo(oldNodeClaim) != c.GetInfo(newNodeClaim) }, }). - WithOptions(controller.Options{MaxConcurrentReconciles: 10})) + WithOptions(controller.Options{MaxConcurrentReconciles: 10}). + Complete(c) } diff --git a/test/pkg/debug/pod.go b/test/pkg/debug/pod.go index a2bea9f21bae..cd51b6bf9b84 100644 --- a/test/pkg/debug/pod.go +++ b/test/pkg/debug/pod.go @@ -31,7 +31,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" - corecontroller "sigs.k8s.io/karpenter/pkg/operator/controller" "sigs.k8s.io/karpenter/pkg/utils/pod" ) @@ -45,10 +44,6 @@ func NewPodController(kubeClient client.Client) *PodController { } } -func (c *PodController) Name() string { - return "pod" -} - func (c *PodController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { p := &v1.Pod{} if err := c.kubeClient.Get(ctx, req.NamespacedName, p); err != nil { @@ -73,9 +68,9 @@ func (c *PodController) GetInfo(p *v1.Pod) string { pod.IsProvisionable(p), p.Status.Phase, p.Spec.NodeName, p.OwnerReferences, containerInfo.String()) } -func (c *PodController) Builder(_ context.Context, m manager.Manager) corecontroller.Builder { - return corecontroller.Adapt(controllerruntime. - NewControllerManagedBy(m). +func (c *PodController) Register(_ context.Context, m manager.Manager) error { + return controllerruntime.NewControllerManagedBy(m). + Named("pod"). For(&v1.Pod{}). WithEventFilter(predicate.And( predicate.Funcs{ @@ -89,5 +84,6 @@ func (c *PodController) Builder(_ context.Context, m manager.Manager) corecontro return o.GetNamespace() != "kube-system" }), )). - WithOptions(controller.Options{MaxConcurrentReconciles: 10})) + WithOptions(controller.Options{MaxConcurrentReconciles: 10}). + Complete(c) } diff --git a/test/pkg/environment/aws/environment.go b/test/pkg/environment/aws/environment.go index e874cf325ce1..f3d9be256215 100644 --- a/test/pkg/environment/aws/environment.go +++ b/test/pkg/environment/aws/environment.go @@ -82,6 +82,13 @@ type Environment struct { ClusterEndpoint string InterruptionQueue string PrivateCluster bool + ZoneInfo []ZoneInfo +} + +type ZoneInfo struct { + Zone string + ZoneID string + ZoneType string } func NewEnvironment(t *testing.T) *Environment { @@ -123,6 +130,14 @@ func NewEnvironment(t *testing.T) *Environment { out := lo.Must(sqsapi.GetQueueUrlWithContext(env.Context, &servicesqs.GetQueueUrlInput{QueueName: aws.String(v)})) awsEnv.SQSProvider = lo.Must(sqs.NewDefaultProvider(sqsapi, lo.FromPtr(out.QueueUrl))) } + // Populate ZoneInfo for all AZs in the region + awsEnv.ZoneInfo = lo.Map(lo.Must(awsEnv.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})).AvailabilityZones, func(zone *ec2.AvailabilityZone, _ int) ZoneInfo { + return ZoneInfo{ + Zone: lo.FromPtr(zone.ZoneName), + ZoneID: lo.FromPtr(zone.ZoneId), + ZoneType: lo.FromPtr(zone.ZoneType), + } + }) return awsEnv } diff --git a/test/pkg/environment/aws/expectations.go b/test/pkg/environment/aws/expectations.go index 2b69ccd47d64..f87261716841 100644 --- a/test/pkg/environment/aws/expectations.go +++ b/test/pkg/environment/aws/expectations.go @@ -210,14 +210,6 @@ func (env *Environment) GetSpotInstanceRequest(id *string) *ec2.SpotInstanceRequ return siro.SpotInstanceRequests[0] } -// GetZones returns all available zones mapped from zone -> zone type -func (env *Environment) GetZones() map[string]string { - output := lo.Must(env.EC2API.DescribeAvailabilityZones(&ec2.DescribeAvailabilityZonesInput{})) - return lo.Associate(output.AvailabilityZones, func(zone *ec2.AvailabilityZone) (string, string) { - return lo.FromPtr(zone.ZoneName), lo.FromPtr(zone.ZoneType) - }) -} - // GetSubnets returns all subnets matching the label selector // mapped from AZ -> {subnet-ids...} func (env *Environment) GetSubnets(tags map[string]string) map[string][]string { @@ -243,10 +235,11 @@ func (env *Environment) GetSubnets(tags map[string]string) map[string][]string { type SubnetInfo struct { Name string ID string + ZoneInfo } -// GetSubnetNameAndIds returns all subnets matching the label selector -func (env *Environment) GetSubnetNameAndIds(tags map[string]string) []SubnetInfo { +// GetSubnetInfo returns all subnets matching the label selector +func (env *Environment) GetSubnetInfo(tags map[string]string) []SubnetInfo { var filters []*ec2.Filter for key, val := range tags { filters = append(filters, &ec2.Filter{ @@ -261,6 +254,11 @@ func (env *Environment) GetSubnetNameAndIds(tags map[string]string) []SubnetInfo if tag, ok := lo.Find(s.Tags, func(t *ec2.Tag) bool { return aws.StringValue(t.Key) == "Name" }); ok { elem.Name = aws.StringValue(tag.Value) } + if info, ok := lo.Find(env.ZoneInfo, func(info ZoneInfo) bool { + return aws.StringValue(s.AvailabilityZone) == info.Zone + }); ok { + elem.ZoneInfo = info + } return elem }) return true @@ -327,7 +325,15 @@ func (env *Environment) ExpectParsedProviderID(providerID string) string { return providerIDSplit[len(providerIDSplit)-1] } -func (env *Environment) GetK8sVersion(offset int) string { +func (env *Environment) K8sVersion() string { + GinkgoHelper() + + return env.K8sVersionWithOffset(0) +} + +func (env *Environment) K8sVersionWithOffset(offset int) string { + GinkgoHelper() + serverVersion, err := env.KubeClient.Discovery().ServerVersion() Expect(err).To(BeNil()) minorVersion, err := strconv.Atoi(strings.TrimSuffix(serverVersion.Minor, "+")) @@ -338,18 +344,19 @@ func (env *Environment) GetK8sVersion(offset int) string { return fmt.Sprintf("%s.%d", serverVersion.Major, minorVersion-offset) } -func (env *Environment) GetK8sMinorVersion(offset int) (int, error) { - version, err := strconv.Atoi(strings.Split(env.GetK8sVersion(offset), ".")[1]) - if err != nil { - return 0, err - } - return version, nil +func (env *Environment) K8sMinorVersion() int { + GinkgoHelper() + + version, err := strconv.Atoi(strings.Split(env.K8sVersion(), ".")[1]) + Expect(err).ToNot(HaveOccurred()) + return version } -func (env *Environment) GetCustomAMI(amiPath string, versionOffset int) string { - version := env.GetK8sVersion(versionOffset) +func (env *Environment) GetAMIBySSMPath(ssmPath string) string { + GinkgoHelper() + parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: aws.String(fmt.Sprintf(amiPath, version)), + Name: aws.String(ssmPath), }) Expect(err).To(BeNil()) return *parameter.Parameter.Value diff --git a/test/pkg/environment/common/environment.go b/test/pkg/environment/common/environment.go index 908a734904be..ef677cbbd8a7 100644 --- a/test/pkg/environment/common/environment.go +++ b/test/pkg/environment/common/environment.go @@ -31,7 +31,9 @@ import ( "k8s.io/client-go/kubernetes" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" - loggingtesting "knative.dev/pkg/logging/testing" + + . "sigs.k8s.io/karpenter/pkg/utils/testing" //nolint:stylecheck + "knative.dev/pkg/system" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" @@ -65,7 +67,7 @@ type Environment struct { } func NewEnvironment(t *testing.T) *Environment { - ctx := loggingtesting.TestContextWithLogger(t) + ctx := TestContextWithLogger(t) ctx, cancel := context.WithCancel(ctx) config := NewConfig() client := NewClient(ctx, config) diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 2cc37dcdc41f..bef21c33c657 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -36,10 +36,9 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/transport" - "knative.dev/pkg/logging" - "knative.dev/pkg/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/log" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" pscheduling "sigs.k8s.io/karpenter/pkg/controllers/provisioning/scheduling" @@ -64,7 +63,7 @@ func (env *Environment) ExpectDeleted(objects ...client.Object) { GinkgoHelper() for _, object := range objects { Eventually(func(g Gomega) { - g.Expect(client.IgnoreNotFound(env.Client.Delete(env, object, client.PropagationPolicy(metav1.DeletePropagationForeground), &client.DeleteOptions{GracePeriodSeconds: ptr.Int64(0)}))).To(Succeed()) + g.Expect(client.IgnoreNotFound(env.Client.Delete(env, object, client.PropagationPolicy(metav1.DeletePropagationForeground), &client.DeleteOptions{GracePeriodSeconds: lo.ToPtr(int64(0))}))).To(Succeed()) }).WithTimeout(time.Second * 10).Should(Succeed()) } } @@ -82,7 +81,7 @@ func (env *Environment) ExpectUpdated(objects ...client.Object) { current := o.DeepCopyObject().(client.Object) g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(current), current)).To(Succeed()) if current.GetResourceVersion() != o.GetResourceVersion() { - logging.FromContext(env).Infof("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme()))) + log.FromContext(env).Info(fmt.Sprintf("detected an update to an object (%s) with an outdated resource version, did you get the latest version of the object before patching?", lo.Must(apiutil.GVKForObject(o, env.Client.Scheme())))) } o.SetResourceVersion(current.GetResourceVersion()) g.Expect(env.Client.Update(env.Context, o)).To(Succeed()) @@ -672,7 +671,7 @@ func (env *Environment) EventuallyExpectNodeClaimsReady(nodeClaims ...*corev1bet for _, nc := range nodeClaims { temp := &corev1beta1.NodeClaim{} g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nc), temp)).Should(Succeed()) - g.Expect(temp.StatusConditions().IsHappy()).To(BeTrue()) + g.Expect(temp.StatusConditions().Root().IsTrue()).To(BeTrue()) } }).Should(Succeed()) } @@ -682,7 +681,7 @@ func (env *Environment) EventuallyExpectExpired(nodeClaims ...*corev1beta1.NodeC Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Expired).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeExpired).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } @@ -692,7 +691,7 @@ func (env *Environment) EventuallyExpectDrifted(nodeClaims ...*corev1beta1.NodeC Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } @@ -704,7 +703,7 @@ func (env *Environment) ConsistentlyExpectNodeClaimsNotDrifted(duration time.Dur Consistently(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Drifted)).To(BeNil()) + g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeDrifted)).To(BeNil()) } }, duration).Should(Succeed()) } @@ -714,7 +713,7 @@ func (env *Environment) EventuallyExpectEmpty(nodeClaims ...*corev1beta1.NodeCla Eventually(func(g Gomega) { for _, nc := range nodeClaims { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nc), nc)).To(Succeed()) - g.Expect(nc.StatusConditions().GetCondition(corev1beta1.Empty).IsTrue()).To(BeTrue()) + g.Expect(nc.StatusConditions().Get(corev1beta1.ConditionTypeEmpty).IsTrue()).To(BeTrue()) } }).Should(Succeed()) } @@ -758,13 +757,13 @@ func (env *Environment) printControllerLogs(options *v1.PodLogOptions) { } stream, err := env.KubeClient.CoreV1().Pods("kube-system").GetLogs(pod.Name, temp).Stream(env.Context) if err != nil { - logging.FromContext(env.Context).Errorf("fetching controller logs: %s", err) + log.FromContext(env.Context).Error(err, "failed fetching controller logs") return } - log := &bytes.Buffer{} - _, err = io.Copy(log, stream) + raw := &bytes.Buffer{} + _, err = io.Copy(raw, stream) Expect(err).ToNot(HaveOccurred()) - logging.FromContext(env.Context).Info(log) + log.FromContext(env.Context).Info(raw.String()) } } @@ -879,7 +878,7 @@ func (env *Environment) ExpectCABundle() string { Expect(err).ToNot(HaveOccurred()) _, err = transport.TLSConfigFor(transportConfig) // fills in CAData! Expect(err).ToNot(HaveOccurred()) - logging.FromContext(env.Context).Debugf("Discovered caBundle, length %d", len(transportConfig.TLS.CAData)) + log.FromContext(env.Context).WithValues("length", len(transportConfig.TLS.CAData)).V(1).Info("discovered caBundle") return base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData) } diff --git a/test/pkg/environment/common/monitor.go b/test/pkg/environment/common/monitor.go index ab92f2c91ccf..3bfd2e64e42a 100644 --- a/test/pkg/environment/common/monitor.go +++ b/test/pkg/environment/common/monitor.go @@ -23,8 +23,8 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/logging" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "github.com/samber/lo" @@ -170,11 +170,11 @@ func (m *Monitor) RunningPodsCount(selector labels.Selector) int { func (m *Monitor) poll() state { var nodes v1.NodeList if err := m.kubeClient.List(m.ctx, &nodes); err != nil { - logging.FromContext(m.ctx).Errorf("listing nodes, %s", err) + log.FromContext(m.ctx).Error(err, "failed listing nodes") } var pods v1.PodList if err := m.kubeClient.List(m.ctx, &pods); err != nil { - logging.FromContext(m.ctx).Errorf("listing pods, %s", err) + log.FromContext(m.ctx).Error(err, "failing listing pods") } st := state{ nodes: map[string]*v1.Node{}, diff --git a/test/pkg/environment/common/setup.go b/test/pkg/environment/common/setup.go index cc233cc561ce..2c029a57c7c5 100644 --- a/test/pkg/environment/common/setup.go +++ b/test/pkg/environment/common/setup.go @@ -58,6 +58,7 @@ var ( &schedulingv1.PriorityClass{}, &v1.Node{}, &corev1beta1.NodeClaim{}, + &v1beta1.EC2NodeClass{}, } ) diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 2690129cfd69..7aeb3cdaa972 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -510,6 +510,14 @@ var _ = Describe("Consolidation", func() { Values: []string{"t2", "t3", "c1", "t3a", "t4g", "a1"}, }, }, + // Specify Linux in the NodePool to filter out Windows only DS when discovering DS overhead + { + NodeSelectorRequirement: v1.NodeSelectorRequirement{ + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Linux)}, + }, + }, }, NodeClassRef: &corev1beta1.NodeClassReference{Name: nodeClass.Name}, }, diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index e0bbf6f431bf..a910a009ab60 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -17,8 +17,6 @@ package drift_test import ( "fmt" "sort" - "strconv" - "strings" "testing" "time" @@ -37,8 +35,6 @@ import ( awssdk "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/eks" - "github.com/aws/aws-sdk-go/service/ssm" - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -80,7 +76,7 @@ var _ = Describe("Drift", func() { var selector labels.Selector var numPods int BeforeEach(func() { - amdAMI = env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", 1) + amdAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) numPods = 1 // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt dep = coretest.Deployment(coretest.DeploymentOptions{ @@ -263,16 +259,6 @@ var _ = Describe("Drift", func() { }) It("should respect budgets for non-empty replace drift", func() { appLabels := map[string]string{"app": "large-app"} - - nodePool = coretest.ReplaceRequirements(nodePool, - corev1beta1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceSize, - Operator: v1.NodeSelectorOpIn, - Values: []string{"xlarge"}, - }, - }, - ) nodePool.Labels = appLabels // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ @@ -280,8 +266,10 @@ var _ = Describe("Drift", func() { }} // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node + numPods = 5 + selector = labels.SelectorFromSet(appLabels) deployment := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 5, + Replicas: int32(numPods), PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, @@ -297,11 +285,11 @@ var _ = Describe("Drift", func() { env.ExpectCreated(nodeClass, nodePool, deployment) - originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 5) - originalNodes := env.EventuallyExpectCreatedNodeCount("==", 5) + originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", numPods) + originalNodes := env.EventuallyExpectCreatedNodeCount("==", numPods) // Check that all deployment pods are online - env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods) + env.EventuallyExpectHealthyPodCount(selector, numPods) By("cordoning and adding finalizer to the nodes") // Add a finalizer to each node so that we can stop termination disruptions @@ -383,14 +371,11 @@ var _ = Describe("Drift", func() { }) }) It("should disrupt nodes that have drifted due to AMIs", func() { - // Choose and old, static image. The 1.23 image is incompatible with EKS 1.29 so fallback to a newer image. - parameterName := lo.Ternary(lo.Must(strconv.Atoi(strings.Split(env.GetK8sVersion(0), ".")[1])) >= 29, - "/aws/service/eks/optimized-ami/1.27/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.27-v20240307/image_id", - "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/arm64/standard/amazon-eks-node-al2023-arm64-standard-1.23-v20240307/image_id", - ) - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{Name: awssdk.String(parameterName)}) - Expect(err).To(BeNil()) - oldCustomAMI := *parameter.Parameter.Value + // Choose an old static image (AL2023 AMIs don't exist for 1.22) + oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, + "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), + )) nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: oldCustomAMI}} @@ -411,12 +396,7 @@ var _ = Describe("Drift", func() { env.EventuallyExpectHealthyPodCount(selector, numPods) }) It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { - version := env.GetK8sVersion(1) - armParameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: awssdk.String(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", version)), - }) - Expect(err).To(BeNil()) - armAMI := *armParameter.Parameter.Value + armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: armAMI}} @@ -437,18 +417,13 @@ var _ = Describe("Drift", func() { env.EventuallyExpectHealthyPodCount(selector, numPods) }) It("should not disrupt nodes that have drifted without the featureGate enabled", func() { - version := env.GetK8sVersion(1) env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) + // Choose an old static image (AL2023 AMIs don't exist for 1.22) - parameterName := lo.Ternary(lo.Must(strconv.Atoi(strings.Split(env.GetK8sVersion(0), ".")[1])) == 23, - "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/arm64/standard/amazon-eks-node-al2023-arm64-standard-1.23-v20240307/image_id", - fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", version), - ) - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: awssdk.String(parameterName), - }) - Expect(err).To(BeNil()) - oldCustomAMI := *parameter.Parameter.Value + oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, + "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), + )) nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: oldCustomAMI}} @@ -540,7 +515,7 @@ var _ = Describe("Drift", func() { env.EventuallyExpectHealthyPodCount(selector, numPods) }) It("should disrupt nodes that have drifted due to subnets", func() { - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(len(subnets)).To(BeNumerically(">", 1)) nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{{ID: subnets[0].ID}} @@ -730,8 +705,8 @@ var _ = Describe("Drift", func() { By("validating the drifted status condition has propagated") Eventually(func(g Gomega) { g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Drifted).IsTrue()).To(BeTrue()) + g.Expect(nodeClaim.StatusConditions().Get(corev1beta1.ConditionTypeDrifted)).ToNot(BeNil()) + g.Expect(nodeClaim.StatusConditions().Get(corev1beta1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) }).Should(Succeed()) delete(pod.Annotations, corev1beta1.DoNotDisruptAnnotationKey) @@ -837,7 +812,7 @@ var _ = Describe("Drift", func() { env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) }) Context("Failure", func() { - It("should not continue to drift if a node never registers", func() { + It("should not disrupt a drifted node if the replacement node never registers", func() { // launch a new nodeClaim var numPods int32 = 2 dep := coretest.Deployment(coretest.DeploymentOptions{ @@ -858,40 +833,30 @@ var _ = Describe("Drift", func() { env.EventuallyExpectCreatedNodeCount("==", int(numPods)) // Drift the nodeClaim with bad configuration that will not register a NodeClaim - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: awssdk.String("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), - }) - Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: *parameter.Parameter.Value}} + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")}} env.ExpectCreatedOrUpdated(nodeClass) env.EventuallyExpectDrifted(startingNodeClaimState...) - // Expect nodes To get tainted + // Expect only a single node to be tainted due to default disruption budgets taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) // Drift should fail and the original node should be untainted // TODO: reduce timeouts when disruption waits are factored out env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) - // We give another 6 minutes here to handle the deletion at the 15m registration timeout - Eventually(func(g Gomega) { - nodeClaims := &corev1beta1.NodeClaimList{} - g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - g.Expect(nodeClaims.Items).To(HaveLen(int(numPods))) - }).WithTimeout(6 * time.Minute).Should(Succeed()) - - // Expect all the NodeClaims that existed on the initial provisioning loop are not removed + // Expect all the NodeClaims that existed on the initial provisioning loop are not removed. + // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't + // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node Consistently(func(g Gomega) { nodeClaims := &corev1beta1.NodeClaimList{} g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - - startingNodeClaimUIDs := lo.Map(startingNodeClaimState, func(nc *corev1beta1.NodeClaim, _ int) types.UID { return nc.UID }) - nodeClaimUIDs := lo.Map(nodeClaims.Items, func(nc corev1beta1.NodeClaim, _ int) types.UID { return nc.UID }) - g.Expect(sets.New(nodeClaimUIDs...).IsSuperset(sets.New(startingNodeClaimUIDs...))).To(BeTrue()) + startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(nc *corev1beta1.NodeClaim, _ int) types.UID { return nc.UID })...) + nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(nc corev1beta1.NodeClaim, _ int) types.UID { return nc.UID })...) + g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) }, "2m").Should(Succeed()) }) - It("should not continue to drift if a node registers but never becomes initialized", func() { + It("should not disrupt a drifted node if the replacement node registers but never initialized", func() { // launch a new nodeClaim var numPods int32 = 2 dep := coretest.Deployment(coretest.DeploymentOptions{ @@ -917,7 +882,7 @@ var _ = Describe("Drift", func() { env.EventuallyExpectDrifted(startingNodeClaimState...) - // Expect nodes to be tainted + // Expect only a single node to get tainted due to default disruption budgets taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) // Drift should fail and original node should be untainted @@ -934,13 +899,14 @@ var _ = Describe("Drift", func() { Expect(nodeClaimList.Items).To(HaveLen(int(numPods) + 1)) // Expect all the NodeClaims that existed on the initial provisioning loop are not removed + // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't + // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node Consistently(func(g Gomega) { nodeClaims := &corev1beta1.NodeClaimList{} g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - - startingNodeClaimUIDs := lo.Map(startingNodeClaimState, func(m *corev1beta1.NodeClaim, _ int) types.UID { return m.UID }) - nodeClaimUIDs := lo.Map(nodeClaims.Items, func(m corev1beta1.NodeClaim, _ int) types.UID { return m.UID }) - g.Expect(sets.New(nodeClaimUIDs...).IsSuperset(sets.New(startingNodeClaimUIDs...))).To(BeTrue()) + startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(m *corev1beta1.NodeClaim, _ int) types.UID { return m.UID })...) + nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(m corev1beta1.NodeClaim, _ int) types.UID { return m.UID })...) + g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) }, "2m").Should(Succeed()) }) It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index 7151c43a6a88..ed6e884e4dcd 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -30,8 +30,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/aws/aws-sdk-go/service/ssm" - corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" @@ -345,27 +343,17 @@ var _ = Describe("Expiration", func() { }) It("should respect budgets for non-empty replace expiration", func() { appLabels := map[string]string{"app": "large-app"} - - nodePool = coretest.ReplaceRequirements(nodePool, - corev1beta1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: v1.NodeSelectorRequirement{ - Key: v1beta1.LabelInstanceSize, - Operator: v1.NodeSelectorOpIn, - Values: []string{"xlarge"}, - }, - }, - ) nodePool.Labels = appLabels // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. nodePool.Spec.Disruption.Budgets = []corev1beta1.Budget{{ Nodes: "3", }} - // Make 5 pods all with different deployments and different test partitions, so that each pod can be put - // on a separate node. + // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node selector = labels.SelectorFromSet(appLabels) + numPods = 5 deployment := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 5, + Replicas: int32(numPods), PodOptions: coretest.PodOptions{ ObjectMeta: metav1.ObjectMeta{ Labels: appLabels, @@ -381,11 +369,11 @@ var _ = Describe("Expiration", func() { env.ExpectCreated(nodeClass, nodePool, deployment) - env.EventuallyExpectCreatedNodeClaimCount("==", 5) - nodes := env.EventuallyExpectCreatedNodeCount("==", 5) + env.EventuallyExpectCreatedNodeClaimCount("==", numPods) + nodes := env.EventuallyExpectCreatedNodeCount("==", numPods) // Check that all daemonsets and deployment pods are online - env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(appLabels), numPods) + env.EventuallyExpectHealthyPodCount(selector, numPods) By("cordoning and adding finalizer to the nodes") // Add a finalizer to each node so that we can stop termination disruptions @@ -582,13 +570,9 @@ var _ = Describe("Expiration", func() { env.EventuallyExpectCreatedNodeCount("==", int(numPods)) // Set a configuration that will not register a NodeClaim - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ - Name: lo.ToPtr("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), - }) - Expect(err).ToNot(HaveOccurred()) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ { - ID: *parameter.Parameter.Value, + ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs"), }, } env.ExpectCreatedOrUpdated(nodeClass) diff --git a/test/suites/integration/ami_test.go b/test/suites/integration/ami_test.go index 96c3bdc57a1c..d5020145529b 100644 --- a/test/suites/integration/ami_test.go +++ b/test/suites/integration/ami_test.go @@ -23,9 +23,12 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/aws/aws-sdk-go/service/ssm" + "github.com/awslabs/operatorpkg/status" + . "github.com/awslabs/operatorpkg/test/expectations" "github.com/samber/lo" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -43,7 +46,7 @@ import ( var _ = Describe("AMI", func() { var customAMI string BeforeEach(func() { - customAMI = env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", 1) + customAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) }) It("should use the AMI defined by the AMI Selector Terms", func() { @@ -60,8 +63,8 @@ var _ = Describe("AMI", func() { env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) }) It("should use the most recent AMI when discovering multiple", func() { - // choose an old static image - oldCustomAMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%[1]s/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-%[1]s-v20240307/image_id", 1) + // choose an old static image that will definitely have an older creation date + oldCustomAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%[1]s/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-%[1]s-v20240307/image_id", env.K8sVersion())) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ { ID: customAMI, @@ -156,14 +159,12 @@ var _ = Describe("AMI", func() { It("should provision a node using the Ubuntu family", func() { nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyUbuntu // TODO (jmdeal@): remove once 22.04 AMIs are supported - if env.GetK8sVersion(0) == "1.29" { + if env.K8sMinorVersion() >= 29 { nodeClass.Spec.AMISelectorTerms = lo.Map([]string{ "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/amd64/hvm/ebs-gp2/ami-id", "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/arm64/hvm/ebs-gp2/ami-id", - }, func(arg string, _ int) v1beta1.AMISelectorTerm { - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{Name: lo.ToPtr(arg)}) - Expect(err).To(BeNil()) - return v1beta1.AMISelectorTerm{ID: *parameter.Parameter.Value} + }, func(ssmPath string, _ int) v1beta1.AMISelectorTerm { + return v1beta1.AMISelectorTerm{ID: env.GetAMIBySSMPath(ssmPath)} }) } // TODO: remove requirements after Ubuntu fixes bootstrap script issue w/ @@ -184,20 +185,23 @@ var _ = Describe("AMI", func() { }) It("should support Custom AMIFamily with AMI Selectors", func() { nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom - al2AMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 1) + al2023AMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ { - ID: al2AMI, + ID: al2023AMI, }, } - nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, + env.ClusterEndpoint, env.ExpectCABundle())) pod := coretest.Pod() env.ExpectCreated(pod, nodeClass, nodePool) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) - env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(al2AMI)))) + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(al2023AMI)))) }) It("should have the EC2NodeClass status for AMIs using wildcard", func() { nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ @@ -219,6 +223,16 @@ var _ = Describe("AMI", func() { nc := EventuallyExpectAMIsToExist(nodeClass) Expect(len(nc.Status.AMIs)).To(BeNumerically("==", 1)) Expect(nc.Status.AMIs[0].ID).To(Equal(customAMI)) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionTrue}) + }) + It("should have ec2nodeClass status as not ready since AMI was not resolved", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: "ami-123", + }, + } + env.ExpectCreated(nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "Failed to resolve AMIs"}) }) }) diff --git a/test/suites/integration/emptiness_test.go b/test/suites/integration/emptiness_test.go index a4aca9792b0d..fc63b3b401b1 100644 --- a/test/suites/integration/emptiness_test.go +++ b/test/suites/integration/emptiness_test.go @@ -22,7 +22,6 @@ import ( appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "knative.dev/pkg/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -110,7 +109,7 @@ var _ = Describe("Emptiness", func() { By("making the nodeclaim empty") persisted := deployment.DeepCopy() - deployment.Spec.Replicas = ptr.Int32(0) + deployment.Spec.Replicas = lo.ToPtr(int32(0)) Expect(env.Client.Patch(env, deployment, client.StrategicMergeFrom(persisted))).To(Succeed()) env.EventuallyExpectEmpty(nodeClaim) diff --git a/test/suites/integration/extended_resources_test.go b/test/suites/integration/extended_resources_test.go index 49c2e8d3c7ae..4c82c1002501 100644 --- a/test/suites/integration/extended_resources_test.go +++ b/test/suites/integration/extended_resources_test.go @@ -149,7 +149,7 @@ var _ = Describe("Extended Resources", func() { Skip("skipping test on AMD instance types") ExpectAMDDevicePluginCreated() - customAMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 0) + customAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) // We create custom userData that installs the AMD Radeon driver and then performs the EKS bootstrap script // We use a Custom AMI so that we can reboot after we start the kubelet service diff --git a/test/suites/integration/instance_profile_test.go b/test/suites/integration/instance_profile_test.go index 4c2e7b860a89..f994e9864877 100644 --- a/test/suites/integration/instance_profile_test.go +++ b/test/suites/integration/instance_profile_test.go @@ -16,6 +16,10 @@ package integration_test import ( "fmt" + "time" + + "github.com/awslabs/operatorpkg/status" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/iam" @@ -25,6 +29,7 @@ import ( awserrors "github.com/aws/karpenter-provider-aws/pkg/errors" + . "github.com/awslabs/operatorpkg/test/expectations" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -81,5 +86,11 @@ var _ = Describe("InstanceProfile Generation", func() { instance := env.GetInstance(node.Name) Expect(instance.IamInstanceProfile).ToNot(BeNil()) Expect(lo.FromPtr(instance.IamInstanceProfile.Arn)).To(ContainSubstring(nodeClass.Status.InstanceProfile)) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionTrue}) + }) + It("should have the EC2NodeClass status as not ready since Instance Profile was not resolved", func() { + nodeClass.Spec.Role = fmt.Sprintf("KarpenterNodeRole-%s", "invalidRole") + env.ExpectCreated(nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "Failed to resolve instance profile"}) }) }) diff --git a/test/suites/integration/kubelet_config_test.go b/test/suites/integration/kubelet_config_test.go index a79804cdab0d..7f59366245ba 100644 --- a/test/suites/integration/kubelet_config_test.go +++ b/test/suites/integration/kubelet_config_test.go @@ -22,11 +22,9 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" - "github.com/aws/aws-sdk-go/service/ssm" "github.com/samber/lo" "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" @@ -36,7 +34,6 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" ) var _ = Describe("KubeletConfiguration Overrides", func() { @@ -44,8 +41,8 @@ var _ = Describe("KubeletConfiguration Overrides", func() { BeforeEach(func() { // MaxPods needs to account for the daemonsets that will run on the nodes nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - MaxPods: ptr.Int32(110), - PodsPerCore: ptr.Int32(10), + MaxPods: lo.ToPtr(int32(110)), + PodsPerCore: lo.ToPtr(int32(10)), SystemReserved: map[string]string{ string(v1.ResourceCPU): "200m", string(v1.ResourceMemory): "200Mi", @@ -80,24 +77,22 @@ var _ = Describe("KubeletConfiguration Overrides", func() { "imagefs.inodesFree": {Duration: time.Minute * 2}, "pid.available": {Duration: time.Minute * 2}, }, - EvictionMaxPodGracePeriod: ptr.Int32(120), - ImageGCHighThresholdPercent: ptr.Int32(50), - ImageGCLowThresholdPercent: ptr.Int32(10), - CPUCFSQuota: ptr.Bool(false), + EvictionMaxPodGracePeriod: lo.ToPtr(int32(120)), + ImageGCHighThresholdPercent: lo.ToPtr(int32(50)), + ImageGCLowThresholdPercent: lo.ToPtr(int32(10)), + CPUCFSQuota: lo.ToPtr(false), } }) DescribeTable("Linux AMIFamilies", func(amiFamily *string) { nodeClass.Spec.AMIFamily = amiFamily // TODO (jmdeal@): remove once 22.04 AMIs are supported - if *amiFamily == v1beta1.AMIFamilyUbuntu && env.GetK8sVersion(0) == "1.29" { + if *amiFamily == v1beta1.AMIFamilyUbuntu && env.K8sMinorVersion() >= 29 { nodeClass.Spec.AMISelectorTerms = lo.Map([]string{ "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/amd64/hvm/ebs-gp2/ami-id", "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/arm64/hvm/ebs-gp2/ami-id", - }, func(arg string, _ int) v1beta1.AMISelectorTerm { - parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{Name: lo.ToPtr(arg)}) - Expect(err).To(BeNil()) - return v1beta1.AMISelectorTerm{ID: *parameter.Parameter.Value} + }, func(ssmPath string, _ int) v1beta1.AMISelectorTerm { + return v1beta1.AMISelectorTerm{ID: env.GetAMIBySSMPath(ssmPath)} }) } pod := test.Pod(test.PodOptions{ @@ -162,7 +157,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { // Get the DS pod count and use it to calculate the DS pod overhead dsCount := env.GetDaemonSetCount(nodePool) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - MaxPods: ptr.Int32(1 + int32(dsCount)), + MaxPods: lo.ToPtr(int32(1 + int32(dsCount))), } numPods := 3 @@ -220,7 +215,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { // 4 DS pods and 2 test pods. dsCount := env.GetDaemonSetCount(nodePool) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - PodsPerCore: ptr.Int32(int32(math.Ceil(float64(2+dsCount) / 2))), + PodsPerCore: lo.ToPtr(int32(math.Ceil(float64(2+dsCount) / 2))), } env.ExpectCreated(nodeClass, nodePool, dep) @@ -242,7 +237,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { }, ) - nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{PodsPerCore: ptr.Int32(1)} + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{PodsPerCore: lo.ToPtr(int32(1))} numPods := 6 dep := test.Deployment(test.DeploymentOptions{ Replicas: int32(numPods), diff --git a/test/suites/integration/scheduling_test.go b/test/suites/integration/scheduling_test.go index c22748c70772..be328ed5cdd2 100644 --- a/test/suites/integration/scheduling_test.go +++ b/test/suites/integration/scheduling_test.go @@ -24,7 +24,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/test" @@ -92,6 +91,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { v1beta1.LabelInstanceCPU: "2", v1beta1.LabelInstanceCPUManufacturer: "intel", v1beta1.LabelInstanceMemory: "4096", + v1beta1.LabelInstanceEBSBandwidth: "4750", v1beta1.LabelInstanceNetworkBandwidth: "750", } selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels @@ -107,6 +107,21 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) env.ExpectCreatedNodeCount("==", 1) }) + It("should support well-known labels for zone id selection", func() { + selectors.Insert(v1beta1.LabelTopologyZoneID) // Add node selector keys to selectors used in testing to ensure we test all labels + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelTopologyZoneID, + Operator: v1.NodeSelectorOpIn, + Values: []string{env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName})[0].ZoneInfo.ZoneID}, + }, + }, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) It("should support well-known labels for local NVME storage", func() { selectors.Insert(v1beta1.LabelInstanceLocalNVME) // Add node selector keys to selectors used in testing to ensure we test all labels deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ @@ -382,7 +397,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { It("should provision a node using a NodePool with higher priority", func() { nodePoolLowPri := test.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ - Weight: ptr.Int32(10), + Weight: lo.ToPtr(int32(10)), Template: corev1beta1.NodeClaimTemplate{ Spec: corev1beta1.NodeClaimSpec{ NodeClassRef: &corev1beta1.NodeClassReference{ @@ -410,7 +425,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }) nodePoolHighPri := test.NodePool(corev1beta1.NodePool{ Spec: corev1beta1.NodePoolSpec{ - Weight: ptr.Int32(100), + Weight: lo.ToPtr(int32(100)), Template: corev1beta1.NodeClaimTemplate{ Spec: corev1beta1.NodeClaimSpec{ NodeClassRef: &corev1beta1.NodeClassReference{ @@ -440,14 +455,14 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(pod, nodeClass, nodePoolLowPri, nodePoolHighPri) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) - Expect(ptr.StringValue(env.GetInstance(pod.Spec.NodeName).InstanceType)).To(Equal("c5.large")) + Expect(lo.FromPtr(env.GetInstance(pod.Spec.NodeName).InstanceType)).To(Equal("c5.large")) Expect(env.GetNode(pod.Spec.NodeName).Labels[corev1beta1.NodePoolLabelKey]).To(Equal(nodePoolHighPri.Name)) }) DescribeTable( "should provision a right-sized node when a pod has InitContainers (cpu)", func(expectedNodeCPU string, containerRequirements v1.ResourceRequirements, initContainers ...v1.Container) { - if version, err := env.GetK8sMinorVersion(0); err != nil || version < 29 { + if env.K8sMinorVersion() < 29 { Skip("native sidecar containers are only enabled on EKS 1.29+") } @@ -548,7 +563,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { }), ) It("should provision a right-sized node when a pod has InitContainers (mixed resources)", func() { - if version, err := env.GetK8sMinorVersion(0); err != nil || version < 29 { + if env.K8sMinorVersion() < 29 { Skip("native sidecar containers are only enabled on EKS 1.29+") } test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ @@ -580,6 +595,84 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { env.ExpectCreated(nodePool, nodeClass, pod) env.EventuallyExpectHealthy(pod) }) + + It("should provision a node for a pod with overlapping zone and zone-id requirements", func() { + subnetInfo := lo.UniqBy(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(s aws.SubnetInfo) string { + return s.Zone + }) + Expect(len(subnetInfo)).To(BeNumerically(">=", 3)) + + // Create a pod with 'overlapping' zone and zone-id requirements. With two options for each label, but only one pair of zone-zoneID that maps to the + // same AZ, we will always expect the pod to be scheduled to that AZ. In this case, this is the mapping at zone[1]. + pod := test.Pod(test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1.LabelTopologyZone, + Operator: v1.NodeSelectorOpIn, + Values: lo.Map(subnetInfo[0:2], func(info aws.SubnetInfo, _ int) string { return info.Zone }), + }, + { + Key: v1beta1.LabelTopologyZoneID, + Operator: v1.NodeSelectorOpIn, + Values: lo.Map(subnetInfo[1:3], func(info aws.SubnetInfo, _ int) string { return info.ZoneID }), + }, + }, + }) + env.ExpectCreated(nodePool, nodeClass, pod) + node := env.EventuallyExpectNodeCount("==", 1)[0] + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(subnetInfo[1].Zone)) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(subnetInfo[1].ZoneID)) + }) + It("should provision nodes for pods with zone-id requirements in the correct zone", func() { + // Each pod specifies a requirement on this expected zone, where the value is the matching zone for the + // required zone-id. This allows us to verify that Karpenter launched the node in the correct zone, even if + // it doesn't add the zone-id label and the label is added by CCM. If we didn't take this approach, we would + // succeed even if Karpenter doesn't add the label and /or incorrectly generated offerings on k8s 1.30 and + // above. This is an unlikely scenario, and adding this check is a defense in depth measure. + const expectedZoneLabel = "expected-zone-label" + test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: v1.NodeSelectorRequirement{ + Key: expectedZoneLabel, + Operator: v1.NodeSelectorOpExists, + }, + }) + + subnetInfo := lo.UniqBy(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(s aws.SubnetInfo) string { + return s.Zone + }) + pods := lo.Map(subnetInfo, func(info aws.SubnetInfo, _ int) *v1.Pod { + return test.Pod(test.PodOptions{ + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: expectedZoneLabel, + Operator: v1.NodeSelectorOpIn, + Values: []string{info.Zone}, + }, + { + Key: v1beta1.LabelTopologyZoneID, + Operator: v1.NodeSelectorOpIn, + Values: []string{info.ZoneID}, + }, + }, + }) + }) + + env.ExpectCreated(nodePool, nodeClass) + for _, pod := range pods { + env.ExpectCreated(pod) + } + nodes := env.EventuallyExpectCreatedNodeCount("==", len(subnetInfo)) + for _, node := range nodes { + expectedZone, ok := node.Labels[expectedZoneLabel] + Expect(ok).To(BeTrue()) + Expect(node.Labels[v1.LabelTopologyZone]).To(Equal(expectedZone)) + zoneInfo, ok := lo.Find(subnetInfo, func(info aws.SubnetInfo) bool { + return info.Zone == expectedZone + }) + Expect(ok).To(BeTrue()) + Expect(node.Labels[v1beta1.LabelTopologyZoneID]).To(Equal(zoneInfo.ZoneID)) + } + }) }) }) diff --git a/test/suites/integration/security_group_test.go b/test/suites/integration/security_group_test.go index 477fa1c70034..ecb365ff7aca 100644 --- a/test/suites/integration/security_group_test.go +++ b/test/suites/integration/security_group_test.go @@ -18,7 +18,9 @@ import ( "time" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/awslabs/operatorpkg/status" "github.com/samber/lo" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" @@ -27,6 +29,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" + . "github.com/awslabs/operatorpkg/test/expectations" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -75,6 +78,17 @@ var _ = Describe("SecurityGroups", func() { It("should update the EC2NodeClass status security groups", func() { env.ExpectCreated(nodeClass) EventuallyExpectSecurityGroups(env, nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionTrue}) + }) + + It("should have the NodeClass status as not ready since security groups were not resolved", func() { + nodeClass.Spec.SecurityGroupSelectorTerms = []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": "invalidName"}, + }, + } + env.ExpectCreated(nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "Failed to resolve security groups"}) }) }) diff --git a/test/suites/integration/subnet_test.go b/test/suites/integration/subnet_test.go index 3312f91c33c6..7eacc0ef28ec 100644 --- a/test/suites/integration/subnet_test.go +++ b/test/suites/integration/subnet_test.go @@ -18,9 +18,11 @@ import ( "time" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/awslabs/operatorpkg/status" "github.com/onsi/gomega/types" "github.com/samber/lo" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" @@ -31,6 +33,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" + . "github.com/awslabs/operatorpkg/test/expectations" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -75,7 +78,7 @@ var _ = Describe("Subnets", func() { }) It("should use the subnet tag selector with multiple tag values", func() { // Get all the subnets for the cluster - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(len(subnets)).To(BeNumerically(">", 1)) firstSubnet := subnets[0] lastSubnet := subnets[len(subnets)-1] @@ -122,6 +125,16 @@ var _ = Describe("Subnets", func() { It("should have the NodeClass status for subnets", func() { env.ExpectCreated(nodeClass) EventuallyExpectSubnets(env, nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionTrue}) + }) + It("should have the NodeClass status as not ready since subnets were not resolved", func() { + nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": "invalidName"}, + }, + } + env.ExpectCreated(nodeClass) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: status.ConditionReady, Status: metav1.ConditionFalse, Message: "Failed to resolve subnets"}) }) }) diff --git a/test/suites/integration/testdata/al2023_userdata_input.yaml b/test/suites/integration/testdata/al2023_userdata_input.yaml new file mode 100644 index 000000000000..b0ce7a5e8496 --- /dev/null +++ b/test/suites/integration/testdata/al2023_userdata_input.yaml @@ -0,0 +1,14 @@ +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + name: %s + apiServerEndpoint: %s + certificateAuthority: %s + cidr: 10.100.0.0/16 + kubelet: + config: + clusterDNS: + - 10.0.100.10 + flags: + - --node-labels="testing/cluster=unspecified" \ No newline at end of file diff --git a/test/suites/integration/validation_test.go b/test/suites/integration/validation_test.go index 790a880ce29b..522815d151ca 100644 --- a/test/suites/integration/validation_test.go +++ b/test/suites/integration/validation_test.go @@ -20,7 +20,6 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -114,18 +113,18 @@ var _ = Describe("Validation", func() { }) It("should error if imageGCHighThresholdPercent is less than imageGCLowThresholdPercent", func() { nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - ImageGCHighThresholdPercent: ptr.Int32(10), - ImageGCLowThresholdPercent: ptr.Int32(60), + ImageGCHighThresholdPercent: lo.ToPtr(int32(10)), + ImageGCLowThresholdPercent: lo.ToPtr(int32(60)), } Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) }) It("should error if imageGCHighThresholdPercent or imageGCLowThresholdPercent is negative", func() { nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - ImageGCHighThresholdPercent: ptr.Int32(-10), + ImageGCHighThresholdPercent: lo.ToPtr(int32(-10)), } Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ - ImageGCLowThresholdPercent: ptr.Int32(-10), + ImageGCLowThresholdPercent: lo.ToPtr(int32(-10)), } Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) }) diff --git a/test/suites/interruption/suite_test.go b/test/suites/interruption/suite_test.go index f2adf66c1437..1ca8cb565446 100644 --- a/test/suites/interruption/suite_test.go +++ b/test/suites/interruption/suite_test.go @@ -24,7 +24,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/uuid" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coretest "sigs.k8s.io/karpenter/pkg/test" @@ -83,7 +82,7 @@ var _ = Describe("Interruption", func() { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "my-app"}, }, - TerminationGracePeriodSeconds: ptr.Int64(0), + TerminationGracePeriodSeconds: lo.ToPtr(int64(0)), }, }) selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) @@ -117,7 +116,7 @@ var _ = Describe("Interruption", func() { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "my-app"}, }, - TerminationGracePeriodSeconds: ptr.Int64(0), + TerminationGracePeriodSeconds: lo.ToPtr(int64(0)), }, }) selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) @@ -143,7 +142,7 @@ var _ = Describe("Interruption", func() { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "my-app"}, }, - TerminationGracePeriodSeconds: ptr.Int64(0), + TerminationGracePeriodSeconds: lo.ToPtr(int64(0)), }, }) selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) @@ -169,7 +168,7 @@ var _ = Describe("Interruption", func() { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"app": "my-app"}, }, - TerminationGracePeriodSeconds: ptr.Int64(0), + TerminationGracePeriodSeconds: lo.ToPtr(int64(0)), }, }) selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) diff --git a/test/suites/localzone/suite_test.go b/test/suites/localzone/suite_test.go index 53b0d4940773..8c3ecc034b6c 100644 --- a/test/suites/localzone/suite_test.go +++ b/test/suites/localzone/suite_test.go @@ -70,8 +70,11 @@ var _ = BeforeEach(func() { NodeSelectorRequirement: v1.NodeSelectorRequirement{ Key: v1.LabelTopologyZone, Operator: v1.NodeSelectorOpIn, - Values: lo.Keys(lo.PickByValues(env.GetZones(), []string{"local-zone"})), - }}) + Values: lo.FilterMap(env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}), func(info aws.SubnetInfo, _ int) (string, bool) { + return info.Zone, info.ZoneType == "local-zone" + }), + }, + }) }) var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) diff --git a/test/suites/nodeclaim/garbage_collection_test.go b/test/suites/nodeclaim/garbage_collection_test.go index 35d5dd4700bc..0872116b00c4 100644 --- a/test/suites/nodeclaim/garbage_collection_test.go +++ b/test/suites/nodeclaim/garbage_collection_test.go @@ -45,11 +45,11 @@ var _ = Describe("GarbageCollection", func() { BeforeEach(func() { securityGroups := env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) - subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) Expect(securityGroups).ToNot(HaveLen(0)) Expect(subnets).ToNot(HaveLen(0)) - customAMI = env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 1) + customAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) instanceProfileName = fmt.Sprintf("KarpenterNodeInstanceProfile-%s", env.ClusterName) roleName = fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName) instanceInput = &ec2.RunInstancesInput{ @@ -98,10 +98,10 @@ var _ = Describe("GarbageCollection", func() { }) It("should succeed to garbage collect an Instance that was launched by a NodeClaim but has no Instance mapping", func() { // Update the userData for the instance input with the correct NodePool - rawContent, err := os.ReadFile("testdata/al2_userdata_input.sh") + rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") Expect(err).ToNot(HaveOccurred()) instanceInput.UserData = lo.ToPtr(base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(string(rawContent), env.ClusterName, - env.ClusterEndpoint, env.ExpectCABundle(), nodePool.Name)))) + env.ClusterEndpoint, env.ExpectCABundle())))) env.ExpectInstanceProfileCreated(instanceProfileName, roleName) DeferCleanup(func() { diff --git a/test/suites/nodeclaim/nodeclaim_test.go b/test/suites/nodeclaim/nodeclaim_test.go index bc36bd8fd380..d4e81e4528cd 100644 --- a/test/suites/nodeclaim/nodeclaim_test.go +++ b/test/suites/nodeclaim/nodeclaim_test.go @@ -15,7 +15,6 @@ limitations under the License. package nodeclaim_test import ( - "encoding/base64" "fmt" "os" "time" @@ -269,16 +268,16 @@ var _ = Describe("StandaloneNodeClaim", func() { }, time.Second*10).Should(Succeed()) }) It("should create a NodeClaim with custom labels passed through the userData", func() { - customAMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 1) + customAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) // Update the userData for the instance input with the correct NodePool - rawContent, err := os.ReadFile("testdata/al2_userdata_custom_labels_input.sh") + rawContent, err := os.ReadFile("testdata/al2023_userdata_custom_labels_input.yaml") Expect(err).ToNot(HaveOccurred()) - // Create userData that adds custom labels through the --kubelet-extra-args + // Create userData that adds custom labels through the --node-labels nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: customAMI}} - nodeClass.Spec.UserData = lo.ToPtr(base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(string(rawContent), env.ClusterName, - env.ClusterEndpoint, env.ExpectCABundle())))) + nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, + env.ClusterEndpoint, env.ExpectCABundle())) nodeClaim := test.NodeClaim(corev1beta1.NodeClaim{ Spec: corev1beta1.NodeClaimSpec{ @@ -319,17 +318,17 @@ var _ = Describe("StandaloneNodeClaim", func() { env.EventuallyExpectNodeClaimsReady(nodeClaim) }) It("should delete a NodeClaim after the registration timeout when the node doesn't register", func() { - customAMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 1) + customAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) // Update the userData for the instance input with the correct NodePool - rawContent, err := os.ReadFile("testdata/al2_userdata_input.sh") + rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") Expect(err).ToNot(HaveOccurred()) - // Create userData that adds custom labels through the --kubelet-extra-args + // Create userData that adds custom labels through the --node-labels nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: customAMI}} // Giving bad clusterName and clusterEndpoint to the userData - nodeClass.Spec.UserData = lo.ToPtr(base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(string(rawContent), "badName", "badEndpoint", env.ExpectCABundle())))) + nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), "badName", "badEndpoint", env.ExpectCABundle())) nodeClaim := test.NodeClaim(corev1beta1.NodeClaim{ Spec: corev1beta1.NodeClaimSpec{ @@ -369,9 +368,9 @@ var _ = Describe("StandaloneNodeClaim", func() { Eventually(func(g Gomega) { temp := &corev1beta1.NodeClaim{} g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), temp)).To(Succeed()) - g.Expect(temp.StatusConditions().GetCondition(corev1beta1.Launched).IsTrue()).To(BeTrue()) - g.Expect(temp.StatusConditions().GetCondition(corev1beta1.Registered).IsFalse()).To(BeTrue()) - g.Expect(temp.StatusConditions().GetCondition(corev1beta1.Initialized).IsFalse()).To(BeTrue()) + g.Expect(temp.StatusConditions().Get(corev1beta1.ConditionTypeLaunched).IsTrue()).To(BeTrue()) + g.Expect(temp.StatusConditions().Get(corev1beta1.ConditionTypeRegistered).IsFalse()).To(BeTrue()) + g.Expect(temp.StatusConditions().Get(corev1beta1.ConditionTypeInitialized).IsFalse()).To(BeTrue()) }).Should(Succeed()) // Expect that the nodeClaim is eventually de-provisioned due to the registration timeout diff --git a/test/suites/nodeclaim/testdata/al2023_userdata_custom_labels_input.yaml b/test/suites/nodeclaim/testdata/al2023_userdata_custom_labels_input.yaml new file mode 100644 index 000000000000..149366387e12 --- /dev/null +++ b/test/suites/nodeclaim/testdata/al2023_userdata_custom_labels_input.yaml @@ -0,0 +1,14 @@ +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + name: %s + apiServerEndpoint: %s + certificateAuthority: %s + cidr: 10.100.0.0/16 + kubelet: + config: + clusterDNS: + - 10.0.100.10 + flags: + - --node-labels="testing/cluster=unspecified,custom-label=custom-value,custom-label2=custom-value2" \ No newline at end of file diff --git a/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml b/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml new file mode 100644 index 000000000000..b0ce7a5e8496 --- /dev/null +++ b/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml @@ -0,0 +1,14 @@ +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + name: %s + apiServerEndpoint: %s + certificateAuthority: %s + cidr: 10.100.0.0/16 + kubelet: + config: + clusterDNS: + - 10.0.100.10 + flags: + - --node-labels="testing/cluster=unspecified" \ No newline at end of file diff --git a/test/suites/nodeclaim/testdata/al2_userdata_custom_labels_input.sh b/test/suites/nodeclaim/testdata/al2_userdata_custom_labels_input.sh deleted file mode 100644 index 86feeb4aa5d6..000000000000 --- a/test/suites/nodeclaim/testdata/al2_userdata_custom_labels_input.sh +++ /dev/null @@ -1,13 +0,0 @@ -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/x-shellscript; charset="us-ascii" - -#!/bin/bash -exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 -/etc/eks/bootstrap.sh '%s' --apiserver-endpoint '%s' --b64-cluster-ca '%s' \ ---use-max-pods false \ ---kubelet-extra-args '--node-labels=testing/cluster=unspecified,custom-label=custom-value,custom-label2=custom-value2' - ---BOUNDARY-- diff --git a/test/suites/nodeclaim/testdata/al2_userdata_input.sh b/test/suites/nodeclaim/testdata/al2_userdata_input.sh deleted file mode 100644 index 1fd3e27e30f0..000000000000 --- a/test/suites/nodeclaim/testdata/al2_userdata_input.sh +++ /dev/null @@ -1,13 +0,0 @@ -MIME-Version: 1.0 -Content-Type: multipart/mixed; boundary="BOUNDARY" - ---BOUNDARY -Content-Type: text/x-shellscript; charset="us-ascii" - -#!/bin/bash -exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 -/etc/eks/bootstrap.sh '%s' --apiserver-endpoint '%s' --b64-cluster-ca '%s' \ ---use-max-pods false \ ---kubelet-extra-args '--node-labels=karpenter.sh/nodepool=%s,testing/cluster=unspecified' - ---BOUNDARY-- diff --git a/test/suites/scale/deprovisioning_test.go b/test/suites/scale/deprovisioning_test.go index cb7086bbee08..cf7eaa8b9b98 100644 --- a/test/suites/scale/deprovisioning_test.go +++ b/test/suites/scale/deprovisioning_test.go @@ -28,7 +28,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/uuid" - "knative.dev/pkg/ptr" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/test" @@ -85,11 +84,22 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), nodeClass = env.DefaultEC2NodeClass() nodePool = env.DefaultNodePool(nodeClass) nodePool.Spec.Limits = nil - test.ReplaceRequirements(nodePool, corev1beta1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: v1.NodeSelectorRequirement{Key: v1beta1.LabelInstanceHypervisor, - Operator: v1.NodeSelectorOpIn, - Values: []string{"nitro"}, - }}) + test.ReplaceRequirements(nodePool, []corev1beta1.NodeSelectorRequirementWithMinValues{ + { + NodeSelectorRequirement: v1.NodeSelectorRequirement{Key: v1beta1.LabelInstanceHypervisor, + Operator: v1.NodeSelectorOpIn, + Values: []string{"nitro"}, + }, + }, + // Ensure that all pods can fit on to the provisioned nodes including all daemonsets + { + NodeSelectorRequirement: v1.NodeSelectorRequirement{ + Key: v1beta1.LabelInstanceSize, + Operator: v1.NodeSelectorOpIn, + Values: []string{"large"}, + }, + }, + }...) deploymentOptions = test.DeploymentOptions{ PodOptions: test.PodOptions{ ResourceRequirements: v1.ResourceRequirements{ @@ -236,8 +246,8 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), // Enable consolidation, emptiness, and expiration nodePoolMap[consolidationValue].Spec.Disruption.ConsolidateAfter = nil nodePoolMap[emptinessValue].Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenEmpty - nodePoolMap[emptinessValue].Spec.Disruption.ConsolidateAfter.Duration = ptr.Duration(0) - nodePoolMap[expirationValue].Spec.Disruption.ExpireAfter.Duration = ptr.Duration(0) + nodePoolMap[emptinessValue].Spec.Disruption.ConsolidateAfter.Duration = lo.ToPtr(time.Duration(0)) + nodePoolMap[expirationValue].Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Duration(0)) nodePoolMap[expirationValue].Spec.Limits = disableProvisioningLimits // Update the drift NodeClass to start drift on Nodes assigned to this NodeClass driftNodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket @@ -545,7 +555,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), env.MeasureDeprovisioningDurationFor(func() { By("kicking off deprovisioning emptiness by setting the ttlSecondsAfterEmpty value on the nodePool") nodePool.Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenEmpty - nodePool.Spec.Disruption.ConsolidateAfter.Duration = ptr.Duration(0) + nodePool.Spec.Disruption.ConsolidateAfter.Duration = lo.ToPtr(time.Duration(0)) env.ExpectCreatedOrUpdated(nodePool) env.EventuallyExpectDeletedNodeCount("==", expectedNodeCount) @@ -598,7 +608,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), // Change limits so that replacement nodes will use another nodePool. nodePool.Spec.Limits = disableProvisioningLimits // Enable Expiration - nodePool.Spec.Disruption.ExpireAfter.Duration = ptr.Duration(0) + nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Duration(0)) noExpireNodePool := test.NodePool(*nodePool.DeepCopy()) diff --git a/website/content/en/docs/concepts/nodeclasses.md b/website/content/en/docs/concepts/nodeclasses.md index 446620cd46fe..c86917cd0a15 100644 --- a/website/content/en/docs/concepts/nodeclasses.md +++ b/website/content/en/docs/concepts/nodeclasses.md @@ -1246,3 +1246,35 @@ spec: status: instanceProfile: "${CLUSTER_NAME}-0123456778901234567789" ``` +## status.conditions + +[`status.conditions`]({{< ref "#statusconditions" >}}) indicates EC2NodeClass readiness. This will be `Ready` when Karpenter successfully discovers AMIs, Instance Profile, Subnets, Cluster CIDR and SecurityGroups for the EC2NodeClass. + +```yaml +spec: + role: "KarpenterNodeRole-${CLUSTER_NAME}" +status: + conditions: + Last Transition Time: 2024-05-06T06:04:45Z + Message: Ready + Reason: Ready + Status: True + Type: Ready +``` + +If any of the underlying conditions are not resolved then `Status` is `False` and `Message` indicates the dependency that was not resolved. + +```yaml +spec: + role: "KarpenterNodeRole-${CLUSTER_NAME}" +status: + conditions: + Last Transition Time: 2024-05-06T06:19:46Z + Message: unable to resolve instance profile for node class + Reason: NodeClassNotReady + Status: False + Type: Ready +``` +{{% alert title="Note" color="primary" %}} +An EC2NodeClass that uses AL2023 requires the cluster CIDR for launching nodes. Cluster CIDR will not be resolved for EC2NodeClass that doesn't use AL2023. +{{% /alert %}} \ No newline at end of file diff --git a/website/content/en/docs/faq.md b/website/content/en/docs/faq.md index 3c1b1df14dc0..46134721be80 100644 --- a/website/content/en/docs/faq.md +++ b/website/content/en/docs/faq.md @@ -14,7 +14,7 @@ See [Configuring NodePools]({{< ref "./concepts/#configuring-nodepools" >}}) for AWS is the first cloud provider supported by Karpenter, although it is designed to be used with other cloud providers as well. ### Can I write my own cloud provider for Karpenter? -Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.1/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. +Yes, but there is no documentation yet for it. Start with Karpenter's GitHub [cloudprovider](https://github.com/aws/karpenter-core/tree/v0.36.2/pkg/cloudprovider) documentation to see how the AWS provider is built, but there are other sections of the code that will require changes too. ### What operating system nodes does Karpenter deploy? Karpenter uses the OS defined by the [AMI Family in your EC2NodeClass]({{< ref "./concepts/nodeclasses#specamifamily" >}}). @@ -26,7 +26,7 @@ Karpenter has multiple mechanisms for configuring the [operating system]({{< ref Karpenter is flexible to multi-architecture configurations using [well known labels]({{< ref "./concepts/scheduling/#supported-labels">}}). ### What RBAC access is required? -All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) files for details. +All the required RBAC rules can be found in the Helm chart template. See [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/clusterrole-core.yaml), [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/clusterrole.yaml), [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/rolebinding.yaml), and [role.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/role.yaml) files for details. ### Can I run Karpenter outside of a Kubernetes cluster? Yes, as long as the controller has network and IAM/RBAC access to the Kubernetes API and your provider API. diff --git a/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md b/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md index 9b5891b1658b..4791c6681d8e 100644 --- a/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md +++ b/website/content/en/docs/getting-started/getting-started-with-karpenter/_index.md @@ -45,7 +45,7 @@ After setting up the tools, set the Karpenter and Kubernetes version: ```bash export KARPENTER_NAMESPACE="kube-system" -export KARPENTER_VERSION="0.36.1" +export KARPENTER_VERSION="0.36.2" export K8S_VERSION="1.29" ``` @@ -109,13 +109,13 @@ See [Enabling Windows support](https://docs.aws.amazon.com/eks/latest/userguide/ As the OCI Helm chart is signed by [Cosign](https://github.com/sigstore/cosign) as part of the release process you can verify the chart before installing it by running the following command. ```bash -cosign verify public.ecr.aws/karpenter/karpenter:0.36.1 \ +cosign verify public.ecr.aws/karpenter/karpenter:0.36.2 \ --certificate-oidc-issuer=https://token.actions.githubusercontent.com \ --certificate-identity-regexp='https://github\.com/aws/karpenter-provider-aws/\.github/workflows/release\.yaml@.+' \ --certificate-github-workflow-repository=aws/karpenter-provider-aws \ --certificate-github-workflow-name=Release \ - --certificate-github-workflow-ref=refs/tags/v0.36.1 \ - --annotations version=0.36.1 + --certificate-github-workflow-ref=refs/tags/v0.36.2 \ + --annotations version=0.36.2 ``` {{% alert title="DNS Policy Notice" color="warning" %}} diff --git a/website/content/en/docs/getting-started/migrating-from-cas/_index.md b/website/content/en/docs/getting-started/migrating-from-cas/_index.md index 8a053ecb51aa..5b9e07ea0f1f 100644 --- a/website/content/en/docs/getting-started/migrating-from-cas/_index.md +++ b/website/content/en/docs/getting-started/migrating-from-cas/_index.md @@ -92,7 +92,7 @@ One for your Karpenter node role and one for your existing node group. First set the Karpenter release you want to deploy. ```bash -export KARPENTER_VERSION="0.36.1" +export KARPENTER_VERSION="0.36.2" ``` We can now generate a full Karpenter deployment yaml from the Helm chart. @@ -117,7 +117,6 @@ affinity: - matchExpressions: - key: karpenter.sh/nodepool operator: DoesNotExist - - matchExpressions: - key: eks.amazonaws.com/nodegroup operator: In values: @@ -133,7 +132,7 @@ Now that our deployment is ready we can create the karpenter namespace, create t ## Create default NodePool -We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.1/examples/v1beta1) for specific needs. +We need to create a default NodePool so Karpenter knows what types of nodes we want for unscheduled workloads. You can refer to some of the [example NodePool](https://github.com/aws/karpenter/tree/v0.36.2/examples/v1beta1) for specific needs. {{% script file="./content/en/{VERSION}/getting-started/migrating-from-cas/scripts/step10-create-nodepool.sh" language="bash" %}} diff --git a/website/content/en/docs/reference/cloudformation.md b/website/content/en/docs/reference/cloudformation.md index cdd34f44f47f..0e6c60b61384 100644 --- a/website/content/en/docs/reference/cloudformation.md +++ b/website/content/en/docs/reference/cloudformation.md @@ -17,7 +17,7 @@ These descriptions should allow you to understand: To download a particular version of `cloudformation.yaml`, set the version and use `curl` to pull the file to your local system: ```bash -export KARPENTER_VERSION="0.36.1" +export KARPENTER_VERSION="0.36.2" curl https://raw.githubusercontent.com/aws/karpenter-provider-aws/v"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml > cloudformation.yaml ``` @@ -376,7 +376,7 @@ This gives EC2 permission explicit permission to use the `KarpenterNodeRole-${Cl #### AllowScopedInstanceProfileCreationActions The AllowScopedInstanceProfileCreationActions Sid gives the Karpenter controller permission to create a new instance profile with [`iam:CreateInstanceProfile`](https://docs.aws.amazon.com/IAM/latest/APIReference/API_CreateInstanceProfile.html), -provided that the request is made to a cluster with `kubernetes.io/cluster/${ClusterName` set to owned and is made in the current region. +provided that the request is made to a cluster with `kubernetes.io/cluster/${ClusterName}` set to owned and is made in the current region. Also, `karpenter.k8s.aws/ec2nodeclass` must be set to some value. This ensures that Karpenter can generate instance profiles on your behalf based on roles specified in your `EC2NodeClasses` that you use to configure Karpenter. ```json @@ -431,7 +431,7 @@ Also, `karpenter.k8s.aws/ec2nodeclass` must be set to some value. This ensures t #### AllowScopedInstanceProfileActions The AllowScopedInstanceProfileActions Sid gives the Karpenter controller permission to perform [`iam:AddRoleToInstanceProfile`](https://docs.aws.amazon.com/IAM/latest/APIReference/API_AddRoleToInstanceProfile.html), [`iam:RemoveRoleFromInstanceProfile`](https://docs.aws.amazon.com/IAM/latest/APIReference/API_RemoveRoleFromInstanceProfile.html), and [`iam:DeleteInstanceProfile`](https://docs.aws.amazon.com/IAM/latest/APIReference/API_DeleteInstanceProfile.html) actions, -provided that the request is made to a cluster with `kubernetes.io/cluster/${ClusterName` set to owned and is made in the current region. +provided that the request is made to a cluster with `kubernetes.io/cluster/${ClusterName}` set to owned and is made in the current region. Also, `karpenter.k8s.aws/ec2nodeclass` must be set to some value. This permission is further enforced by the `iam:PassRole` permission. If Karpenter attempts to add a role to an instance profile that it doesn't have `iam:PassRole` permission on, that call will fail. Therefore, if you configure Karpenter to use a new role through the `EC2NodeClass`, ensure that you also specify that role within your `iam:PassRole` permission. ```json diff --git a/website/content/en/docs/reference/instance-types.md b/website/content/en/docs/reference/instance-types.md index b304534088bf..577c119ab214 100644 --- a/website/content/en/docs/reference/instance-types.md +++ b/website/content/en/docs/reference/instance-types.md @@ -10,8 +10,7 @@ description: > AWS instance types offer varying resources and can be selected by labels. The values provided below are the resources available with some assumptions and after the instance overhead has been subtracted: - `blockDeviceMappings` are not configured -- `aws-eni-limited-pod-density` is assumed to be `true` -- `amiFamily` is set to the default of `AL2` +- `amiFamily` is set to `AL2023` ## a1 Family ### `a1.medium` #### Labels @@ -3174,8 +3173,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|238333Mi| - |pods|345| + |memory|237794Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ### `c6in.metal` @@ -3200,8 +3199,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|238333Mi| - |pods|345| + |memory|237794Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ## c7a Family @@ -11192,8 +11191,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|480816Mi| - |pods|345| + |memory|480277Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ### `m6idn.metal` @@ -11219,8 +11218,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|480816Mi| - |pods|345| + |memory|480277Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ## m6in Family @@ -11446,8 +11445,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|480816Mi| - |pods|345| + |memory|480277Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ### `m6in.metal` @@ -11472,8 +11471,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|480816Mi| - |pods|345| + |memory|480277Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ## m7a Family @@ -16231,8 +16230,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|965782Mi| - |pods|345| + |memory|965243Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ### `r6idn.metal` @@ -16258,8 +16257,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|965782Mi| - |pods|345| + |memory|965243Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ## r6in Family @@ -16485,8 +16484,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|965782Mi| - |pods|345| + |memory|965243Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ### `r6in.metal` @@ -16511,8 +16510,8 @@ below are the resources available with some assumptions and after the instance o |--|--| |cpu|127610m| |ephemeral-storage|17Gi| - |memory|965782Mi| - |pods|345| + |memory|965243Mi| + |pods|394| |vpc.amazonaws.com/efa|2| |vpc.amazonaws.com/pod-eni|108| ## r7a Family diff --git a/website/content/en/docs/reference/threat-model.md b/website/content/en/docs/reference/threat-model.md index 9f6cf6fe9c23..84a4fefb1cef 100644 --- a/website/content/en/docs/reference/threat-model.md +++ b/website/content/en/docs/reference/threat-model.md @@ -31,11 +31,11 @@ A Cluster Developer has the ability to create pods via `Deployments`, `ReplicaSe Karpenter has permissions to create and manage cloud instances. Karpenter has Kubernetes API permissions to create, update, and remove nodes, as well as evict pods. For a full list of the permissions, see the RBAC rules in the helm chart template. Karpenter also has AWS IAM permissions to create instances with IAM roles. -* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/aggregate-clusterrole.yaml) -* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole-core.yaml) -* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/clusterrole.yaml) -* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/rolebinding.yaml) -* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.1/charts/karpenter/templates/role.yaml) +* [aggregate-clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/aggregate-clusterrole.yaml) +* [clusterrole-core.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/clusterrole-core.yaml) +* [clusterrole.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/clusterrole.yaml) +* [rolebinding.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/rolebinding.yaml) +* [role.yaml](https://github.com/aws/karpenter/blob/v0.36.2/charts/karpenter/templates/role.yaml) ## Assumptions diff --git a/website/content/en/docs/upgrading/upgrade-guide.md b/website/content/en/docs/upgrading/upgrade-guide.md index ac6b56667366..9720ffd86b5e 100644 --- a/website/content/en/docs/upgrading/upgrade-guide.md +++ b/website/content/en/docs/upgrading/upgrade-guide.md @@ -28,9 +28,9 @@ If you get the error `invalid ownership metadata; label validation error:` while In general, you can reapply the CRDs in the `crds` directory of the Karpenter Helm chart: ```shell -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodepools.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.sh_nodeclaims.yaml -kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.1/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.2/pkg/apis/crds/karpenter.sh_nodepools.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.2/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +kubectl apply -f https://raw.githubusercontent.com/aws/karpenter/v0.36.2/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml ```