diff --git a/.github/workflows/approval-comment.yaml b/.github/workflows/approval-comment.yaml index c2adf35ff842..6e59fd3181e7 100644 --- a/.github/workflows/approval-comment.yaml +++ b/.github/workflows/approval-comment.yaml @@ -19,7 +19,7 @@ jobs: mkdir -p /tmp/artifacts { echo "$REVIEW_BODY"; echo "$PULL_REQUEST_NUMBER"; echo "$COMMIT_ID"; } >> /tmp/artifacts/metadata.txt cat /tmp/artifacts/metadata.txt - - uses: actions/upload-artifact@89ef406dd8d7e03cfd12d9e0a4a378f454709029 # v4.3.5 + - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 with: name: artifacts path: /tmp/artifacts diff --git a/go.mod b/go.mod index 8128909e8a7b..6d85b66df9f1 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/imdario/mergo v0.3.16 github.com/jonathan-innis/aws-sdk-go-prometheus v0.1.1-0.20240804232425-54c8227e0bab github.com/mitchellh/hashstructure/v2 v2.0.2 - github.com/onsi/ginkgo/v2 v2.19.1 + github.com/onsi/ginkgo/v2 v2.20.0 github.com/onsi/gomega v1.34.1 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/pelletier/go-toml/v2 v2.2.2 @@ -32,7 +32,7 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.18.4 - sigs.k8s.io/karpenter v0.37.1-0.20240808004019-5758fa2b0eb7 + sigs.k8s.io/karpenter v0.37.1-0.20240812014159-feb6857c0fc4 sigs.k8s.io/yaml v1.4.0 ) @@ -64,7 +64,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect + github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect @@ -91,13 +91,13 @@ require ( github.com/spf13/pflag v1.0.5 // indirect go.opencensus.io v0.24.0 // indirect go.uber.org/automaxprocs v1.5.3 // indirect - golang.org/x/net v0.27.0 // indirect + golang.org/x/net v0.28.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/term v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/sys v0.23.0 // indirect + golang.org/x/term v0.23.0 // indirect + golang.org/x/text v0.17.0 // indirect golang.org/x/time v0.6.0 // indirect - golang.org/x/tools v0.23.0 // indirect + golang.org/x/tools v0.24.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/api v0.146.0 // indirect google.golang.org/appengine v1.6.8 // indirect diff --git a/go.sum b/go.sum index 81ed909fe813..d5bbddc8219a 100644 --- a/go.sum +++ b/go.sum @@ -196,8 +196,8 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= -github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -272,8 +272,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= -github.com/onsi/ginkgo/v2 v2.19.1 h1:QXgq3Z8Crl5EL1WBAC98A5sEBHARrAJNzAmMxzLcRF0= -github.com/onsi/ginkgo/v2 v2.19.1/go.mod h1:O3DtEWQkPa/F7fBMgmZQKKsluAy8pd3rEQdrjkPb9zA= +github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw= +github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= @@ -422,8 +422,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= -golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -462,8 +462,8 @@ golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -532,14 +532,14 @@ golang.org/x/sys v0.0.0-20220708085239-5a0f0661e09d/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -550,8 +550,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -601,8 +601,8 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg= -golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -761,8 +761,8 @@ sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHv sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.37.1-0.20240808004019-5758fa2b0eb7 h1:cP/YGLKmljE8KCoURtnAM10pFS5/jw49EuyiUiGTCLA= -sigs.k8s.io/karpenter v0.37.1-0.20240808004019-5758fa2b0eb7/go.mod h1:3NLmsnHHw8p4VutpjTOPUZyhE3qH6yGTs8O94Lsu8uw= +sigs.k8s.io/karpenter v0.37.1-0.20240812014159-feb6857c0fc4 h1:ub5jM8/+jWnzJt6fCSoa3yD9qdGGphX8bK7LtSib+yk= +sigs.k8s.io/karpenter v0.37.1-0.20240812014159-feb6857c0fc4/go.mod h1:3NLmsnHHw8p4VutpjTOPUZyhE3qH6yGTs8O94Lsu8uw= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index f8519d8ff543..5ad59c7d8552 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -474,6 +474,12 @@ func (in *EC2NodeClass) InstanceProfileTags(clusterName string) map[string]strin }) } +// UbuntuIncompatible returns true if the NodeClass has the ubuntu compatibility annotation. This will cause the NodeClass to show +// as NotReady in its status conditions, opting its referencing NodePools out of provisioning and drift. +func (in *EC2NodeClass) UbuntuIncompatible() bool { + return lo.Contains(strings.Split(in.Annotations[AnnotationUbuntuCompatibilityKey], ","), AnnotationUbuntuCompatibilityIncompatible) +} + // AMIFamily returns the family for a NodePool based on the following items, in order of precdence: // - ec2nodeclass.spec.amiFamily // - ec2nodeclass.spec.amiSelectorTerms[].alias diff --git a/pkg/apis/v1/ec2nodeclass_conversion.go b/pkg/apis/v1/ec2nodeclass_conversion.go index 4af86ba7eec9..caa5284d13c3 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion.go +++ b/pkg/apis/v1/ec2nodeclass_conversion.go @@ -35,6 +35,11 @@ func (in *EC2NodeClass) ConvertTo(ctx context.Context, to apis.Convertible) erro if value, ok := in.Annotations[AnnotationUbuntuCompatibilityKey]; ok { compatSpecifiers := strings.Split(value, ",") + // Remove the `id: ami-placeholder` AMISelectorTerms that are injected to pass CRD validation at v1 + // we don't need these in v1beta1, and should be dropped + if lo.Contains(compatSpecifiers, AnnotationUbuntuCompatibilityIncompatible) { + in.Spec.AMISelectorTerms = nil + } // The only blockDeviceMappings present on the v1 EC2NodeClass are those that we injected during conversion. // These should be dropped. if lo.Contains(compatSpecifiers, AnnotationUbuntuCompatibilityBlockDeviceMappings) { @@ -136,13 +141,6 @@ func (in *EC2NodeClass) ConvertFrom(ctx context.Context, from apis.Convertible) in.Spec.AMIFamily = v1beta1enc.Spec.AMIFamily } case AMIFamilyUbuntu: - // If there are no AMISelectorTerms specified, we will fail closed when converting the NodeClass. Users must - // pin their AMIs **before** upgrading to Karpenter v1.0.0 if they were using the Ubuntu AMIFamily. - // TODO: jmdeal@ verify doc link to the upgrade guide once available - if len(v1beta1enc.Spec.AMISelectorTerms) == 0 { - return fmt.Errorf("converting EC2NodeClass %q from v1beta1 to v1, automatic Ubuntu AMI discovery is not supported (https://karpenter.sh/v1.0/upgrading/upgrade-guide/)", v1beta1enc.Name) - } - // If AMISelectorTerms were specified, we can continue to use them to discover Ubuntu AMIs and use the AL2 AMI // family for bootstrapping. AL2 and Ubuntu have an identical UserData format, but do have different default // BlockDeviceMappings. We'll set the BlockDeviceMappings to Ubuntu's default if no user specified @@ -161,6 +159,16 @@ func (in *EC2NodeClass) ConvertFrom(ctx context.Context, from apis.Convertible) }, }} } + + // If there are no AMISelectorTerms specified, we mark the ec2nodeclass as incompatible. + // Karpenter will ignore incompatible ec2nodeclasses for provisioning and computing drift. + if len(v1beta1enc.Spec.AMISelectorTerms) == 0 { + compatSpecifiers = append(compatSpecifiers, AnnotationUbuntuCompatibilityIncompatible) + in.Spec.AMISelectorTerms = []AMISelectorTerm{{ + ID: "ami-placeholder", + }} + } + // This compatibility annotation will be used to determine if the amiFamily was mutated from Ubuntu to AL2, and // if we needed to inject any blockDeviceMappings. This is required to enable a round-trip conversion. in.Annotations = lo.Assign(in.Annotations, map[string]string{ diff --git a/pkg/apis/v1/ec2nodeclass_conversion_test.go b/pkg/apis/v1/ec2nodeclass_conversion_test.go index 9dd49be8f41b..4c25eec22ca7 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion_test.go +++ b/pkg/apis/v1/ec2nodeclass_conversion_test.go @@ -459,10 +459,11 @@ var _ = Describe("Convert v1beta1 to v1 EC2NodeClass API", func() { }, }})) }) - It("should fail to convert v1beta1 ec2nodeclass when amiFamily is Ubuntu (without amiSelectorTerms)", func() { + It("should convert v1beta1 ec2nodeclass when amiFamily is Ubuntu (without amiSelectorTerms) but mark incompatible", func() { v1beta1ec2nodeclass.Spec.AMIFamily = lo.ToPtr(v1beta1.AMIFamilyUbuntu) v1beta1ec2nodeclass.Spec.AMISelectorTerms = nil - Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).ToNot(Succeed()) + Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) + Expect(v1ec2nodeclass.UbuntuIncompatible()).To(BeTrue()) }) It("should convert v1beta1 ec2nodeclass user data", func() { v1beta1ec2nodeclass.Spec.UserData = lo.ToPtr("test user data") diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index f7422024362a..c391b0faab01 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -127,6 +127,7 @@ var ( AnnotationInstanceTagged = apis.Group + "/tagged" AnnotationUbuntuCompatibilityKey = apis.CompatibilityGroup + "/v1beta1-ubuntu" + AnnotationUbuntuCompatibilityIncompatible = "incompatible" AnnotationUbuntuCompatibilityAMIFamily = "amiFamily" AnnotationUbuntuCompatibilityBlockDeviceMappings = "blockDeviceMappings" diff --git a/pkg/cloudprovider/cloudprovider.go b/pkg/cloudprovider/cloudprovider.go index 3cec17461804..81ee525ea522 100644 --- a/pkg/cloudprovider/cloudprovider.go +++ b/pkg/cloudprovider/cloudprovider.go @@ -77,6 +77,7 @@ func New(instanceTypeProvider instancetype.Provider, instanceProvider instance.P } // Create a NodeClaim given the constraints. +// nolint: gocyclo func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*karpv1.NodeClaim, error) { nodeClass, err := c.resolveNodeClassFromNodeClaim(ctx, nodeClaim) if err != nil { @@ -87,6 +88,10 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err)) } + // TODO: Remove this once support for conversion webhooks is dropped + if nodeClass.UbuntuIncompatible() { + return nil, cloudprovider.NewNodeClassNotReadyError(fmt.Errorf("EC2NodeClass %q is incompatible with Karpenter v1, specify your Ubuntu AMIs in your AMISelectorTerms", nodeClass.Name)) + } // TODO: Remove this after v1 nodePool, err := utils.ResolveNodePoolFromNodeClaim(ctx, c.kubeClient, nodeClaim) if err != nil { diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 8056d2f23374..88ef02bfd879 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -197,6 +197,13 @@ var _ = Describe("CloudProvider", func() { Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed()) Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed()) }) + // TODO: remove after v1 + It("should fail instance creation if NodeClass has ubuntu incompatible annotation", func() { + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1.AnnotationUbuntuCompatibilityKey: v1.AnnotationUbuntuCompatibilityIncompatible}) + ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) + _, err := cloudProvider.Create(ctx, nodeClaim) + Expect(corecloudprovider.IsNodeClassNotReadyError(err)).To(BeTrue()) + }) It("should not proceed with instance creation if NodeClass is unknown", func() { nodeClass.StatusConditions().SetUnknown(opstatus.ConditionReady) ExpectApplied(ctx, env.Client, nodePool, nodeClass, nodeClaim) diff --git a/pkg/controllers/nodeclass/status/ami.go b/pkg/controllers/nodeclass/status/ami.go index faf94d2dac08..3a23a1165808 100644 --- a/pkg/controllers/nodeclass/status/ami.go +++ b/pkg/controllers/nodeclass/status/ami.go @@ -35,6 +35,10 @@ type AMI struct { } func (a *AMI) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { + if nodeClass.UbuntuIncompatible() { + nodeClass.StatusConditions().SetFalse(v1.ConditionTypeAMIsReady, "AMINotFound", "Ubuntu AMI discovery is not supported at v1, refer to the upgrade guide (https://karpenter.sh/docs/upgrading/upgrade-guide/#upgrading-to-100)") + return reconcile.Result{}, nil + } amis, err := a.amiProvider.List(ctx, nodeClass) if err != nil { return reconcile.Result{}, fmt.Errorf("getting amis, %w", err) diff --git a/pkg/controllers/nodeclass/status/ami_test.go b/pkg/controllers/nodeclass/status/ami_test.go index 6ce1caefc5ea..8ff07296c27f 100644 --- a/pkg/controllers/nodeclass/status/ami_test.go +++ b/pkg/controllers/nodeclass/status/ami_test.go @@ -89,6 +89,18 @@ var _ = Describe("NodeClass AMI Status Controller", func() { }, }) }) + It("should fail to resolve AMIs if the nodeclass has ubuntu incompatible annotation", func() { + nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2) + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1.AnnotationUbuntuCompatibilityKey: v1.AnnotationUbuntuCompatibilityIncompatible}) + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + cond := nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady) + Expect(cond.IsTrue()).To(BeFalse()) + Expect(cond.Message).To(Equal("Ubuntu AMI discovery is not supported at v1, refer to the upgrade guide (https://karpenter.sh/docs/upgrading/upgrade-guide/#upgrading-to-100)")) + Expect(cond.Reason).To(Equal("AMINotFound")) + + }) It("should resolve amiSelector AMIs and requirements into status", func() { version := lo.Must(awsEnv.VersionProvider.Get(ctx)) diff --git a/test/pkg/environment/common/environment.go b/test/pkg/environment/common/environment.go index 2700cc963a69..8f60ecd08f0d 100644 --- a/test/pkg/environment/common/environment.go +++ b/test/pkg/environment/common/environment.go @@ -178,7 +178,7 @@ func (env *Environment) DefaultNodePool(nodeClass *v1.EC2NodeClass) *karpv1.Node }, } nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") nodePool.Spec.Template.Spec.ExpireAfter.Duration = nil nodePool.Spec.Limits = karpv1.Limits(corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("1000"), diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index 60f0c3c50f91..6069b0aee838 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -279,6 +279,21 @@ func (env *Environment) EventuallyExpectHealthy(pods ...*corev1.Pod) { env.EventuallyExpectHealthyWithTimeout(-1, pods...) } +func (env *Environment) EventuallyExpectTerminating(pods ...*corev1.Pod) { + GinkgoHelper() + env.EventuallyExpectTerminatingWithTimeout(-1, pods...) +} + +func (env *Environment) EventuallyExpectTerminatingWithTimeout(timeout time.Duration, pods ...*corev1.Pod) { + GinkgoHelper() + Eventually(func(g Gomega) { + for _, pod := range pods { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(pod), pod)).To(Succeed()) + g.Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse()) + } + }).WithTimeout(timeout).Should(Succeed()) +} + func (env *Environment) EventuallyExpectHealthyWithTimeout(timeout time.Duration, pods ...*corev1.Pod) { GinkgoHelper() Eventually(func(g Gomega) { @@ -290,7 +305,17 @@ func (env *Environment) EventuallyExpectHealthyWithTimeout(timeout time.Duration ))) } }).WithTimeout(timeout).Should(Succeed()) +} +func (env *Environment) ConsistentlyExpectTerminatingPods(duration time.Duration, pods ...*corev1.Pod) { + GinkgoHelper() + By(fmt.Sprintf("expecting %d pods to be terminating for %s", len(pods), duration)) + Consistently(func(g Gomega) { + for _, pod := range pods { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(pod), pod)).To(Succeed()) + g.Expect(pod.DeletionTimestamp.IsZero()).To(BeFalse()) + } + }, duration.String()).Should(Succeed()) } func (env *Environment) ConsistentlyExpectHealthyPods(duration time.Duration, pods ...*corev1.Pod) { diff --git a/test/suites/chaos/suite_test.go b/test/suites/chaos/suite_test.go index 1310a926e4cc..8023a9cdb20c 100644 --- a/test/suites/chaos/suite_test.go +++ b/test/suites/chaos/suite_test.go @@ -82,7 +82,7 @@ var _ = Describe("Chaos", func() { }, }) nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") numPods := 1 dep := coretest.Deployment(coretest.DeploymentOptions{ @@ -113,7 +113,7 @@ var _ = Describe("Chaos", func() { defer cancel() nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmpty - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(30 * time.Second)} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("30s") numPods := 1 dep := coretest.Deployment(coretest.DeploymentOptions{ Replicas: int32(numPods), diff --git a/test/suites/consolidation/suite_test.go b/test/suites/consolidation/suite_test.go index 60382727de13..6d6ebb652514 100644 --- a/test/suites/consolidation/suite_test.go +++ b/test/suites/consolidation/suite_test.go @@ -71,7 +71,7 @@ var _ = Describe("Consolidation", func() { var nodePool *karpv1.NodePool BeforeEach(func() { nodePool = env.DefaultNodePool(nodeClass) - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") }) It("should update lastPodEventTime when pods are scheduled and removed", func() { @@ -187,7 +187,7 @@ var _ = Describe("Consolidation", func() { var numPods int32 BeforeEach(func() { nodePool = env.DefaultNodePool(nodeClass) - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") numPods = 5 dep = test.Deployment(test.DeploymentOptions{ @@ -266,7 +266,7 @@ var _ = Describe("Consolidation", func() { }) It("should respect budgets for non-empty delete consolidation", func() { // This test will hold consolidation until we are ready to execute it - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") nodePool = test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ @@ -319,7 +319,7 @@ var _ = Describe("Consolidation", func() { } By("enabling consolidation") - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) // Ensure that we get two nodes tainted, and they have overlap during consolidation @@ -335,7 +335,7 @@ var _ = Describe("Consolidation", func() { It("should respect budgets for non-empty replace consolidation", func() { appLabels := map[string]string{"app": "large-app"} // This test will hold consolidation until we are ready to execute it - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") nodePool = test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ @@ -423,7 +423,7 @@ var _ = Describe("Consolidation", func() { env.EventuallyExpectHealthyPodCount(selector, int(numPods)) By("enabling consolidation") - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) // Ensure that we get three nodes tainted, and they have overlap during the consolidation @@ -518,7 +518,7 @@ var _ = Describe("Consolidation", func() { Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, // Disable Consolidation until we're ready - ConsolidateAfter: karpv1.NillableDuration{}, + ConsolidateAfter: karpv1.MustParseNillableDuration("Never"), }, Template: karpv1.NodeClaimTemplate{ Spec: karpv1.NodeClaimTemplateSpec{ @@ -583,7 +583,7 @@ var _ = Describe("Consolidation", func() { env.EventuallyExpectAvgUtilization(corev1.ResourceCPU, "<", 0.5) // Enable consolidation as WhenEmptyOrUnderutilized doesn't allow a consolidateAfter value - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) // With consolidation enabled, we now must delete nodes @@ -601,7 +601,7 @@ var _ = Describe("Consolidation", func() { Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, // Disable Consolidation until we're ready - ConsolidateAfter: karpv1.NillableDuration{}, + ConsolidateAfter: karpv1.MustParseNillableDuration("Never"), }, Template: karpv1.NodeClaimTemplate{ Spec: karpv1.NodeClaimTemplateSpec{ @@ -717,7 +717,7 @@ var _ = Describe("Consolidation", func() { env.ExpectUpdated(largeDep) env.EventuallyExpectAvgUtilization(corev1.ResourceCPU, "<", 0.5) - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) // With consolidation enabled, we now must replace each node in turn to consolidate due to the anti-affinity @@ -756,7 +756,7 @@ var _ = Describe("Consolidation", func() { Disruption: karpv1.Disruption{ ConsolidationPolicy: karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized, // Disable Consolidation until we're ready - ConsolidateAfter: karpv1.NillableDuration{}, + ConsolidateAfter: karpv1.MustParseNillableDuration("Never"), }, Template: karpv1.NodeClaimTemplate{ Spec: karpv1.NodeClaimTemplateSpec{ @@ -836,7 +836,7 @@ var _ = Describe("Consolidation", func() { // Enable spot capacity type after the on-demand node is provisioned // Expect the node to consolidate to a spot instance as it will be a cheaper // instance than on-demand - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index 13f1f54e3033..7f8ccc09dffa 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -16,7 +16,6 @@ package expiration_test import ( "testing" - "time" "github.com/samber/lo" appsv1 "k8s.io/api/apps/v1" @@ -82,7 +81,7 @@ var _ = Describe("Expiration", func() { selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) }) It("should expire the node after the expiration is reached", func() { - nodePool.Spec.Template.Spec.ExpireAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Minute * 2)} + nodePool.Spec.Template.Spec.ExpireAfter = karpv1.MustParseNillableDuration("2m") env.ExpectCreated(nodeClass, nodePool, dep) nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] @@ -118,7 +117,7 @@ var _ = Describe("Expiration", func() { It("should replace expired node with a single node and schedule all pods", func() { // Set expire after to 5 minutes since we have to respect PDB and move over pods one at a time from one node to another. // The new nodes should not expire before all the pods are moved over. - nodePool.Spec.Template.Spec.ExpireAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Minute * 5)} + nodePool.Spec.Template.Spec.ExpireAfter = karpv1.MustParseNillableDuration("5m") var numPods int32 = 5 // We should setup a PDB that will only allow a minimum of 1 pod to be pending at a time minAvailable := intstr.FromInt32(numPods - 1) diff --git a/test/suites/integration/daemonset_test.go b/test/suites/integration/daemonset_test.go index 438a86c5acd2..a617cf4a0876 100644 --- a/test/suites/integration/daemonset_test.go +++ b/test/suites/integration/daemonset_test.go @@ -15,8 +15,6 @@ limitations under the License. package integration_test import ( - "time" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" schedulingv1 "k8s.io/api/scheduling/v1" @@ -26,7 +24,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/samber/lo" "sigs.k8s.io/controller-runtime/pkg/client" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" @@ -41,7 +38,7 @@ var _ = Describe("DaemonSet", func() { BeforeEach(func() { nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") priorityclass = &schedulingv1.PriorityClass{ ObjectMeta: metav1.ObjectMeta{ Name: "high-priority-daemonsets", diff --git a/test/suites/integration/validation_test.go b/test/suites/integration/validation_test.go index 81db3e289113..76f4451408f0 100644 --- a/test/suites/integration/validation_test.go +++ b/test/suites/integration/validation_test.go @@ -16,7 +16,6 @@ package integration_test import ( "fmt" - "time" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" @@ -103,12 +102,12 @@ var _ = Describe("Validation", func() { }) It("should error when consolidateAfter is negative", func() { nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmpty - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(-time.Second)} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("-1s") Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) }) It("should succeed when ConsolidationPolicy=WhenEmptyOrUnderutilized is used with consolidateAfter", func() { nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Minute)} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("1m") Expect(env.Client.Create(env.Context, nodePool)).To(Succeed()) }) It("should error when minValues for a requirement key is negative", func() { diff --git a/test/suites/scale/deprovisioning_test.go b/test/suites/scale/deprovisioning_test.go index f14e265aebb9..3397eb71ce27 100644 --- a/test/suites/scale/deprovisioning_test.go +++ b/test/suites/scale/deprovisioning_test.go @@ -250,7 +250,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), nodePoolMap[noExpirationValue].Spec = *nodePoolMap[expirationValue].Spec.DeepCopy() // Enable consolidation, emptiness, and expiration - nodePoolMap[consolidationValue].Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePoolMap[consolidationValue].Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") nodePoolMap[emptinessValue].Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmpty nodePoolMap[emptinessValue].Spec.Disruption.ConsolidateAfter.Duration = lo.ToPtr(time.Duration(0)) nodePoolMap[expirationValue].Spec.Template.Spec.ExpireAfter.Duration = lo.ToPtr(time.Duration(0)) @@ -383,7 +383,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), env.MeasureDeprovisioningDurationFor(func() { By("kicking off deprovisioning by setting the consolidation enabled value on the nodePool") nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) env.EventuallyExpectDeletedNodeCount("==", expectedNodeCount) @@ -437,7 +437,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), env.MeasureDeprovisioningDurationFor(func() { By("kicking off deprovisioning by setting the consolidation enabled value on the nodePool") nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) env.EventuallyExpectDeletedNodeCount("==", int(float64(expectedNodeCount)*0.8)) @@ -504,7 +504,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), // The nodePool defaults to a larger instance type than we need so enabling consolidation and making // the requirements wide-open should cause deletes and increase our utilization on the cluster nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmptyOrUnderutilized - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") nodePool.Spec.Template.Spec.Requirements = lo.Reject(nodePool.Spec.Template.Spec.Requirements, func(r karpv1.NodeSelectorRequirementWithMinValues, _ int) bool { return r.Key == v1.LabelInstanceSize }) @@ -622,7 +622,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), noExpireNodePool := test.NodePool(*nodePool.DeepCopy()) // Disable Expiration - noExpireNodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{} + noExpireNodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("Never") noExpireNodePool.Spec.Template.Spec.ExpireAfter.Duration = nil noExpireNodePool.ObjectMeta = metav1.ObjectMeta{Name: test.RandomName()} diff --git a/test/suites/termination/emptiness_test.go b/test/suites/termination/emptiness_test.go index 5efb528c1ec0..cf53b749c26b 100644 --- a/test/suites/termination/emptiness_test.go +++ b/test/suites/termination/emptiness_test.go @@ -38,7 +38,7 @@ var _ = Describe("Emptiness", func() { var numPods int BeforeEach(func() { nodePool.Spec.Disruption.ConsolidationPolicy = karpv1.ConsolidationPolicyWhenEmpty - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") numPods = 1 dep = test.Deployment(test.DeploymentOptions{ @@ -96,7 +96,7 @@ var _ = Describe("Emptiness", func() { }) }) It("should terminate an empty node", func() { - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(10 * time.Second)} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("10s") const numPods = 1 deployment := test.Deployment(test.DeploymentOptions{Replicas: numPods}) @@ -115,7 +115,7 @@ var _ = Describe("Emptiness", func() { env.EventuallyExpectConsolidatable(nodeClaim) By("waiting for the nodeclaim to deprovision when past its ConsolidateAfter timeout of 0") - nodePool.Spec.Disruption.ConsolidateAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + nodePool.Spec.Disruption.ConsolidateAfter = karpv1.MustParseNillableDuration("0s") env.ExpectUpdated(nodePool) env.EventuallyExpectNotFound(nodeClaim, node) diff --git a/test/suites/termination/termination_grace_period_test.go b/test/suites/termination/termination_grace_period_test.go index 9c149e94fa32..941297c35555 100644 --- a/test/suites/termination/termination_grace_period_test.go +++ b/test/suites/termination/termination_grace_period_test.go @@ -33,20 +33,19 @@ import ( var _ = Describe("TerminationGracePeriod", func() { BeforeEach(func() { nodePool.Spec.Template.Spec.TerminationGracePeriod = &metav1.Duration{Duration: time.Second * 30} - // Set the expireAfter value to get the node deleted - nodePool.Spec.Template.Spec.ExpireAfter = karpv1.NillableDuration{Duration: lo.ToPtr(time.Second * 90)} }) - It("should delete pod that tolerates do-not-disrupt after termination grace period seconds", func() { + It("should delete pod with do-not-disrupt when it reaches its terminationGracePeriodSeconds", func() { pod := coretest.UnschedulablePod(coretest.PodOptions{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{ karpv1.DoNotDisruptAnnotationKey: "true", - }}}) + }}, TerminationGracePeriodSeconds: lo.ToPtr(int64(15))}) env.ExpectCreated(nodeClass, nodePool, pod) nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] node := env.EventuallyExpectCreatedNodeCount("==", 1)[0] env.EventuallyExpectHealthy(pod) - // Check that pod remains healthy until termination grace period - env.ConsistentlyExpectHealthyPods(time.Second*30, pod) + + // Delete the nodeclaim to start the TerminationGracePeriod + env.ExpectDeleted(nodeClaim) // Eventually the node will be tainted Eventually(func(g Gomega) { @@ -55,19 +54,28 @@ var _ = Describe("TerminationGracePeriod", func() { return karpv1.IsDisruptingTaint(t) }) g.Expect(ok).To(BeTrue()) - }).Should(Succeed()) + }).WithTimeout(3 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + // Check that pod remains healthy until termination grace period + // subtract the polling time of the eventually above to reduce any races. + env.ConsistentlyExpectHealthyPods(time.Second*15-100*time.Millisecond, pod) // Both nodeClaim and node should be gone once terminationGracePeriod is reached - env.EventuallyExpectNotFound(nodeClaim, node) + env.EventuallyExpectNotFound(nodeClaim, node, pod) }) It("should delete pod that has a pre-stop hook after termination grace period seconds", func() { - pod := coretest.UnschedulablePod(coretest.PodOptions{PreStopSleep: lo.ToPtr(int64(300))}) + pod := coretest.UnschedulablePod(coretest.PodOptions{ + PreStopSleep: lo.ToPtr(int64(300)), + TerminationGracePeriodSeconds: lo.ToPtr(int64(15)), + Image: "alpine:3.20.2", + Command: []string{"/bin/sh", "-c", "sleep 30"}}) env.ExpectCreated(nodeClass, nodePool, pod) nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] node := env.EventuallyExpectCreatedNodeCount("==", 1)[0] env.EventuallyExpectHealthy(pod) - // Check that pod remains healthy until termination grace period - env.ConsistentlyExpectHealthyPods(time.Second*30, pod) + + // Delete the nodeclaim to start the TerminationGracePeriod + env.ExpectDeleted(nodeClaim) // Eventually the node will be tainted Eventually(func(g Gomega) { @@ -76,8 +84,15 @@ var _ = Describe("TerminationGracePeriod", func() { return karpv1.IsDisruptingTaint(t) }) g.Expect(ok).To(BeTrue()) - }).Should(Succeed()) + }).WithTimeout(3 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + env.EventuallyExpectTerminating(pod) + + // Check that pod remains healthy until termination grace period + // subtract the polling time of the eventually above to reduce any races. + env.ConsistentlyExpectTerminatingPods(time.Second*15-100*time.Millisecond, pod) + // Both nodeClaim and node should be gone once terminationGracePeriod is reached - env.EventuallyExpectNotFound(nodeClaim, node) + env.EventuallyExpectNotFound(nodeClaim, node, pod) }) }) diff --git a/website/content/en/preview/concepts/nodeclasses.md b/website/content/en/preview/concepts/nodeclasses.md index 0d1f0beffc92..637ed31a3f4e 100644 --- a/website/content/en/preview/concepts/nodeclasses.md +++ b/website/content/en/preview/concepts/nodeclasses.md @@ -636,7 +636,7 @@ For [private clusters](https://docs.aws.amazon.com/eks/latest/userguide/private- ## spec.amiSelectorTerms -AMI Selector Terms are __required__ and are used to configure AMIs for Karpenter to use. AMIs are discovered through alias, id, owner, name, and [tags](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html). +AMI Selector Terms are __required__ and are used to configure AMIs for Karpenter to use. AMIs are discovered through alias, id, owner, name, and [tags](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html). This selection logic is modeled as terms, where each term contains multiple conditions that must all be satisfied for the selector to match. Effectively, all requirements within a single term are ANDed together. It's possible that you may want to select on two different AMIs that have unrelated requirements. In this case, you can specify multiple terms which will be ORed together to form your selection logic. The example below shows how this selection logic is fulfilled. @@ -855,17 +855,6 @@ spec: encrypted: true ``` -### Ubuntu (not supported) -```yaml -spec: - blockDeviceMappings: - - deviceName: /dev/sda1 - ebs: - volumeSize: 20Gi - volumeType: gp3 - encrypted: true -``` - ### Windows2019/Windows2022 ```yaml spec: @@ -965,7 +954,7 @@ For more examples on configuring fields for different AMI families, see the [exa Karpenter will merge the userData you specify with the default userData for that AMIFamily. See the [AMIFamily]({{< ref "#specamifamily" >}}) section for more details on these defaults. View the sections below to understand the different merge strategies for each AMIFamily. -### AL2/Ubuntu +### AL2 * Your UserData can be in the [MIME multi part archive](https://cloudinit.readthedocs.io/en/latest/topics/format.html#mime-multi-part-archive) format. * Karpenter will transform your custom user-data as a MIME part, if necessary, and then merge a final MIME part to the end of your UserData parts which will bootstrap the worker node. Karpenter will have full control over all the parameters being passed to the bootstrap script. diff --git a/website/content/en/preview/upgrading/v1-migration.md b/website/content/en/preview/upgrading/v1-migration.md index db9058564c63..94f8c5231984 100644 --- a/website/content/en/preview/upgrading/v1-migration.md +++ b/website/content/en/preview/upgrading/v1-migration.md @@ -24,10 +24,14 @@ See the [Changelog]({{}}) for details about actions you shoul Please read through the entire procedure before beginning the upgrade. There are major changes in this upgrade, so please evaluate the list of breaking changes before continuing. +{{% alert title="Note" color="warning" %}} +The upgrade guide will first require upgrading to your latest patch version prior to upgrade to v1.0.0. This will be to allow the conversion webhooks to operate and minimize downtime of the Karpenter controller when requesting the Karpenter custom resources. +{{% /alert %}} + 1. Determine the current Karpenter version: ```bash kubectl get pod -A | grep karpenter - kubectl describe pod -n karpenter karpenter-xxxxxxxxxx-xxxxx | grep Image: + kubectl describe pod -n karpenter karpenter-xxxxxxxxxx-xxxxx | grep Image: ``` Sample output: ```bash @@ -122,8 +126,9 @@ Please read through the entire procedure before beginning the upgrade. There are 11. Upgrade Karpenter to the new version. At the end of this step, conversion webhooks run to convert the Karpenter CRDs to v1. ```bash + # Service account annotion can be dropped when using pod identity helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter --version ${KARPENTER_VERSION} --namespace "${KARPENTER_NAMESPACE}" --create-namespace \ - --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \ # Service account annotion can be droped when using pod identity + --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \ --set settings.clusterName=${CLUSTER_NAME} \ --set settings.interruptionQueue=${CLUSTER_NAME} \ --set controller.resources.requests.cpu=1 \ @@ -150,15 +155,15 @@ The results of upgrading these CRDs include the following: Your NodePool and EC2NodeClass objects are auto-converted to the new v1 storage version during the upgrade. Consider getting the latest versions of those objects to update any stored manifests where you were previously applying the v1beta1 version. - * ([NodePools]({{}}): Get the latest copy of your NodePool (`kubectl get nodepool default -o yaml > nodepool.yaml`) and review the [Changelog]({{}}) for changes to NodePool objects. Make modifications as needed. + * [NodePools]({{}}): Get the latest copy of your NodePool (`kubectl get nodepool default -o yaml > nodepool.yaml`) and review the [Changelog]({{}}) for changes to NodePool objects. Make modifications as needed. * [EC2NodeClasses]({{}}): Get the latest copy of your EC2NodeClass (`kubectl get ec2nodeclass default -o yaml > ec2nodeclass.yaml`) and review the [Changelog]({{}}) for changes to EC2NodeClass objects. Make modifications as needed. When you are satisfied with your NodePool and EC2NodeClass files, apply them as follows: - ```bash - kubectl apply -f nodepool.yaml - kubectl apply -f ec2nodeclass.yaml - ``` +```bash +kubectl apply -f nodepool.yaml +kubectl apply -f ec2nodeclass.yaml +``` ## Changelog @@ -180,8 +185,20 @@ for [EKS Pod Identity ABAC policies](https://docs.aws.amazon.com/eks/latest/user * **metadataOptions could break workloads**: If you have workload pods that are not using `hostNetworking`, the updated default `metadataOptions` could cause your containers to break when you apply new EC2NodeClasses on v1. -* **Support for native Ubuntu AMI selection is dropped**: If you are using Ubuntu and not using `amiSelectorTerms`, be aware that Karpenter v1 no longer natively supports Ubuntu. To continue using Ubuntu in v1, you can update `amiSelectorTerms` in your EC2NodeClasses to identify Ubuntu as the AMI you want to use. See [reimplement amiFamily](https://github.com/aws/karpenter-provider-aws/pull/6569) for an example. Once you have done that, you can leave the amiFamily as Ubuntu and proceed to upgrading to v1. This will result in the following transformation: +* **Ubuntu AMIFamily Removed**: + + Support for automatic AMI selection and UserData generation for Ubuntu has been dropped with Karpenter `v1.0.0`. + To continue using Ubuntu AMIs you will need to specify an AMI using `amiSelectorTerms`. + + UserData generation can be achieved using the AL2 AMIFamily which has an identical UserData format. + However, compatibility is not guaranteed long-term and changes to either AL2 or Ubuntu's UserData format may introduce incompatibilities. + If this occurs, the Custom AMIFamily should be used for Ubuntu and UserData will need to be entirely maintained by the user. + + If you are upgrading to `v1.0.0` and already have v1beta1 Ubuntu EC2NodeClasses, all you need to do is specify `amiSelectorTerms` and Karpenter will translate your NodeClasses to the v1 equivalent (as shown below). + Failure to specify `amiSelectorTerms` will result in the EC2NodeClass and all referencing NodePools to show as NotReady, causing Karpenter to ignore these NodePools and EC2NodeClasses for Provisioning and Drift. + ```yaml + # Original v1beta1 EC2NodeClass version: karpenter.k8s.aws/v1beta1 kind: EC2NodeClass spec: @@ -207,10 +224,9 @@ for [EKS Pod Identity ABAC policies](https://docs.aws.amazon.com/eks/latest/user volumeType: gp3 volumeSize: 20Gi ``` - **NOTE**: If you do not specify `amiSelectorTerms` before upgrading with the Ubuntu `amiFamily`, the conversion webhook will fail. If you do this, update your EC2NodeClass to specify amiSelectorTerms on v1beta1, resolving the conversion webhook failures. * **amiSelectorTerms and amiFamily**: For v1, `amiFamily` is no longer required if you instead specify an `alias` in `amiSelectorTerms` in your `EC2NodeClass`. You need to update your `amiSelectorTerms` and `amiFamily` if you are using: - * A Custom amiFamily. You must ensure that the node you add the `karpenter.sh/unregistered` taint in your UserData. + * A Custom amiFamily. You must ensure that the node you add the `karpenter.sh/unregistered:NoExecute` taint in your UserData. * An Ubuntu AMI, as described earlier. ### Before upgrading to `v1.1.0` @@ -227,11 +243,12 @@ then you have to manually add more EC2NodeClasses and point their NodePools to t If you have multiple NodePools pointing to the same EC2NodeClass, but they have the same configuration, then you can proceed with the migration without having drift or having any additional NodePools or EC2NodeClasses configured. -**Remove kubelet annotation from NodePools**: During the upgrade process Karpenter will rely on the `compatibility.karpenter.sh/v1beta1-kubelet-conversion` annotation to determine whether to use the v1beta1 NodePool kubelet configuration or the v1 EC2NodeClass kubelet configuration. The `compatibility.karpenter.sh/v1beta1-kubelet-conversion` NodePool annotation takes precedence over the EC2NodeClass Kubelet configuration when launching nodes. Remove the kubelet-configuration annotation (`compatibility.karpenter.sh/v1beta1-kubelet-conversion`) from your NodePools once you have migrated kubelet from the NodePool to the EC2NodeClass. +* **Remove kubelet annotation from NodePools**: During the upgrade process Karpenter will rely on the `compatibility.karpenter.sh/v1beta1-kubelet-conversion` annotation to determine whether to use the v1beta1 NodePool kubelet configuration or the v1 EC2NodeClass kubelet configuration. The `compatibility.karpenter.sh/v1beta1-kubelet-conversion` NodePool annotation takes precedence over the EC2NodeClass Kubelet configuration when launching nodes. Remove the kubelet-configuration annotation (`compatibility.karpenter.sh/v1beta1-kubelet-conversion`) from your NodePools once you have migrated kubelet from the NodePool to the EC2NodeClass. Keep in mind that rollback, without replacing the Karpenter nodes, will not be supported to an earlier version of Karpenter once the annotation is removed. This annotation is only used to support the kubelet configuration migration path, but will not be supported in v1.1. ### Downgrading + Once the Karpenter CRDs are upgraded to v1, conversion webhooks are needed to help convert APIs that are stored in etcd from v1 to v1beta1. Also changes to the CRDs will need to at least include the latest version of the CRD in this case being v1. The patch versions of the v1beta1 Karpenter controller that include the conversion wehooks include: * v0.37.1 @@ -308,35 +325,34 @@ helm upgrade --install karpenter oci://public.ecr.aws/karpenter/karpenter --vers Karpenter should now be pulling and operating against the v1beta1 APIVersion as it was prior to the upgrade - ## Full Changelog -* Features: +* Features: * AMI Selector Terms has a new Alias field which can only be set by itself in `EC2NodeClass.Spec.AMISelectorTerms` * Disruption Budgets by Reason was added to `NodePool.Spec.Disruption.Budgets` * TerminationGracePeriod was added to `NodePool.Spec.Template.Spec`. * LOG_OUTPUT_PATHS and LOG_ERROR_OUTPUT_PATHS environment variables added * API Rename: NodePool’s ConsolidationPolicy `WhenUnderutilized` is now renamed to `WhenEmptyOrUnderutilized` -* Behavior Changes: +* Behavior Changes: * Expiration is now forceful and begins draining as soon as it’s expired. Karpenter does not wait for replacement capacity to be available before draining, but will start provisioning a replacement as soon as the node is expired and begins draining. - * Karpenter's generated NodeConfig now takes precedence when generating UserData with the AL2023 `amiFamily`. If you're setting any values managed by Karpenter in your AL2023 UserData, configure these through Karpenter natively (e.g. kubelet configuration fields). - * Karpenter now adds a `karpenter.sh/unregistered` taint to nodes in injected UserData when using alias in AMISelectorTerms or non-Custom AMIFamily. When using `amiFamily: Custom`, users will need to add this taint into their UserData, where Karpenter will automatically remove it when provisioning nodes. -* API Moves: + * Karpenter's generated NodeConfig now takes precedence when generating UserData with the AL2023 `amiFamily`. If you're setting any values managed by Karpenter in your AL2023 UserData, configure these through Karpenter natively (e.g. kubelet configuration fields). + * Karpenter now adds a `karpenter.sh/unregistered:NoExecute` taint to nodes in injected UserData when using alias in AMISelectorTerms or non-Custom AMIFamily. When using `amiFamily: Custom`, users will need to add this taint into their UserData, where Karpenter will automatically remove it when provisioning nodes. +* API Moves: * ExpireAfter has moved from the `NodePool.Spec.Disruption` block to `NodePool.Spec.Template.Spec`, and is now a drift-able field. * `Kubelet` was moved to the EC2NodeClass from the NodePool. * RBAC changes: added `delete pods` | added `get, patch crds` | added `update nodes` | removed `create nodes` -* Breaking API (Manual Migration Needed): +* Breaking API (Manual Migration Needed): * Ubuntu is dropped as a first class supported AMI Family * `karpenter.sh/do-not-consolidate` (annotation), `karpenter.sh/do-not-evict` (annotation), and `karpenter.sh/managed-by` (tag) are all removed. `karpenter.sh/managed-by`, which currently stores the cluster name in its value, will be replaced by eks:eks-cluster-name * The taint used to mark nodes for disruption and termination changed from `karpenter.sh/disruption=disrupting:NoSchedule` to `karpenter.sh/disrupted:NoSchedule`. It is not recommended to tolerate this taint, however, if you were tolerating it in your applications, you'll need to adjust your taints to reflect this. -* Environment Variable Changes: +* Environment Variable Changes: * Environment Variable Changes * LOGGING_CONFIG, ASSUME_ROLE_ARN, ASSUME_ROLE_DURATION Dropped * LEADER_ELECT renamed to DISABLE_LEADER_ELECTION * `FEATURE_GATES.DRIFT=true` was dropped and promoted to Stable, and cannot be disabled. * Users currently opting out of drift, disabling the drift feature flag will no longer be able to do so. -* Defaults changed: +* Defaults changed: * API: Karpenter will drop support for IMDS access from containers by default on new EC2NodeClasses by updating the default of `httpPutResponseHopLimit` from 2 to 1. - * API: ConsolidateAfter is required. Users couldn’t set this before with ConsolidationPolicy: WhenUnderutilized, where this is now required. Users can set it to 0 to have the same behavior as in v1beta1. + * API: ConsolidateAfter is required. Users couldn’t set this before with ConsolidationPolicy: WhenUnderutilized, where this is now required. Users can set it to 0 to have the same behavior as in v1beta1. * API: All `NodeClassRef` fields are now all required, and apiVersion has been renamed to group * API: AMISelectorTerms are required. Setting an Alias cannot be done with any other type of term, and must match the AMI Family that's set or be Custom. * Helm: Deployment spec TopologySpreadConstraint to have required zonal spread over preferred. Users who had one node running their Karpenter deployments need to either: @@ -344,7 +360,7 @@ Karpenter should now be pulling and operating against the v1beta1 APIVersion as * Scale down their Karpenter replicas from 2 to 1 in the helm chart * Edit and relax the topology spread constraint in their helm chart from DoNotSchedule to ScheduleAnyway * Helm/Binary: `controller.METRICS_PORT` default changed back to 8080 - + ### Updated metrics Changes to Karpenter metrics from v1beta1 to v1 are shown in the following tables.