From fd2bf4c56855f53003edc7f92860029d86d35a94 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Tue, 9 Jul 2024 11:18:44 -0700 Subject: [PATCH 1/9] wip: v1 API impl --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2461 +++++++++-------- pkg/apis/v1/ec2nodeclass.go | 60 +- .../v1/ec2nodeclass_validation_cel_test.go | 108 +- pkg/apis/v1/zz_generated.deepcopy.go | 5 - 4 files changed, 1387 insertions(+), 1247 deletions(-) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index cc061eb57cf9..98e3dcfc5f17 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -9,1301 +9,1366 @@ spec: group: karpenter.k8s.aws names: categories: - - karpenter + - karpenter kind: EC2NodeClass listKind: EC2NodeClassList plural: ec2nodeclasses shortNames: - - ec2nc - - ec2ncs + - ec2nc + - ec2ncs singular: ec2nodeclass scope: Cluster versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.type=="Ready")].status - name: Ready - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - - jsonPath: .spec.role - name: Role - priority: 1 - type: string - name: v1 - schema: - openAPIV3Schema: - description: EC2NodeClass is the Schema for the EC2NodeClass API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. - This will contain configuration necessary to launch instances in AWS. - properties: - amiFamily: - description: AMIFamily is the AMI family that instances use. - enum: - - AL2 - - AL2023 - - Bottlerocket - - Ubuntu - - Custom - - Windows2019 - - Windows2022 - type: string - amiSelectorTerms: - description: AMISelectorTerms is a list of or ami selector terms. The terms are ORed. - items: - description: |- - AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the ami id in EC2 - pattern: ami-[0-9a-z]+ - type: string - name: - description: |- - Name is the ami name in EC2. - This value is the name field, which is different from the name tag. - type: string - owner: - description: |- - Owner is the owner for the ami. - You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || has(x.owner)))' - associatePublicIPAddress: - description: AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. - type: boolean - blockDeviceMappings: - description: BlockDeviceMappings to be applied to provisioned nodes. - items: - properties: - deviceName: - description: The device name (for example, /dev/sdh or xvdh). + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + - jsonPath: .spec.role + name: Role + priority: 1 + type: string + name: v1 + schema: + openAPIV3Schema: + description: EC2NodeClass is the Schema for the EC2NodeClass API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. + This will contain configuration necessary to launch instances in AWS. + properties: + amiSelectorTerms: + description: AMISelectorTerms is a list of or ami selector terms. + The terms are ORed. + items: + description: |- + AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + alias: + description: |- + Alias specifies which EKS optimized AMI to select. + Each alias consistes of a family and a version, specified as "family@version". + Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. + The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). + The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. + Note: The Windows families do **not** support version pinning, and only latest may be used. + maxLength: 30 + type: string + x-kubernetes-validations: + - message: '''alias'' is improperly formatted, must match the + format ''family@version''' + rule: self.matches('^[a-zA-Z0-9]*@.*$') + - message: 'family is not supported, must be one of the following: + ''al2'', ''al2023'', ''bottlerocket'', ''windows2019'', + ''windows2022''' + rule: self.find('^[^@]+') in ['al2','al2023','bottlerocket','windows2019','windows2022'] + id: + description: ID is the ami id in EC2 + pattern: ami-[0-9a-z]+ + type: string + name: + description: |- + Name is the ami name in EC2. + This value is the name field, which is different from the name tag. + type: string + owner: + description: |- + Owner is the owner for the ami. + You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" + type: string + tags: + additionalProperties: type: string - ebs: - description: EBS contains parameters used to automatically set up EBS volumes when an instance is launched. - properties: - deleteOnTermination: - description: DeleteOnTermination indicates whether the EBS volume is deleted on instance termination. - type: boolean - encrypted: - description: |- - Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only - be attached to instances that support Amazon EBS encryption. If you are creating - a volume from a snapshot, you can't specify an encryption value. - type: boolean - iops: - description: |- - IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, - this represents the number of IOPS that are provisioned for the volume. For - gp2 volumes, this represents the baseline performance of the volume and the - rate at which the volume accumulates I/O credits for bursting. + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + minItems: 1 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id', 'name', + 'alias'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name) || has(x.alias)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in amiSelectorTerms' + rule: '!self.exists(x, has(x.id) && (has(x.alias) || has(x.tags) + || has(x.name) || has(x.owner)))' + - message: '''alias'' is mutually exclusive, cannot be set with a + combination of other fields in amiSelectorTerms' + rule: '!self.exists(x, has(x.alias) && (has(x.id) || has(x.tags) + || has(x.name) || has(x.owner)))' + - message: '''alias'' is mutually exclusive, cannot be set with a + combination of other amiSelectorTerms' + rule: '!(self.exists(x, has(x.alias)) && self.size() != 1)' + associatePublicIPAddress: + description: AssociatePublicIPAddress controls if public IP addresses + are assigned to instances that are launched with the nodeclass. + type: boolean + blockDeviceMappings: + description: BlockDeviceMappings to be applied to provisioned nodes. + items: + properties: + deviceName: + description: The device name (for example, /dev/sdh or xvdh). + type: string + ebs: + description: EBS contains parameters used to automatically set + up EBS volumes when an instance is launched. + properties: + deleteOnTermination: + description: DeleteOnTermination indicates whether the EBS + volume is deleted on instance termination. + type: boolean + encrypted: + description: |- + Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only + be attached to instances that support Amazon EBS encryption. If you are creating + a volume from a snapshot, you can't specify an encryption value. + type: boolean + iops: + description: |- + IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, + this represents the number of IOPS that are provisioned for the volume. For + gp2 volumes, this represents the baseline performance of the volume and the + rate at which the volume accumulates I/O credits for bursting. - The following are the supported values for each volume type: + The following are the supported values for each volume type: - * gp3: 3,000-16,000 IOPS + * gp3: 3,000-16,000 IOPS - * io1: 100-64,000 IOPS + * io1: 100-64,000 IOPS - * io2: 100-64,000 IOPS + * io2: 100-64,000 IOPS - For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built - on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). - Other instance families guarantee performance up to 32,000 IOPS. + For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built + on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). + Other instance families guarantee performance up to 32,000 IOPS. - This parameter is supported for io1, io2, and gp3 volumes only. This parameter - is not supported for gp2, st1, sc1, or standard volumes. - format: int64 - type: integer - kmsKeyID: - description: KMSKeyID (ARN) of the symmetric Key Management Service (KMS) CMK used for encryption. - type: string - snapshotID: - description: SnapshotID is the ID of an EBS snapshot - type: string - throughput: - description: |- - Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. - Valid Range: Minimum value of 125. Maximum value of 1000. - format: int64 - type: integer - volumeSize: - description: |- - VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or - a volume size. The following are the supported volumes sizes for each volume - type: + This parameter is supported for io1, io2, and gp3 volumes only. This parameter + is not supported for gp2, st1, sc1, or standard volumes. + format: int64 + type: integer + kmsKeyID: + description: KMSKeyID (ARN) of the symmetric Key Management + Service (KMS) CMK used for encryption. + type: string + snapshotID: + description: SnapshotID is the ID of an EBS snapshot + type: string + throughput: + description: |- + Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. + Valid Range: Minimum value of 125. Maximum value of 1000. + format: int64 + type: integer + volumeSize: + description: |- + VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or + a volume size. The following are the supported volumes sizes for each volume + type: - * gp2 and gp3: 1-16,384 + * gp2 and gp3: 1-16,384 - * io1 and io2: 4-16,384 + * io1 and io2: 4-16,384 - * st1 and sc1: 125-16,384 + * st1 and sc1: 125-16,384 - * standard: 1-1,024 - pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ - type: string - volumeType: - description: |- - VolumeType of the block device. - For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) - in the Amazon Elastic Compute Cloud User Guide. - enum: - - standard - - io1 - - io2 - - gp2 - - sc1 - - st1 - - gp3 - type: string - type: object - x-kubernetes-validations: - - message: snapshotID or volumeSize must be defined - rule: has(self.snapshotID) || has(self.volumeSize) - rootVolume: - description: |- - RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can - configure at most one root volume in BlockDeviceMappings. - type: boolean + * standard: 1-1,024 + pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ + type: string + volumeType: + description: |- + VolumeType of the block device. + For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) + in the Amazon Elastic Compute Cloud User Guide. + enum: + - standard + - io1 + - io2 + - gp2 + - sc1 + - st1 + - gp3 + type: string + type: object + x-kubernetes-validations: + - message: snapshotID or volumeSize must be defined + rule: has(self.snapshotID) || has(self.volumeSize) + rootVolume: + description: |- + RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can + configure at most one root volume in BlockDeviceMappings. + type: boolean + type: object + maxItems: 50 + type: array + x-kubernetes-validations: + - message: must have only one blockDeviceMappings with rootVolume + rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() + <= 1 + context: + description: |- + Context is a Reserved field in EC2 APIs + https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html + type: string + detailedMonitoring: + description: DetailedMonitoring controls if detailed monitoring is + enabled for instances that are launched + type: boolean + instanceProfile: + description: |- + InstanceProfile is the AWS entity that instances use. + This field is mutually exclusive from role. + The instance profile should already have a role assigned to it that Karpenter + has PassRole permission on for instance launch using this instanceProfile to succeed. + type: string + x-kubernetes-validations: + - message: instanceProfile cannot be empty + rule: self != '' + instanceStorePolicy: + description: InstanceStorePolicy specifies how to handle instance-store + disks. + enum: + - RAID0 + type: string + kubelet: + description: |- + Kubelet defines args to be used when configuring kubelet on provisioned nodes. + They are a subset of the upstream types, recognizing not all options may be supported. + Wherever possible, the types and names should reflect the upstream kubelet types. + properties: + clusterDNS: + description: |- + clusterDNS is a list of IP addresses for the cluster DNS server. + Note that not all providers may use all addresses. + items: + type: string + type: array + cpuCFSQuota: + description: CPUCFSQuota enables CPU CFS quota enforcement for + containers that specify CPU limits. + type: boolean + evictionHard: + additionalProperties: + type: string + description: EvictionHard is the map of signal names to quantities + that define hard eviction thresholds type: object - maxItems: 50 - type: array - x-kubernetes-validations: - - message: must have only one blockDeviceMappings with rootVolume - rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 - context: - description: |- - Context is a Reserved field in EC2 APIs - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html - type: string - detailedMonitoring: - description: DetailedMonitoring controls if detailed monitoring is enabled for instances that are launched - type: boolean - instanceProfile: - description: |- - InstanceProfile is the AWS entity that instances use. - This field is mutually exclusive from role. - The instance profile should already have a role assigned to it that Karpenter - has PassRole permission on for instance launch using this instanceProfile to succeed. - type: string - x-kubernetes-validations: - - message: instanceProfile cannot be empty - rule: self != '' - instanceStorePolicy: - description: InstanceStorePolicy specifies how to handle instance-store disks. - enum: - - RAID0 - type: string - kubelet: + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionMaxPodGracePeriod: + description: |- + EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in + response to soft eviction thresholds being met. + format: int32 + type: integer + evictionSoft: + additionalProperties: + type: string + description: EvictionSoft is the map of signal names to quantities + that define soft eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionSoftGracePeriod: + additionalProperties: + type: string + description: EvictionSoftGracePeriod is the map of signal names + to quantities that define grace periods for each eviction signal + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: + description: |- + ImageGCHighThresholdPercent is the percent of disk usage after which image + garbage collection is always run. The percent is calculated by dividing this + field value by 100, so this field must be between 0 and 100, inclusive. + When specified, the value must be greater than ImageGCLowThresholdPercent. + format: int32 + maximum: 100 + minimum: 0 + type: integer + imageGCLowThresholdPercent: + description: |- + ImageGCLowThresholdPercent is the percent of disk usage before which image + garbage collection is never run. Lowest disk usage to garbage collect to. + The percent is calculated by dividing this field value by 100, + so the field value must be between 0 and 100, inclusive. + When specified, the value must be less than imageGCHighThresholdPercent + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + type: string + description: KubeReserved contains resources reserved for Kubernetes + system components. + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + maxPods: + description: |- + MaxPods is an override for the maximum number of pods that can run on + a worker node instance. + format: int32 + minimum: 0 + type: integer + podsPerCore: + description: |- + PodsPerCore is an override for the number of pods that can run on a worker node + instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if + MaxPods is a lower value, that value will be used. + format: int32 + minimum: 0 + type: integer + systemReserved: + additionalProperties: + type: string + description: SystemReserved contains resources reserved for OS + system daemons and kernel memory. + type: object + x-kubernetes-validations: + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: systemReserved value cannot be a negative resource + quantity + rule: self.all(x, !self[x].startsWith('-')) + type: object + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) + ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : + true' + - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod + rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true + - message: evictionSoftGracePeriod OwnerKey does not have a matching + evictionSoft + rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, + (e in self.evictionSoft)):true + metadataOptions: + default: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + description: |- + MetadataOptions for the generated launch template of provisioned nodes. + + + This specifies the exposure of the Instance Metadata Service to + provisioned EC2 nodes. For more information, + see Instance Metadata and User Data + (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) + in the Amazon Elastic Compute Cloud User Guide. + + + Refer to recommended, security best practices + (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) + for limiting exposure of Instance Metadata and User Data to pods. + If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 + disabled, with httpPutResponseLimit of 1, and with httpTokens + required. + properties: + httpEndpoint: + default: enabled + description: |- + HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned + nodes. If metadata options is non-nil, but this parameter is not specified, + the default state is "enabled". + + + If you specify a value of "disabled", instance metadata will not be accessible + on the node. + enum: + - enabled + - disabled + type: string + httpProtocolIPv6: + default: disabled + description: |- + HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata + service on provisioned nodes. If metadata options is non-nil, but this parameter + is not specified, the default state is "disabled". + enum: + - enabled + - disabled + type: string + httpPutResponseHopLimit: + default: 1 + description: |- + HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for + instance metadata requests. The larger the number, the further instance + metadata requests can travel. Possible values are integers from 1 to 64. + If metadata options is non-nil, but this parameter is not specified, the + default value is 1. + format: int64 + maximum: 64 + minimum: 1 + type: integer + httpTokens: + default: required + description: |- + HTTPTokens determines the state of token usage for instance metadata + requests. If metadata options is non-nil, but this parameter is not + specified, the default state is "required". + + + If the state is optional, one can choose to retrieve instance metadata with + or without a signed token header on the request. If one retrieves the IAM + role credentials without a token, the version 1.0 role credentials are + returned. If one retrieves the IAM role credentials using a valid signed + token, the version 2.0 role credentials are returned. + + + If the state is "required", one must send a signed token header with any + instance metadata retrieval requests. In this state, retrieving the IAM + role credentials always returns the version 2.0 credentials; the version + 1.0 credentials are not available. + enum: + - required + - optional + type: string + type: object + role: + description: |- + Role is the AWS identity that nodes use. This field is immutable. + This field is mutually exclusive from instanceProfile. + Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. + This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented + for the old instance profiles on an update. + type: string + x-kubernetes-validations: + - message: role cannot be empty + rule: self != '' + - message: immutable field changed + rule: self == oldSelf + securityGroupSelectorTerms: + description: SecurityGroupSelectorTerms is a list of or security group + selector terms. The terms are ORed. + items: description: |- - Kubelet defines args to be used when configuring kubelet on provisioned nodes. - They are a subset of the upstream types, recognizing not all options may be supported. - Wherever possible, the types and names should reflect the upstream kubelet types. + SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. properties: - clusterDNS: + id: + description: ID is the security group id in EC2 + pattern: sg-[0-9a-z]+ + type: string + name: description: |- - clusterDNS is a list of IP addresses for the cluster DNS server. - Note that not all providers may use all addresses. - items: - type: string - type: array - cpuCFSQuota: - description: CPUCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. - type: boolean - evictionHard: + Name is the security group name in EC2. + This value is the name field, which is different from the name tag. + type: string + tags: additionalProperties: type: string - pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ - description: EvictionHard is the map of signal names to quantities that define hard eviction thresholds - type: object - x-kubernetes-validations: - - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - evictionMaxPodGracePeriod: description: |- - EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in - response to soft eviction thresholds being met. - format: int32 - type: integer - evictionSoft: - additionalProperties: - type: string - pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ - description: EvictionSoft is the map of signal names to quantities that define soft eviction thresholds + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 type: object x-kubernetes-validations: - - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - evictionSoftGracePeriod: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: securityGroupSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' + - message: '''name'' is mutually exclusive, cannot be set with a combination + of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' + subnetSelectorTerms: + description: SubnetSelectorTerms is a list of or subnet selector terms. + The terms are ORed. + items: + description: |- + SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the subnet id in EC2 + pattern: subnet-[0-9a-z]+ + type: string + tags: additionalProperties: type: string - description: EvictionSoftGracePeriod is the map of signal names to quantities that define grace periods for each eviction signal + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 type: object x-kubernetes-validations: - - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - imageGCHighThresholdPercent: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: subnetSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in subnetSelectorTerms' + rule: '!self.all(x, has(x.id) && has(x.tags))' + tags: + additionalProperties: + type: string + description: Tags to be applied on ec2 resources like instances and + launch templates. + type: object + x-kubernetes-validations: + - message: empty tag keys aren't supported + rule: self.all(k, k != '') + - message: tag contains a restricted tag matching kubernetes.io/cluster/ + rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) + - message: tag contains a restricted tag matching karpenter.sh/nodepool + rule: self.all(k, k != 'karpenter.sh/nodepool') + - message: tag contains a restricted tag matching karpenter.sh/managed-by + rule: self.all(k, k !='karpenter.sh/managed-by') + - message: tag contains a restricted tag matching karpenter.sh/nodeclaim + rule: self.all(k, k !='karpenter.sh/nodeclaim') + - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass + rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') + userData: + description: |- + UserData to be applied to the provisioned nodes. + It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into + this UserData to ensure nodes are being provisioned with the correct configuration. + type: string + required: + - amiSelectorTerms + - securityGroupSelectorTerms + - subnetSelectorTerms + type: object + x-kubernetes-validations: + - message: must specify exactly one of ['role', 'instanceProfile'] + rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) + && has(self.instanceProfile)) + - message: changing from 'instanceProfile' to 'role' is not supported. + You must delete and recreate this node class if you want to change + this. + rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) + && has(self.instanceProfile)) + status: + description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass + properties: + amis: + description: |- + AMI contains the current AMI values that are available to the + cluster under the AMI selectors. + items: + description: AMI contains resolved AMI selector values utilized + for node launch + properties: + id: + description: ID of the AMI + type: string + name: + description: Name of the AMI + type: string + requirements: + description: Requirements of the AMI to be utilized on an instance + type + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + required: + - id + - requirements + type: object + type: array + conditions: + description: Conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional + helper methods + properties: + lastTransitionTime: description: |- - ImageGCHighThresholdPercent is the percent of disk usage after which image - garbage collection is always run. The percent is calculated by dividing this - field value by 100, so this field must be between 0 and 100, inclusive. - When specified, the value must be greater than ImageGCLowThresholdPercent. - format: int32 - maximum: 100 - minimum: 0 - type: integer - imageGCLowThresholdPercent: + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: description: |- - ImageGCLowThresholdPercent is the percent of disk usage before which image - garbage collection is never run. Lowest disk usage to garbage collect to. - The percent is calculated by dividing this field value by 100, - so the field value must be between 0 and 100, inclusive. - When specified, the value must be less than imageGCHighThresholdPercent - format: int32 - maximum: 100 - minimum: 0 - type: integer - kubeReserved: - additionalProperties: - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - description: KubeReserved contains resources reserved for Kubernetes system components. - type: object - x-kubernetes-validations: - - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] - rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') - - message: kubeReserved value cannot be a negative resource quantity - rule: self.all(x, !self[x].startsWith('-')) - maxPods: + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: description: |- - MaxPods is an override for the maximum number of pods that can run on - a worker node instance. - format: int32 + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 minimum: 0 type: integer - podsPerCore: + reason: description: |- - PodsPerCore is an override for the number of pods that can run on a worker node - instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if - MaxPods is a lower value, that value will be used. - format: int32 - minimum: 0 - type: integer - systemReserved: + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + instanceProfile: + description: InstanceProfile contains the resolved instance profile + for the role + type: string + securityGroups: + description: |- + SecurityGroups contains the current Security Groups values that are available to the + cluster under the SecurityGroups selectors. + items: + description: SecurityGroup contains resolved SecurityGroup selector + values utilized for node launch + properties: + id: + description: ID of the security group + type: string + name: + description: Name of the security group + type: string + required: + - id + type: object + type: array + subnets: + description: |- + Subnets contains the current Subnet values that are available to the + cluster under the subnet selectors. + items: + description: Subnet contains resolved Subnet selector values utilized + for node launch + properties: + id: + description: ID of the subnet + type: string + zone: + description: The associated availability zone + type: string + zoneID: + description: The associated availability zone ID + type: string + required: + - id + - zone + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} + - name: v1beta1 + schema: + openAPIV3Schema: + description: EC2NodeClass is the Schema for the EC2NodeClass API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. + This will contain configuration necessary to launch instances in AWS. + properties: + amiFamily: + description: AMIFamily is the AMI family that instances use. + enum: + - AL2 + - AL2023 + - Bottlerocket + - Ubuntu + - Custom + - Windows2019 + - Windows2022 + type: string + amiSelectorTerms: + description: AMISelectorTerms is a list of or ami selector terms. + The terms are ORed. + items: + description: |- + AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the ami id in EC2 + pattern: ami-[0-9a-z]+ + type: string + name: + description: |- + Name is the ami name in EC2. + This value is the name field, which is different from the name tag. + type: string + owner: + description: |- + Owner is the owner for the ami. + You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" + type: string + tags: additionalProperties: type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - description: SystemReserved contains resources reserved for OS system daemons and kernel memory. + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 type: object x-kubernetes-validations: - - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] - rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') - - message: systemReserved value cannot be a negative resource quantity - rule: self.all(x, !self[x].startsWith('-')) + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') type: object - x-kubernetes-validations: - - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent - rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : true' - - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod - rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true - - message: evictionSoftGracePeriod OwnerKey does not have a matching evictionSoft - rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, (e in self.evictionSoft)):true - metadataOptions: - default: - httpEndpoint: enabled - httpProtocolIPv6: disabled - httpPutResponseHopLimit: 1 - httpTokens: required - description: |- - MetadataOptions for the generated launch template of provisioned nodes. + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in amiSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || + has(x.owner)))' + associatePublicIPAddress: + description: AssociatePublicIPAddress controls if public IP addresses + are assigned to instances that are launched with the nodeclass. + type: boolean + blockDeviceMappings: + description: BlockDeviceMappings to be applied to provisioned nodes. + items: + properties: + deviceName: + description: The device name (for example, /dev/sdh or xvdh). + type: string + ebs: + description: EBS contains parameters used to automatically set + up EBS volumes when an instance is launched. + properties: + deleteOnTermination: + description: DeleteOnTermination indicates whether the EBS + volume is deleted on instance termination. + type: boolean + encrypted: + description: |- + Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only + be attached to instances that support Amazon EBS encryption. If you are creating + a volume from a snapshot, you can't specify an encryption value. + type: boolean + iops: + description: |- + IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, + this represents the number of IOPS that are provisioned for the volume. For + gp2 volumes, this represents the baseline performance of the volume and the + rate at which the volume accumulates I/O credits for bursting. - This specifies the exposure of the Instance Metadata Service to - provisioned EC2 nodes. For more information, - see Instance Metadata and User Data - (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) - in the Amazon Elastic Compute Cloud User Guide. + The following are the supported values for each volume type: - Refer to recommended, security best practices - (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) - for limiting exposure of Instance Metadata and User Data to pods. - If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 - disabled, with httpPutResponseLimit of 1, and with httpTokens - required. - properties: - httpEndpoint: - default: enabled - description: |- - HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned - nodes. If metadata options is non-nil, but this parameter is not specified, - the default state is "enabled". + * gp3: 3,000-16,000 IOPS - If you specify a value of "disabled", instance metadata will not be accessible - on the node. - enum: - - enabled - - disabled - type: string - httpProtocolIPv6: - default: disabled - description: |- - HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata - service on provisioned nodes. If metadata options is non-nil, but this parameter - is not specified, the default state is "disabled". - enum: - - enabled - - disabled - type: string - httpPutResponseHopLimit: - default: 1 - description: |- - HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for - instance metadata requests. The larger the number, the further instance - metadata requests can travel. Possible values are integers from 1 to 64. - If metadata options is non-nil, but this parameter is not specified, the - default value is 1. - format: int64 - maximum: 64 - minimum: 1 - type: integer - httpTokens: - default: required - description: |- - HTTPTokens determines the state of token usage for instance metadata - requests. If metadata options is non-nil, but this parameter is not - specified, the default state is "required". + * io1: 100-64,000 IOPS - If the state is optional, one can choose to retrieve instance metadata with - or without a signed token header on the request. If one retrieves the IAM - role credentials without a token, the version 1.0 role credentials are - returned. If one retrieves the IAM role credentials using a valid signed - token, the version 2.0 role credentials are returned. + * io2: 100-64,000 IOPS - If the state is "required", one must send a signed token header with any - instance metadata retrieval requests. In this state, retrieving the IAM - role credentials always returns the version 2.0 credentials; the version - 1.0 credentials are not available. - enum: - - required - - optional - type: string - type: object - role: - description: |- - Role is the AWS identity that nodes use. This field is immutable. - This field is mutually exclusive from instanceProfile. - Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. - This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented - for the old instance profiles on an update. - type: string - x-kubernetes-validations: - - message: role cannot be empty - rule: self != '' - - message: immutable field changed - rule: self == oldSelf - securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. - items: - description: |- - SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the security group id in EC2 - pattern: sg-[0-9a-z]+ - type: string - name: - description: |- - Name is the security group name in EC2. - This value is the name field, which is different from the name tag. - type: string - tags: - additionalProperties: + For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built + on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). + Other instance families guarantee performance up to 32,000 IOPS. + + + This parameter is supported for io1, io2, and gp3 volumes only. This parameter + is not supported for gp2, st1, sc1, or standard volumes. + format: int64 + type: integer + kmsKeyID: + description: KMSKeyID (ARN) of the symmetric Key Management + Service (KMS) CMK used for encryption. type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: securityGroupSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' - subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. - items: - description: |- - SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the subnet id in EC2 - pattern: subnet-[0-9a-z]+ - type: string - tags: - additionalProperties: + snapshotID: + description: SnapshotID is the ID of an EBS snapshot type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: subnetSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id'] - rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' - rule: '!self.all(x, has(x.id) && has(x.tags))' - tags: - additionalProperties: - type: string - description: Tags to be applied on ec2 resources like instances and launch templates. - type: object - x-kubernetes-validations: - - message: empty tag keys aren't supported - rule: self.all(k, k != '') - - message: tag contains a restricted tag matching kubernetes.io/cluster/ - rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) - - message: tag contains a restricted tag matching karpenter.sh/nodepool - rule: self.all(k, k != 'karpenter.sh/nodepool') - - message: tag contains a restricted tag matching karpenter.sh/managed-by - rule: self.all(k, k !='karpenter.sh/managed-by') - - message: tag contains a restricted tag matching karpenter.sh/nodeclaim - rule: self.all(k, k !='karpenter.sh/nodeclaim') - - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass - rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') - userData: - description: |- - UserData to be applied to the provisioned nodes. - It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into - this UserData to ensure nodes are being provisioned with the correct configuration. - type: string - required: - - amiFamily - - securityGroupSelectorTerms - - subnetSelectorTerms - type: object - x-kubernetes-validations: - - message: amiSelectorTerms is required when amiFamily == 'Custom' - rule: 'self.amiFamily == ''Custom'' ? self.amiSelectorTerms.size() != 0 : true' - - message: must specify exactly one of ['role', 'instanceProfile'] - rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) && has(self.instanceProfile)) - - message: changing from 'instanceProfile' to 'role' is not supported. You must delete and recreate this node class if you want to change this. - rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) && has(self.instanceProfile)) - status: - description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass - properties: - amis: - description: |- - AMI contains the current AMI values that are available to the - cluster under the AMI selectors. - items: - description: AMI contains resolved AMI selector values utilized for node launch - properties: - id: - description: ID of the AMI - type: string - name: - description: Name of the AMI - type: string - requirements: - description: Requirements of the AMI to be utilized on an instance type - items: + throughput: description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - required: - - id - - requirements - type: object - type: array - conditions: - description: Conditions contains signals for health and readiness - items: - description: Condition aliases the upstream type and adds additional helper methods - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - instanceProfile: - description: InstanceProfile contains the resolved instance profile for the role - type: string - securityGroups: - description: |- - SecurityGroups contains the current Security Groups values that are available to the - cluster under the SecurityGroups selectors. - items: - description: SecurityGroup contains resolved SecurityGroup selector values utilized for node launch - properties: - id: - description: ID of the security group - type: string - name: - description: Name of the security group - type: string - required: - - id - type: object - type: array - subnets: - description: |- - Subnets contains the current Subnet values that are available to the - cluster under the subnet selectors. - items: - description: Subnet contains resolved Subnet selector values utilized for node launch - properties: - id: - description: ID of the subnet - type: string - zone: - description: The associated availability zone - type: string - zoneID: - description: The associated availability zone ID - type: string - required: - - id - - zone - type: object - type: array - type: object - type: object - served: true - storage: true - subresources: - status: {} - - name: v1beta1 - schema: - openAPIV3Schema: - description: EC2NodeClass is the Schema for the EC2NodeClass API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. - This will contain configuration necessary to launch instances in AWS. - properties: - amiFamily: - description: AMIFamily is the AMI family that instances use. - enum: - - AL2 - - AL2023 - - Bottlerocket - - Ubuntu - - Custom - - Windows2019 - - Windows2022 - type: string - amiSelectorTerms: - description: AMISelectorTerms is a list of or ami selector terms. The terms are ORed. - items: - description: |- - AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the ami id in EC2 - pattern: ami-[0-9a-z]+ - type: string - name: - description: |- - Name is the ami name in EC2. - This value is the name field, which is different from the name tag. - type: string - owner: - description: |- - Owner is the owner for the ami. - You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || has(x.owner)))' - associatePublicIPAddress: - description: AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. - type: boolean - blockDeviceMappings: - description: BlockDeviceMappings to be applied to provisioned nodes. - items: - properties: - deviceName: - description: The device name (for example, /dev/sdh or xvdh). - type: string - ebs: - description: EBS contains parameters used to automatically set up EBS volumes when an instance is launched. - properties: - deleteOnTermination: - description: DeleteOnTermination indicates whether the EBS volume is deleted on instance termination. - type: boolean - encrypted: - description: |- - Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only - be attached to instances that support Amazon EBS encryption. If you are creating - a volume from a snapshot, you can't specify an encryption value. - type: boolean - iops: - description: |- - IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, - this represents the number of IOPS that are provisioned for the volume. For - gp2 volumes, this represents the baseline performance of the volume and the - rate at which the volume accumulates I/O credits for bursting. - - - The following are the supported values for each volume type: + Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. + Valid Range: Minimum value of 125. Maximum value of 1000. + format: int64 + type: integer + volumeSize: + description: |- + VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or + a volume size. The following are the supported volumes sizes for each volume + type: - * gp3: 3,000-16,000 IOPS + * gp2 and gp3: 1-16,384 - * io1: 100-64,000 IOPS + * io1 and io2: 4-16,384 - * io2: 100-64,000 IOPS + * st1 and sc1: 125-16,384 - For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built - on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). - Other instance families guarantee performance up to 32,000 IOPS. + * standard: 1-1,024 + pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ + type: string + volumeType: + description: |- + VolumeType of the block device. + For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) + in the Amazon Elastic Compute Cloud User Guide. + enum: + - standard + - io1 + - io2 + - gp2 + - sc1 + - st1 + - gp3 + type: string + type: object + x-kubernetes-validations: + - message: snapshotID or volumeSize must be defined + rule: has(self.snapshotID) || has(self.volumeSize) + rootVolume: + description: |- + RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can + configure at most one root volume in BlockDeviceMappings. + type: boolean + type: object + maxItems: 50 + type: array + x-kubernetes-validations: + - message: must have only one blockDeviceMappings with rootVolume + rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() + <= 1 + context: + description: |- + Context is a Reserved field in EC2 APIs + https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html + type: string + detailedMonitoring: + description: DetailedMonitoring controls if detailed monitoring is + enabled for instances that are launched + type: boolean + instanceProfile: + description: |- + InstanceProfile is the AWS entity that instances use. + This field is mutually exclusive from role. + The instance profile should already have a role assigned to it that Karpenter + has PassRole permission on for instance launch using this instanceProfile to succeed. + type: string + x-kubernetes-validations: + - message: instanceProfile cannot be empty + rule: self != '' + instanceStorePolicy: + description: InstanceStorePolicy specifies how to handle instance-store + disks. + enum: + - RAID0 + type: string + metadataOptions: + default: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + description: |- + MetadataOptions for the generated launch template of provisioned nodes. - This parameter is supported for io1, io2, and gp3 volumes only. This parameter - is not supported for gp2, st1, sc1, or standard volumes. - format: int64 - type: integer - kmsKeyID: - description: KMSKeyID (ARN) of the symmetric Key Management Service (KMS) CMK used for encryption. - type: string - snapshotID: - description: SnapshotID is the ID of an EBS snapshot - type: string - throughput: - description: |- - Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. - Valid Range: Minimum value of 125. Maximum value of 1000. - format: int64 - type: integer - volumeSize: - description: |- - VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or - a volume size. The following are the supported volumes sizes for each volume - type: + This specifies the exposure of the Instance Metadata Service to + provisioned EC2 nodes. For more information, + see Instance Metadata and User Data + (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) + in the Amazon Elastic Compute Cloud User Guide. - * gp2 and gp3: 1-16,384 + Refer to recommended, security best practices + (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) + for limiting exposure of Instance Metadata and User Data to pods. + If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 + disabled, with httpPutResponseLimit of 1, and with httpTokens + required. + properties: + httpEndpoint: + default: enabled + description: |- + HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned + nodes. If metadata options is non-nil, but this parameter is not specified, + the default state is "enabled". - * io1 and io2: 4-16,384 + If you specify a value of "disabled", instance metadata will not be accessible + on the node. + enum: + - enabled + - disabled + type: string + httpProtocolIPv6: + default: disabled + description: |- + HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata + service on provisioned nodes. If metadata options is non-nil, but this parameter + is not specified, the default state is "disabled". + enum: + - enabled + - disabled + type: string + httpPutResponseHopLimit: + default: 2 + description: |- + HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for + instance metadata requests. The larger the number, the further instance + metadata requests can travel. Possible values are integers from 1 to 64. + If metadata options is non-nil, but this parameter is not specified, the + default value is 2. + format: int64 + maximum: 64 + minimum: 1 + type: integer + httpTokens: + default: required + description: |- + HTTPTokens determines the state of token usage for instance metadata + requests. If metadata options is non-nil, but this parameter is not + specified, the default state is "required". - * st1 and sc1: 125-16,384 + If the state is optional, one can choose to retrieve instance metadata with + or without a signed token header on the request. If one retrieves the IAM + role credentials without a token, the version 1.0 role credentials are + returned. If one retrieves the IAM role credentials using a valid signed + token, the version 2.0 role credentials are returned. - * standard: 1-1,024 - pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ + If the state is "required", one must send a signed token header with any + instance metadata retrieval requests. In this state, retrieving the IAM + role credentials always returns the version 2.0 credentials; the version + 1.0 credentials are not available. + enum: + - required + - optional + type: string + type: object + role: + description: |- + Role is the AWS identity that nodes use. This field is immutable. + This field is mutually exclusive from instanceProfile. + Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. + This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented + for the old instance profiles on an update. + type: string + x-kubernetes-validations: + - message: role cannot be empty + rule: self != '' + - message: immutable field changed + rule: self == oldSelf + securityGroupSelectorTerms: + description: SecurityGroupSelectorTerms is a list of or security group + selector terms. The terms are ORed. + items: + description: |- + SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the security group id in EC2 + pattern: sg-[0-9a-z]+ + type: string + name: + description: |- + Name is the security group name in EC2. + This value is the name field, which is different from the name tag. + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: securityGroupSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' + - message: '''name'' is mutually exclusive, cannot be set with a combination + of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' + subnetSelectorTerms: + description: SubnetSelectorTerms is a list of or subnet selector terms. + The terms are ORed. + items: + description: |- + SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the subnet id in EC2 + pattern: subnet-[0-9a-z]+ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: subnetSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set with a combination + of other fields in subnetSelectorTerms' + rule: '!self.all(x, has(x.id) && has(x.tags))' + tags: + additionalProperties: + type: string + description: Tags to be applied on ec2 resources like instances and + launch templates. + type: object + x-kubernetes-validations: + - message: empty tag keys aren't supported + rule: self.all(k, k != '') + - message: tag contains a restricted tag matching kubernetes.io/cluster/ + rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) + - message: tag contains a restricted tag matching karpenter.sh/nodepool + rule: self.all(k, k != 'karpenter.sh/nodepool') + - message: tag contains a restricted tag matching karpenter.sh/managed-by + rule: self.all(k, k !='karpenter.sh/managed-by') + - message: tag contains a restricted tag matching karpenter.sh/nodeclaim + rule: self.all(k, k !='karpenter.sh/nodeclaim') + - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass + rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') + userData: + description: |- + UserData to be applied to the provisioned nodes. + It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into + this UserData to ensure nodes are being provisioned with the correct configuration. + type: string + required: + - amiFamily + - securityGroupSelectorTerms + - subnetSelectorTerms + type: object + x-kubernetes-validations: + - message: amiSelectorTerms is required when amiFamily == 'Custom' + rule: 'self.amiFamily == ''Custom'' ? self.amiSelectorTerms.size() != + 0 : true' + - message: must specify exactly one of ['role', 'instanceProfile'] + rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) + && has(self.instanceProfile)) + - message: changing from 'instanceProfile' to 'role' is not supported. + You must delete and recreate this node class if you want to change + this. + rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) + && has(self.instanceProfile)) + status: + description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass + properties: + amis: + description: |- + AMI contains the current AMI values that are available to the + cluster under the AMI selectors. + items: + description: AMI contains resolved AMI selector values utilized + for node launch + properties: + id: + description: ID of the AMI + type: string + name: + description: Name of the AMI + type: string + requirements: + description: Requirements of the AMI to be utilized on an instance + type + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. type: string - volumeType: + operator: description: |- - VolumeType of the block device. - For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) - in the Amazon Elastic Compute Cloud User Guide. - enum: - - standard - - io1 - - io2 - - gp2 - - sc1 - - st1 - - gp3 + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator type: object - x-kubernetes-validations: - - message: snapshotID or volumeSize must be defined - rule: has(self.snapshotID) || has(self.volumeSize) - rootVolume: - description: |- - RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can - configure at most one root volume in BlockDeviceMappings. - type: boolean - type: object - maxItems: 50 - type: array - x-kubernetes-validations: - - message: must have only one blockDeviceMappings with rootVolume - rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 - context: - description: |- - Context is a Reserved field in EC2 APIs - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html - type: string - detailedMonitoring: - description: DetailedMonitoring controls if detailed monitoring is enabled for instances that are launched - type: boolean - instanceProfile: - description: |- - InstanceProfile is the AWS entity that instances use. - This field is mutually exclusive from role. - The instance profile should already have a role assigned to it that Karpenter - has PassRole permission on for instance launch using this instanceProfile to succeed. - type: string - x-kubernetes-validations: - - message: instanceProfile cannot be empty - rule: self != '' - instanceStorePolicy: - description: InstanceStorePolicy specifies how to handle instance-store disks. - enum: - - RAID0 - type: string - metadataOptions: - default: - httpEndpoint: enabled - httpProtocolIPv6: disabled - httpPutResponseHopLimit: 1 - httpTokens: required - description: |- - MetadataOptions for the generated launch template of provisioned nodes. - - - This specifies the exposure of the Instance Metadata Service to - provisioned EC2 nodes. For more information, - see Instance Metadata and User Data - (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) - in the Amazon Elastic Compute Cloud User Guide. - - - Refer to recommended, security best practices - (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) - for limiting exposure of Instance Metadata and User Data to pods. - If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 - disabled, with httpPutResponseLimit of 1, and with httpTokens - required. + type: array + required: + - id + - requirements + type: object + type: array + conditions: + description: Conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional + helper methods properties: - httpEndpoint: - default: enabled + lastTransitionTime: description: |- - HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned - nodes. If metadata options is non-nil, but this parameter is not specified, - the default state is "enabled". - - - If you specify a value of "disabled", instance metadata will not be accessible - on the node. - enum: - - enabled - - disabled + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time type: string - httpProtocolIPv6: - default: disabled + message: description: |- - HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata - service on provisioned nodes. If metadata options is non-nil, but this parameter - is not specified, the default state is "disabled". - enum: - - enabled - - disabled + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 type: string - httpPutResponseHopLimit: - default: 2 + observedGeneration: description: |- - HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for - instance metadata requests. The larger the number, the further instance - metadata requests can travel. Possible values are integers from 1 to 64. - If metadata options is non-nil, but this parameter is not specified, the - default value is 2. + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. format: int64 - maximum: 64 - minimum: 1 + minimum: 0 type: integer - httpTokens: - default: required + reason: description: |- - HTTPTokens determines the state of token usage for instance metadata - requests. If metadata options is non-nil, but this parameter is not - specified, the default state is "required". - - - If the state is optional, one can choose to retrieve instance metadata with - or without a signed token header on the request. If one retrieves the IAM - role credentials without a token, the version 1.0 role credentials are - returned. If one retrieves the IAM role credentials using a valid signed - token, the version 2.0 role credentials are returned. - - - If the state is "required", one must send a signed token header with any - instance metadata retrieval requests. In this state, retrieving the IAM - role credentials always returns the version 2.0 credentials; the version - 1.0 credentials are not available. + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. enum: - - required - - optional + - "True" + - "False" + - Unknown type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type type: object - role: - description: |- - Role is the AWS identity that nodes use. This field is immutable. - This field is mutually exclusive from instanceProfile. - Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. - This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented - for the old instance profiles on an update. - type: string - x-kubernetes-validations: - - message: role cannot be empty - rule: self != '' - - message: immutable field changed - rule: self == oldSelf - securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. - items: - description: |- - SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the security group id in EC2 - pattern: sg-[0-9a-z]+ - type: string - name: - description: |- - Name is the security group name in EC2. - This value is the name field, which is different from the name tag. - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: securityGroupSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' - subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. - items: - description: |- - SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the subnet id in EC2 - pattern: subnet-[0-9a-z]+ - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: subnetSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id'] - rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' - rule: '!self.all(x, has(x.id) && has(x.tags))' - tags: - additionalProperties: - type: string - description: Tags to be applied on ec2 resources like instances and launch templates. + type: array + instanceProfile: + description: InstanceProfile contains the resolved instance profile + for the role + type: string + securityGroups: + description: |- + SecurityGroups contains the current Security Groups values that are available to the + cluster under the SecurityGroups selectors. + items: + description: SecurityGroup contains resolved SecurityGroup selector + values utilized for node launch + properties: + id: + description: ID of the security group + type: string + name: + description: Name of the security group + type: string + required: + - id type: object - x-kubernetes-validations: - - message: empty tag keys aren't supported - rule: self.all(k, k != '') - - message: tag contains a restricted tag matching kubernetes.io/cluster/ - rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) - - message: tag contains a restricted tag matching karpenter.sh/nodepool - rule: self.all(k, k != 'karpenter.sh/nodepool') - - message: tag contains a restricted tag matching karpenter.sh/managed-by - rule: self.all(k, k !='karpenter.sh/managed-by') - - message: tag contains a restricted tag matching karpenter.sh/nodeclaim - rule: self.all(k, k !='karpenter.sh/nodeclaim') - - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass - rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') - userData: - description: |- - UserData to be applied to the provisioned nodes. - It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into - this UserData to ensure nodes are being provisioned with the correct configuration. - type: string - required: - - amiFamily - - securityGroupSelectorTerms - - subnetSelectorTerms - type: object - x-kubernetes-validations: - - message: amiSelectorTerms is required when amiFamily == 'Custom' - rule: 'self.amiFamily == ''Custom'' ? self.amiSelectorTerms.size() != 0 : true' - - message: must specify exactly one of ['role', 'instanceProfile'] - rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) && has(self.instanceProfile)) - - message: changing from 'instanceProfile' to 'role' is not supported. You must delete and recreate this node class if you want to change this. - rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) && has(self.instanceProfile)) - status: - description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass - properties: - amis: - description: |- - AMI contains the current AMI values that are available to the - cluster under the AMI selectors. - items: - description: AMI contains resolved AMI selector values utilized for node launch - properties: - id: - description: ID of the AMI - type: string - name: - description: Name of the AMI - type: string - requirements: - description: Requirements of the AMI to be utilized on an instance type - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - required: - - id - - requirements - type: object - type: array - conditions: - description: Conditions contains signals for health and readiness - items: - description: Condition aliases the upstream type and adds additional helper methods - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - instanceProfile: - description: InstanceProfile contains the resolved instance profile for the role - type: string - securityGroups: - description: |- - SecurityGroups contains the current Security Groups values that are available to the - cluster under the SecurityGroups selectors. - items: - description: SecurityGroup contains resolved SecurityGroup selector values utilized for node launch - properties: - id: - description: ID of the security group - type: string - name: - description: Name of the security group - type: string - required: - - id - type: object - type: array - subnets: - description: |- - Subnets contains the current Subnet values that are available to the - cluster under the subnet selectors. - items: - description: Subnet contains resolved Subnet selector values utilized for node launch - properties: - id: - description: ID of the subnet - type: string - zone: - description: The associated availability zone - type: string - zoneID: - description: The associated availability zone ID - type: string - required: - - id - - zone - type: object - type: array - type: object - type: object - served: true - storage: false - subresources: - status: {} - conversion: - strategy: Webhook - webhook: - conversionReviewVersions: - - v1beta1 - - v1 - clientConfig: - service: - name: karpenter - namespace: kube-system - port: 8443 + type: array + subnets: + description: |- + Subnets contains the current Subnet values that are available to the + cluster under the subnet selectors. + items: + description: Subnet contains resolved Subnet selector values utilized + for node launch + properties: + id: + description: ID of the subnet + type: string + zone: + description: The associated availability zone + type: string + zoneID: + description: The associated availability zone ID + type: string + required: + - id + - zone + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 1c43db9f9607..3a4c8edaded8 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -16,6 +16,8 @@ package v1 import ( "fmt" + "log" + "strings" "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" @@ -46,15 +48,14 @@ type EC2NodeClassSpec struct { // +optional AssociatePublicIPAddress *bool `json:"associatePublicIPAddress,omitempty"` // AMISelectorTerms is a list of or ami selector terms. The terms are ORed. - // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id', 'name']",rule="self.all(x, has(x.tags) || has(x.id) || has(x.name))" - // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms",rule="!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || has(x.owner)))" + // +kubebuilder:validation:XValidation:message="expected at least one, got none, ['tags', 'id', 'name', 'alias']",rule="self.all(x, has(x.tags) || has(x.id) || has(x.name) || has(x.alias))" + // +kubebuilder:validation:XValidation:message="'id' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms",rule="!self.exists(x, has(x.id) && (has(x.alias) || has(x.tags) || has(x.name) || has(x.owner)))" + // +kubebuilder:validation:XValidation:message="'alias' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms",rule="!self.exists(x, has(x.alias) && (has(x.id) || has(x.tags) || has(x.name) || has(x.owner)))" + // +kubebuilder:validation:XValidation:message="'alias' is mutually exclusive, cannot be set with a combination of other amiSelectorTerms",rule="!(self.exists(x, has(x.alias)) && self.size() != 1)" + // +kubebuilder:validation:MinItems:=1 // +kubebuilder:validation:MaxItems:=30 - // +optional - AMISelectorTerms []AMISelectorTerm `json:"amiSelectorTerms,omitempty" hash:"ignore"` - // AMIFamily is the AMI family that instances use. - // +kubebuilder:validation:Enum:={AL2,AL2023,Bottlerocket,Ubuntu,Custom,Windows2019,Windows2022} // +required - AMIFamily *string `json:"amiFamily"` + AMISelectorTerms []AMISelectorTerm `json:"amiSelectorTerms" hash:"ignore"` // UserData to be applied to the provisioned nodes. // It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into // this UserData to ensure nodes are being provisioned with the correct configuration. @@ -163,6 +164,17 @@ type SecurityGroupSelectorTerm struct { // AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { + // Alias specifies which EKS optimized AMI to select. + // Each alias consistes of a family and a version, specified as "family@version". + // Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. + // The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). + // The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. + // Note: The Windows families do **not** support version pinning, and only latest may be used. + // +kubebuilder:validation:XValidation:message="'alias' is improperly formatted, must match the format 'family@version'",rule="self.matches('^[a-zA-Z0-9]*@.*$')" + // +kubebuilder:validation:XValidation:message="family is not supported, must be one of the following: 'al2', 'al2023', 'bottlerocket', 'windows2019', 'windows2022'",rule="self.find('^[^@]+') in ['al2','al2023','bottlerocket','windows2019','windows2022']" + // +kubebuilder:validation:MaxLength=30 + // +optional + Alias string `json:"alias,omitempty"` // Tags is a map of key/value tags used to select subnets // Specifying '*' for a value selects all values for a given tag key. // +kubebuilder:validation:XValidation:message="empty tag keys or values aren't supported",rule="self.all(k, k != '' && self[k] != '')" @@ -405,7 +417,6 @@ type EC2NodeClass struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` - // +kubebuilder:validation:XValidation:message="amiSelectorTerms is required when amiFamily == 'Custom'",rule="self.amiFamily == 'Custom' ? self.amiSelectorTerms.size() != 0 : true" // +kubebuilder:validation:XValidation:message="must specify exactly one of ['role', 'instanceProfile']",rule="(has(self.role) && !has(self.instanceProfile)) || (!has(self.role) && has(self.instanceProfile))" // +kubebuilder:validation:XValidation:message="changing from 'instanceProfile' to 'role' is not supported. You must delete and recreate this node class if you want to change this.",rule="(has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) && has(self.instanceProfile))" Spec EC2NodeClassSpec `json:"spec,omitempty"` @@ -442,6 +453,39 @@ func (in *EC2NodeClass) InstanceProfileTags(clusterName string) map[string]strin }) } +func (in *EC2NodeClassSpec) AMIFamily() string { + if term, ok := lo.Find(in.AMISelectorTerms, func(t AMISelectorTerm) bool { + return t.Alias != "" + }); ok { + switch strings.Split(term.Alias, "@")[0] { + case "al2": + return AMIFamilyAL2 + case "al2023": + return AMIFamilyAL2023 + case "bottlerocket": + return AMIFamilyBottlerocket + case "windows2019": + return AMIFamilyWindows2019 + case "windows2022": + return AMIFamilyWindows2022 + } + } + return AMIFamilyCustom +} + +func (in *EC2NodeClassSpec) AMIVersion() string { + if term, ok := lo.Find(in.AMISelectorTerms, func(t AMISelectorTerm) bool { + return t.Alias != "" + }); ok { + parts := strings.Split(term.Alias, "@") + if len(parts) != 2 { + log.Fatalf("failed to parse AMI alias %q, invalid format", term.Alias) + } + return parts[1] + } + return "" +} + // EC2NodeClassList contains a list of EC2NodeClass // +kubebuilder:object:root=true type EC2NodeClassList struct { diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index d862d62e9b8f..3c41ab32936d 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -18,6 +18,7 @@ import ( "time" "github.com/aws/aws-sdk-go/aws" + "github.com/imdario/mergo" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -42,8 +43,8 @@ var _ = Describe("CEL/Validation", func() { nc = &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: lo.ToPtr(v1.AMIFamilyAL2023), - Role: "role-1", + AMISelectorTerms: []v1.AMISelectorTerm{{Alias: "al2023@latest"}}, + Role: "role-1", SecurityGroupSelectorTerms: []v1.SecurityGroupSelectorTerm{ { Tags: map[string]string{ @@ -343,6 +344,12 @@ var _ = Describe("CEL/Validation", func() { }) }) Context("AMISelectorTerms", func() { + It("should succeed with a valid ami selector on alias", func() { + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ + Alias: "al2023@latest", + }} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) It("should succeed with a valid ami selector on tags", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ { @@ -389,6 +396,12 @@ var _ = Describe("CEL/Validation", func() { } Expect(env.Client.Create(ctx, nc)).To(Succeed()) }) + It("should fail when no ami selector terms are specified", func() { + nc.Spec.AMISelectorTerms = nil + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) It("should fail when a ami selector term has no values", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ {}, @@ -448,37 +461,60 @@ var _ = Describe("CEL/Validation", func() { } Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) - It("should fail when specifying id with tags", func() { - nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - ID: "ami-12345749", - Tags: map[string]string{ - "test": "testvalue", - }, - }, - } - Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) - }) - It("should fail when specifying id with name", func() { - nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - ID: "ami-12345749", - Name: "my-custom-ami", - }, - } - Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) - }) - It("should fail when specifying id with owner", func() { + DescribeTable( + "should fail when specifying id with other fields", + func(mutation v1.AMISelectorTerm) { + term := v1.AMISelectorTerm{ID: "ami-1234749"} + Expect(mergo.Merge(&term, &mutation)).To(Succeed()) + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{term} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }, + Entry("alias", v1.AMISelectorTerm{Alias: "al2023@latest"}), + Entry("tags", v1.AMISelectorTerm{ + Tags: map[string]string{"test": "testvalue"}, + }), + Entry("name", v1.AMISelectorTerm{Name: "my-custom-ami"}), + Entry("owner", v1.AMISelectorTerm{Owner: "123456789"}), + ) + DescribeTable( + "should fail when specifying alias with other fields", + func(mutation v1.AMISelectorTerm) { + term := v1.AMISelectorTerm{Alias: "al2023@latest"} + Expect(mergo.Merge(&term, &mutation)).To(Succeed()) + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{term} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }, + Entry("id", v1.AMISelectorTerm{ID: "ami-1234749"}), + Entry("tags", v1.AMISelectorTerm{ + Tags: map[string]string{"test": "testvalue"}, + }), + Entry("name", v1.AMISelectorTerm{Name: "my-custom-ami"}), + Entry("owner", v1.AMISelectorTerm{Owner: "123456789"}), + ) + It("should fail when specifying alias with other terms", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - ID: "ami-12345749", - Owner: "123456789", - }, + {Alias: "al2023@latest"}, + {ID: "ami-1234749"}, } Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) - It("should fail when AMIFamily is Custom and not AMISelectorTerms", func() { - nc.Spec.AMIFamily = &v1.AMIFamilyCustom + DescribeTable( + "should succeed for valid aliases", + func(alias string) { + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: alias}} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }, + Entry("al2 (latest)", "al2@latest"), + Entry("al2 (pinned)", "al2@v20240625"), + Entry("al2023 (latest)", "al2023@latest"), + Entry("al2023 (pinned)", "al2023@v20240625"), + Entry("bottlerocket (latest)", "bottlerocket@latest"), + Entry("bottlerocket (pinned)", "bottlerocket@1.10.0"), + Entry("windows2019 (latest)", "windows2019@latest"), + Entry("windows2022 (latest)", "windows2022@latest"), + ) + It("should fail for an alias with an invalid family", func() { + nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "ubuntu@latest"}} Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) }) }) @@ -725,7 +761,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -755,7 +791,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -776,7 +812,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -797,7 +833,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -825,7 +861,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -846,7 +882,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, @@ -867,7 +903,7 @@ var _ = Describe("CEL/Validation", func() { nodeClass := &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: nc.Spec.AMIFamily, + AMISelectorTerms: nc.Spec.AMISelectorTerms, SubnetSelectorTerms: nc.Spec.SubnetSelectorTerms, SecurityGroupSelectorTerms: nc.Spec.SecurityGroupSelectorTerms, Role: nc.Spec.Role, diff --git a/pkg/apis/v1/zz_generated.deepcopy.go b/pkg/apis/v1/zz_generated.deepcopy.go index 627c14cfad13..c7732b18e478 100644 --- a/pkg/apis/v1/zz_generated.deepcopy.go +++ b/pkg/apis/v1/zz_generated.deepcopy.go @@ -237,11 +237,6 @@ func (in *EC2NodeClassSpec) DeepCopyInto(out *EC2NodeClassSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.AMIFamily != nil { - in, out := &in.AMIFamily, &out.AMIFamily - *out = new(string) - **out = **in - } if in.UserData != nil { in, out := &in.UserData, &out.UserData *out = new(string) From 76184e3b9262dbadaff7a43167e9148bdde64370 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 3 Jul 2024 09:24:41 -0700 Subject: [PATCH 2/9] feat: AMI alias resolution --- pkg/operator/operator.go | 6 +- pkg/providers/amifamily/al2.go | 88 +++++--- pkg/providers/amifamily/al2023.go | 63 ++++-- pkg/providers/amifamily/ami.go | 276 +++++++++--------------- pkg/providers/amifamily/bottlerocket.go | 83 +++---- pkg/providers/amifamily/custom.go | 7 +- pkg/providers/amifamily/resolver.go | 10 +- pkg/providers/amifamily/types.go | 98 +++++++++ pkg/providers/amifamily/windows.go | 50 ++++- pkg/providers/ssm/provider.go | 82 +++++++ 10 files changed, 473 insertions(+), 290 deletions(-) create mode 100644 pkg/providers/amifamily/types.go create mode 100644 pkg/providers/ssm/provider.go diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 1158569fe44c..2f56094a2465 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -61,6 +61,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" + ssmp "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" "github.com/aws/karpenter-provider-aws/pkg/providers/version" ) @@ -87,6 +88,7 @@ type Operator struct { VersionProvider version.Provider InstanceTypesProvider instancetype.Provider InstanceProvider instance.Provider + SSMProvider ssmp.Provider } func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) { @@ -148,7 +150,8 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont *sess.Config.Region, ) versionProvider := version.NewDefaultProvider(operator.KubernetesInterface, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) - amiProvider := amifamily.NewDefaultProvider(versionProvider, ssm.New(sess), ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) + ssmProvider := ssmp.NewDefaultProvider(ssm.New(sess), cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) + amiProvider := amifamily.NewDefaultProvider(versionProvider, ssmProvider, ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) amiResolver := amifamily.NewResolver(amiProvider) launchTemplateProvider := launchtemplate.NewDefaultProvider( ctx, @@ -196,6 +199,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont PricingProvider: pricingProvider, InstanceTypesProvider: instanceTypeProvider, InstanceProvider: instanceProvider, + SSMProvider: ssmProvider, } } diff --git a/pkg/providers/amifamily/al2.go b/pkg/providers/amifamily/al2.go index 078d8e0ddae9..abeb7e76fe46 100644 --- a/pkg/providers/amifamily/al2.go +++ b/pkg/providers/amifamily/al2.go @@ -15,12 +15,18 @@ limitations under the License. package amifamily import ( + "context" "fmt" + "regexp" + "strings" "github.com/aws/aws-sdk-go/aws" corev1 "k8s.io/api/core/v1" - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/samber/lo" + + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/karpenter/pkg/scheduling" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -28,6 +34,7 @@ import ( "sigs.k8s.io/karpenter/pkg/cloudprovider" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" ) type AL2 struct { @@ -35,40 +42,53 @@ type AL2 struct { *Options } -// DefaultAMIs returns the AMI name, and Requirements, with an SSM query -func (a AL2) DefaultAMIs(version string) []DefaultAMIOutput { - return []DefaultAMIOutput{ - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpDoesNotExist), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), - ), - }, - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-gpu/recommended/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpExists), - ), - }, - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-gpu/recommended/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpExists), - ), - }, - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-%s/recommended/image_id", version, karpv1.ArchitectureArm64), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpDoesNotExist), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), - ), - }, +func (a AL2) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { + query := AMIQuery{ + Filters: []*ec2.Filter{&ec2.Filter{ + Name: lo.ToPtr("image-id"), + }}, + KnownRequirements: make(map[string][]scheduling.Requirements), + } + for rootPath, variants := range map[string][]Variant{ + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2", k8sVersion): []Variant{VariantStandard}, + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64", k8sVersion): []Variant{VariantStandard}, + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-gpu", k8sVersion): []Variant{VariantNeuron, VariantNvidia}, + } { + results, err := ssmProvider.List(ctx, rootPath) + if err != nil { + log.FromContext(ctx).WithValues("path", rootPath).Error(err, "discovering AMIs from ssm") + continue + } + for path, value := range results { + pathComponents := strings.Split(path, "/") + // Only select image_id paths which match the desired AMI version + if len(pathComponents) != 9 || pathComponents[8] != "image_id" { + continue + } + if av, err := a.extractAMIVersion(pathComponents[7]); err != nil || av != amiVersion { + continue + } + query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) + query.KnownRequirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) + } + } + // Failed to discover any AMIs, we should short circuit AMI discovery + if len(query.Filters[0].Values) == 0 { + return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + } + return query, nil +} + +func (a AL2) extractAMIVersion(versionStr string) (string, error) { + if versionStr == "recommended" { + return AMIVersionLatest, nil + } + rgx := regexp.MustCompile(`^.*(v\d+)$`) + matches := rgx.FindStringSubmatch(versionStr) + if len(matches) != 2 { + return "", fmt.Errorf("failed to extract AMI version") } + return matches[1], nil } // UserData returns the exact same string for equivalent input, diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index 94f95321e23b..20f28756e9e9 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -15,16 +15,20 @@ limitations under the License. package amifamily import ( + "context" "fmt" + "regexp" + "strings" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" ) type AL2023 struct { @@ -32,21 +36,50 @@ type AL2023 struct { *Options } -func (a AL2023) DefaultAMIs(version string) []DefaultAMIOutput { - return []DefaultAMIOutput{ - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - ), - }, - { - Query: fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64), - ), - }, +func (a AL2023) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { + query := AMIQuery{ + Filters: []*ec2.Filter{&ec2.Filter{ + Name: lo.ToPtr("image-id"), + }}, + KnownRequirements: make(map[string][]scheduling.Requirements), + } + rootPath := fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023", k8sVersion) + results, err := ssmProvider.List(ctx, rootPath) + if err != nil { + return AMIQuery{}, fmt.Errorf("discovering AMIs from ssm") + } + for path, value := range results { + pathComponents := strings.Split(path, "/") + if len(pathComponents) != 11 || pathComponents[10] != "image_id" { + continue + } + if av, err := a.extractAMIVersion(pathComponents[9]); err != nil || av != amiVersion { + continue + } + variant, err := NewVariant(pathComponents[8]) + if err != nil { + continue + } + query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) + query.KnownRequirements[value] = []scheduling.Requirements{variant.Requirements()} + } + // Failed to discover any AMIs, we should short circuit AMI discovery + if len(query.Filters[0].Values) == 0 { + return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + } + return query, nil +} + +func (a AL2023) extractAMIVersion(versionStr string) (string, error) { + if versionStr == "recommended" { + return AMIVersionLatest, nil + } + rgx := regexp.MustCompile(`^.*(v\d+)$`) + matches := rgx.FindStringSubmatch(versionStr) + if len(matches) != 2 { + return "", fmt.Errorf("failed to extract AMI version") } + return matches[1], nil } func (a AL2023) UserData(kubeletConfig *v1.KubeletConfiguration, taints []corev1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string, instanceStorePolicy *v1.InstanceStorePolicy) bootstrap.Bootstrapper { diff --git a/pkg/providers/amifamily/ami.go b/pkg/providers/amifamily/ami.go index 90ba3a30de8b..3c3316a39dbb 100644 --- a/pkg/providers/amifamily/ami.go +++ b/pkg/providers/amifamily/ami.go @@ -17,15 +17,12 @@ package amifamily import ( "context" "fmt" - "sort" "sync" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" - "github.com/aws/aws-sdk-go/service/ssm" - "github.com/aws/aws-sdk-go/service/ssm/ssmiface" "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/samber/lo" @@ -35,6 +32,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/version" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/utils/pretty" @@ -47,55 +45,19 @@ type Provider interface { type DefaultProvider struct { sync.Mutex cache *cache.Cache - ssm ssmiface.SSMAPI ec2api ec2iface.EC2API cm *pretty.ChangeMonitor versionProvider version.Provider + ssmProvider ssm.Provider } -type AMI struct { - Name string - AmiID string - CreationDate string - Requirements scheduling.Requirements -} - -type AMIs []AMI - -// Sort orders the AMIs by creation date in descending order. -// If creation date is nil or two AMIs have the same creation date, the AMIs will be sorted by ID, which is guaranteed to be unique, in ascending order. -func (a AMIs) Sort() { - sort.Slice(a, func(i, j int) bool { - itime, _ := time.Parse(time.RFC3339, a[i].CreationDate) - jtime, _ := time.Parse(time.RFC3339, a[j].CreationDate) - if itime.Unix() != jtime.Unix() { - return itime.Unix() > jtime.Unix() - } - return a[i].AmiID < a[j].AmiID - }) -} - -// MapToInstanceTypes returns a map of AMIIDs that are the most recent on creationDate to compatible instancetypes -func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.AMI) map[string][]*cloudprovider.InstanceType { - amiIDs := map[string][]*cloudprovider.InstanceType{} - for _, instanceType := range instanceTypes { - for _, ami := range amis { - if err := instanceType.Requirements.Compatible(scheduling.NewNodeSelectorRequirements(ami.Requirements...), scheduling.AllowUndefinedWellKnownLabels); err == nil { - amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType) - break - } - } - } - return amiIDs -} - -func NewDefaultProvider(versionProvider version.Provider, ssm ssmiface.SSMAPI, ec2api ec2iface.EC2API, cache *cache.Cache) *DefaultProvider { +func NewDefaultProvider(versionProvider version.Provider, ssmProvider ssm.Provider, ec2api ec2iface.EC2API, cache *cache.Cache) *DefaultProvider { return &DefaultProvider{ cache: cache, - ssm: ssm, ec2api: ec2api, cm: pretty.NewChangeMonitor(), versionProvider: versionProvider, + ssmProvider: ssmProvider, } } @@ -103,19 +65,13 @@ func NewDefaultProvider(versionProvider version.Provider, ssm ssmiface.SSMAPI, e func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) (AMIs, error) { p.Lock() defer p.Unlock() - - var err error - var amis AMIs - if len(nodeClass.Spec.AMISelectorTerms) == 0 { - amis, err = p.getDefaultAMIs(ctx, nodeClass) - if err != nil { - return nil, err - } - } else { - amis, err = p.getAMIs(ctx, nodeClass.Spec.AMISelectorTerms) - if err != nil { - return nil, err - } + queries, err := p.GetAMIQueries(ctx, nodeClass) + if err != nil { + return nil, fmt.Errorf("getting AMI queries, %w", err) + } + amis, err := p.getAMIs(ctx, queries) + if err != nil { + return nil, err } amis.Sort() uniqueAMIs := lo.Uniq(lo.Map(amis, func(a AMI, _ int) string { return a.AmiID })) @@ -126,129 +82,36 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) return amis, nil } -func (p *DefaultProvider) getDefaultAMIs(ctx context.Context, nodeClass *v1.EC2NodeClass) (res AMIs, err error) { - if images, ok := p.cache.Get(lo.FromPtr(nodeClass.Spec.AMIFamily)); ok { - // Ensure what's returned from this function is a deep-copy of AMIs so alterations - // to the data don't affect the original - return append(AMIs{}, images.(AMIs)...), nil - } - amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, &Options{}) - kubernetesVersion, err := p.versionProvider.Get(ctx) - if err != nil { - return nil, fmt.Errorf("getting kubernetes version %w", err) - } - defaultAMIs := amiFamily.DefaultAMIs(kubernetesVersion) - for _, ami := range defaultAMIs { - if id, err := p.resolveSSMParameter(ctx, ami.Query); err != nil { - log.FromContext(ctx).WithValues("query", ami.Query).Error(err, "failed discovering amis from ssm") - } else { - res = append(res, AMI{AmiID: id, Requirements: ami.Requirements}) - } - } - // Resolve Name and CreationDate information into the DefaultAMIs - if err = p.ec2api.DescribeImagesPagesWithContext(ctx, &ec2.DescribeImagesInput{ - Filters: []*ec2.Filter{{Name: aws.String("image-id"), Values: aws.StringSlice(lo.Map(res, func(a AMI, _ int) string { return a.AmiID }))}}, - MaxResults: aws.Int64(500), - }, func(page *ec2.DescribeImagesOutput, _ bool) bool { - for i := range page.Images { - for j := range res { - if res[j].AmiID == aws.StringValue(page.Images[i].ImageId) { - res[j].Name = aws.StringValue(page.Images[i].Name) - res[j].CreationDate = aws.StringValue(page.Images[i].CreationDate) - } - } - } - return true - }); err != nil { - return nil, fmt.Errorf("describing images, %w", err) - } - p.cache.SetDefault(lo.FromPtr(nodeClass.Spec.AMIFamily), res) - return res, nil -} - -func (p *DefaultProvider) resolveSSMParameter(ctx context.Context, ssmQuery string) (string, error) { - output, err := p.ssm.GetParameterWithContext(ctx, &ssm.GetParameterInput{Name: aws.String(ssmQuery)}) - if err != nil { - return "", fmt.Errorf("getting ssm parameter %q, %w", ssmQuery, err) - } - ami := aws.StringValue(output.Parameter.Value) - return ami, nil -} - -func (p *DefaultProvider) getAMIs(ctx context.Context, terms []v1.AMISelectorTerm) (AMIs, error) { - filterAndOwnerSets := GetFilterAndOwnerSets(terms) - hash, err := hashstructure.Hash(filterAndOwnerSets, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) - if err != nil { - return nil, err - } - if images, ok := p.cache.Get(fmt.Sprintf("%d", hash)); ok { - // Ensure what's returned from this function is a deep-copy of AMIs so alterations - // to the data don't affect the original - return append(AMIs{}, images.(AMIs)...), nil - } - images := map[uint64]AMI{} - for _, filtersAndOwners := range filterAndOwnerSets { - if err = p.ec2api.DescribeImagesPagesWithContext(ctx, &ec2.DescribeImagesInput{ - // Don't include filters in the Describe Images call as EC2 API doesn't allow empty filters. - Filters: lo.Ternary(len(filtersAndOwners.Filters) > 0, filtersAndOwners.Filters, nil), - Owners: lo.Ternary(len(filtersAndOwners.Owners) > 0, aws.StringSlice(filtersAndOwners.Owners), nil), - MaxResults: aws.Int64(1000), - }, func(page *ec2.DescribeImagesOutput, _ bool) bool { - for i := range page.Images { - reqs := p.getRequirementsFromImage(page.Images[i]) - if !v1.WellKnownArchitectures.Has(reqs.Get(corev1.LabelArchStable).Any()) { - continue - } - reqsHash := lo.Must(hashstructure.Hash(reqs.NodeSelectorRequirements(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true})) - // If the proposed image is newer, store it so that we can return it - if v, ok := images[reqsHash]; ok { - candidateCreationTime, _ := time.Parse(time.RFC3339, lo.FromPtr(page.Images[i].CreationDate)) - existingCreationTime, _ := time.Parse(time.RFC3339, v.CreationDate) - if existingCreationTime == candidateCreationTime && lo.FromPtr(page.Images[i].Name) < v.Name { - continue - } - if candidateCreationTime.Unix() < existingCreationTime.Unix() { - continue - } - } - images[reqsHash] = AMI{ - Name: lo.FromPtr(page.Images[i].Name), - AmiID: lo.FromPtr(page.Images[i].ImageId), - CreationDate: lo.FromPtr(page.Images[i].CreationDate), - Requirements: reqs, - } - } - return true - }); err != nil { - return nil, fmt.Errorf("describing images, %w", err) +func (p *DefaultProvider) GetAMIQueries(ctx context.Context, nodeClass *v1.EC2NodeClass) ([]AMIQuery, error) { + // Aliases should be mutually exclusive (enforced via CEL validation), we'll treat it as such + if amiFamilyKey := nodeClass.AMIFamily(); amiFamilyKey != v1.AMIFamilyCustom { + amiVersion := nodeClass.AMIVersion() + amiFamily := GetAMIFamily(&amiFamilyKey, nil) + kubernetesVersion, err := p.versionProvider.Get(ctx) + if err != nil { + return nil, fmt.Errorf("getting kubernetes version, %w", err) } + query, err := amiFamily.AMIQuery(ctx, p.ssmProvider, kubernetesVersion, amiVersion) + return []AMIQuery{query}, err } - p.cache.SetDefault(fmt.Sprintf("%d", hash), AMIs(lo.Values(images))) - return lo.Values(images), nil -} - -type FiltersAndOwners struct { - Filters []*ec2.Filter - Owners []string -} -func GetFilterAndOwnerSets(terms []v1.AMISelectorTerm) (res []FiltersAndOwners) { idFilter := &ec2.Filter{Name: aws.String("image-id")} - for _, term := range terms { + queries := []AMIQuery{} + for _, term := range nodeClass.Spec.AMISelectorTerms { switch { case term.ID != "": idFilter.Values = append(idFilter.Values, aws.String(term.ID)) default: - elem := FiltersAndOwners{ + query := AMIQuery{ Owners: lo.Ternary(term.Owner != "", []string{term.Owner}, []string{}), } if term.Name != "" { // Default owners to self,amazon to ensure Karpenter only discovers cross-account AMIs if the user specifically allows it. // Removing this default would cause Karpenter to discover publicly shared AMIs passing the name filter. - elem = FiltersAndOwners{ + query = AMIQuery{ Owners: lo.Ternary(term.Owner != "", []string{term.Owner}, []string{"self", "amazon"}), } - elem.Filters = append(elem.Filters, &ec2.Filter{ + query.Filters = append(query.Filters, &ec2.Filter{ Name: aws.String("name"), Values: aws.StringSlice([]string{term.Name}), }) @@ -256,33 +119,94 @@ func GetFilterAndOwnerSets(terms []v1.AMISelectorTerm) (res []FiltersAndOwners) } for k, v := range term.Tags { if v == "*" { - elem.Filters = append(elem.Filters, &ec2.Filter{ + query.Filters = append(query.Filters, &ec2.Filter{ Name: aws.String("tag-key"), Values: []*string{aws.String(k)}, }) } else { - elem.Filters = append(elem.Filters, &ec2.Filter{ + query.Filters = append(query.Filters, &ec2.Filter{ Name: aws.String(fmt.Sprintf("tag:%s", k)), Values: []*string{aws.String(v)}, }) } } - res = append(res, elem) + queries = append(queries, query) } } if len(idFilter.Values) > 0 { - res = append(res, FiltersAndOwners{Filters: []*ec2.Filter{idFilter}}) + queries = append(queries, AMIQuery{Filters: []*ec2.Filter{idFilter}}) + } + return queries, nil +} + +func (p *DefaultProvider) getAMIs(ctx context.Context, queries []AMIQuery) (AMIs, error) { + hash, err := hashstructure.Hash(queries, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) + if err != nil { + return nil, err + } + if images, ok := p.cache.Get(fmt.Sprintf("%d", hash)); ok { + // Ensure what's returned from this function is a deep-copy of AMIs so alterations + // to the data don't affect the original + return append(AMIs{}, images.(AMIs)...), nil } - return res + images := map[uint64]AMI{} + for _, query := range queries { + if err = p.ec2api.DescribeImagesPagesWithContext(ctx, query.DescribeImagesInput(), func(page *ec2.DescribeImagesOutput, _ bool) bool { + for _, image := range page.Images { + arch, ok := v1.AWSToKubeArchitectures[lo.FromPtr(image.Architecture)] + if !ok { + continue + } + requirementSets := func() []scheduling.Requirements { + if knownRequirements, ok := query.KnownRequirements[lo.FromPtr(image.ImageId)]; ok { + return lo.Map(knownRequirements, func(r scheduling.Requirements, _ int) scheduling.Requirements { + r.Add(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch)) + return r + }) + } + return []scheduling.Requirements{scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch))} + }() + for _, reqs := range requirementSets { + reqsHash := lo.Must(hashstructure.Hash(reqs.NodeSelectorRequirements(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true})) + // If the proposed image is newer, store it so that we can return it + if v, ok := images[reqsHash]; ok { + candidateCreationTime, _ := time.Parse(time.RFC3339, lo.FromPtr(image.CreationDate)) + existingCreationTime, _ := time.Parse(time.RFC3339, v.CreationDate) + if existingCreationTime == candidateCreationTime && lo.FromPtr(image.Name) < v.Name { + continue + } + if candidateCreationTime.Unix() < existingCreationTime.Unix() { + continue + } + } + images[reqsHash] = AMI{ + Name: lo.FromPtr(image.Name), + AmiID: lo.FromPtr(image.ImageId), + CreationDate: lo.FromPtr(image.CreationDate), + Requirements: reqs, + } + } + } + return true + }); err != nil { + return nil, fmt.Errorf("describing images, %w", err) + } + } + p.cache.SetDefault(fmt.Sprintf("%d", hash), AMIs(lo.Values(images))) + return lo.Values(images), nil } -func (p *DefaultProvider) getRequirementsFromImage(ec2Image *ec2.Image) scheduling.Requirements { - requirements := scheduling.NewRequirements() - // Always add the architecture of an image as a requirement, irrespective of what's specified in EC2 tags. - architecture := *ec2Image.Architecture - if value, ok := v1.AWSToKubeArchitectures[architecture]; ok { - architecture = value +// MapToInstanceTypes returns a map of AMIIDs that are the most recent on creationDate to compatible instancetypes +func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.AMI) map[string][]*cloudprovider.InstanceType { + amiIDs := map[string][]*cloudprovider.InstanceType{} + for _, instanceType := range instanceTypes { + for _, ami := range amis { + if err := instanceType.Requirements.Compatible(scheduling.NewNodeSelectorRequirements(ami.Requirements...), scheduling.AllowUndefinedWellKnownLabels); err == nil { + amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType) + break + } + } } - requirements.Add(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, architecture)) - return requirements + return amiIDs } + diff --git a/pkg/providers/amifamily/bottlerocket.go b/pkg/providers/amifamily/bottlerocket.go index 084004988697..384d21b8360c 100644 --- a/pkg/providers/amifamily/bottlerocket.go +++ b/pkg/providers/amifamily/bottlerocket.go @@ -15,20 +15,24 @@ limitations under the License. package amifamily import ( + "context" "fmt" + "strings" "github.com/samber/lo" - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/controller-runtime/pkg/log" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" "github.com/aws/aws-sdk-go/aws" corev1 "k8s.io/api/core/v1" + "github.com/aws/aws-sdk-go/service/ec2" "k8s.io/apimachinery/pkg/api/resource" ) @@ -37,54 +41,37 @@ type Bottlerocket struct { *Options } -// DefaultAMIs returns the AMI name, and Requirements, with an SSM query -func (b Bottlerocket) DefaultAMIs(version string) []DefaultAMIOutput { - return []DefaultAMIOutput{ - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpDoesNotExist), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), - ), - }, - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/x86_64/latest/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpExists), - ), - }, - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/x86_64/latest/image_id", version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpExists), - ), - }, - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/%s/latest/image_id", version, karpv1.ArchitectureArm64), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpDoesNotExist), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), - ), - }, - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/%s/latest/image_id", version, karpv1.ArchitectureArm64), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64), - scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpExists), - ), - }, - { - Query: fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/%s/latest/image_id", version, karpv1.ArchitectureArm64), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureArm64), - scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpExists), - ), - }, +func (b Bottlerocket) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { + query := AMIQuery{ + Filters: []*ec2.Filter{&ec2.Filter{ + Name: lo.ToPtr("image-id"), + }}, + KnownRequirements: make(map[string][]scheduling.Requirements), + } + for rootPath, variants := range map[string][]Variant{ + fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s", k8sVersion): []Variant{VariantStandard}, + fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia", k8sVersion): []Variant{VariantNeuron, VariantNvidia}, + } { + results, err := ssmProvider.List(ctx, rootPath) + if err != nil { + log.FromContext(ctx).WithValues("path", rootPath).Error(err, "discovering AMIs from ssm") + continue + } + for path, value := range results { + pathComponents := strings.Split(path, "/") + // Only select image_id paths which match the desired AMI version + if len(pathComponents) != 8 || pathComponents[7] != "image_id" || pathComponents[6] != amiVersion { + continue + } + query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) + query.KnownRequirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) + } + } + // Failed to discover any AMIs, we should short circuit AMI discovery + if len(query.Filters[0].Values) == 0 { + return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") } + return query, nil } // UserData returns the default userdata script for the AMI Family diff --git a/pkg/providers/amifamily/custom.go b/pkg/providers/amifamily/custom.go index 3b15d4060bca..6f22eca35060 100644 --- a/pkg/providers/amifamily/custom.go +++ b/pkg/providers/amifamily/custom.go @@ -15,6 +15,8 @@ limitations under the License. package amifamily import ( + "context" + corev1 "k8s.io/api/core/v1" "sigs.k8s.io/karpenter/pkg/cloudprovider" @@ -22,6 +24,7 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" ) type Custom struct { @@ -38,8 +41,8 @@ func (c Custom) UserData(_ *v1.KubeletConfiguration, _ []corev1.Taint, _ map[str } } -func (c Custom) DefaultAMIs(_ string) []DefaultAMIOutput { - return nil +func (c Custom) AMIQuery(_ context.Context, _ ssm.Provider, _ string, _ string) (AMIQuery, error) { + return AMIQuery{}, nil } func (c Custom) DefaultBlockDeviceMappings() []*v1.BlockDeviceMapping { diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 0c5838af5231..e5d85ca6fc23 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -15,6 +15,7 @@ limitations under the License. package amifamily import ( + "context" "fmt" "net" @@ -29,11 +30,16 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" "github.com/aws/karpenter-provider-aws/pkg/utils" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" ) +const ( + AMIVersionLatest = "latest" +) + var DefaultEBS = v1.BlockDevice{ Encrypted: aws.Bool(true), VolumeType: aws.String(ec2.VolumeTypeGp3), @@ -77,7 +83,7 @@ type LaunchTemplate struct { // AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters type AMIFamily interface { - DefaultAMIs(version string) []DefaultAMIOutput + AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) UserData(kubeletConfig *v1.KubeletConfiguration, taints []corev1.Taint, labels map[string]string, caBundle *string, instanceTypes []*cloudprovider.InstanceType, customUserData *string, instanceStorePolicy *v1.InstanceStorePolicy) bootstrap.Bootstrapper DefaultBlockDeviceMappings() []*v1.BlockDeviceMapping DefaultMetadataOptions() *v1.MetadataOptions @@ -164,8 +170,6 @@ func GetAMIFamily(amiFamily *string, options *Options) AMIFamily { switch aws.StringValue(amiFamily) { case v1.AMIFamilyBottlerocket: return &Bottlerocket{Options: options} - case v1.AMIFamilyUbuntu: - return &Ubuntu{Options: options} case v1.AMIFamilyWindows2019: return &Windows{Options: options, Version: v1.Windows2019, Build: v1.Windows2019Build} case v1.AMIFamilyWindows2022: diff --git a/pkg/providers/amifamily/types.go b/pkg/providers/amifamily/types.go new file mode 100644 index 000000000000..91ef9f576849 --- /dev/null +++ b/pkg/providers/amifamily/types.go @@ -0,0 +1,98 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package amifamily + +import ( + "fmt" + "sort" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/karpenter/pkg/scheduling" +) + +type AMI struct { + Name string + AmiID string + CreationDate string + Requirements scheduling.Requirements +} + +type AMIs []AMI + +// Sort orders the AMIs by creation date in descending order. +// If creation date is nil or two AMIs have the same creation date, the AMIs will be sorted by ID, which is guaranteed to be unique, in ascending order. +func (a AMIs) Sort() { + sort.Slice(a, func(i, j int) bool { + itime, _ := time.Parse(time.RFC3339, a[i].CreationDate) + jtime, _ := time.Parse(time.RFC3339, a[j].CreationDate) + if itime.Unix() != jtime.Unix() { + return itime.Unix() > jtime.Unix() + } + return a[i].AmiID < a[j].AmiID + }) +} + +type Variant string + +var ( + VariantStandard Variant = "standard" + VariantNvidia Variant = "nvidia" + VariantNeuron Variant = "neuron" +) + +func NewVariant(v string) (Variant, error) { + var wellKnownVariants = sets.New(VariantStandard, VariantNvidia, VariantNeuron) + variant := Variant(v) + if !wellKnownVariants.Has(variant) { + return variant, fmt.Errorf("%q is not a well-known variant", variant) + } + return variant, nil +} + +func (v Variant) Requirements() scheduling.Requirements { + switch v { + case VariantStandard: + return scheduling.NewRequirements( + scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), + scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpDoesNotExist), + ) + case VariantNvidia: + return scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpExists)) + case VariantNeuron: + return scheduling.NewRequirements(scheduling.NewRequirement(v1.LabelInstanceGPUCount, corev1.NodeSelectorOpExists)) + } + return nil +} + +type AMIQuery struct { + Filters []*ec2.Filter + Owners []string + KnownRequirements map[string][]scheduling.Requirements +} + +func (aq AMIQuery) DescribeImagesInput() *ec2.DescribeImagesInput { + return &ec2.DescribeImagesInput{ + // Don't include filters in the Describe Images call as EC2 API doesn't allow empty filters. + Filters: lo.Ternary(len(aq.Filters) > 0, aq.Filters, nil), + Owners: lo.Ternary(len(aq.Owners) > 0, lo.ToSlicePtr(aq.Owners), nil), + MaxResults: aws.Int64(1000), + } +} diff --git a/pkg/providers/amifamily/windows.go b/pkg/providers/amifamily/windows.go index e92c801bcf0b..960252e18977 100644 --- a/pkg/providers/amifamily/windows.go +++ b/pkg/providers/amifamily/windows.go @@ -15,9 +15,11 @@ limitations under the License. package amifamily import ( + "context" "fmt" + "regexp" + "strings" - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" "sigs.k8s.io/karpenter/pkg/scheduling" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" @@ -26,8 +28,10 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" corev1 "k8s.io/api/core/v1" @@ -41,17 +45,41 @@ type Windows struct { Build string } -func (w Windows) DefaultAMIs(version string) []DefaultAMIOutput { - return []DefaultAMIOutput{ - { - Query: fmt.Sprintf("/aws/service/ami-windows-latest/Windows_Server-%s-English-%s-EKS_Optimized-%s/image_id", w.Version, v1.WindowsCore, version), - Requirements: scheduling.NewRequirements( - scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, karpv1.ArchitectureAmd64), - scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)), - scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpIn, w.Build), - ), - }, +func (w Windows) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { + query := AMIQuery{ + Filters: []*ec2.Filter{&ec2.Filter{ + Name: lo.ToPtr("image-id"), + }}, + KnownRequirements: make(map[string][]scheduling.Requirements), + } + // SSM aliases are only maintained for the latest Windows AMI releases + if amiVersion != AMIVersionLatest { + return AMIQuery{}, fmt.Errorf("discovering AMIs for alias, %q is an invalid version for Windows", amiVersion) + } + results, err := ssmProvider.List(ctx, "/aws/service/ami-windows-latest") + if err != nil { + return AMIQuery{}, fmt.Errorf("discovering AMIs from ssm") + } + for path, value := range results { + pathComponents := strings.Split(path, "/") + if len(pathComponents) != 6 { + continue + } + matches := regexp.MustCompile(`^Windows_Server-(\d+)-English-Core-EKS_Optimized-(\d\.\d+)$`).FindStringSubmatch(pathComponents[4]) + if len(matches) != 3 || matches[1] != w.Version || matches[2] != k8sVersion { + continue + } + query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) + query.KnownRequirements[value] = []scheduling.Requirements{scheduling.NewRequirements( + scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)), + scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpIn, w.Build), + )} + } + // Failed to discover any AMIs, we should short circuit AMI discovery + if len(query.Filters[0].Values) == 0 { + return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") } + return query, nil } // UserData returns the default userdata script for the AMI Family diff --git a/pkg/providers/ssm/provider.go b/pkg/providers/ssm/provider.go new file mode 100644 index 000000000000..bbfe54e3c22d --- /dev/null +++ b/pkg/providers/ssm/provider.go @@ -0,0 +1,82 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssm + +import ( + "context" + "fmt" + "sync" + + "github.com/aws/aws-sdk-go/service/ssm" + "github.com/aws/aws-sdk-go/service/ssm/ssmiface" + "github.com/patrickmn/go-cache" + "github.com/samber/lo" +) + +type Provider interface { + List(context.Context, string) (map[string]string, error) + Get(context.Context, string) (string, error) +} + +type DefaultProvider struct { + sync.Mutex + cache *cache.Cache + ssmapi ssmiface.SSMAPI +} + +func NewDefaultProvider(ssmapi ssmiface.SSMAPI, cache *cache.Cache) *DefaultProvider { + return &DefaultProvider{ + ssmapi: ssmapi, + cache: cache, + } +} + +func (p *DefaultProvider) List(ctx context.Context, path string) (map[string]string, error) { + p.Lock() + defer p.Unlock() + if paths, ok := p.cache.Get(path); ok { + return paths.(map[string]string), nil + } + values := map[string]string{} + if err := p.ssmapi.GetParametersByPathPagesWithContext(ctx, &ssm.GetParametersByPathInput{ + Recursive: lo.ToPtr(true), + Path: &path, + }, func(out *ssm.GetParametersByPathOutput, _ bool) bool { + for _, parameter := range out.Parameters { + if parameter.Name == nil || parameter.Value == nil { + continue + } + values[*parameter.Name] = *parameter.Value + } + return true + }); err != nil { + return nil, fmt.Errorf("getting ssm parameters for path %q, %w", path, err) + } + p.cache.SetDefault(path, values) + return values, nil +} + +func (p *DefaultProvider) Get(ctx context.Context, path string) (string, error) { + p.Lock() + defer p.Unlock() + if val, ok := p.cache.Get(path); ok { + return val.(string), nil + } + out, err := p.ssmapi.GetParameterWithContext(ctx, &ssm.GetParameterInput{Name: &path}) + if err != nil { + return "", fmt.Errorf("getting ssm parameter %q, %w", path, err) + } + return lo.FromPtr(out.Parameter.Value), err +} From 1acf7fd20073a92fb7cd015978b83e8607cdc807 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 3 Jul 2024 14:40:25 -0700 Subject: [PATCH 3/9] test: update AMI tests for alias --- pkg/fake/ssmapi.go | 93 +++++++++++++++++++- pkg/providers/amifamily/suite_test.go | 118 +++++++++++++++----------- pkg/test/environment.go | 9 +- 3 files changed, 164 insertions(+), 56 deletions(-) diff --git a/pkg/fake/ssmapi.go b/pkg/fake/ssmapi.go index b9d0e1711d84..bd75d1567974 100644 --- a/pkg/fake/ssmapi.go +++ b/pkg/fake/ssmapi.go @@ -17,9 +17,15 @@ package fake import ( "context" "fmt" + "regexp" + "strconv" + "strings" + "github.com/Pallinder/go-randomdata" "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/karpenter-provider-aws/pkg/providers/version" "github.com/mitchellh/hashstructure/v2" + "github.com/samber/lo" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/request" @@ -29,9 +35,10 @@ import ( type SSMAPI struct { ssmiface.SSMAPI - Parameters map[string]string - GetParameterOutput *ssm.GetParameterOutput - WantErr error + Parameters map[string]string + GetParameterOutput *ssm.GetParameterOutput + GetParametersByPathOutput *ssm.GetParametersByPathOutput + WantErr error } func NewSSMAPI() *SSMAPI { @@ -60,6 +67,86 @@ func (a SSMAPI) GetParameterWithContext(_ context.Context, input *ssm.GetParamet }, nil } +func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ssm.GetParametersByPathInput, f func(*ssm.GetParametersByPathOutput, bool) bool, _ ...request.Option) error { + if a.WantErr != nil { + return a.WantErr + } + if a.GetParametersByPathOutput != nil { + f(a.GetParametersByPathOutput, true) + return nil + } + if len(a.Parameters) != 0 { + f(&ssm.GetParametersByPathOutput{ + Parameters: lo.FilterMap(lo.Entries(a.Parameters), func(p lo.Entry[string, string], _ int) (*ssm.Parameter, bool) { + if !strings.HasPrefix(p.Key, lo.FromPtr(input.Path)) { + return nil, false + } + if strings.TrimPrefix(p.Key, lo.FromPtr(input.Path))[0] != '/' { + return nil, false + } + return &ssm.Parameter{ + Name: lo.ToPtr(p.Key), + Value: lo.ToPtr(p.Value), + }, true + }), + }, true) + return nil + } + if params := getDefaultParametersForPath(lo.FromPtr(input.Path)); params != nil { + f(&ssm.GetParametersByPathOutput{Parameters: params}, true) + return nil + } + return fmt.Errorf("path %q does not exist", lo.FromPtr(input.Path)) +} + +func getDefaultParametersForPath(path string) []*ssm.Parameter { + suffixes := map[string][]string{ + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2$`: []string{"recommended/image_id"}, + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-arm64$`: []string{"recommended/image_id"}, + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-gpu$`: []string{"recommended/image_id"}, + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2023$`: []string{ + "x86_64/standard/recommended/image_id", + "arm64/standard/recommended/image_id", + "x86_64/nvidia/recommended/image_id", + "arm64/nvidia/recommended/image_id", + "x86_64/neuron/recommended/image_id", + "arm64/neuron/recommended/image_id", + }, + `\/aws\/service\/bottlerocket\/aws-k8s-.*`: []string{ + "x86_64/latest/image_id", + "arm64/latest/image_id", + }, + `\/aws\/service\/ami-windows-latest`: lo.FlatMap(supportedK8sVersions(), func(version string, _ int) []string { + return []string{ + fmt.Sprintf("Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id", version), + fmt.Sprintf("Windows_Server-2022-English-Core-EKS_Optimized-%s/image_id", version), + } + }), + } + for matchStr, suffixes := range suffixes { + if !regexp.MustCompile(matchStr).MatchString(path) { + continue + } + return lo.Map(suffixes, func(suffix string, _ int) *ssm.Parameter { + return &ssm.Parameter{ + Name: lo.ToPtr(fmt.Sprintf("%s/%s", path, suffix)), + Value: lo.ToPtr(fmt.Sprintf("ami-%s", randomdata.Alphanumeric(16))), + } + }) + } + return nil +} + +func supportedK8sVersions() []string { + minMinor := lo.Must(strconv.Atoi(strings.Split(version.MinK8sVersion, ".")[1])) + maxMinor := lo.Must(strconv.Atoi(strings.Split(version.MaxK8sVersion, ".")[1])) + versions := make([]string, 0, maxMinor-minMinor+1) + for i := minMinor; i <= maxMinor; i++ { + versions = append(versions, fmt.Sprintf("1.%d", i)) + } + return versions +} + func (a *SSMAPI) Reset() { a.GetParameterOutput = nil a.Parameters = nil diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 57e1f2f0c8dd..14add43e98f6 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -334,15 +334,17 @@ var _ = Describe("AMIProvider", func() { // When you tag public or shared resources, the tags you assign are available only to your AWS account; no other AWS account will have access to those tags // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions It("should have empty owners and use tags when prefixes aren't set", func() { - amiSelectorTerms := []v1.AMISelectorTerm{ - { - Tags: map[string]string{ - "Name": "my-ami", - }, + queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{{ + Tags: map[string]string{ + "Name": "my-ami", + }, + }}, }, - } - filterAndOwnersSets := amifamily.GetFilterAndOwnerSets(amiSelectorTerms) - ExpectConsistsOfFiltersAndOwners([]amifamily.FiltersAndOwners{ + }) + Expect(err).To(BeNil()) + ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ { Filters: []*ec2.Filter{ { @@ -352,16 +354,18 @@ var _ = Describe("AMIProvider", func() { }, Owners: []string{}, }, - }, filterAndOwnersSets) + }, queries) }) It("should have default owners and use name when prefixed", func() { - amiSelectorTerms := []v1.AMISelectorTerm{ - { - Name: "my-ami", + queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{{ + Name: "my-ami", + }}, }, - } - filterAndOwnersSets := amifamily.GetFilterAndOwnerSets(amiSelectorTerms) - ExpectConsistsOfFiltersAndOwners([]amifamily.FiltersAndOwners{ + }) + Expect(err).To(BeNil()) + ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ { Filters: []*ec2.Filter{ { @@ -374,19 +378,23 @@ var _ = Describe("AMIProvider", func() { "self", }, }, - }, filterAndOwnersSets) + }, queries) }) It("should not set owners when legacy ids are passed", func() { - amiSelectorTerms := []v1.AMISelectorTerm{ - { - ID: "ami-abcd1234", - }, - { - ID: "ami-cafeaced", + queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{ + { + ID: "ami-abcd1234", + }, + { + ID: "ami-cafeaced", + }, + }, }, - } - filterAndOwnersSets := amifamily.GetFilterAndOwnerSets(amiSelectorTerms) - ExpectConsistsOfFiltersAndOwners([]amifamily.FiltersAndOwners{ + }) + Expect(err).To(BeNil()) + ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ { Filters: []*ec2.Filter{ { @@ -395,40 +403,48 @@ var _ = Describe("AMIProvider", func() { }, }, }, - }, filterAndOwnersSets) + }, queries) }) It("should allow only specifying owners", func() { - amiSelectorTerms := []v1.AMISelectorTerm{ - { - Owner: "abcdef", - }, - { - Owner: "123456789012", + queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{ + { + Owner: "abcdef", + }, + { + Owner: "123456789012", + }, + }, }, - } - filterAndOwnersSets := amifamily.GetFilterAndOwnerSets(amiSelectorTerms) - ExpectConsistsOfFiltersAndOwners([]amifamily.FiltersAndOwners{ + }) + Expect(err).To(BeNil()) + ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ { Owners: []string{"abcdef"}, }, { Owners: []string{"123456789012"}, }, - }, filterAndOwnersSets) + }, queries) }) It("should allow prefixed name and prefixed owners", func() { - amiSelectorTerms := []v1.AMISelectorTerm{ - { - Name: "my-name", - Owner: "0123456789", - }, - { - Name: "my-name", - Owner: "self", + queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{ + { + Name: "my-name", + Owner: "0123456789", + }, + { + Name: "my-name", + Owner: "self", + }, + }, }, - } - filterAndOwnersSets := amifamily.GetFilterAndOwnerSets(amiSelectorTerms) - ExpectConsistsOfFiltersAndOwners([]amifamily.FiltersAndOwners{ + }) + Expect(err).To(BeNil()) + ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ { Owners: []string{"0123456789"}, Filters: []*ec2.Filter{ @@ -447,7 +463,7 @@ var _ = Describe("AMIProvider", func() { }, }, }, - }, filterAndOwnersSets) + }, queries) }) It("should sort amis by creationDate", func() { amis := amifamily.AMIs{ @@ -567,11 +583,11 @@ var _ = Describe("AMIProvider", func() { }) }) -func ExpectConsistsOfFiltersAndOwners(expected, actual []amifamily.FiltersAndOwners) { +func ExpectConsistsOfAMIQueries(expected, actual []amifamily.AMIQuery) { GinkgoHelper() Expect(actual).To(HaveLen(len(expected))) - for _, list := range [][]amifamily.FiltersAndOwners{expected, actual} { + for _, list := range [][]amifamily.AMIQuery{expected, actual} { for _, elem := range list { for _, f := range elem.Filters { sort.Slice(f.Values, func(i, j int) bool { @@ -584,5 +600,5 @@ func ExpectConsistsOfFiltersAndOwners(expected, actual []amifamily.FiltersAndOwn }) } } - Expect(actual).To(ConsistOf(lo.Map(expected, func(f amifamily.FiltersAndOwners, _ int) interface{} { return f })...)) + Expect(actual).To(ConsistOf(lo.Map(expected, func(q amifamily.AMIQuery, _ int) interface{} { return q })...)) } diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 015d5ae9a5eb..ac4c357bfee1 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -34,6 +34,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" + ssmp "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" "github.com/aws/karpenter-provider-aws/pkg/providers/version" @@ -66,6 +67,7 @@ type Environment struct { AssociatePublicIPAddressCache *cache.Cache SecurityGroupCache *cache.Cache InstanceProfileCache *cache.Cache + SSMCache *cache.Cache // Providers InstanceTypesProvider *instancetype.DefaultProvider @@ -98,6 +100,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment associatePublicIPAddressCache := cache.New(awscache.AssociatePublicIPAddressTTL, awscache.DefaultCleanupInterval) securityGroupCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) instanceProfileCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) + ssmCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) fakePricingAPI := &fake.PricingAPI{} // Providers @@ -106,7 +109,8 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment securityGroupProvider := securitygroup.NewDefaultProvider(ec2api, securityGroupCache) versionProvider := version.NewDefaultProvider(env.KubernetesInterface, kubernetesVersionCache) instanceProfileProvider := instanceprofile.NewDefaultProvider(fake.DefaultRegion, iamapi, instanceProfileCache) - amiProvider := amifamily.NewDefaultProvider(versionProvider, ssmapi, ec2api, ec2Cache) + ssmProvider := ssmp.NewDefaultProvider(ssmapi, ssmCache) + amiProvider := amifamily.NewDefaultProvider(versionProvider, ssmProvider, ec2api, ec2Cache) amiResolver := amifamily.NewResolver(amiProvider) instanceTypesProvider := instancetype.NewDefaultProvider(fake.DefaultRegion, instanceTypeCache, ec2api, subnetProvider, unavailableOfferingsCache, pricingProvider) launchTemplateProvider := @@ -149,6 +153,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment SecurityGroupCache: securityGroupCache, InstanceProfileCache: instanceProfileCache, UnavailableOfferingsCache: unavailableOfferingsCache, + SSMCache: ssmCache, InstanceTypesProvider: instanceTypesProvider, InstanceProvider: instanceProvider, @@ -181,7 +186,7 @@ func (env *Environment) Reset() { env.AvailableIPAdressCache.Flush() env.SecurityGroupCache.Flush() env.InstanceProfileCache.Flush() - + env.SSMCache.Flush() mfs, err := crmetrics.Registry.Gather() if err != nil { for _, mf := range mfs { From cc92f15525eb1ef5392833dcd26659441a36fd20 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 3 Jul 2024 11:24:18 -0700 Subject: [PATCH 4/9] chore: drop AMIFamily references --- hack/docs/instancetypes_gen/main.go | 4 +- pkg/cloudprovider/suite_test.go | 5 +- pkg/controllers/nodeclass/hash/suite_test.go | 1 - pkg/controllers/nodeclass/status/ami_test.go | 9 +- .../nodeclass/status/launchtemplate_test.go | 19 ++-- pkg/controllers/nodeclass/status/readiness.go | 3 +- pkg/providers/amifamily/resolver.go | 2 +- pkg/providers/amifamily/suite_test.go | 43 ++------ pkg/providers/instancetype/instancetype.go | 6 +- pkg/providers/instancetype/suite_test.go | 102 ++++++++---------- pkg/providers/launchtemplate/suite_test.go | 39 +++---- pkg/providers/securitygroup/suite_test.go | 4 +- pkg/providers/subnet/suite_test.go | 5 +- pkg/test/nodeclass.go | 4 +- test/pkg/environment/aws/environment.go | 2 +- test/suites/ami/suite_test.go | 37 ++----- .../integration/extended_resources_test.go | 13 +-- .../suites/integration/kubelet_config_test.go | 29 ++--- test/suites/integration/validation_test.go | 4 +- test/suites/nodeclaim/nodeclaim_test.go | 2 - test/suites/scale/deprovisioning_test.go | 4 +- test/suites/scheduling/suite_test.go | 2 +- 22 files changed, 133 insertions(+), 206 deletions(-) diff --git a/hack/docs/instancetypes_gen/main.go b/hack/docs/instancetypes_gen/main.go index 87b4deb1f30d..e5cafcde2803 100644 --- a/hack/docs/instancetypes_gen/main.go +++ b/hack/docs/instancetypes_gen/main.go @@ -105,7 +105,9 @@ func main() { // Fake a NodeClass so we can use it to get InstanceTypes nodeClass := &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ - AMIFamily: &v1.AMIFamilyAL2023, + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "al2023@latest", + }}, SubnetSelectorTerms: []v1.SubnetSelectorTerm{ { Tags: map[string]string{ diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index f28739327e15..69f91f97e368 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -890,7 +890,9 @@ var _ = Describe("CloudProvider", func() { }, Context: lo.ToPtr("fake-context"), DetailedMonitoring: lo.ToPtr(false), - AMIFamily: lo.ToPtr(v1.AMIFamilyAL2023), + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "al2023@latest", + }}, AssociatePublicIPAddress: lo.ToPtr(false), MetadataOptions: &v1.MetadataOptions{ HTTPEndpoint: lo.ToPtr("disabled"), @@ -966,7 +968,6 @@ var _ = Describe("CloudProvider", func() { Entry("Tags", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}}), Entry("Context", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Context: lo.ToPtr("context-2")}}), Entry("DetailedMonitoring", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), - Entry("AMIFamily", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AMIFamily: lo.ToPtr(v1.AMIFamilyBottlerocket)}}), Entry("InstanceStorePolicy", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), Entry("AssociatePublicIPAddress", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), Entry("MetadataOptions HTTPEndpoint", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), diff --git a/pkg/controllers/nodeclass/hash/suite_test.go b/pkg/controllers/nodeclass/hash/suite_test.go index d9090dcfeaa4..6470dec982f9 100644 --- a/pkg/controllers/nodeclass/hash/suite_test.go +++ b/pkg/controllers/nodeclass/hash/suite_test.go @@ -135,7 +135,6 @@ var _ = Describe("NodeClass Hash Controller", func() { Expect(expectedHash).ToNot(Equal(expectedHashTwo)) }, - Entry("AMIFamily Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AMIFamily: aws.String(v1.AMIFamilyBottlerocket)}}), Entry("UserData Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{UserData: aws.String("userdata-test-2")}}), Entry("Tags Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}}), Entry("BlockDeviceMappings Drift", &v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{DeviceName: aws.String("map-device-test-3")}}}}), diff --git a/pkg/controllers/nodeclass/status/ami_test.go b/pkg/controllers/nodeclass/status/ami_test.go index 0ab001dd7d4c..e52b9fe2771a 100644 --- a/pkg/controllers/nodeclass/status/ami_test.go +++ b/pkg/controllers/nodeclass/status/ami_test.go @@ -123,7 +123,7 @@ var _ = Describe("NodeClass AMI Status Controller", func() { Name: aws.String("test-ami-3"), ImageId: aws.String("ami-id-789"), CreationDate: aws.String(time.Now().Add(2 * time.Minute).Format(time.RFC3339)), - Architecture: aws.String("x86_64"), + Architecture: aws.String("arm64"), Tags: []*ec2.Tag{ {Key: aws.String("Name"), Value: aws.String("test-ami-3")}, {Key: aws.String("foo"), Value: aws.String("bar")}, @@ -131,7 +131,7 @@ var _ = Describe("NodeClass AMI Status Controller", func() { }, }, }) - nodeClass.Spec.AMISelectorTerms = nil + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) @@ -214,8 +214,9 @@ var _ = Describe("NodeClass AMI Status Controller", func() { fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id", version): "ami-id-123", fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/arm64/latest/image_id", version): "ami-id-456", } - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket - nodeClass.Spec.AMISelectorTerms = nil + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ + Alias: "bottlerocket@latest", + }} awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{ Images: []*ec2.Image{ { diff --git a/pkg/controllers/nodeclass/status/launchtemplate_test.go b/pkg/controllers/nodeclass/status/launchtemplate_test.go index 5035d03eba5f..f69f9a82c799 100644 --- a/pkg/controllers/nodeclass/status/launchtemplate_test.go +++ b/pkg/controllers/nodeclass/status/launchtemplate_test.go @@ -52,22 +52,21 @@ var _ = Describe("NodeClass Launch Template CIDR Resolution Controller", func() awsEnv.LaunchTemplateProvider.ClusterCIDR.Store(nil) }) It("shouldn't resolve cluster CIDR for non-AL2023 NodeClasses", func() { - for _, family := range []string{ - v1.AMIFamilyAL2, - v1.AMIFamilyBottlerocket, - v1.AMIFamilyUbuntu, - v1.AMIFamilyWindows2019, - v1.AMIFamilyWindows2022, - v1.AMIFamilyCustom, + for _, term := range []v1.AMISelectorTerm{ + {Alias: "al2@latest"}, + {Alias: "bottlerocket@latest"}, + {Alias: "windows2019@latest"}, + {Alias: "windows2022@latest"}, + {ID: "ami-12345"}, } { - nodeClass.Spec.AMIFamily = lo.ToPtr(family) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{term} ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load()).To(BeNil()) } }) It("should resolve cluster CIDR for IPv4 clusters", func() { - nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(lo.FromPtr(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load())).To(Equal("10.100.0.0/16")) @@ -82,7 +81,7 @@ var _ = Describe("NodeClass Launch Template CIDR Resolution Controller", func() }, }, }) - nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) Expect(lo.FromPtr(awsEnv.LaunchTemplateProvider.ClusterCIDR.Load())).To(Equal("2001:db8::/64")) diff --git a/pkg/controllers/nodeclass/status/readiness.go b/pkg/controllers/nodeclass/status/readiness.go index 1905267fbeae..828ae098010b 100644 --- a/pkg/controllers/nodeclass/status/readiness.go +++ b/pkg/controllers/nodeclass/status/readiness.go @@ -19,7 +19,6 @@ import ( "fmt" "github.com/awslabs/operatorpkg/status" - "github.com/samber/lo" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" @@ -36,7 +35,7 @@ func (n Readiness) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (r // A NodeClass that uses AL2023 requires the cluster CIDR for launching nodes. // To allow Karpenter to be used for Non-EKS clusters, resolving the Cluster CIDR // will not be done at startup but instead in a reconcile loop. - if lo.FromPtr(nodeClass.Spec.AMIFamily) == v1.AMIFamilyAL2023 { + if nodeClass.AMIFamily() == v1.AMIFamilyAL2023 { if err := n.launchTemplateProvider.ResolveClusterCIDR(ctx); err != nil { nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Failed to detect the cluster CIDR") return reconcile.Result{}, fmt.Errorf("failed to detect the cluster CIDR, %w", err) diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index e5d85ca6fc23..04d75e3d3f2f 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -126,7 +126,7 @@ func NewResolver(amiProvider Provider) *Resolver { // Resolve generates launch templates using the static options and dynamically generates launch template parameters. // Multiple ResolvedTemplates are returned based on the instanceTypes passed in to support special AMIs for certain instance types like GPUs. func (r Resolver) Resolve(nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*cloudprovider.InstanceType, capacityType string, options *Options) ([]*LaunchTemplate, error) { - amiFamily := GetAMIFamily(nodeClass.Spec.AMIFamily, options) + amiFamily := GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), options) if len(nodeClass.Status.AMIs) == 0 { return nil, fmt.Errorf("no amis exist given constraints") } diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 14add43e98f6..a5787397f9d8 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -134,7 +134,7 @@ var _ = Describe("AMIProvider", func() { nodeClass = test.EC2NodeClass() }) It("should succeed to resolve AMIs (AL2)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", version): amd64AMI, fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-gpu/recommended/image_id", version): amd64NvidiaAMI, @@ -145,7 +145,7 @@ var _ = Describe("AMIProvider", func() { Expect(amis).To(HaveLen(4)) }) It("should succeed to resolve AMIs (AL2023)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", version): amd64AMI, fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", version): arm64AMI, @@ -155,7 +155,7 @@ var _ = Describe("AMIProvider", func() { Expect(amis).To(HaveLen(2)) }) It("should succeed to resolve AMIs (Bottlerocket)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id", version): amd64AMI, fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia/x86_64/latest/image_id", version): amd64NvidiaAMI, @@ -166,18 +166,8 @@ var _ = Describe("AMIProvider", func() { Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(6)) }) - It("should succeed to resolve AMIs (Ubuntu)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyUbuntu - awsEnv.SSMAPI.Parameters = map[string]string{ - fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/amd64/hvm/ebs-gp2/ami-id", version): amd64AMI, - fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/arm64/hvm/ebs-gp2/ami-id", version): arm64AMI, - } - amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) - Expect(err).ToNot(HaveOccurred()) - Expect(amis).To(HaveLen(2)) - }) It("should succeed to resolve AMIs (Windows2019)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2019 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2019@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/ami-windows-latest/Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id", version): amd64AMI, } @@ -186,7 +176,7 @@ var _ = Describe("AMIProvider", func() { Expect(amis).To(HaveLen(1)) }) It("should succeed to resolve AMIs (Windows2022)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/ami-windows-latest/Windows_Server-2022-English-Core-EKS_Optimized-%s/image_id", version): amd64AMI, } @@ -194,12 +184,6 @@ var _ = Describe("AMIProvider", func() { Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(1)) }) - It("should succeed to resolve AMIs (Custom)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom - amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) - Expect(err).ToNot(HaveOccurred()) - Expect(amis).To(HaveLen(0)) - }) It("should not cause data races when calling Get() simultaneously", func() { nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ { @@ -245,7 +229,7 @@ var _ = Describe("AMIProvider", func() { }) Context("SSM Alias Missing", func() { It("should succeed to partially resolve AMIs if all SSM aliases don't exist (Al2)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} // No GPU AMI exists here awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", version): amd64AMI, @@ -257,7 +241,7 @@ var _ = Describe("AMIProvider", func() { Expect(amis).To(HaveLen(2)) }) It("should succeed to partially resolve AMIs if all SSM aliases don't exist (AL2023)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", version): amd64AMI, } @@ -266,7 +250,7 @@ var _ = Describe("AMIProvider", func() { Expect(amis).To(HaveLen(1)) }) It("should succeed to partially resolve AMIs if all SSM aliases don't exist (Bottlerocket)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} // No GPU AMI exists for AM64 here awsEnv.SSMAPI.Parameters = map[string]string{ fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id", version): amd64AMI, @@ -278,17 +262,6 @@ var _ = Describe("AMIProvider", func() { Expect(err).ToNot(HaveOccurred()) Expect(amis).To(HaveLen(4)) }) - It("should succeed to partially resolve AMIs if all SSM aliases don't exist (Ubuntu)", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyUbuntu - // No AMD64 AMI exists here - awsEnv.SSMAPI.Parameters = map[string]string{ - fmt.Sprintf("/aws/service/canonical/ubuntu/eks/20.04/%s/stable/current/arm64/hvm/ebs-gp2/ami-id", version): arm64AMI, - } - // Only 1 of the requirements sets for the SSM aliases will resolve - amis, err := awsEnv.AMIProvider.List(ctx, nodeClass) - Expect(err).ToNot(HaveOccurred()) - Expect(amis).To(HaveLen(1)) - }) }) Context("AMI Tag Requirements", func() { var img *ec2.Image diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index f34e2c25c039..e95c01bb16bd 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -131,8 +131,8 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1.KubeletConfiguration, subnetZonesHash, kcHash, blockDeviceMappingsHash, - aws.StringValue((*string)(nodeClass.Spec.InstanceStorePolicy)), - aws.StringValue(nodeClass.Spec.AMIFamily), + lo.FromPtr((*string)(nodeClass.Spec.InstanceStorePolicy)), + nodeClass.AMIFamily(), ) if item, ok := p.instanceTypesCache.Get(key); ok { // Ensure what's returned from this function is a shallow-copy of the slice (not a deep-copy of the data itself) @@ -151,7 +151,7 @@ func (p *DefaultProvider) List(ctx context.Context, kc *v1.KubeletConfiguration, if p.cm.HasChanged("zones", allZones) { log.FromContext(ctx).WithValues("zones", allZones.UnsortedList()).V(1).Info("discovered zones") } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) result := lo.Map(p.instanceTypesInfo, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { instanceTypeVCPU.With(prometheus.Labels{ instanceTypeLabel: *i.InstanceType, diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 569647e2b99a..9ff96447c008 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -168,7 +168,9 @@ var _ = Describe("InstanceTypeProvider", func() { }) windowsNodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ - AMIFamily: &v1.AMIFamilyWindows2022, + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "windows2022@latest", + }}, }, Status: v1.EC2NodeClassStatus{ InstanceProfile: "test-profile", @@ -618,7 +620,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(supportsPodENI()).To(Equal(true)) }) It("should launch vpc.amazonaws.com/PrivateIPv4Address on a compatible instance type", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{ ResourceRequirements: corev1.ResourceRequirements{ @@ -679,7 +681,7 @@ var _ = Describe("InstanceTypeProvider", func() { Values: []string{"test"}, }, }) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{ ResourceRequirements: corev1.ResourceRequirements{ @@ -910,7 +912,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(err).To(BeNil()) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -933,7 +935,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(err).To(BeNil()) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(windowsNodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(windowsNodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1052,7 +1054,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) Context("System Reserved Resources", func() { It("should use defaults when no kubelet is specified", func() { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} it := instancetype.NewInstanceType(ctx, info, @@ -1080,7 +1082,7 @@ var _ = Describe("InstanceTypeProvider", func() { string(corev1.ResourceEphemeralStorage): "10Gi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1102,8 +1104,8 @@ var _ = Describe("InstanceTypeProvider", func() { }) Context("Kube Reserved Resources", func() { It("should use defaults when no kubelet is specified", func() { + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1135,7 +1137,7 @@ var _ = Describe("InstanceTypeProvider", func() { string(corev1.ResourceEphemeralStorage): "2Gi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1174,7 +1176,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "500Mi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1203,7 +1205,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "10%", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1232,7 +1234,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "100%", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1261,7 +1263,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "50Mi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1292,7 +1294,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "500Mi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1324,7 +1326,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "10%", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1353,7 +1355,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "100%", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1371,7 +1373,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(it.Overhead.EvictionThreshold.Memory().String()).To(Equal("0")) }) It("should ignore eviction threshold when using Bottlerocket AMI", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ SystemReserved: map[string]string{ string(corev1.ResourceMemory): "20Gi", @@ -1386,7 +1388,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "10Gi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1405,8 +1407,8 @@ var _ = Describe("InstanceTypeProvider", func() { }) }) It("should take the default eviction threshold when none is specified", func() { + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1440,7 +1442,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "1Gi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1472,7 +1474,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "5%", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1504,7 +1506,7 @@ var _ = Describe("InstanceTypeProvider", func() { instancetype.MemoryAvailable: "1Gi", }, } - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1528,7 +1530,7 @@ var _ = Describe("InstanceTypeProvider", func() { nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} for _, info := range instanceInfo.InstanceTypes { if *info.InstanceType == "t3.large" { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1546,7 +1548,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 35)) } if *info.InstanceType == "m6idn.32xlarge" { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1572,7 +1574,7 @@ var _ = Describe("InstanceTypeProvider", func() { MaxPods: lo.ToPtr(int32(10)), } for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1597,7 +1599,7 @@ var _ = Describe("InstanceTypeProvider", func() { MaxPods: lo.ToPtr(int32(10)), } for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1626,7 +1628,7 @@ var _ = Describe("InstanceTypeProvider", func() { return *info.InstanceType == "t3.large" }) Expect(ok).To(Equal(true)) - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} it := instancetype.NewInstanceType(ctx, t3Large, @@ -1661,7 +1663,7 @@ var _ = Describe("InstanceTypeProvider", func() { return *info.InstanceType == "t3.large" }) Expect(ok).To(Equal(true)) - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} it := instancetype.NewInstanceType(ctx, t3Large, @@ -1693,7 +1695,7 @@ var _ = Describe("InstanceTypeProvider", func() { PodsPerCore: lo.ToPtr(int32(1)), } for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1719,7 +1721,7 @@ var _ = Describe("InstanceTypeProvider", func() { MaxPods: lo.ToPtr(int32(20)), } for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1740,12 +1742,12 @@ var _ = Describe("InstanceTypeProvider", func() { It("should ignore pods-per-core when using Bottlerocket AMI", func() { instanceInfo, err := awsEnv.EC2API.DescribeInstanceTypesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}) Expect(err).To(BeNil()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ PodsPerCore: lo.ToPtr(int32(1)), } for _, info := range instanceInfo.InstanceTypes { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1772,7 +1774,7 @@ var _ = Describe("InstanceTypeProvider", func() { } for _, info := range instanceInfo.InstanceTypes { if *info.InstanceType == "t3.large" { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -1790,7 +1792,7 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 35)) } if *info.InstanceType == "m6idn.32xlarge" { - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, fake.DefaultRegion, @@ -2185,7 +2187,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) Context("Ephemeral Storage", func() { BeforeEach(func() { - nodeClass.Spec.AMIFamily = aws.String(v1.AMIFamilyAL2) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { DeviceName: aws.String("/dev/xvda"), @@ -2196,7 +2198,6 @@ var _ = Describe("InstanceTypeProvider", func() { } }) It("should default to EBS defaults when volumeSize is not defined in blockDeviceMappings for custom AMIs", func() { - nodeClass.Spec.AMIFamily = aws.String(v1.AMIFamilyCustom) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ { Tags: map[string]string{ @@ -2230,7 +2231,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) }) It("should default to EBS defaults when volumeSize is not defined in blockDeviceMappings for AL2023 Root volume", func() { - nodeClass.Spec.AMIFamily = aws.String(v1.AMIFamilyAL2023) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} awsEnv.LaunchTemplateProvider.CABundle = lo.ToPtr("Y2EtYnVuZGxlCg==") awsEnv.LaunchTemplateProvider.ClusterCIDR.Store(lo.ToPtr("10.100.0.0/16")) ExpectApplied(ctx, env.Client, nodePool, nodeClass) @@ -2246,7 +2247,7 @@ var _ = Describe("InstanceTypeProvider", func() { }) }) It("should default to EBS defaults when volumeSize is not defined in blockDeviceMappings for Bottlerocket Root volume", func() { - nodeClass.Spec.AMIFamily = aws.String(v1.AMIFamilyBottlerocket) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.BlockDeviceMappings[0].DeviceName = aws.String("/dev/xvdb") ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() @@ -2261,21 +2262,6 @@ var _ = Describe("InstanceTypeProvider", func() { Expect(*ltInput.LaunchTemplateData.BlockDeviceMappings[0].Ebs.SnapshotId).To(Equal("snap-xxxxxxxx")) }) }) - It("should default to EBS defaults when volumeSize is not defined in blockDeviceMappings for Ubuntu Root volume", func() { - nodeClass.Spec.AMIFamily = aws.String(v1.AMIFamilyUbuntu) - nodeClass.Spec.BlockDeviceMappings[0].DeviceName = aws.String("/dev/sda1") - ExpectApplied(ctx, env.Client, nodePool, nodeClass) - pod := coretest.UnschedulablePod() - ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) - node := ExpectScheduled(ctx, env.Client, pod) - Expect(*node.Status.Capacity.StorageEphemeral()).To(Equal(resource.MustParse("20Gi"))) - Expect(awsEnv.EC2API.CalledWithCreateLaunchTemplateInput.Len()).To(BeNumerically(">=", 1)) - awsEnv.EC2API.CalledWithCreateLaunchTemplateInput.ForEach(func(ltInput *ec2.CreateLaunchTemplateInput) { - Expect(ltInput.LaunchTemplateData.BlockDeviceMappings).To(HaveLen(1)) - Expect(*ltInput.LaunchTemplateData.BlockDeviceMappings[0].DeviceName).To(Equal("/dev/sda1")) - Expect(*ltInput.LaunchTemplateData.BlockDeviceMappings[0].Ebs.SnapshotId).To(Equal("snap-xxxxxxxx")) - }) - }) }) Context("Metadata Options", func() { It("should default metadata options on generated launch template", func() { @@ -2369,7 +2355,7 @@ var _ = Describe("InstanceTypeProvider", func() { It("changes to nodeclass fields should result in a different set of instances types", func() { // We should expect these nodeclass fields to change the result of the instance type // nodeClass.instanceStorePolicy - // nodeClass.amiFamily + // nodeClass.amiSelectorTerms (alias) // nodeClass.blockDeviceMapping.rootVolume // nodeClass.blockDeviceMapping.volumeSize // nodeClass.blockDeviceMapping.deviceName @@ -2383,7 +2369,13 @@ var _ = Describe("InstanceTypeProvider", func() { nodeClassChanges := []*v1.EC2NodeClass{ {}, // Testing the base case black EC2NodeClass {Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}, - {Spec: v1.EC2NodeClassSpec{AMIFamily: &v1.AMIFamilyUbuntu}}, + { + Spec: v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "bottlerocket@latest", + }}, + }, + }, { Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ { diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index cf724f665b69..32b23ea80e2e 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -594,7 +594,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) Context("Block Device Mappings", func() { It("should default AL2 block device mappings", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -608,7 +608,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should default AL2023 block device mappings", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} awsEnv.LaunchTemplateProvider.CABundle = lo.ToPtr("Y2EtYnVuZGxlCg==") awsEnv.LaunchTemplateProvider.ClusterCIDR.Store(lo.ToPtr("10.100.0.0/16")) ExpectApplied(ctx, env.Client, nodePool, nodeClass) @@ -624,7 +624,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should use custom block device mapping", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { DeviceName: aws.String("/dev/xvda"), @@ -674,7 +674,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should round up for custom block device mappings when specified in gigabytes", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { DeviceName: aws.String("/dev/xvda"), @@ -711,7 +711,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should default bottlerocket second volume with root volume size", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -730,7 +730,6 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should not default block device mappings for custom AMIFamilies", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() @@ -742,7 +741,6 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should use custom block device mapping for custom AMIFamilies", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { @@ -909,7 +907,6 @@ var _ = Describe("LaunchTemplate Provider", func() { ExpectScheduled(ctx, env.Client, pod) }) It("should pack pods using blockdevicemappings for Custom AMIFamily", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { @@ -939,7 +936,6 @@ var _ = Describe("LaunchTemplate Provider", func() { ExpectScheduled(ctx, env.Client, pod) }) It("should pack pods using the configured root volume in blockdevicemappings", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ { @@ -1008,9 +1004,9 @@ var _ = Describe("LaunchTemplate Provider", func() { VMMemoryOverheadPercent: lo.ToPtr[float64](0), })) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, "", @@ -1062,9 +1058,9 @@ var _ = Describe("LaunchTemplate Provider", func() { VMMemoryOverheadPercent: lo.ToPtr[float64](0), })) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{} - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, "", @@ -1088,9 +1084,9 @@ var _ = Describe("LaunchTemplate Provider", func() { VMMemoryOverheadPercent: lo.ToPtr[float64](0), })) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{MaxPods: lo.ToPtr[int32](110)} - amiFamily := amifamily.GetAMIFamily(nodeClass.Spec.AMIFamily, &amifamily.Options{}) + amiFamily := amifamily.GetAMIFamily(lo.ToPtr(nodeClass.AMIFamily()), &amifamily.Options{}) it := instancetype.NewInstanceType(ctx, info, "", @@ -1360,7 +1356,7 @@ var _ = Describe("LaunchTemplate Provider", func() { ExpectLaunchTemplatesCreatedWithUserDataNotContaining(corev1.LabelNamespaceNodeRestriction) }) It("should specify --local-disks raid0 when instance-store policy is set on AL2", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.InstanceStorePolicy = lo.ToPtr(v1.InstanceStorePolicyRAID0) ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() @@ -1370,7 +1366,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) Context("Bottlerocket", func() { BeforeEach(func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{MaxPods: lo.ToPtr[int32](110)} }) It("should merge in custom user data", func() { @@ -1644,7 +1640,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) Context("AL2023", func() { BeforeEach(func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} // base64 encoded version of "ca-bundle" to ensure the nodeadm bootstrap provider can decode successfully awsEnv.LaunchTemplateProvider.CABundle = lo.ToPtr("Y2EtYnVuZGxlCg==") @@ -1866,7 +1862,6 @@ var _ = Describe("LaunchTemplate Provider", func() { It("should copy over userData untouched when AMIFamily is Custom", func() { nodeClass.Spec.UserData = aws.String("special user data") nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Tags: map[string]string{"*": "*"}}} - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Status.AMIs = []v1.AMI{ { ID: "ami-123", @@ -2074,7 +2069,7 @@ var _ = Describe("LaunchTemplate Provider", func() { Context("Windows Custom UserData", func() { BeforeEach(func() { nodePool.Spec.Template.Spec.Requirements = []karpv1.NodeSelectorRequirementWithMinValues{{NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: corev1.LabelOSStable, Operator: corev1.NodeSelectorOpIn, Values: []string{string(corev1.Windows)}}}} - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{MaxPods: lo.ToPtr[int32](110)} }) It("should merge and bootstrap with custom user data", func() { @@ -2114,7 +2109,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) Context("Detailed Monitoring", func() { It("should default detailed monitoring to off", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) @@ -2125,7 +2120,7 @@ var _ = Describe("LaunchTemplate Provider", func() { }) }) It("should pass detailed monitoring setting to the launch template at creation", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.DetailedMonitoring = aws.Bool(true) ExpectApplied(ctx, env.Client, nodePool, nodeClass) pod := coretest.UnschedulablePod() diff --git a/pkg/providers/securitygroup/suite_test.go b/pkg/providers/securitygroup/suite_test.go index 7adcbc95cb8e..6a2e18fe8111 100644 --- a/pkg/providers/securitygroup/suite_test.go +++ b/pkg/providers/securitygroup/suite_test.go @@ -70,7 +70,9 @@ var _ = BeforeEach(func() { ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ - AMIFamily: aws.String(v1.AMIFamilyAL2), + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "al2@latest", + }}, SubnetSelectorTerms: []v1.SubnetSelectorTerm{ { Tags: map[string]string{ diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 068fb93f4676..f6dedfa1d2e7 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -22,7 +22,6 @@ import ( "sigs.k8s.io/karpenter/pkg/test/v1alpha1" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/samber/lo" @@ -70,7 +69,9 @@ var _ = BeforeEach(func() { ctx = options.ToContext(ctx, test.Options()) nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ - AMIFamily: aws.String(v1.AMIFamilyAL2), + AMISelectorTerms: []v1.AMISelectorTerm{{ + Alias: "al2@latest", + }}, SubnetSelectorTerms: []v1.SubnetSelectorTerm{ { Tags: map[string]string{ diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index 6b8858832a29..dc0d911fb97a 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -137,8 +137,8 @@ func BetaEC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { panic(fmt.Sprintf("Failed to merge settings: %s", err)) } } - if options.Spec.AMIFamily == nil { - options.Spec.AMIFamily = &v1beta1.AMIFamilyAL2 + if len(options.Spec.AMISelectorTerms) == 0 { + options.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Alias: "al2@latest"}} options.Status.AMIs = []v1beta1.AMI{ { ID: "ami-test1", diff --git a/test/pkg/environment/aws/environment.go b/test/pkg/environment/aws/environment.go index 64d307bfb58e..8df6765a9915 100644 --- a/test/pkg/environment/aws/environment.go +++ b/test/pkg/environment/aws/environment.go @@ -143,7 +143,7 @@ func GetTimeStreamAPI(session *session.Session) timestreamwriteiface.TimestreamW func (env *Environment) DefaultEC2NodeClass() *v1.EC2NodeClass { nodeClass := test.EC2NodeClass() - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} nodeClass.Spec.Tags = map[string]string{ "testing/cluster": env.ClusterName, } diff --git a/test/suites/ami/suite_test.go b/test/suites/ami/suite_test.go index 85e2519819e5..0769c118eb07 100644 --- a/test/suites/ami/suite_test.go +++ b/test/suites/ami/suite_test.go @@ -163,49 +163,28 @@ var _ = Describe("AMI", func() { Context("AMIFamily", func() { It("should provision a node using the AL2 family", func() { pod := coretest.Pod() - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} env.ExpectCreated(nodeClass, nodePool, pod) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) }) It("should provision a node using the AL2023 family", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2023@latest"}} pod := coretest.Pod() env.ExpectCreated(nodeClass, nodePool, pod) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) }) It("should provision a node using the Bottlerocket family", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket - pod := coretest.Pod() - env.ExpectCreated(nodeClass, nodePool, pod) - env.EventuallyExpectHealthy(pod) - env.ExpectCreatedNodeCount("==", 1) - }) - It("should provision a node using the Ubuntu family", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyUbuntu - // TODO (jmdeal@): remove once 22.04 AMIs are supported - if env.K8sMinorVersion() >= 29 { - nodeClass.Spec.AMISelectorTerms = lo.Map([]string{ - "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/amd64/hvm/ebs-gp2/ami-id", - "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/arm64/hvm/ebs-gp2/ami-id", - }, func(ssmPath string, _ int) v1.AMISelectorTerm { - return v1.AMISelectorTerm{ID: env.GetAMIBySSMPath(ssmPath)} - }) - } + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} pod := coretest.Pod() env.ExpectCreated(nodeClass, nodePool, pod) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) }) It("should support Custom AMIFamily with AMI Selectors", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom al2023AMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - ID: al2023AMI, - }, - } + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: al2023AMI}} rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") Expect(err).ToNot(HaveOccurred()) nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, @@ -257,7 +236,7 @@ var _ = Describe("AMI", func() { It("should merge UserData contents for AL2 AMIFamily", func() { content, err := os.ReadFile("testdata/al2_userdata_input.sh") Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.UserData = awssdk.String(string(content)) nodePool.Spec.Template.Spec.Taints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} @@ -278,7 +257,7 @@ var _ = Describe("AMI", func() { It("should merge non-MIME UserData contents for AL2 AMIFamily", func() { content, err := os.ReadFile("testdata/al2_no_mime_userdata_input.sh") Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} nodeClass.Spec.UserData = awssdk.String(string(content)) nodePool.Spec.Template.Spec.Taints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} @@ -299,7 +278,7 @@ var _ = Describe("AMI", func() { It("should merge UserData contents for Bottlerocket AMIFamily", func() { content, err := os.ReadFile("testdata/br_userdata_input.sh") Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} nodeClass.Spec.UserData = awssdk.String(string(content)) nodePool.Spec.Template.Spec.Taints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} @@ -328,7 +307,7 @@ var _ = Describe("AMI", func() { content, err := os.ReadFile("testdata/windows_userdata_input.ps1") Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} nodeClass.Spec.UserData = awssdk.String(string(content)) nodePool.Spec.Template.Spec.Taints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} diff --git a/test/suites/integration/extended_resources_test.go b/test/suites/integration/extended_resources_test.go index fcc36a081a79..070f8cca07b3 100644 --- a/test/suites/integration/extended_resources_test.go +++ b/test/suites/integration/extended_resources_test.go @@ -45,7 +45,7 @@ var _ = Describe("Extended Resources", func() { It("should provision nodes for a deployment that requests nvidia.com/gpu", func() { ExpectNvidiaDevicePluginCreated() // TODO: jmdeal@ remove AL2 pin once AL2023 accelerated AMIs are available - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} numPods := 1 dep := test.Deployment(test.DeploymentOptions{ Replicas: int32(numPods), @@ -77,7 +77,7 @@ var _ = Describe("Extended Resources", func() { }) It("should provision nodes for a deployment that requests nvidia.com/gpu (Bottlerocket)", func() { // For Bottlerocket, we are testing that resources are initialized without needing a device plugin - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} numPods := 1 dep := test.Deployment(test.DeploymentOptions{ Replicas: int32(numPods), @@ -144,12 +144,7 @@ var _ = Describe("Extended Resources", func() { // We use a Custom AMI so that we can reboot after we start the kubelet service rawContent, err := os.ReadFile("testdata/amd_driver_input.sh") Expect(err).ToNot(HaveOccurred()) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ - { - ID: customAMI, - }, - } + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: customAMI}} nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, env.ClusterEndpoint, env.ExpectCABundle(), nodePool.Name)) @@ -228,7 +223,7 @@ var _ = Describe("Extended Resources", func() { // Only select private subnets since instances with multiple network instances at launch won't get a public IP. nodeClass.Spec.SubnetSelectorTerms[0].Tags["Name"] = "*Private*" // TODO: jmdeal@ remove AL2 pin once AL2023 accelerated AMIs are available - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} numPods := 1 dep := test.Deployment(test.DeploymentOptions{ diff --git a/test/suites/integration/kubelet_config_test.go b/test/suites/integration/kubelet_config_test.go index 142c59d55d87..b7f06f3175c8 100644 --- a/test/suites/integration/kubelet_config_test.go +++ b/test/suites/integration/kubelet_config_test.go @@ -84,17 +84,9 @@ var _ = Describe("KubeletConfiguration Overrides", func() { } }) DescribeTable("Linux AMIFamilies", - func(amiFamily *string) { - nodeClass.Spec.AMIFamily = amiFamily + func(term v1.AMISelectorTerm) { + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{term} // TODO (jmdeal@): remove once 22.04 AMIs are supported - if *amiFamily == v1.AMIFamilyUbuntu && env.K8sMinorVersion() >= 29 { - nodeClass.Spec.AMISelectorTerms = lo.Map([]string{ - "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/amd64/hvm/ebs-gp2/ami-id", - "/aws/service/canonical/ubuntu/eks/20.04/1.28/stable/current/arm64/hvm/ebs-gp2/ami-id", - }, func(ssmPath string, _ int) v1.AMISelectorTerm { - return v1.AMISelectorTerm{ID: env.GetAMIBySSMPath(ssmPath)} - }) - } pod := test.Pod(test.PodOptions{ NodeSelector: map[string]string{ corev1.LabelOSStable: string(corev1.Linux), @@ -105,19 +97,18 @@ var _ = Describe("KubeletConfiguration Overrides", func() { env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) }, - Entry("when the AMIFamily is AL2", &v1.AMIFamilyAL2), - Entry("when the AMIFamily is AL2023", &v1.AMIFamilyAL2023), - Entry("when the AMIFamily is Ubuntu", &v1.AMIFamilyUbuntu), - Entry("when the AMIFamily is Bottlerocket", &v1.AMIFamilyBottlerocket), + Entry("when the AMIFamily is AL2", v1.AMISelectorTerm{Alias: "al2@latest"}), + Entry("when the AMIFamily is AL2023", v1.AMISelectorTerm{Alias: "al2023@latest"}), + Entry("when the AMIFamily is Bottlerocket", v1.AMISelectorTerm{Alias: "bottlerocket@latest"}), ) DescribeTable("Windows AMIFamilies", - func(amiFamily *string) { + func(term v1.AMISelectorTerm) { env.ExpectWindowsIPAMEnabled() DeferCleanup(func() { env.ExpectWindowsIPAMDisabled() }) - nodeClass.Spec.AMIFamily = amiFamily + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{term} // Need to enable nodepool-level OS-scoping for now since DS evaluation is done off of the nodepool // requirements, not off of the instance type options so scheduling can fail if nodepool aren't // properly scoped @@ -146,8 +137,8 @@ var _ = Describe("KubeletConfiguration Overrides", func() { // If the instance type is not supported by the controller resource `vpc.amazonaws.com/PrivateIPv4Address` will not register. // Issue: https://github.com/aws/karpenter-provider-aws/issues/4472 // See: https://github.com/aws/amazon-vpc-resource-controller-k8s/blob/master/pkg/aws/vpc/limits.go - Entry("when the AMIFamily is Windows2019", &v1.AMIFamilyWindows2019), - Entry("when the AMIFamily is Windows2022", &v1.AMIFamilyWindows2022), + Entry("when the AMIFamily is Windows2019", v1.AMISelectorTerm{Alias: "windows2019@latest"}), + Entry("when the AMIFamily is Windows2022", v1.AMISelectorTerm{Alias: "windows2022@latest"}), ) }) It("should schedule pods onto separate nodes when maxPods is set", func() { @@ -221,7 +212,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { env.EventuallyExpectUniqueNodeNames(selector, 2) }) It("should ignore podsPerCore value when Bottlerocket is used", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} // All pods should schedule to a single node since we are ignoring podsPerCore value // This would normally schedule to 3 nodes if not using Bottlerocket test.ReplaceRequirements(nodePool, diff --git a/test/suites/integration/validation_test.go b/test/suites/integration/validation_test.go index d5c5d498cf74..0c0671dbe856 100644 --- a/test/suites/integration/validation_test.go +++ b/test/suites/integration/validation_test.go @@ -157,8 +157,8 @@ var _ = Describe("Validation", func() { }) }) Context("EC2NodeClass", func() { - It("should error when amiSelectorTerms are not defined for amiFamily Custom", func() { - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom + It("should error when amiSelectorTerms are not defined", func() { + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{} Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) }) It("should fail for poorly formatted AMI ids", func() { diff --git a/test/suites/nodeclaim/nodeclaim_test.go b/test/suites/nodeclaim/nodeclaim_test.go index 4fb499886025..81738fa335bc 100644 --- a/test/suites/nodeclaim/nodeclaim_test.go +++ b/test/suites/nodeclaim/nodeclaim_test.go @@ -244,7 +244,6 @@ var _ = Describe("StandaloneNodeClaim", func() { Expect(err).ToNot(HaveOccurred()) // Create userData that adds custom labels through the --node-labels - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: customAMI}} nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, env.ClusterEndpoint, env.ExpectCABundle())) @@ -296,7 +295,6 @@ var _ = Describe("StandaloneNodeClaim", func() { Expect(err).ToNot(HaveOccurred()) // Create userData that adds custom labels through the --node-labels - nodeClass.Spec.AMIFamily = &v1.AMIFamilyCustom nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: customAMI}} // Giving bad clusterName and clusterEndpoint to the userData diff --git a/test/suites/scale/deprovisioning_test.go b/test/suites/scale/deprovisioning_test.go index 33c711d2dccc..cac0d34e064c 100644 --- a/test/suites/scale/deprovisioning_test.go +++ b/test/suites/scale/deprovisioning_test.go @@ -256,7 +256,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), nodePoolMap[expirationValue].Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Duration(0)) nodePoolMap[expirationValue].Spec.Limits = disableProvisioningLimits // Update the drift NodeClass to start drift on Nodes assigned to this NodeClass - driftNodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + driftNodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} // Create test assertions to ensure during the multiple deprovisioner scale-downs type testAssertions struct { @@ -678,7 +678,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), env.MeasureDeprovisioningDurationFor(func() { By("kicking off deprovisioning drift by changing the nodeClass AMIFamily") - nodeClass.Spec.AMIFamily = &v1.AMIFamilyBottlerocket + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}} env.ExpectCreatedOrUpdated(nodeClass) env.EventuallyExpectDeletedNodeCount("==", expectedNodeCount) diff --git a/test/suites/scheduling/suite_test.go b/test/suites/scheduling/suite_test.go index 3801f6113768..23248c43d174 100644 --- a/test/suites/scheduling/suite_test.go +++ b/test/suites/scheduling/suite_test.go @@ -302,7 +302,7 @@ var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { NodeRequirements: requirements, Image: environmentaws.WindowsDefaultImage, }}) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyWindows2022 + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "windows2022@latest"}} test.ReplaceRequirements(nodePool, karpv1.NodeSelectorRequirementWithMinValues{ NodeSelectorRequirement: corev1.NodeSelectorRequirement{ From 954bc523bd5bee800e3ed4a73f574186c3c462e0 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 8 Jul 2024 17:11:26 -0700 Subject: [PATCH 5/9] chore: PR feedback --- pkg/fake/ssmapi.go | 19 +- pkg/providers/amifamily/al2.go | 11 +- pkg/providers/amifamily/al2023.go | 17 +- pkg/providers/amifamily/ami.go | 45 +- pkg/providers/amifamily/bottlerocket.go | 9 +- pkg/providers/amifamily/custom.go | 4 +- pkg/providers/amifamily/resolver.go | 6 +- pkg/providers/amifamily/suite_test.go | 26 +- pkg/providers/amifamily/types.go | 28 +- pkg/providers/amifamily/windows.go | 17 +- pkg/providers/ssm/provider.go | 16 +- pkg/providers/version/version.go | 12 + test/suites/drift/suite_test.go | 1809 +++++++++++------------ 13 files changed, 1011 insertions(+), 1008 deletions(-) diff --git a/pkg/fake/ssmapi.go b/pkg/fake/ssmapi.go index bd75d1567974..680a75f629ff 100644 --- a/pkg/fake/ssmapi.go +++ b/pkg/fake/ssmapi.go @@ -18,7 +18,6 @@ import ( "context" "fmt" "regexp" - "strconv" "strings" "github.com/Pallinder/go-randomdata" @@ -68,6 +67,9 @@ func (a SSMAPI) GetParameterWithContext(_ context.Context, input *ssm.GetParamet } func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ssm.GetParametersByPathInput, f func(*ssm.GetParametersByPathOutput, bool) bool, _ ...request.Option) error { + if !lo.FromPtr(input.Recursive) { + panic("fake SSM API currently only supports GetParametersByPathPagesWithContext when recursive is true") + } if a.WantErr != nil { return a.WantErr } @@ -78,9 +80,12 @@ func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ss if len(a.Parameters) != 0 { f(&ssm.GetParametersByPathOutput{ Parameters: lo.FilterMap(lo.Entries(a.Parameters), func(p lo.Entry[string, string], _ int) (*ssm.Parameter, bool) { + // The parameter does not start with the path if !strings.HasPrefix(p.Key, lo.FromPtr(input.Path)) { return nil, false } + // The parameter starts with the input path, but the last segment of the input path is only a subset of the matching segment of the parameters path. + // Ex: "/aws/service/eks-optimized-ami/amazon-linux-2" is a prefix for "/aws/service/eks-optimized-ami/amazon-linux-2-gpu/..." but we shouldn't match if strings.TrimPrefix(p.Key, lo.FromPtr(input.Path))[0] != '/' { return nil, false } @@ -116,7 +121,7 @@ func getDefaultParametersForPath(path string) []*ssm.Parameter { "x86_64/latest/image_id", "arm64/latest/image_id", }, - `\/aws\/service\/ami-windows-latest`: lo.FlatMap(supportedK8sVersions(), func(version string, _ int) []string { + `\/aws\/service\/ami-windows-latest`: lo.FlatMap(version.SupportedK8sVersions(), func(version string, _ int) []string { return []string{ fmt.Sprintf("Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id", version), fmt.Sprintf("Windows_Server-2022-English-Core-EKS_Optimized-%s/image_id", version), @@ -137,16 +142,6 @@ func getDefaultParametersForPath(path string) []*ssm.Parameter { return nil } -func supportedK8sVersions() []string { - minMinor := lo.Must(strconv.Atoi(strings.Split(version.MinK8sVersion, ".")[1])) - maxMinor := lo.Must(strconv.Atoi(strings.Split(version.MaxK8sVersion, ".")[1])) - versions := make([]string, 0, maxMinor-minMinor+1) - for i := minMinor; i <= maxMinor; i++ { - versions = append(versions, fmt.Sprintf("1.%d", i)) - } - return versions -} - func (a *SSMAPI) Reset() { a.GetParameterOutput = nil a.Parameters = nil diff --git a/pkg/providers/amifamily/al2.go b/pkg/providers/amifamily/al2.go index abeb7e76fe46..c9e927972e1e 100644 --- a/pkg/providers/amifamily/al2.go +++ b/pkg/providers/amifamily/al2.go @@ -42,13 +42,16 @@ type AL2 struct { *Options } -func (a AL2) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { - query := AMIQuery{ +func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { + query := DescribeImageQuery{ Filters: []*ec2.Filter{&ec2.Filter{ Name: lo.ToPtr("image-id"), }}, KnownRequirements: make(map[string][]scheduling.Requirements), } + // Example Paths: + // - Latest EKS 1.30 Standard Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2/recommended/image_id + // - Specific EKS 1.30 GPU Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2-gpu/amazon-eks-node-1.30-v20240625/image_id for rootPath, variants := range map[string][]Variant{ fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2", k8sVersion): []Variant{VariantStandard}, fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64", k8sVersion): []Variant{VariantStandard}, @@ -56,7 +59,7 @@ func (a AL2) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion } { results, err := ssmProvider.List(ctx, rootPath) if err != nil { - log.FromContext(ctx).WithValues("path", rootPath).Error(err, "discovering AMIs from ssm") + log.FromContext(ctx).WithValues("path", rootPath, "family", "AL2").Error(err, "discovering AMIs from ssm") continue } for path, value := range results { @@ -74,7 +77,7 @@ func (a AL2) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion } // Failed to discover any AMIs, we should short circuit AMI discovery if len(query.Filters[0].Values) == 0 { - return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2@%s"`, amiVersion) } return query, nil } diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index 20f28756e9e9..f2a8dd0eb837 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -22,11 +22,12 @@ import ( "github.com/samber/lo" corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/aws-sdk-go/service/ec2" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" ) @@ -36,17 +37,21 @@ type AL2023 struct { *Options } -func (a AL2023) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { - query := AMIQuery{ +func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { + query := DescribeImageQuery{ Filters: []*ec2.Filter{&ec2.Filter{ Name: lo.ToPtr("image-id"), }}, KnownRequirements: make(map[string][]scheduling.Requirements), } + // Example Paths: + // - Latest EKS 1.30 arm64 Standard Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2023/arm64/standard/recommended/image_id + // - Specific EKS 1.30 amd64 Nvidia Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2023/x86_64/nvidia/amazon-eks-node-al2023-x86_64-nvidia-1.30-v20240625/image_id rootPath := fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023", k8sVersion) results, err := ssmProvider.List(ctx, rootPath) if err != nil { - return AMIQuery{}, fmt.Errorf("discovering AMIs from ssm") + log.FromContext(ctx).WithValues("path", rootPath, "family", "AL2023").Error(err, "discovering AMIs from ssm") + return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2023@%s"`, amiVersion) } for path, value := range results { pathComponents := strings.Split(path, "/") @@ -65,7 +70,7 @@ func (a AL2023) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersi } // Failed to discover any AMIs, we should short circuit AMI discovery if len(query.Filters[0].Values) == 0 { - return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2023@%s"`, amiVersion) } return query, nil } @@ -74,7 +79,7 @@ func (a AL2023) extractAMIVersion(versionStr string) (string, error) { if versionStr == "recommended" { return AMIVersionLatest, nil } - rgx := regexp.MustCompile(`^.*(v\d+)$`) + rgx := regexp.MustCompile(`^.*(v\d{8})$`) matches := rgx.FindStringSubmatch(versionStr) if len(matches) != 2 { return "", fmt.Errorf("failed to extract AMI version") diff --git a/pkg/providers/amifamily/ami.go b/pkg/providers/amifamily/ami.go index 3c3316a39dbb..c0c555e485e9 100644 --- a/pkg/providers/amifamily/ami.go +++ b/pkg/providers/amifamily/ami.go @@ -65,11 +65,11 @@ func NewDefaultProvider(versionProvider version.Provider, ssmProvider ssm.Provid func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) (AMIs, error) { p.Lock() defer p.Unlock() - queries, err := p.GetAMIQueries(ctx, nodeClass) + queries, err := p.DescribeImageQueries(ctx, nodeClass) if err != nil { return nil, fmt.Errorf("getting AMI queries, %w", err) } - amis, err := p.getAMIs(ctx, queries) + amis, err := p.amis(ctx, queries) if err != nil { return nil, err } @@ -82,33 +82,34 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) return amis, nil } -func (p *DefaultProvider) GetAMIQueries(ctx context.Context, nodeClass *v1.EC2NodeClass) ([]AMIQuery, error) { - // Aliases should be mutually exclusive (enforced via CEL validation), we'll treat it as such - if amiFamilyKey := nodeClass.AMIFamily(); amiFamilyKey != v1.AMIFamilyCustom { +func (p *DefaultProvider) DescribeImageQueries(ctx context.Context, nodeClass *v1.EC2NodeClass) ([]DescribeImageQuery, error) { + // Aliases are mutually exclusive, both on the term level and field level within a term. + // This is enforced by a CEL validation, we will treat this as an invariant. + if amiFamilyKey := nodeClass.AMIFamily(); amiFamilyKey != v1beta1.AMIFamilyCustom { amiVersion := nodeClass.AMIVersion() amiFamily := GetAMIFamily(&amiFamilyKey, nil) kubernetesVersion, err := p.versionProvider.Get(ctx) if err != nil { return nil, fmt.Errorf("getting kubernetes version, %w", err) } - query, err := amiFamily.AMIQuery(ctx, p.ssmProvider, kubernetesVersion, amiVersion) - return []AMIQuery{query}, err + query, err := amiFamily.DescribeImageQuery(ctx, p.ssmProvider, kubernetesVersion, amiVersion) + return []DescribeImageQuery{query}, err } idFilter := &ec2.Filter{Name: aws.String("image-id")} - queries := []AMIQuery{} + queries := []DescribeImageQuery{} for _, term := range nodeClass.Spec.AMISelectorTerms { switch { case term.ID != "": idFilter.Values = append(idFilter.Values, aws.String(term.ID)) default: - query := AMIQuery{ + query := DescribeImageQuery{ Owners: lo.Ternary(term.Owner != "", []string{term.Owner}, []string{}), } if term.Name != "" { // Default owners to self,amazon to ensure Karpenter only discovers cross-account AMIs if the user specifically allows it. // Removing this default would cause Karpenter to discover publicly shared AMIs passing the name filter. - query = AMIQuery{ + query = DescribeImageQuery{ Owners: lo.Ternary(term.Owner != "", []string{term.Owner}, []string{"self", "amazon"}), } query.Filters = append(query.Filters, &ec2.Filter{ @@ -134,12 +135,12 @@ func (p *DefaultProvider) GetAMIQueries(ctx context.Context, nodeClass *v1.EC2No } } if len(idFilter.Values) > 0 { - queries = append(queries, AMIQuery{Filters: []*ec2.Filter{idFilter}}) + queries = append(queries, DescribeImageQuery{Filters: []*ec2.Filter{idFilter}}) } return queries, nil } -func (p *DefaultProvider) getAMIs(ctx context.Context, queries []AMIQuery) (AMIs, error) { +func (p *DefaultProvider) amis(ctx context.Context, queries []DescribeImageQuery) (AMIs, error) { hash, err := hashstructure.Hash(queries, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) if err != nil { return nil, err @@ -157,18 +158,11 @@ func (p *DefaultProvider) getAMIs(ctx context.Context, queries []AMIQuery) (AMIs if !ok { continue } - requirementSets := func() []scheduling.Requirements { - if knownRequirements, ok := query.KnownRequirements[lo.FromPtr(image.ImageId)]; ok { - return lo.Map(knownRequirements, func(r scheduling.Requirements, _ int) scheduling.Requirements { - r.Add(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch)) - return r - }) - } - return []scheduling.Requirements{scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch))} - }() - for _, reqs := range requirementSets { + // Each image may have multiple associated sets of requirements. For example, an image may be compatible with Neuron instances + // and GPU instances. In that case, we'll have a set of requirements for each, and will create one "image" for each. + for _, reqs := range query.RequirementsForImageWithArchitecture(lo.FromPtr(image.ImageId), arch) { + // If we already have an image with the same set of requirements, but this image is newer, replace the previous image. reqsHash := lo.Must(hashstructure.Hash(reqs.NodeSelectorRequirements(), hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true})) - // If the proposed image is newer, store it so that we can return it if v, ok := images[reqsHash]; ok { candidateCreationTime, _ := time.Parse(time.RFC3339, lo.FromPtr(image.CreationDate)) existingCreationTime, _ := time.Parse(time.RFC3339, v.CreationDate) @@ -201,7 +195,10 @@ func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.A amiIDs := map[string][]*cloudprovider.InstanceType{} for _, instanceType := range instanceTypes { for _, ami := range amis { - if err := instanceType.Requirements.Compatible(scheduling.NewNodeSelectorRequirements(ami.Requirements...), scheduling.AllowUndefinedWellKnownLabels); err == nil { + if err := instanceType.Requirements.Compatible( + scheduling.NewNodeSelectorRequirements(ami.Requirements...), + scheduling.AllowUndefinedWellKnownLabels, + ); err == nil { amiIDs[ami.ID] = append(amiIDs[ami.ID], instanceType) break } diff --git a/pkg/providers/amifamily/bottlerocket.go b/pkg/providers/amifamily/bottlerocket.go index 384d21b8360c..5ccaa3f3ac70 100644 --- a/pkg/providers/amifamily/bottlerocket.go +++ b/pkg/providers/amifamily/bottlerocket.go @@ -41,13 +41,16 @@ type Bottlerocket struct { *Options } -func (b Bottlerocket) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { - query := AMIQuery{ +func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { + query := DescribeImageQuery{ Filters: []*ec2.Filter{&ec2.Filter{ Name: lo.ToPtr("image-id"), }}, KnownRequirements: make(map[string][]scheduling.Requirements), } + // Example Paths: + // - Latest EKS 1.30 amd64 Standard Image: /aws/service/bottlerocket/aws-k8s-1.30/x86_64/latest/image_id + // - Specific EKS 1.30 arm64 Nvidia Image: /aws/service/bottlerocket/aws-k8s-1.30-nvidia/arm64/1.10.0/image_id for rootPath, variants := range map[string][]Variant{ fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s", k8sVersion): []Variant{VariantStandard}, fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s-nvidia", k8sVersion): []Variant{VariantNeuron, VariantNvidia}, @@ -69,7 +72,7 @@ func (b Bottlerocket) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8 } // Failed to discover any AMIs, we should short circuit AMI discovery if len(query.Filters[0].Values) == 0 { - return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "bottlerocket@%s"`, amiVersion) } return query, nil } diff --git a/pkg/providers/amifamily/custom.go b/pkg/providers/amifamily/custom.go index 6f22eca35060..6448cad83997 100644 --- a/pkg/providers/amifamily/custom.go +++ b/pkg/providers/amifamily/custom.go @@ -41,8 +41,8 @@ func (c Custom) UserData(_ *v1.KubeletConfiguration, _ []corev1.Taint, _ map[str } } -func (c Custom) AMIQuery(_ context.Context, _ ssm.Provider, _ string, _ string) (AMIQuery, error) { - return AMIQuery{}, nil +func (c Custom) DescribeImageQuery(_ context.Context, _ ssm.Provider, _ string, _ string) (DescribeImageQuery, error) { + return DescribeImageQuery{}, nil } func (c Custom) DefaultBlockDeviceMappings() []*v1.BlockDeviceMapping { diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 04d75e3d3f2f..24ec92dd6b5b 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -36,10 +36,6 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" ) -const ( - AMIVersionLatest = "latest" -) - var DefaultEBS = v1.BlockDevice{ Encrypted: aws.Bool(true), VolumeType: aws.String(ec2.VolumeTypeGp3), @@ -83,7 +79,7 @@ type LaunchTemplate struct { // AMIFamily can be implemented to override the default logic for generating dynamic launch template parameters type AMIFamily interface { - AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) + DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) UserData(kubeletConfig *v1.KubeletConfiguration, taints []corev1.Taint, labels map[string]string, caBundle *string, instanceTypes []*cloudprovider.InstanceType, customUserData *string, instanceStorePolicy *v1.InstanceStorePolicy) bootstrap.Bootstrapper DefaultBlockDeviceMappings() []*v1.BlockDeviceMapping DefaultMetadataOptions() *v1.MetadataOptions diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index a5787397f9d8..8e9110322e0d 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -307,7 +307,7 @@ var _ = Describe("AMIProvider", func() { // When you tag public or shared resources, the tags you assign are available only to your AWS account; no other AWS account will have access to those tags // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions It("should have empty owners and use tags when prefixes aren't set", func() { - queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + queries, err := awsEnv.AMIProvider.DescribeImageQueries(ctx, &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ AMISelectorTerms: []v1.AMISelectorTerm{{ Tags: map[string]string{ @@ -317,7 +317,7 @@ var _ = Describe("AMIProvider", func() { }, }) Expect(err).To(BeNil()) - ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ + ExpectConsistsOfAMIQueries([]amifamily.DescribeImageQuery{ { Filters: []*ec2.Filter{ { @@ -330,7 +330,7 @@ var _ = Describe("AMIProvider", func() { }, queries) }) It("should have default owners and use name when prefixed", func() { - queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + queries, err := awsEnv.AMIProvider.DescribeImageQueries(ctx, &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ AMISelectorTerms: []v1.AMISelectorTerm{{ Name: "my-ami", @@ -338,7 +338,7 @@ var _ = Describe("AMIProvider", func() { }, }) Expect(err).To(BeNil()) - ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ + ExpectConsistsOfAMIQueries([]amifamily.DescribeImageQuery{ { Filters: []*ec2.Filter{ { @@ -354,7 +354,7 @@ var _ = Describe("AMIProvider", func() { }, queries) }) It("should not set owners when legacy ids are passed", func() { - queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + queries, err := awsEnv.AMIProvider.DescribeImageQueries(ctx, &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ AMISelectorTerms: []v1.AMISelectorTerm{ { @@ -367,7 +367,7 @@ var _ = Describe("AMIProvider", func() { }, }) Expect(err).To(BeNil()) - ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ + ExpectConsistsOfAMIQueries([]amifamily.DescribeImageQuery{ { Filters: []*ec2.Filter{ { @@ -379,7 +379,7 @@ var _ = Describe("AMIProvider", func() { }, queries) }) It("should allow only specifying owners", func() { - queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + queries, err := awsEnv.AMIProvider.DescribeImageQueries(ctx, &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ AMISelectorTerms: []v1.AMISelectorTerm{ { @@ -392,7 +392,7 @@ var _ = Describe("AMIProvider", func() { }, }) Expect(err).To(BeNil()) - ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ + ExpectConsistsOfAMIQueries([]amifamily.DescribeImageQuery{ { Owners: []string{"abcdef"}, }, @@ -402,7 +402,7 @@ var _ = Describe("AMIProvider", func() { }, queries) }) It("should allow prefixed name and prefixed owners", func() { - queries, err := awsEnv.AMIProvider.GetAMIQueries(ctx, &v1.EC2NodeClass{ + queries, err := awsEnv.AMIProvider.DescribeImageQueries(ctx, &v1.EC2NodeClass{ Spec: v1.EC2NodeClassSpec{ AMISelectorTerms: []v1.AMISelectorTerm{ { @@ -417,7 +417,7 @@ var _ = Describe("AMIProvider", func() { }, }) Expect(err).To(BeNil()) - ExpectConsistsOfAMIQueries([]amifamily.AMIQuery{ + ExpectConsistsOfAMIQueries([]amifamily.DescribeImageQuery{ { Owners: []string{"0123456789"}, Filters: []*ec2.Filter{ @@ -556,11 +556,11 @@ var _ = Describe("AMIProvider", func() { }) }) -func ExpectConsistsOfAMIQueries(expected, actual []amifamily.AMIQuery) { +func ExpectConsistsOfAMIQueries(expected, actual []amifamily.DescribeImageQuery) { GinkgoHelper() Expect(actual).To(HaveLen(len(expected))) - for _, list := range [][]amifamily.AMIQuery{expected, actual} { + for _, list := range [][]amifamily.DescribeImageQuery{expected, actual} { for _, elem := range list { for _, f := range elem.Filters { sort.Slice(f.Values, func(i, j int) bool { @@ -573,5 +573,5 @@ func ExpectConsistsOfAMIQueries(expected, actual []amifamily.AMIQuery) { }) } } - Expect(actual).To(ConsistOf(lo.Map(expected, func(q amifamily.AMIQuery, _ int) interface{} { return q })...)) + Expect(actual).To(ConsistOf(lo.Map(expected, func(q amifamily.DescribeImageQuery, _ int) interface{} { return q })...)) } diff --git a/pkg/providers/amifamily/types.go b/pkg/providers/amifamily/types.go index 91ef9f576849..092d6d75ccb0 100644 --- a/pkg/providers/amifamily/types.go +++ b/pkg/providers/amifamily/types.go @@ -28,6 +28,12 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" ) +const ( + // AMIVersionLatest is the version used in EKS aliases to represent the latest version. This maps to different + // values in the SSM path, depending on the AMI type (e.g. "recommended" for AL2/AL2023)). + AMIVersionLatest = "latest" +) + type AMI struct { Name string AmiID string @@ -82,17 +88,31 @@ func (v Variant) Requirements() scheduling.Requirements { return nil } -type AMIQuery struct { +type DescribeImageQuery struct { Filters []*ec2.Filter Owners []string + // KnownRequirements is a map from image IDs to a set of known requirements. + // When discovering image IDs via SSM we know additional requirements which aren't surfaced by ec2:DescribeImage (e.g. GPU / Neuron compatibility) + // Sometimes, an image may have multiple sets of known requirements. For example, the AL2 GPU AMI is compatible with both Neuron and Nvidia GPU + // instances, which means we need a set of requirements for either instance type. KnownRequirements map[string][]scheduling.Requirements } -func (aq AMIQuery) DescribeImagesInput() *ec2.DescribeImagesInput { +func (q DescribeImageQuery) DescribeImagesInput() *ec2.DescribeImagesInput { return &ec2.DescribeImagesInput{ // Don't include filters in the Describe Images call as EC2 API doesn't allow empty filters. - Filters: lo.Ternary(len(aq.Filters) > 0, aq.Filters, nil), - Owners: lo.Ternary(len(aq.Owners) > 0, lo.ToSlicePtr(aq.Owners), nil), + Filters: lo.Ternary(len(q.Filters) > 0, q.Filters, nil), + Owners: lo.Ternary(len(q.Owners) > 0, lo.ToSlicePtr(q.Owners), nil), MaxResults: aws.Int64(1000), } } + +func (q DescribeImageQuery) RequirementsForImageWithArchitecture(image string, arch string) []scheduling.Requirements { + if knownRequirements, ok := q.KnownRequirements[image]; ok { + return lo.Map(knownRequirements, func(r scheduling.Requirements, _ int) scheduling.Requirements { + r.Add(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch)) + return r + }) + } + return []scheduling.Requirements{scheduling.NewRequirements(scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, arch))} +} diff --git a/pkg/providers/amifamily/windows.go b/pkg/providers/amifamily/windows.go index 960252e18977..af49063ac34c 100644 --- a/pkg/providers/amifamily/windows.go +++ b/pkg/providers/amifamily/windows.go @@ -41,12 +41,15 @@ import ( type Windows struct { DefaultFamily *Options + // Version is the major version of Windows Server (2019 or 2022). + // Only the core version of each version is supported by Karpenter, so this field only indicates the year. Version string + // Build is a specific build code associated with the Version Build string } -func (w Windows) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (AMIQuery, error) { - query := AMIQuery{ +func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { + query := DescribeImageQuery{ Filters: []*ec2.Filter{&ec2.Filter{ Name: lo.ToPtr("image-id"), }}, @@ -54,15 +57,17 @@ func (w Windows) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVers } // SSM aliases are only maintained for the latest Windows AMI releases if amiVersion != AMIVersionLatest { - return AMIQuery{}, fmt.Errorf("discovering AMIs for alias, %q is an invalid version for Windows", amiVersion) + + return DescribeImageQuery{}, fmt.Errorf(`discovering AMIs for alias "windows%s@%s", %q is not a supported version`, w.Version, amiVersion, amiVersion) } + // Example Path: /aws/service/ami-windows-latest/Windows_Server-2022-English-Core-EKS_Optimized-1.30/image_id results, err := ssmProvider.List(ctx, "/aws/service/ami-windows-latest") if err != nil { - return AMIQuery{}, fmt.Errorf("discovering AMIs from ssm") + return DescribeImageQuery{}, fmt.Errorf("discovering AMIs from ssm") } for path, value := range results { pathComponents := strings.Split(path, "/") - if len(pathComponents) != 6 { + if len(pathComponents) != 6 && pathComponents[5] != "image_id" { continue } matches := regexp.MustCompile(`^Windows_Server-(\d+)-English-Core-EKS_Optimized-(\d\.\d+)$`).FindStringSubmatch(pathComponents[4]) @@ -77,7 +82,7 @@ func (w Windows) AMIQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVers } // Failed to discover any AMIs, we should short circuit AMI discovery if len(query.Filters[0].Values) == 0 { - return AMIQuery{}, fmt.Errorf("failed to discover any AMIs for alias") + return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "windows%s@%s"`, w.Version, amiVersion) } return query, nil } diff --git a/pkg/providers/ssm/provider.go b/pkg/providers/ssm/provider.go index bbfe54e3c22d..ced87b07ea52 100644 --- a/pkg/providers/ssm/provider.go +++ b/pkg/providers/ssm/provider.go @@ -27,7 +27,6 @@ import ( type Provider interface { List(context.Context, string) (map[string]string, error) - Get(context.Context, string) (string, error) } type DefaultProvider struct { @@ -43,6 +42,8 @@ func NewDefaultProvider(ssmapi ssmiface.SSMAPI, cache *cache.Cache) *DefaultProv } } +// List calls GetParametersByPath recursively with the provided input path. +// The result is a map of paths to values for those paths. func (p *DefaultProvider) List(ctx context.Context, path string) (map[string]string, error) { p.Lock() defer p.Unlock() @@ -67,16 +68,3 @@ func (p *DefaultProvider) List(ctx context.Context, path string) (map[string]str p.cache.SetDefault(path, values) return values, nil } - -func (p *DefaultProvider) Get(ctx context.Context, path string) (string, error) { - p.Lock() - defer p.Unlock() - if val, ok := p.cache.Get(path); ok { - return val.(string), nil - } - out, err := p.ssmapi.GetParameterWithContext(ctx, &ssm.GetParameterInput{Name: &path}) - if err != nil { - return "", fmt.Errorf("getting ssm parameter %q, %w", path, err) - } - return lo.FromPtr(out.Parameter.Value), err -} diff --git a/pkg/providers/version/version.go b/pkg/providers/version/version.go index 4050c797350f..64b716d9859a 100644 --- a/pkg/providers/version/version.go +++ b/pkg/providers/version/version.go @@ -17,9 +17,11 @@ package version import ( "context" "fmt" + "strconv" "strings" "github.com/patrickmn/go-cache" + "github.com/samber/lo" "k8s.io/apimachinery/pkg/util/version" "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/log" @@ -75,6 +77,16 @@ func (p *DefaultProvider) Get(ctx context.Context) (string, error) { return version, nil } +func SupportedK8sVersions() []string{ + minMinor := lo.Must(strconv.Atoi(strings.Split(MinK8sVersion, ".")[1])) + maxMinor := lo.Must(strconv.Atoi(strings.Split(MaxK8sVersion, ".")[1])) + versions := make([]string, 0, maxMinor-minMinor+1) + for i := minMinor; i <= maxMinor; i++ { + versions = append(versions, fmt.Sprintf("1.%d", i)) + } + return versions +} + func validateK8sVersion(v string) error { k8sVersion := version.MustParseGeneric(v) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 5590fd20b064..12584aed340b 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -15,34 +15,13 @@ limitations under the License. package drift_test import ( - "fmt" - "sort" "testing" - "time" - "github.com/awslabs/operatorpkg/object" - "github.com/samber/lo" - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/sets" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/karpenter/pkg/utils/resources" - - awssdk "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/aws/aws-sdk-go/service/eks" karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - coretest "sigs.k8s.io/karpenter/pkg/test" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - "github.com/aws/karpenter-provider-aws/pkg/test" + "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" - "github.com/aws/karpenter-provider-aws/test/pkg/environment/common" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -72,896 +51,896 @@ var _ = BeforeEach(func() { var _ = AfterEach(func() { env.Cleanup() }) var _ = AfterEach(func() { env.AfterEach() }) -var _ = Describe("Drift", func() { - var dep *appsv1.Deployment - var selector labels.Selector - var numPods int - BeforeEach(func() { - amdAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) - numPods = 1 - // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt - dep = coretest.Deployment(coretest.DeploymentOptions{ - Replicas: int32(numPods), - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "app": "my-app", - }, - Annotations: map[string]string{ - karpv1.DoNotDisruptAnnotationKey: "true", - }, - }, - TerminationGracePeriodSeconds: lo.ToPtr[int64](0), - }, - }) - selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) - }) - Context("Budgets", func() { - It("should respect budgets for empty drift", func() { - nodePool = coretest.ReplaceRequirements(nodePool, - karpv1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: corev1.NodeSelectorRequirement{ - Key: v1.LabelInstanceSize, - Operator: corev1.NodeSelectorOpIn, - Values: []string{"2xlarge"}, - }, - }, - ) - // We're expecting to create 3 nodes, so we'll expect to see 2 nodes deleting at one time. - nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ - Nodes: "50%", - }} - var numPods int32 = 6 - dep = coretest.Deployment(coretest.DeploymentOptions{ - Replicas: numPods, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ - karpv1.DoNotDisruptAnnotationKey: "true", - }, - Labels: map[string]string{"app": "large-app"}, - }, - // Each 2xlarge has 8 cpu, so each node should fit 2 pods. - ResourceRequirements: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("3"), - }, - }, - }, - }) - selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) - env.ExpectCreated(nodeClass, nodePool, dep) - - nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) - nodes := env.EventuallyExpectCreatedNodeCount("==", 3) - env.EventuallyExpectHealthyPodCount(selector, int(numPods)) - - // List nodes so that we get any updated information on the nodes. If we don't - // we have the potential to over-write any changes Karpenter makes to the nodes. - // Add a finalizer to each node so that we can stop termination disruptions - By("adding finalizers to the nodes to prevent termination") - for _, node := range nodes { - Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) - node.Finalizers = append(node.Finalizers, common.TestingFinalizer) - env.ExpectUpdated(node) - } - - By("making the nodes empty") - // Delete the deployment to make all nodes empty. - env.ExpectDeleted(dep) - - // Drift the nodeclaims - By("drift the nodeclaims") - nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} - env.ExpectUpdated(nodePool) - - env.EventuallyExpectDrifted(nodeClaims...) - - // Ensure that we get two nodes tainted, and they have overlap during the drift - env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) - - // Remove the finalizer from each node so that we can terminate - for _, node := range nodes { - Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) - } - - // After the deletion timestamp is set and all pods are drained - // the node should be gone - env.EventuallyExpectNotFound(nodes[0], nodes[1]) - - nodes = env.EventuallyExpectTaintedNodeCount("==", 1) - Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) - env.EventuallyExpectNotFound(nodes[0]) - }) - It("should respect budgets for non-empty delete drift", func() { - nodePool = coretest.ReplaceRequirements(nodePool, - karpv1.NodeSelectorRequirementWithMinValues{ - NodeSelectorRequirement: corev1.NodeSelectorRequirement{ - Key: v1.LabelInstanceSize, - Operator: corev1.NodeSelectorOpIn, - Values: []string{"2xlarge"}, - }, - }, - ) - // We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time. - nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ - Nodes: "50%", - }} - var numPods int32 = 9 - dep = coretest.Deployment(coretest.DeploymentOptions{ - Replicas: numPods, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: map[string]string{ - karpv1.DoNotDisruptAnnotationKey: "true", - }, - Labels: map[string]string{"app": "large-app"}, - }, - // Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods. - ResourceRequirements: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("2100m"), - }, - }, - }, - }) - selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) - env.ExpectCreated(nodeClass, nodePool, dep) - - nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) - nodes := env.EventuallyExpectCreatedNodeCount("==", 3) - env.EventuallyExpectHealthyPodCount(selector, int(numPods)) - - By("scaling down the deployment") - // Update the deployment to a third of the replicas. - dep.Spec.Replicas = lo.ToPtr[int32](3) - env.ExpectUpdated(dep) - - // First expect there to be 3 pods, then try to spread the pods. - env.EventuallyExpectHealthyPodCount(selector, 3) - env.ForcePodsToSpread(nodes...) - env.EventuallyExpectHealthyPodCount(selector, 3) - - By("cordoning and adding finalizer to the nodes") - // Add a finalizer to each node so that we can stop termination disruptions - for _, node := range nodes { - Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) - node.Finalizers = append(node.Finalizers, common.TestingFinalizer) - env.ExpectUpdated(node) - } - - By("drifting the nodes") - // Drift the nodeclaims - nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} - env.ExpectUpdated(nodePool) - - env.EventuallyExpectDrifted(nodeClaims...) - - By("enabling disruption by removing the do not disrupt annotation") - pods := env.EventuallyExpectHealthyPodCount(selector, 3) - // Remove the do-not-disrupt annotation so that the nodes are now disruptable - for _, pod := range pods { - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - } - - // Ensure that we get two nodes tainted, and they have overlap during the drift - env.EventuallyExpectTaintedNodeCount("==", 2) - nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 30*time.Second) - - By("removing the finalizer from the nodes") - Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) - Expect(env.ExpectTestingFinalizerRemoved(nodes[1])).To(Succeed()) - - // After the deletion timestamp is set and all pods are drained - // the node should be gone - env.EventuallyExpectNotFound(nodes[0], nodes[1]) - }) - It("should respect budgets for non-empty replace drift", func() { - appLabels := map[string]string{"app": "large-app"} - nodePool.Labels = appLabels - // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. - nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ - Nodes: "3", - }} - - // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node - numPods = 5 - selector = labels.SelectorFromSet(appLabels) - deployment := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: int32(numPods), - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{ - Labels: appLabels, - }, - PodAntiRequirements: []corev1.PodAffinityTerm{{ - TopologyKey: corev1.LabelHostname, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: appLabels, - }, - }}, - }, - }) - - env.ExpectCreated(nodeClass, nodePool, deployment) - - originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", numPods) - originalNodes := env.EventuallyExpectCreatedNodeCount("==", numPods) - - // Check that all deployment pods are online - env.EventuallyExpectHealthyPodCount(selector, numPods) - - By("cordoning and adding finalizer to the nodes") - // Add a finalizer to each node so that we can stop termination disruptions - for _, node := range originalNodes { - Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) - node.Finalizers = append(node.Finalizers, common.TestingFinalizer) - env.ExpectUpdated(node) - } - - By("drifting the nodepool") - nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{"test-annotation": "drift"}) - env.ExpectUpdated(nodePool) - - // Ensure that we get three nodes tainted, and they have overlap during the drift - env.EventuallyExpectTaintedNodeCount("==", 3) - env.EventuallyExpectNodeClaimCount("==", 8) - env.EventuallyExpectNodeCount("==", 8) - env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) - - for _, node := range originalNodes { - Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) - } - - // Eventually expect all the nodes to be rolled and completely removed - // Since this completes the disruption operation, this also ensures that we aren't leaking nodes into subsequent - // tests since nodeclaims that are actively replacing but haven't brought-up nodes yet can register nodes later - env.EventuallyExpectNotFound(lo.Map(originalNodes, func(n *corev1.Node, _ int) client.Object { return n })...) - env.EventuallyExpectNotFound(lo.Map(originalNodeClaims, func(n *karpv1.NodeClaim, _ int) client.Object { return n })...) - env.ExpectNodeClaimCount("==", 5) - env.ExpectNodeCount("==", 5) - }) - It("should not allow drift if the budget is fully blocking", func() { - // We're going to define a budget that doesn't allow any drift to happen - nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ - Nodes: "0", - }} - - dep.Spec.Template.Annotations = nil - env.ExpectCreated(nodeClass, nodePool, dep) - - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - env.EventuallyExpectCreatedNodeCount("==", 1) - env.EventuallyExpectHealthyPodCount(selector, numPods) - - By("drifting the nodes") - // Drift the nodeclaims - nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} - env.ExpectUpdated(nodePool) - - env.EventuallyExpectDrifted(nodeClaim) - env.ConsistentlyExpectNoDisruptions(1, time.Minute) - }) - It("should not allow drift if the budget is fully blocking during a scheduled time", func() { - // We're going to define a budget that doesn't allow any drift to happen - // This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before - // the current time and extends 15 minutes past the current time - // Times need to be in UTC since the karpenter containers were built in UTC time - windowStart := time.Now().Add(-time.Minute * 15).UTC() - nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ - Nodes: "0", - Schedule: lo.ToPtr(fmt.Sprintf("%d %d * * *", windowStart.Minute(), windowStart.Hour())), - Duration: &metav1.Duration{Duration: time.Minute * 30}, - }} - - dep.Spec.Template.Annotations = nil - env.ExpectCreated(nodeClass, nodePool, dep) - - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - env.EventuallyExpectCreatedNodeCount("==", 1) - env.EventuallyExpectHealthyPodCount(selector, numPods) - - By("drifting the nodes") - // Drift the nodeclaims - nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} - env.ExpectUpdated(nodePool) - - env.EventuallyExpectDrifted(nodeClaim) - env.ConsistentlyExpectNoDisruptions(1, time.Minute) - }) - }) - It("should disrupt nodes that have drifted due to AMIs", func() { - // Choose an old static image (AL2023 AMIs don't exist for 1.22) - oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, - "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", - fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), - )) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - env.ExpectCreatedNodeCount("==", 1) - - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.EventuallyExpectNodeCount("==", 1)[0] - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, nodeClaim, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { - armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: armAMI}} - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - env.ExpectCreatedNodeCount("==", 1) - - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.EventuallyExpectNodeCount("==", 1)[0] - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, nodeClaim, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - It("should not disrupt nodes that have drifted without the featureGate enabled", func() { - env.ExpectSettingsOverridden(corev1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) - - // Choose an old static image (AL2023 AMIs don't exist for 1.22) - oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, - "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", - fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), - )) - nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} - - env.ExpectCreated(dep, nodeClass, nodePool) - env.EventuallyExpectHealthyPodCount(selector, numPods) - env.ExpectCreatedNodeCount("==", 1) - - node := env.Monitor.CreatedNodes()[0] - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} - env.ExpectUpdated(nodeClass) - - // We should consistently get the same node existing for a minute - Consistently(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), &corev1.Node{})).To(Succeed()) - }).WithTimeout(time.Minute).Should(Succeed()) - }) - It("should disrupt nodes that have drifted due to securitygroup", func() { - By("getting the cluster vpc id") - output, err := env.EKSAPI.DescribeCluster(&eks.DescribeClusterInput{Name: awssdk.String(env.ClusterName)}) - Expect(err).To(BeNil()) - - By("creating new security group") - createSecurityGroup := &ec2.CreateSecurityGroupInput{ - GroupName: awssdk.String("security-group-drift"), - Description: awssdk.String("End-to-end Drift Test, should delete after drift test is completed"), - VpcId: output.Cluster.ResourcesVpcConfig.VpcId, - TagSpecifications: []*ec2.TagSpecification{ - { - ResourceType: awssdk.String("security-group"), - Tags: []*ec2.Tag{ - { - Key: awssdk.String("karpenter.sh/discovery"), - Value: awssdk.String(env.ClusterName), - }, - { - Key: awssdk.String(coretest.DiscoveryLabel), - Value: awssdk.String(env.ClusterName), - }, - { - Key: awssdk.String("creation-date"), - Value: awssdk.String(time.Now().Format(time.RFC3339)), - }, - }, - }, - }, - } - _, _ = env.EC2API.CreateSecurityGroup(createSecurityGroup) - - By("looking for security groups") - var securitygroups []aws.SecurityGroup - var testSecurityGroup aws.SecurityGroup - Eventually(func(g Gomega) { - securitygroups = env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) - testSecurityGroup, _ = lo.Find(securitygroups, func(sg aws.SecurityGroup) bool { - return awssdk.StringValue(sg.GroupName) == "security-group-drift" - }) - g.Expect(testSecurityGroup).ToNot(BeNil()) - }).Should(Succeed()) - - By("creating a new provider with the new securitygroup") - awsIDs := lo.FilterMap(securitygroups, func(sg aws.SecurityGroup, _ int) (string, bool) { - if awssdk.StringValue(sg.GroupId) != awssdk.StringValue(testSecurityGroup.GroupId) { - return awssdk.StringValue(sg.GroupId), true - } - return "", false - }) - sgTerms := []v1.SecurityGroupSelectorTerm{{ID: awssdk.StringValue(testSecurityGroup.GroupId)}} - for _, id := range awsIDs { - sgTerms = append(sgTerms, v1.SecurityGroupSelectorTerm{ID: id}) - } - nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - sgTerms = lo.Reject(sgTerms, func(t v1.SecurityGroupSelectorTerm, _ int) bool { - return t.ID == awssdk.StringValue(testSecurityGroup.GroupId) - }) - nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, nodeClaim, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - It("should disrupt nodes that have drifted due to subnets", func() { - subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) - Expect(len(subnets)).To(BeNumerically(">", 1)) - - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[0].ID}} - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[1].ID}} - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - DescribeTable("NodePool Drift", func(nodeClaimTemplate karpv1.NodeClaimTemplate) { - updatedNodePool := coretest.NodePool( - karpv1.NodePool{ - Spec: karpv1.NodePoolSpec{ - Template: karpv1.NodeClaimTemplate{ - Spec: karpv1.NodeClaimSpec{ - NodeClassRef: &karpv1.NodeClassReference{ - Group: object.GVK(nodeClass).Group, - Kind: object.GVK(nodeClass).Kind, - Name: nodeClass.Name, - }, - // keep the same instance type requirements to prevent considering instance types that require swap - Requirements: nodePool.Spec.Template.Spec.Requirements, - }, - }, - }, - }, - karpv1.NodePool{ - Spec: karpv1.NodePoolSpec{ - Template: nodeClaimTemplate, - }, - }, - ) - updatedNodePool.ObjectMeta = nodePool.ObjectMeta - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - env.ExpectCreatedOrUpdated(updatedNodePool) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - - // Nodes will need to have the start-up taint removed before the node can be considered as initialized - fmt.Println(CurrentSpecReport().LeafNodeText) - if CurrentSpecReport().LeafNodeText == "Start-up Taints" { - nodes := env.EventuallyExpectCreatedNodeCount("==", 2) - sort.Slice(nodes, func(i int, j int) bool { - return nodes[i].CreationTimestamp.Before(&nodes[j].CreationTimestamp) - }) - nodeTwo := nodes[1] - // Remove the startup taints from the new nodes to initialize them - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeTwo), nodeTwo)).To(Succeed()) - stored := nodeTwo.DeepCopy() - nodeTwo.Spec.Taints = lo.Reject(nodeTwo.Spec.Taints, func(t corev1.Taint, _ int) bool { return t.Key == "example.com/another-taint-2" }) - g.Expect(env.Client.Patch(env.Context, nodeTwo, client.StrategicMergeFrom(stored))).To(Succeed()) - }).Should(Succeed()) - } - env.EventuallyExpectNotFound(pod, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }, - Entry("Annotations", karpv1.NodeClaimTemplate{ - ObjectMeta: karpv1.ObjectMeta{ - Annotations: map[string]string{"keyAnnotationTest": "valueAnnotationTest"}, - }, - }), - Entry("Labels", karpv1.NodeClaimTemplate{ - ObjectMeta: karpv1.ObjectMeta{ - Labels: map[string]string{"keyLabelTest": "valueLabelTest"}, - }, - }), - Entry("Taints", karpv1.NodeClaimTemplate{ - Spec: karpv1.NodeClaimSpec{ - Taints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, - }, - }), - Entry("Start-up Taints", karpv1.NodeClaimTemplate{ - Spec: karpv1.NodeClaimSpec{ - StartupTaints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, - }, - }), - Entry("NodeRequirements", karpv1.NodeClaimTemplate{ - Spec: karpv1.NodeClaimSpec{ - // since this will overwrite the default requirements, add instance category and family selectors back into requirements - Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ - {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: karpv1.CapacityTypeLabelKey, Operator: corev1.NodeSelectorOpIn, Values: []string{karpv1.CapacityTypeSpot}}}, - {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceCategory, Operator: corev1.NodeSelectorOpIn, Values: []string{"c", "m", "r"}}}, - {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceFamily, Operator: corev1.NodeSelectorOpNotIn, Values: []string{"a1"}}}, - }, - }, - }), - ) - DescribeTable("EC2NodeClass", func(nodeClassSpec v1.EC2NodeClassSpec) { - updatedNodeClass := test.EC2NodeClass(v1.EC2NodeClass{Spec: *nodeClass.Spec.DeepCopy()}, v1.EC2NodeClass{Spec: nodeClassSpec}) - updatedNodeClass.ObjectMeta = nodeClass.ObjectMeta - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - env.ExpectCreatedOrUpdated(updatedNodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }, - Entry("UserData", v1.EC2NodeClassSpec{UserData: awssdk.String("#!/bin/bash\necho \"Hello, AL2023\"")}), - Entry("Tags", v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}), - Entry("MetadataOptions", v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: awssdk.String("required"), HTTPPutResponseHopLimit: awssdk.Int64(10)}}), - Entry("BlockDeviceMappings", v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ - { - DeviceName: awssdk.String("/dev/xvda"), - EBS: &v1.BlockDevice{ - VolumeSize: resources.Quantity("20Gi"), - VolumeType: awssdk.String("gp3"), - Encrypted: awssdk.Bool(true), - }, - }}}), - Entry("DetailedMonitoring", v1.EC2NodeClassSpec{DetailedMonitoring: awssdk.Bool(true)}), - Entry("AMIFamily", v1.EC2NodeClassSpec{AMIFamily: awssdk.String(v1.AMIFamilyBottlerocket)}), - Entry("KubeletConfiguration", v1.EC2NodeClassSpec{ - Kubelet: &v1.KubeletConfiguration{ - EvictionSoft: map[string]string{"memory.available": "5%"}, - EvictionSoftGracePeriod: map[string]metav1.Duration{"memory.available": {Duration: time.Minute}}, - }, - }), - ) - It("should drift the EC2NodeClass on InstanceProfile", func() { - // Create a separate test case for this one since we can't use the default NodeClass that's created due to it having - // a pre-populated role AND we also need to do the instance profile generation within the scope of this test - instanceProfileName := fmt.Sprintf("KarpenterNodeInstanceProfile-%s", env.ClusterName) - instanceProfileDriftName := fmt.Sprintf("KarpenterNodeInstanceProfile-Drift-%s", env.ClusterName) - roleName := fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName) - - for _, name := range []string{instanceProfileName, instanceProfileDriftName} { - env.ExpectInstanceProfileCreated(name, roleName) - DeferCleanup(func() { - env.ExpectInstanceProfileDeleted(name, roleName) - }) - } - nodeClass.Spec.Role = "" - nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileName) - - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileDriftName) - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(nodeClaim) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - It("should drift the EC2NodeClass on BlockDeviceMappings volume size update", func() { - nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ - { - DeviceName: awssdk.String("/dev/xvda"), - EBS: &v1.BlockDevice{ - VolumeSize: resources.Quantity("20Gi"), - VolumeType: awssdk.String("gp3"), - Encrypted: awssdk.Bool(true), - }, - }, - } - env.ExpectCreated(dep, nodeClass, nodePool) - pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - node := env.ExpectCreatedNodeCount("==", 1)[0] - - nodeClass.Spec.BlockDeviceMappings[0].EBS.VolumeSize = resources.Quantity("100Gi") - env.ExpectCreatedOrUpdated(nodeClass) - - By("validating the drifted status condition has propagated") - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted)).ToNot(BeNil()) - g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) - }).Should(Succeed()) - - delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) - env.ExpectUpdated(pod) - env.EventuallyExpectNotFound(pod, node) - env.EventuallyExpectHealthyPodCount(selector, numPods) - }) - It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { - env.ExpectCreated(dep, nodeClass, nodePool) - env.EventuallyExpectHealthyPodCount(selector, numPods) - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - nodePool = env.ExpectExists(nodePool).(*karpv1.NodePool) - expectedHash := nodePool.Hash() - - By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, karpv1.NodePoolHashAnnotationKey, karpv1.NodePoolHashVersionAnnotationKey)) - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - - g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) - g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) - }).WithTimeout(30 * time.Second).Should(Succeed()) - - nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ - karpv1.NodePoolHashAnnotationKey: "test-hash-1", - karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", - }) - // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the - // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that - // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation - nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ - "test-key": "test-value", - }) - nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ - karpv1.NodePoolHashAnnotationKey: "test-hash-2", - karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", - }) - - // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool - env.ExpectUpdated(nodeClaim, nodePool) - expectedHash = nodePool.Hash() - - // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - - g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) - g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) - }) - }) - It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { - env.ExpectCreated(dep, nodeClass, nodePool) - env.EventuallyExpectHealthyPodCount(selector, numPods) - nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] - nodeClass = env.ExpectExists(nodeClass).(*v1.EC2NodeClass) - expectedHash := nodeClass.Hash() - - By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1.AnnotationEC2NodeClassHash, v1.AnnotationEC2NodeClassHashVersion)) - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - - g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) - g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) - }).WithTimeout(30 * time.Second).Should(Succeed()) - - nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ - v1.AnnotationEC2NodeClassHash: "test-hash-1", - v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", - }) - // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the - // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that - // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation - nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ - "test-key": "test-value", - }) - nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ - v1.AnnotationEC2NodeClassHash: "test-hash-2", - v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", - }) - - // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass - env.ExpectUpdated(nodeClaim, nodeClass) - expectedHash = nodeClass.Hash() - - // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation - Eventually(func(g Gomega) { - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) - g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) - - g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) - g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) - g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) - }).WithTimeout(30 * time.Second).Should(Succeed()) - env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) - }) - Context("Failure", func() { - It("should not disrupt a drifted node if the replacement node never registers", func() { - // launch a new nodeClaim - var numPods int32 = 2 - dep := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 2, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, - PodAntiRequirements: []corev1.PodAffinityTerm{{ - TopologyKey: corev1.LabelHostname, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "inflate"}, - }}, - }, - }, - }) - env.ExpectCreated(dep, nodeClass, nodePool) - - startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) - env.EventuallyExpectCreatedNodeCount("==", int(numPods)) - - // Drift the nodeClaim with bad configuration that will not register a NodeClaim - nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")}} - env.ExpectCreatedOrUpdated(nodeClass) - - env.EventuallyExpectDrifted(startingNodeClaimState...) - - // Expect only a single node to be tainted due to default disruption budgets - taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) - - // Drift should fail and the original node should be untainted - // TODO: reduce timeouts when disruption waits are factored out - env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) - - // Expect all the NodeClaims that existed on the initial provisioning loop are not removed. - // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't - // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node - Consistently(func(g Gomega) { - nodeClaims := &karpv1.NodeClaimList{} - g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(nc *karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) - nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) - g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) - }, "2m").Should(Succeed()) - }) - It("should not disrupt a drifted node if the replacement node registers but never initialized", func() { - // launch a new nodeClaim - var numPods int32 = 2 - dep := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 2, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, - PodAntiRequirements: []corev1.PodAffinityTerm{{ - TopologyKey: corev1.LabelHostname, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "inflate"}, - }}, - }, - }, - }) - env.ExpectCreated(dep, nodeClass, nodePool) - - startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) - env.EventuallyExpectCreatedNodeCount("==", int(numPods)) - - // Drift the nodeClaim with bad configuration that never initializes - nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com/taint", Effect: corev1.TaintEffectPreferNoSchedule}} - env.ExpectCreatedOrUpdated(nodePool) - - env.EventuallyExpectDrifted(startingNodeClaimState...) - - // Expect only a single node to get tainted due to default disruption budgets - taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) - - // Drift should fail and original node should be untainted - // TODO: reduce timeouts when disruption waits are factored out - env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) - - // Expect that the new nodeClaim/node is kept around after the un-cordon - nodeList := &corev1.NodeList{} - Expect(env.Client.List(env, nodeList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - Expect(nodeList.Items).To(HaveLen(int(numPods) + 1)) - - nodeClaimList := &karpv1.NodeClaimList{} - Expect(env.Client.List(env, nodeClaimList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - Expect(nodeClaimList.Items).To(HaveLen(int(numPods) + 1)) - - // Expect all the NodeClaims that existed on the initial provisioning loop are not removed - // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't - // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node - Consistently(func(g Gomega) { - nodeClaims := &karpv1.NodeClaimList{} - g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) - startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(m *karpv1.NodeClaim, _ int) types.UID { return m.UID })...) - nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(m karpv1.NodeClaim, _ int) types.UID { return m.UID })...) - g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) - }, "2m").Should(Succeed()) - }) - It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { - // Create a deployment that contains a readiness probe that will never succeed - // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy - var numPods int32 = 2 - dep := coretest.Deployment(coretest.DeploymentOptions{ - Replicas: 2, - PodOptions: coretest.PodOptions{ - ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, - PodAntiRequirements: []corev1.PodAffinityTerm{{ - TopologyKey: corev1.LabelHostname, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"app": "inflate"}, - }}, - }, - ReadinessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - HTTPGet: &corev1.HTTPGetAction{ - Port: intstr.FromInt32(80), - }, - }, - }, - }, - }) - selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) - minAvailable := intstr.FromInt32(numPods - 1) - pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ - Labels: dep.Spec.Template.Labels, - MinAvailable: &minAvailable, - }) - env.ExpectCreated(dep, nodeClass, nodePool, pdb) - - nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) - env.EventuallyExpectCreatedNodeCount("==", int(numPods)) - - // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check - env.EventuallyExpectBoundPodCount(selector, int(numPods)) - - // Drift the nodeclaims - nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} - env.ExpectUpdated(nodePool) - - env.EventuallyExpectDrifted(nodeClaims...) - env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) - }) - }) -}) +// var _ = Describe("Drift", func() { +// var dep *appsv1.Deployment +// var selector labels.Selector +// var numPods int +// BeforeEach(func() { +// amdAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) +// numPods = 1 +// // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt +// dep = coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: int32(numPods), +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{ +// Labels: map[string]string{ +// "app": "my-app", +// }, +// Annotations: map[string]string{ +// karpv1.DoNotDisruptAnnotationKey: "true", +// }, +// }, +// TerminationGracePeriodSeconds: lo.ToPtr[int64](0), +// }, +// }) +// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) +// }) +// Context("Budgets", func() { +// It("should respect budgets for empty drift", func() { +// nodePool = coretest.ReplaceRequirements(nodePool, +// karpv1.NodeSelectorRequirementWithMinValues{ +// NodeSelectorRequirement: corev1.NodeSelectorRequirement{ +// Key: v1.LabelInstanceSize, +// Operator: corev1.NodeSelectorOpIn, +// Values: []string{"2xlarge"}, +// }, +// }, +// ) +// // We're expecting to create 3 nodes, so we'll expect to see 2 nodes deleting at one time. +// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ +// Nodes: "50%", +// }} +// var numPods int32 = 6 +// dep = coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: numPods, +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{ +// Annotations: map[string]string{ +// karpv1.DoNotDisruptAnnotationKey: "true", +// }, +// Labels: map[string]string{"app": "large-app"}, +// }, +// // Each 2xlarge has 8 cpu, so each node should fit 2 pods. +// ResourceRequirements: corev1.ResourceRequirements{ +// Requests: corev1.ResourceList{ +// corev1.ResourceCPU: resource.MustParse("3"), +// }, +// }, +// }, +// }) +// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) +// env.ExpectCreated(nodeClass, nodePool, dep) +// +// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) +// nodes := env.EventuallyExpectCreatedNodeCount("==", 3) +// env.EventuallyExpectHealthyPodCount(selector, int(numPods)) +// +// // List nodes so that we get any updated information on the nodes. If we don't +// // we have the potential to over-write any changes Karpenter makes to the nodes. +// // Add a finalizer to each node so that we can stop termination disruptions +// By("adding finalizers to the nodes to prevent termination") +// for _, node := range nodes { +// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) +// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) +// env.ExpectUpdated(node) +// } +// +// By("making the nodes empty") +// // Delete the deployment to make all nodes empty. +// env.ExpectDeleted(dep) +// +// // Drift the nodeclaims +// By("drift the nodeclaims") +// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} +// env.ExpectUpdated(nodePool) +// +// env.EventuallyExpectDrifted(nodeClaims...) +// +// // Ensure that we get two nodes tainted, and they have overlap during the drift +// env.EventuallyExpectTaintedNodeCount("==", 2) +// nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) +// +// // Remove the finalizer from each node so that we can terminate +// for _, node := range nodes { +// Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) +// } +// +// // After the deletion timestamp is set and all pods are drained +// // the node should be gone +// env.EventuallyExpectNotFound(nodes[0], nodes[1]) +// +// nodes = env.EventuallyExpectTaintedNodeCount("==", 1) +// Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) +// env.EventuallyExpectNotFound(nodes[0]) +// }) +// It("should respect budgets for non-empty delete drift", func() { +// nodePool = coretest.ReplaceRequirements(nodePool, +// karpv1.NodeSelectorRequirementWithMinValues{ +// NodeSelectorRequirement: corev1.NodeSelectorRequirement{ +// Key: v1.LabelInstanceSize, +// Operator: corev1.NodeSelectorOpIn, +// Values: []string{"2xlarge"}, +// }, +// }, +// ) +// // We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time. +// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ +// Nodes: "50%", +// }} +// var numPods int32 = 9 +// dep = coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: numPods, +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{ +// Annotations: map[string]string{ +// karpv1.DoNotDisruptAnnotationKey: "true", +// }, +// Labels: map[string]string{"app": "large-app"}, +// }, +// // Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods. +// ResourceRequirements: corev1.ResourceRequirements{ +// Requests: corev1.ResourceList{ +// corev1.ResourceCPU: resource.MustParse("2100m"), +// }, +// }, +// }, +// }) +// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) +// env.ExpectCreated(nodeClass, nodePool, dep) +// +// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) +// nodes := env.EventuallyExpectCreatedNodeCount("==", 3) +// env.EventuallyExpectHealthyPodCount(selector, int(numPods)) +// +// By("scaling down the deployment") +// // Update the deployment to a third of the replicas. +// dep.Spec.Replicas = lo.ToPtr[int32](3) +// env.ExpectUpdated(dep) +// +// // First expect there to be 3 pods, then try to spread the pods. +// env.EventuallyExpectHealthyPodCount(selector, 3) +// env.ForcePodsToSpread(nodes...) +// env.EventuallyExpectHealthyPodCount(selector, 3) +// +// By("cordoning and adding finalizer to the nodes") +// // Add a finalizer to each node so that we can stop termination disruptions +// for _, node := range nodes { +// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) +// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) +// env.ExpectUpdated(node) +// } +// +// By("drifting the nodes") +// // Drift the nodeclaims +// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} +// env.ExpectUpdated(nodePool) +// +// env.EventuallyExpectDrifted(nodeClaims...) +// +// By("enabling disruption by removing the do not disrupt annotation") +// pods := env.EventuallyExpectHealthyPodCount(selector, 3) +// // Remove the do-not-disrupt annotation so that the nodes are now disruptable +// for _, pod := range pods { +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// } +// +// // Ensure that we get two nodes tainted, and they have overlap during the drift +// env.EventuallyExpectTaintedNodeCount("==", 2) +// nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 30*time.Second) +// +// By("removing the finalizer from the nodes") +// Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) +// Expect(env.ExpectTestingFinalizerRemoved(nodes[1])).To(Succeed()) +// +// // After the deletion timestamp is set and all pods are drained +// // the node should be gone +// env.EventuallyExpectNotFound(nodes[0], nodes[1]) +// }) +// It("should respect budgets for non-empty replace drift", func() { +// appLabels := map[string]string{"app": "large-app"} +// nodePool.Labels = appLabels +// // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. +// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ +// Nodes: "3", +// }} +// +// // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node +// numPods = 5 +// selector = labels.SelectorFromSet(appLabels) +// deployment := coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: int32(numPods), +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{ +// Labels: appLabels, +// }, +// PodAntiRequirements: []corev1.PodAffinityTerm{{ +// TopologyKey: corev1.LabelHostname, +// LabelSelector: &metav1.LabelSelector{ +// MatchLabels: appLabels, +// }, +// }}, +// }, +// }) +// +// env.ExpectCreated(nodeClass, nodePool, deployment) +// +// originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", numPods) +// originalNodes := env.EventuallyExpectCreatedNodeCount("==", numPods) +// +// // Check that all deployment pods are online +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// +// By("cordoning and adding finalizer to the nodes") +// // Add a finalizer to each node so that we can stop termination disruptions +// for _, node := range originalNodes { +// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) +// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) +// env.ExpectUpdated(node) +// } +// +// By("drifting the nodepool") +// nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{"test-annotation": "drift"}) +// env.ExpectUpdated(nodePool) +// +// // Ensure that we get three nodes tainted, and they have overlap during the drift +// env.EventuallyExpectTaintedNodeCount("==", 3) +// env.EventuallyExpectNodeClaimCount("==", 8) +// env.EventuallyExpectNodeCount("==", 8) +// env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) +// +// for _, node := range originalNodes { +// Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) +// } +// +// // Eventually expect all the nodes to be rolled and completely removed +// // Since this completes the disruption operation, this also ensures that we aren't leaking nodes into subsequent +// // tests since nodeclaims that are actively replacing but haven't brought-up nodes yet can register nodes later +// env.EventuallyExpectNotFound(lo.Map(originalNodes, func(n *corev1.Node, _ int) client.Object { return n })...) +// env.EventuallyExpectNotFound(lo.Map(originalNodeClaims, func(n *karpv1.NodeClaim, _ int) client.Object { return n })...) +// env.ExpectNodeClaimCount("==", 5) +// env.ExpectNodeCount("==", 5) +// }) +// It("should not allow drift if the budget is fully blocking", func() { +// // We're going to define a budget that doesn't allow any drift to happen +// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ +// Nodes: "0", +// }} +// +// dep.Spec.Template.Annotations = nil +// env.ExpectCreated(nodeClass, nodePool, dep) +// +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// env.EventuallyExpectCreatedNodeCount("==", 1) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// +// By("drifting the nodes") +// // Drift the nodeclaims +// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} +// env.ExpectUpdated(nodePool) +// +// env.EventuallyExpectDrifted(nodeClaim) +// env.ConsistentlyExpectNoDisruptions(1, time.Minute) +// }) +// It("should not allow drift if the budget is fully blocking during a scheduled time", func() { +// // We're going to define a budget that doesn't allow any drift to happen +// // This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before +// // the current time and extends 15 minutes past the current time +// // Times need to be in UTC since the karpenter containers were built in UTC time +// windowStart := time.Now().Add(-time.Minute * 15).UTC() +// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ +// Nodes: "0", +// Schedule: lo.ToPtr(fmt.Sprintf("%d %d * * *", windowStart.Minute(), windowStart.Hour())), +// Duration: &metav1.Duration{Duration: time.Minute * 30}, +// }} +// +// dep.Spec.Template.Annotations = nil +// env.ExpectCreated(nodeClass, nodePool, dep) +// +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// env.EventuallyExpectCreatedNodeCount("==", 1) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// +// By("drifting the nodes") +// // Drift the nodeclaims +// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} +// env.ExpectUpdated(nodePool) +// +// env.EventuallyExpectDrifted(nodeClaim) +// env.ConsistentlyExpectNoDisruptions(1, time.Minute) +// }) +// }) +// It("should disrupt nodes that have drifted due to AMIs", func() { +// // Choose an old static image (AL2023 AMIs don't exist for 1.22) +// oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, +// "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", +// fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), +// )) +// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// env.ExpectCreatedNodeCount("==", 1) +// +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.EventuallyExpectNodeCount("==", 1)[0] +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, nodeClaim, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { +// armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) +// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: armAMI}} +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// env.ExpectCreatedNodeCount("==", 1) +// +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.EventuallyExpectNodeCount("==", 1)[0] +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, nodeClaim, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// It("should not disrupt nodes that have drifted without the featureGate enabled", func() { +// env.ExpectSettingsOverridden(corev1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) +// +// // Choose an old static image (AL2023 AMIs don't exist for 1.22) +// oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, +// "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", +// fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), +// )) +// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// env.ExpectCreatedNodeCount("==", 1) +// +// node := env.Monitor.CreatedNodes()[0] +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} +// env.ExpectUpdated(nodeClass) +// +// // We should consistently get the same node existing for a minute +// Consistently(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), &corev1.Node{})).To(Succeed()) +// }).WithTimeout(time.Minute).Should(Succeed()) +// }) +// It("should disrupt nodes that have drifted due to securitygroup", func() { +// By("getting the cluster vpc id") +// output, err := env.EKSAPI.DescribeCluster(&eks.DescribeClusterInput{Name: awssdk.String(env.ClusterName)}) +// Expect(err).To(BeNil()) +// +// By("creating new security group") +// createSecurityGroup := &ec2.CreateSecurityGroupInput{ +// GroupName: awssdk.String("security-group-drift"), +// Description: awssdk.String("End-to-end Drift Test, should delete after drift test is completed"), +// VpcId: output.Cluster.ResourcesVpcConfig.VpcId, +// TagSpecifications: []*ec2.TagSpecification{ +// { +// ResourceType: awssdk.String("security-group"), +// Tags: []*ec2.Tag{ +// { +// Key: awssdk.String("karpenter.sh/discovery"), +// Value: awssdk.String(env.ClusterName), +// }, +// { +// Key: awssdk.String(coretest.DiscoveryLabel), +// Value: awssdk.String(env.ClusterName), +// }, +// { +// Key: awssdk.String("creation-date"), +// Value: awssdk.String(time.Now().Format(time.RFC3339)), +// }, +// }, +// }, +// }, +// } +// _, _ = env.EC2API.CreateSecurityGroup(createSecurityGroup) +// +// By("looking for security groups") +// var securitygroups []aws.SecurityGroup +// var testSecurityGroup aws.SecurityGroup +// Eventually(func(g Gomega) { +// securitygroups = env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) +// testSecurityGroup, _ = lo.Find(securitygroups, func(sg aws.SecurityGroup) bool { +// return awssdk.StringValue(sg.GroupName) == "security-group-drift" +// }) +// g.Expect(testSecurityGroup).ToNot(BeNil()) +// }).Should(Succeed()) +// +// By("creating a new provider with the new securitygroup") +// awsIDs := lo.FilterMap(securitygroups, func(sg aws.SecurityGroup, _ int) (string, bool) { +// if awssdk.StringValue(sg.GroupId) != awssdk.StringValue(testSecurityGroup.GroupId) { +// return awssdk.StringValue(sg.GroupId), true +// } +// return "", false +// }) +// sgTerms := []v1.SecurityGroupSelectorTerm{{ID: awssdk.StringValue(testSecurityGroup.GroupId)}} +// for _, id := range awsIDs { +// sgTerms = append(sgTerms, v1.SecurityGroupSelectorTerm{ID: id}) +// } +// nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// sgTerms = lo.Reject(sgTerms, func(t v1.SecurityGroupSelectorTerm, _ int) bool { +// return t.ID == awssdk.StringValue(testSecurityGroup.GroupId) +// }) +// nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, nodeClaim, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// It("should disrupt nodes that have drifted due to subnets", func() { +// subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) +// Expect(len(subnets)).To(BeNumerically(">", 1)) +// +// nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[0].ID}} +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[1].ID}} +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// DescribeTable("NodePool Drift", func(nodeClaimTemplate karpv1.NodeClaimTemplate) { +// updatedNodePool := coretest.NodePool( +// karpv1.NodePool{ +// Spec: karpv1.NodePoolSpec{ +// Template: karpv1.NodeClaimTemplate{ +// Spec: karpv1.NodeClaimSpec{ +// NodeClassRef: &karpv1.NodeClassReference{ +// Group: object.GVK(nodeClass).Group, +// Kind: object.GVK(nodeClass).Kind, +// Name: nodeClass.Name, +// }, +// // keep the same instance type requirements to prevent considering instance types that require swap +// Requirements: nodePool.Spec.Template.Spec.Requirements, +// }, +// }, +// }, +// }, +// karpv1.NodePool{ +// Spec: karpv1.NodePoolSpec{ +// Template: nodeClaimTemplate, +// }, +// }, +// ) +// updatedNodePool.ObjectMeta = nodePool.ObjectMeta +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// env.ExpectCreatedOrUpdated(updatedNodePool) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// +// // Nodes will need to have the start-up taint removed before the node can be considered as initialized +// fmt.Println(CurrentSpecReport().LeafNodeText) +// if CurrentSpecReport().LeafNodeText == "Start-up Taints" { +// nodes := env.EventuallyExpectCreatedNodeCount("==", 2) +// sort.Slice(nodes, func(i int, j int) bool { +// return nodes[i].CreationTimestamp.Before(&nodes[j].CreationTimestamp) +// }) +// nodeTwo := nodes[1] +// // Remove the startup taints from the new nodes to initialize them +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeTwo), nodeTwo)).To(Succeed()) +// stored := nodeTwo.DeepCopy() +// nodeTwo.Spec.Taints = lo.Reject(nodeTwo.Spec.Taints, func(t corev1.Taint, _ int) bool { return t.Key == "example.com/another-taint-2" }) +// g.Expect(env.Client.Patch(env.Context, nodeTwo, client.StrategicMergeFrom(stored))).To(Succeed()) +// }).Should(Succeed()) +// } +// env.EventuallyExpectNotFound(pod, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }, +// Entry("Annotations", karpv1.NodeClaimTemplate{ +// ObjectMeta: karpv1.ObjectMeta{ +// Annotations: map[string]string{"keyAnnotationTest": "valueAnnotationTest"}, +// }, +// }), +// Entry("Labels", karpv1.NodeClaimTemplate{ +// ObjectMeta: karpv1.ObjectMeta{ +// Labels: map[string]string{"keyLabelTest": "valueLabelTest"}, +// }, +// }), +// Entry("Taints", karpv1.NodeClaimTemplate{ +// Spec: karpv1.NodeClaimSpec{ +// Taints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, +// }, +// }), +// Entry("Start-up Taints", karpv1.NodeClaimTemplate{ +// Spec: karpv1.NodeClaimSpec{ +// StartupTaints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, +// }, +// }), +// Entry("NodeRequirements", karpv1.NodeClaimTemplate{ +// Spec: karpv1.NodeClaimSpec{ +// // since this will overwrite the default requirements, add instance category and family selectors back into requirements +// Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ +// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: karpv1.CapacityTypeLabelKey, Operator: corev1.NodeSelectorOpIn, Values: []string{karpv1.CapacityTypeSpot}}}, +// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceCategory, Operator: corev1.NodeSelectorOpIn, Values: []string{"c", "m", "r"}}}, +// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceFamily, Operator: corev1.NodeSelectorOpNotIn, Values: []string{"a1"}}}, +// }, +// }, +// }), +// ) +// DescribeTable("EC2NodeClass", func(nodeClassSpec v1.EC2NodeClassSpec) { +// updatedNodeClass := test.EC2NodeClass(v1.EC2NodeClass{Spec: *nodeClass.Spec.DeepCopy()}, v1.EC2NodeClass{Spec: nodeClassSpec}) +// updatedNodeClass.ObjectMeta = nodeClass.ObjectMeta +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// env.ExpectCreatedOrUpdated(updatedNodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }, +// Entry("UserData", v1.EC2NodeClassSpec{UserData: awssdk.String("#!/bin/bash\necho \"Hello, AL2023\"")}), +// Entry("Tags", v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}), +// Entry("MetadataOptions", v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: awssdk.String("required"), HTTPPutResponseHopLimit: awssdk.Int64(10)}}), +// Entry("BlockDeviceMappings", v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ +// { +// DeviceName: awssdk.String("/dev/xvda"), +// EBS: &v1.BlockDevice{ +// VolumeSize: resources.Quantity("20Gi"), +// VolumeType: awssdk.String("gp3"), +// Encrypted: awssdk.Bool(true), +// }, +// }}}), +// Entry("DetailedMonitoring", v1.EC2NodeClassSpec{DetailedMonitoring: awssdk.Bool(true)}), +// Entry("AMIFamily", v1.EC2NodeClassSpec{AMIFamily: awssdk.String(v1.AMIFamilyBottlerocket)}), +// Entry("KubeletConfiguration", v1.EC2NodeClassSpec{ +// Kubelet: &v1.KubeletConfiguration{ +// EvictionSoft: map[string]string{"memory.available": "5%"}, +// EvictionSoftGracePeriod: map[string]metav1.Duration{"memory.available": {Duration: time.Minute}}, +// }, +// }), +// ) +// It("should drift the EC2NodeClass on InstanceProfile", func() { +// // Create a separate test case for this one since we can't use the default NodeClass that's created due to it having +// // a pre-populated role AND we also need to do the instance profile generation within the scope of this test +// instanceProfileName := fmt.Sprintf("KarpenterNodeInstanceProfile-%s", env.ClusterName) +// instanceProfileDriftName := fmt.Sprintf("KarpenterNodeInstanceProfile-Drift-%s", env.ClusterName) +// roleName := fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName) +// +// for _, name := range []string{instanceProfileName, instanceProfileDriftName} { +// env.ExpectInstanceProfileCreated(name, roleName) +// DeferCleanup(func() { +// env.ExpectInstanceProfileDeleted(name, roleName) +// }) +// } +// nodeClass.Spec.Role = "" +// nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileName) +// +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileDriftName) +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(nodeClaim) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// It("should drift the EC2NodeClass on BlockDeviceMappings volume size update", func() { +// nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ +// { +// DeviceName: awssdk.String("/dev/xvda"), +// EBS: &v1.BlockDevice{ +// VolumeSize: resources.Quantity("20Gi"), +// VolumeType: awssdk.String("gp3"), +// Encrypted: awssdk.Bool(true), +// }, +// }, +// } +// env.ExpectCreated(dep, nodeClass, nodePool) +// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// node := env.ExpectCreatedNodeCount("==", 1)[0] +// +// nodeClass.Spec.BlockDeviceMappings[0].EBS.VolumeSize = resources.Quantity("100Gi") +// env.ExpectCreatedOrUpdated(nodeClass) +// +// By("validating the drifted status condition has propagated") +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) +// g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted)).ToNot(BeNil()) +// g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) +// }).Should(Succeed()) +// +// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) +// env.ExpectUpdated(pod) +// env.EventuallyExpectNotFound(pod, node) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// }) +// It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { +// env.ExpectCreated(dep, nodeClass, nodePool) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// nodePool = env.ExpectExists(nodePool).(*karpv1.NodePool) +// expectedHash := nodePool.Hash() +// +// By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, karpv1.NodePoolHashAnnotationKey, karpv1.NodePoolHashVersionAnnotationKey)) +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) +// +// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) +// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) +// }).WithTimeout(30 * time.Second).Should(Succeed()) +// +// nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ +// karpv1.NodePoolHashAnnotationKey: "test-hash-1", +// karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", +// }) +// // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the +// // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that +// // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation +// nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ +// "test-key": "test-value", +// }) +// nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ +// karpv1.NodePoolHashAnnotationKey: "test-hash-2", +// karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", +// }) +// +// // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool +// env.ExpectUpdated(nodeClaim, nodePool) +// expectedHash = nodePool.Hash() +// +// // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) +// +// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) +// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) +// }) +// }) +// It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { +// env.ExpectCreated(dep, nodeClass, nodePool) +// env.EventuallyExpectHealthyPodCount(selector, numPods) +// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] +// nodeClass = env.ExpectExists(nodeClass).(*v1.EC2NodeClass) +// expectedHash := nodeClass.Hash() +// +// By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1.AnnotationEC2NodeClassHash, v1.AnnotationEC2NodeClassHashVersion)) +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) +// +// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) +// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) +// }).WithTimeout(30 * time.Second).Should(Succeed()) +// +// nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ +// v1.AnnotationEC2NodeClassHash: "test-hash-1", +// v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", +// }) +// // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the +// // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that +// // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation +// nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ +// "test-key": "test-value", +// }) +// nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ +// v1.AnnotationEC2NodeClassHash: "test-hash-2", +// v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", +// }) +// +// // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass +// env.ExpectUpdated(nodeClaim, nodeClass) +// expectedHash = nodeClass.Hash() +// +// // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation +// Eventually(func(g Gomega) { +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) +// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) +// +// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) +// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) +// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) +// }).WithTimeout(30 * time.Second).Should(Succeed()) +// env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) +// }) +// Context("Failure", func() { +// It("should not disrupt a drifted node if the replacement node never registers", func() { +// // launch a new nodeClaim +// var numPods int32 = 2 +// dep := coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: 2, +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, +// PodAntiRequirements: []corev1.PodAffinityTerm{{ +// TopologyKey: corev1.LabelHostname, +// LabelSelector: &metav1.LabelSelector{ +// MatchLabels: map[string]string{"app": "inflate"}, +// }}, +// }, +// }, +// }) +// env.ExpectCreated(dep, nodeClass, nodePool) +// +// startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) +// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) +// +// // Drift the nodeClaim with bad configuration that will not register a NodeClaim +// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")}} +// env.ExpectCreatedOrUpdated(nodeClass) +// +// env.EventuallyExpectDrifted(startingNodeClaimState...) +// +// // Expect only a single node to be tainted due to default disruption budgets +// taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) +// +// // Drift should fail and the original node should be untainted +// // TODO: reduce timeouts when disruption waits are factored out +// env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) +// +// // Expect all the NodeClaims that existed on the initial provisioning loop are not removed. +// // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't +// // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node +// Consistently(func(g Gomega) { +// nodeClaims := &karpv1.NodeClaimList{} +// g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) +// startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(nc *karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) +// nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) +// g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) +// }, "2m").Should(Succeed()) +// }) +// It("should not disrupt a drifted node if the replacement node registers but never initialized", func() { +// // launch a new nodeClaim +// var numPods int32 = 2 +// dep := coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: 2, +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, +// PodAntiRequirements: []corev1.PodAffinityTerm{{ +// TopologyKey: corev1.LabelHostname, +// LabelSelector: &metav1.LabelSelector{ +// MatchLabels: map[string]string{"app": "inflate"}, +// }}, +// }, +// }, +// }) +// env.ExpectCreated(dep, nodeClass, nodePool) +// +// startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) +// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) +// +// // Drift the nodeClaim with bad configuration that never initializes +// nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com/taint", Effect: corev1.TaintEffectPreferNoSchedule}} +// env.ExpectCreatedOrUpdated(nodePool) +// +// env.EventuallyExpectDrifted(startingNodeClaimState...) +// +// // Expect only a single node to get tainted due to default disruption budgets +// taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) +// +// // Drift should fail and original node should be untainted +// // TODO: reduce timeouts when disruption waits are factored out +// env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) +// +// // Expect that the new nodeClaim/node is kept around after the un-cordon +// nodeList := &corev1.NodeList{} +// Expect(env.Client.List(env, nodeList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) +// Expect(nodeList.Items).To(HaveLen(int(numPods) + 1)) +// +// nodeClaimList := &karpv1.NodeClaimList{} +// Expect(env.Client.List(env, nodeClaimList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) +// Expect(nodeClaimList.Items).To(HaveLen(int(numPods) + 1)) +// +// // Expect all the NodeClaims that existed on the initial provisioning loop are not removed +// // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't +// // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node +// Consistently(func(g Gomega) { +// nodeClaims := &karpv1.NodeClaimList{} +// g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) +// startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(m *karpv1.NodeClaim, _ int) types.UID { return m.UID })...) +// nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(m karpv1.NodeClaim, _ int) types.UID { return m.UID })...) +// g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) +// }, "2m").Should(Succeed()) +// }) +// It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { +// // Create a deployment that contains a readiness probe that will never succeed +// // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy +// var numPods int32 = 2 +// dep := coretest.Deployment(coretest.DeploymentOptions{ +// Replicas: 2, +// PodOptions: coretest.PodOptions{ +// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, +// PodAntiRequirements: []corev1.PodAffinityTerm{{ +// TopologyKey: corev1.LabelHostname, +// LabelSelector: &metav1.LabelSelector{ +// MatchLabels: map[string]string{"app": "inflate"}, +// }}, +// }, +// ReadinessProbe: &corev1.Probe{ +// ProbeHandler: corev1.ProbeHandler{ +// HTTPGet: &corev1.HTTPGetAction{ +// Port: intstr.FromInt32(80), +// }, +// }, +// }, +// }, +// }) +// selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) +// minAvailable := intstr.FromInt32(numPods - 1) +// pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ +// Labels: dep.Spec.Template.Labels, +// MinAvailable: &minAvailable, +// }) +// env.ExpectCreated(dep, nodeClass, nodePool, pdb) +// +// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) +// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) +// +// // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check +// env.EventuallyExpectBoundPodCount(selector, int(numPods)) +// +// // Drift the nodeclaims +// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} +// env.ExpectUpdated(nodePool) +// +// env.EventuallyExpectDrifted(nodeClaims...) +// env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) +// }) +// }) +// }) From bc464a82e932d099be10cd9a786cc984566500d0 Mon Sep 17 00:00:00 2001 From: Nick Tran Date: Thu, 11 Jul 2024 12:41:32 -0700 Subject: [PATCH 6/9] pushup changes --- pkg/fake/ssmapi.go | 15 ++++++++------- pkg/providers/amifamily/al2.go | 24 +++++++++++++----------- pkg/providers/amifamily/al2023.go | 25 ++++++++++++++----------- pkg/providers/amifamily/ami.go | 6 ++++-- pkg/providers/amifamily/bottlerocket.go | 22 ++++++++++++---------- pkg/providers/amifamily/windows.go | 24 +++++++++++++----------- 6 files changed, 64 insertions(+), 52 deletions(-) diff --git a/pkg/fake/ssmapi.go b/pkg/fake/ssmapi.go index 680a75f629ff..e37b20d55511 100644 --- a/pkg/fake/ssmapi.go +++ b/pkg/fake/ssmapi.go @@ -17,6 +17,7 @@ package fake import ( "context" "fmt" + "log" "regexp" "strings" @@ -34,10 +35,10 @@ import ( type SSMAPI struct { ssmiface.SSMAPI - Parameters map[string]string - GetParameterOutput *ssm.GetParameterOutput + Parameters map[string]string + GetParameterOutput *ssm.GetParameterOutput GetParametersByPathOutput *ssm.GetParametersByPathOutput - WantErr error + WantErr error } func NewSSMAPI() *SSMAPI { @@ -68,7 +69,7 @@ func (a SSMAPI) GetParameterWithContext(_ context.Context, input *ssm.GetParamet func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ssm.GetParametersByPathInput, f func(*ssm.GetParametersByPathOutput, bool) bool, _ ...request.Option) error { if !lo.FromPtr(input.Recursive) { - panic("fake SSM API currently only supports GetParametersByPathPagesWithContext when recursive is true") + log.Fatalf("fake SSM API currently only supports GetParametersByPathPagesWithContext when recursive is true") } if a.WantErr != nil { return a.WantErr @@ -106,9 +107,9 @@ func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ss func getDefaultParametersForPath(path string) []*ssm.Parameter { suffixes := map[string][]string{ - `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2$`: []string{"recommended/image_id"}, + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2$`: []string{"recommended/image_id"}, `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-arm64$`: []string{"recommended/image_id"}, - `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-gpu$`: []string{"recommended/image_id"}, + `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-gpu$`: []string{"recommended/image_id"}, `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2023$`: []string{ "x86_64/standard/recommended/image_id", "arm64/standard/recommended/image_id", @@ -134,7 +135,7 @@ func getDefaultParametersForPath(path string) []*ssm.Parameter { } return lo.Map(suffixes, func(suffix string, _ int) *ssm.Parameter { return &ssm.Parameter{ - Name: lo.ToPtr(fmt.Sprintf("%s/%s", path, suffix)), + Name: lo.ToPtr(fmt.Sprintf("%s/%s", path, suffix)), Value: lo.ToPtr(fmt.Sprintf("ami-%s", randomdata.Alphanumeric(16))), } }) diff --git a/pkg/providers/amifamily/al2.go b/pkg/providers/amifamily/al2.go index c9e927972e1e..5037ee98621d 100644 --- a/pkg/providers/amifamily/al2.go +++ b/pkg/providers/amifamily/al2.go @@ -43,12 +43,8 @@ type AL2 struct { } func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { - query := DescribeImageQuery{ - Filters: []*ec2.Filter{&ec2.Filter{ - Name: lo.ToPtr("image-id"), - }}, - KnownRequirements: make(map[string][]scheduling.Requirements), - } + imageIDs := make([]*string, 0, 5) + requirements := make(map[string][]scheduling.Requirements) // Example Paths: // - Latest EKS 1.30 Standard Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2/recommended/image_id // - Specific EKS 1.30 GPU Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2-gpu/amazon-eks-node-1.30-v20240625/image_id @@ -71,22 +67,28 @@ func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k if av, err := a.extractAMIVersion(pathComponents[7]); err != nil || av != amiVersion { continue } - query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) - query.KnownRequirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) + imageIDs = append(imageIDs, lo.ToPtr(value)) + requirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) } } // Failed to discover any AMIs, we should short circuit AMI discovery - if len(query.Filters[0].Values) == 0 { + if len(imageIDs) == 0 { return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2@%s"`, amiVersion) } - return query, nil + return DescribeImageQuery{ + Filters: []*ec2.Filter{{ + Name: lo.ToPtr("image-id"), + Values: imageIDs, + }}, + KnownRequirements: requirements, + }, nil } func (a AL2) extractAMIVersion(versionStr string) (string, error) { if versionStr == "recommended" { return AMIVersionLatest, nil } - rgx := regexp.MustCompile(`^.*(v\d+)$`) + rgx := regexp.MustCompile(`^.*(v\d{8})$`) matches := rgx.FindStringSubmatch(versionStr) if len(matches) != 2 { return "", fmt.Errorf("failed to extract AMI version") diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index f2a8dd0eb837..0da37fa3d749 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -38,12 +38,8 @@ type AL2023 struct { } func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { - query := DescribeImageQuery{ - Filters: []*ec2.Filter{&ec2.Filter{ - Name: lo.ToPtr("image-id"), - }}, - KnownRequirements: make(map[string][]scheduling.Requirements), - } + requirements := make(map[string][]scheduling.Requirements) + imageIDs := make([]*string, 0, 5) // Example Paths: // - Latest EKS 1.30 arm64 Standard Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2023/arm64/standard/recommended/image_id // - Specific EKS 1.30 amd64 Nvidia Image: /aws/service/eks/optimized-ami/1.30/amazon-linux-2023/x86_64/nvidia/amazon-eks-node-al2023-x86_64-nvidia-1.30-v20240625/image_id @@ -65,14 +61,21 @@ func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider if err != nil { continue } - query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) - query.KnownRequirements[value] = []scheduling.Requirements{variant.Requirements()} + imageIDs = append(imageIDs, lo.ToPtr(value)) + requirements[value] = []scheduling.Requirements{variant.Requirements()} } // Failed to discover any AMIs, we should short circuit AMI discovery - if len(query.Filters[0].Values) == 0 { - return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2023@%s"`, amiVersion) + if len(imageIDs) == 0 { + return DescribeImageQuery{}, fmt.Errorf(`failed to discover AMIs for alias "al2023@%s"`, amiVersion) } - return query, nil + + return DescribeImageQuery{ + Filters: []*ec2.Filter{{ + Name: lo.ToPtr("image-id"), + Values: imageIDs, + }}, + KnownRequirements: requirements, + }, nil } func (a AL2023) extractAMIVersion(versionStr string) (string, error) { diff --git a/pkg/providers/amifamily/ami.go b/pkg/providers/amifamily/ami.go index c0c555e485e9..513229c230fb 100644 --- a/pkg/providers/amifamily/ami.go +++ b/pkg/providers/amifamily/ami.go @@ -93,7 +93,10 @@ func (p *DefaultProvider) DescribeImageQueries(ctx context.Context, nodeClass *v return nil, fmt.Errorf("getting kubernetes version, %w", err) } query, err := amiFamily.DescribeImageQuery(ctx, p.ssmProvider, kubernetesVersion, amiVersion) - return []DescribeImageQuery{query}, err + if err != nil { + return []DescribeImageQuery{}, err + } + return []DescribeImageQuery{query}, nil } idFilter := &ec2.Filter{Name: aws.String("image-id")} @@ -206,4 +209,3 @@ func MapToInstanceTypes(instanceTypes []*cloudprovider.InstanceType, amis []v1.A } return amiIDs } - diff --git a/pkg/providers/amifamily/bottlerocket.go b/pkg/providers/amifamily/bottlerocket.go index 5ccaa3f3ac70..0c699240a206 100644 --- a/pkg/providers/amifamily/bottlerocket.go +++ b/pkg/providers/amifamily/bottlerocket.go @@ -42,12 +42,8 @@ type Bottlerocket struct { } func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { - query := DescribeImageQuery{ - Filters: []*ec2.Filter{&ec2.Filter{ - Name: lo.ToPtr("image-id"), - }}, - KnownRequirements: make(map[string][]scheduling.Requirements), - } + imageIDs := make([]*string, 0, 5) + requirements := make(map[string][]scheduling.Requirements) // Example Paths: // - Latest EKS 1.30 amd64 Standard Image: /aws/service/bottlerocket/aws-k8s-1.30/x86_64/latest/image_id // - Specific EKS 1.30 arm64 Nvidia Image: /aws/service/bottlerocket/aws-k8s-1.30-nvidia/arm64/1.10.0/image_id @@ -66,15 +62,21 @@ func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Pr if len(pathComponents) != 8 || pathComponents[7] != "image_id" || pathComponents[6] != amiVersion { continue } - query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) - query.KnownRequirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) + imageIDs = append(imageIDs, lo.ToPtr(value)) + requirements[value] = lo.Map(variants, func(v Variant, _ int) scheduling.Requirements { return v.Requirements() }) } } // Failed to discover any AMIs, we should short circuit AMI discovery - if len(query.Filters[0].Values) == 0 { + if len(imageIDs) == 0 { return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "bottlerocket@%s"`, amiVersion) } - return query, nil + return DescribeImageQuery{ + Filters: []*ec2.Filter{{ + Name: lo.ToPtr("image-id"), + Values: imageIDs, + }}, + KnownRequirements: make(map[string][]scheduling.Requirements), + }, nil } // UserData returns the default userdata script for the AMI Family diff --git a/pkg/providers/amifamily/windows.go b/pkg/providers/amifamily/windows.go index af49063ac34c..f50a7e1a0c66 100644 --- a/pkg/providers/amifamily/windows.go +++ b/pkg/providers/amifamily/windows.go @@ -45,16 +45,12 @@ type Windows struct { // Only the core version of each version is supported by Karpenter, so this field only indicates the year. Version string // Build is a specific build code associated with the Version - Build string + Build string } func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) { - query := DescribeImageQuery{ - Filters: []*ec2.Filter{&ec2.Filter{ - Name: lo.ToPtr("image-id"), - }}, - KnownRequirements: make(map[string][]scheduling.Requirements), - } + requirements := make(map[string][]scheduling.Requirements) + imageIDs := make([]*string, 0, 5) // SSM aliases are only maintained for the latest Windows AMI releases if amiVersion != AMIVersionLatest { @@ -74,17 +70,23 @@ func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provide if len(matches) != 3 || matches[1] != w.Version || matches[2] != k8sVersion { continue } - query.Filters[0].Values = append(query.Filters[0].Values, lo.ToPtr(value)) - query.KnownRequirements[value] = []scheduling.Requirements{scheduling.NewRequirements( + imageIDs = append(imageIDs, lo.ToPtr(value)) + requirements[value] = []scheduling.Requirements{scheduling.NewRequirements( scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Windows)), scheduling.NewRequirement(corev1.LabelWindowsBuild, corev1.NodeSelectorOpIn, w.Build), )} } // Failed to discover any AMIs, we should short circuit AMI discovery - if len(query.Filters[0].Values) == 0 { + if len(imageIDs) == 0 { return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "windows%s@%s"`, w.Version, amiVersion) } - return query, nil + return DescribeImageQuery{ + Filters: []*ec2.Filter{&ec2.Filter{ + Name: lo.ToPtr("image-id"), + Values: imageIDs, + }}, + KnownRequirements: requirements, + }, nil } // UserData returns the default userdata script for the AMI Family From b1efa2d820240963c06d8308fd6c2ca98f9afb6f Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 15 Jul 2024 11:31:28 -0700 Subject: [PATCH 7/9] misc fixes + conversion --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2467 ++++++++--------- pkg/apis/v1/ec2nodeclass.go | 20 +- pkg/apis/v1/ec2nodeclass_conversion.go | 24 +- pkg/apis/v1/ec2nodeclass_conversion_test.go | 69 +- pkg/apis/v1/ec2nodeclass_hash_test.go | 50 +- .../v1/ec2nodeclass_validation_cel_test.go | 6 - pkg/apis/v1/labels.go | 1 + pkg/apis/v1beta1/labels.go | 1 + pkg/cloudprovider/suite_test.go | 4 +- pkg/controllers/nodeclass/status/ami_test.go | 6 +- pkg/controllers/nodeclass/status/readiness.go | 5 + pkg/fake/ssmapi.go | 3 +- pkg/providers/amifamily/al2023.go | 1 + pkg/providers/amifamily/ami.go | 7 +- pkg/providers/amifamily/bottlerocket.go | 4 +- pkg/providers/amifamily/resolver.go | 2 +- pkg/providers/amifamily/types.go | 13 +- pkg/providers/ssm/provider.go | 6 +- pkg/providers/version/version.go | 2 +- pkg/test/nodeclass.go | 8 +- test/suites/drift/suite_test.go | 64 +- .../cloudformation.yaml | 2 +- .../cloudformation.yaml | 4 +- .../en/preview/reference/cloudformation.md | 4 +- 24 files changed, 1393 insertions(+), 1380 deletions(-) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 98e3dcfc5f17..9dc67492e313 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -9,1366 +9,1307 @@ spec: group: karpenter.k8s.aws names: categories: - - karpenter + - karpenter kind: EC2NodeClass listKind: EC2NodeClassList plural: ec2nodeclasses shortNames: - - ec2nc - - ec2ncs + - ec2nc + - ec2ncs singular: ec2nodeclass scope: Cluster versions: - - additionalPrinterColumns: - - jsonPath: .status.conditions[?(@.type=="Ready")].status - name: Ready - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - - jsonPath: .spec.role - name: Role - priority: 1 - type: string - name: v1 - schema: - openAPIV3Schema: - description: EC2NodeClass is the Schema for the EC2NodeClass API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. - This will contain configuration necessary to launch instances in AWS. - properties: - amiSelectorTerms: - description: AMISelectorTerms is a list of or ami selector terms. - The terms are ORed. - items: - description: |- - AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - alias: - description: |- - Alias specifies which EKS optimized AMI to select. - Each alias consistes of a family and a version, specified as "family@version". - Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. - The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). - The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. - Note: The Windows families do **not** support version pinning, and only latest may be used. - maxLength: 30 - type: string - x-kubernetes-validations: - - message: '''alias'' is improperly formatted, must match the - format ''family@version''' - rule: self.matches('^[a-zA-Z0-9]*@.*$') - - message: 'family is not supported, must be one of the following: - ''al2'', ''al2023'', ''bottlerocket'', ''windows2019'', - ''windows2022''' - rule: self.find('^[^@]+') in ['al2','al2023','bottlerocket','windows2019','windows2022'] - id: - description: ID is the ami id in EC2 - pattern: ami-[0-9a-z]+ - type: string - name: - description: |- - Name is the ami name in EC2. - This value is the name field, which is different from the name tag. - type: string - owner: - description: |- - Owner is the owner for the ami. - You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" - type: string - tags: - additionalProperties: + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + - jsonPath: .spec.role + name: Role + priority: 1 + type: string + name: v1 + schema: + openAPIV3Schema: + description: EC2NodeClass is the Schema for the EC2NodeClass API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. + This will contain configuration necessary to launch instances in AWS. + properties: + amiSelectorTerms: + description: AMISelectorTerms is a list of or ami selector terms. The terms are ORed. + items: + description: |- + AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + alias: + description: |- + Alias specifies which EKS optimized AMI to select. + Each alias consists of a family and a version, specified as "family@version". + Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. + The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). + The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. + Note: The Windows families do **not** support version pinning, and only latest may be used. + maxLength: 30 type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - minItems: 1 - type: array - x-kubernetes-validations: - - message: expected at least one, got none, ['tags', 'id', 'name', - 'alias'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name) || has(x.alias)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in amiSelectorTerms' - rule: '!self.exists(x, has(x.id) && (has(x.alias) || has(x.tags) - || has(x.name) || has(x.owner)))' - - message: '''alias'' is mutually exclusive, cannot be set with a - combination of other fields in amiSelectorTerms' - rule: '!self.exists(x, has(x.alias) && (has(x.id) || has(x.tags) - || has(x.name) || has(x.owner)))' - - message: '''alias'' is mutually exclusive, cannot be set with a - combination of other amiSelectorTerms' - rule: '!(self.exists(x, has(x.alias)) && self.size() != 1)' - associatePublicIPAddress: - description: AssociatePublicIPAddress controls if public IP addresses - are assigned to instances that are launched with the nodeclass. - type: boolean - blockDeviceMappings: - description: BlockDeviceMappings to be applied to provisioned nodes. - items: - properties: - deviceName: - description: The device name (for example, /dev/sdh or xvdh). - type: string - ebs: - description: EBS contains parameters used to automatically set - up EBS volumes when an instance is launched. - properties: - deleteOnTermination: - description: DeleteOnTermination indicates whether the EBS - volume is deleted on instance termination. - type: boolean - encrypted: - description: |- - Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only - be attached to instances that support Amazon EBS encryption. If you are creating - a volume from a snapshot, you can't specify an encryption value. - type: boolean - iops: - description: |- - IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, - this represents the number of IOPS that are provisioned for the volume. For - gp2 volumes, this represents the baseline performance of the volume and the - rate at which the volume accumulates I/O credits for bursting. + x-kubernetes-validations: + - message: '''alias'' is improperly formatted, must match the format ''family@version''' + rule: self.matches('^[a-zA-Z0-9]*@.*$') + - message: 'family is not supported, must be one of the following: ''al2'', ''al2023'', ''bottlerocket'', ''windows2019'', ''windows2022''' + rule: self.find('^[^@]+') in ['al2','al2023','bottlerocket','windows2019','windows2022'] + id: + description: ID is the ami id in EC2 + pattern: ami-[0-9a-z]+ + type: string + name: + description: |- + Name is the ami name in EC2. + This value is the name field, which is different from the name tag. + type: string + owner: + description: |- + Owner is the owner for the ami. + You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + minItems: 1 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id', 'name', 'alias'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name) || has(x.alias)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms' + rule: '!self.exists(x, has(x.id) && (has(x.alias) || has(x.tags) || has(x.name) || has(x.owner)))' + - message: '''alias'' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms' + rule: '!self.exists(x, has(x.alias) && (has(x.id) || has(x.tags) || has(x.name) || has(x.owner)))' + - message: '''alias'' is mutually exclusive, cannot be set with a combination of other amiSelectorTerms' + rule: '!(self.exists(x, has(x.alias)) && self.size() != 1)' + associatePublicIPAddress: + description: AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. + type: boolean + blockDeviceMappings: + description: BlockDeviceMappings to be applied to provisioned nodes. + items: + properties: + deviceName: + description: The device name (for example, /dev/sdh or xvdh). + type: string + ebs: + description: EBS contains parameters used to automatically set up EBS volumes when an instance is launched. + properties: + deleteOnTermination: + description: DeleteOnTermination indicates whether the EBS volume is deleted on instance termination. + type: boolean + encrypted: + description: |- + Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only + be attached to instances that support Amazon EBS encryption. If you are creating + a volume from a snapshot, you can't specify an encryption value. + type: boolean + iops: + description: |- + IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, + this represents the number of IOPS that are provisioned for the volume. For + gp2 volumes, this represents the baseline performance of the volume and the + rate at which the volume accumulates I/O credits for bursting. - The following are the supported values for each volume type: + The following are the supported values for each volume type: - * gp3: 3,000-16,000 IOPS + * gp3: 3,000-16,000 IOPS - * io1: 100-64,000 IOPS + * io1: 100-64,000 IOPS - * io2: 100-64,000 IOPS + * io2: 100-64,000 IOPS - For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built - on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). - Other instance families guarantee performance up to 32,000 IOPS. + For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built + on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). + Other instance families guarantee performance up to 32,000 IOPS. - This parameter is supported for io1, io2, and gp3 volumes only. This parameter - is not supported for gp2, st1, sc1, or standard volumes. - format: int64 - type: integer - kmsKeyID: - description: KMSKeyID (ARN) of the symmetric Key Management - Service (KMS) CMK used for encryption. - type: string - snapshotID: - description: SnapshotID is the ID of an EBS snapshot - type: string - throughput: - description: |- - Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. - Valid Range: Minimum value of 125. Maximum value of 1000. - format: int64 - type: integer - volumeSize: - description: |- - VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or - a volume size. The following are the supported volumes sizes for each volume - type: + This parameter is supported for io1, io2, and gp3 volumes only. This parameter + is not supported for gp2, st1, sc1, or standard volumes. + format: int64 + type: integer + kmsKeyID: + description: KMSKeyID (ARN) of the symmetric Key Management Service (KMS) CMK used for encryption. + type: string + snapshotID: + description: SnapshotID is the ID of an EBS snapshot + type: string + throughput: + description: |- + Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. + Valid Range: Minimum value of 125. Maximum value of 1000. + format: int64 + type: integer + volumeSize: + description: |- + VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or + a volume size. The following are the supported volumes sizes for each volume + type: - * gp2 and gp3: 1-16,384 + * gp2 and gp3: 1-16,384 - * io1 and io2: 4-16,384 + * io1 and io2: 4-16,384 - * st1 and sc1: 125-16,384 + * st1 and sc1: 125-16,384 - * standard: 1-1,024 - pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ - type: string - volumeType: - description: |- - VolumeType of the block device. - For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) - in the Amazon Elastic Compute Cloud User Guide. - enum: - - standard - - io1 - - io2 - - gp2 - - sc1 - - st1 - - gp3 - type: string - type: object - x-kubernetes-validations: - - message: snapshotID or volumeSize must be defined - rule: has(self.snapshotID) || has(self.volumeSize) - rootVolume: - description: |- - RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can - configure at most one root volume in BlockDeviceMappings. - type: boolean - type: object - maxItems: 50 - type: array - x-kubernetes-validations: - - message: must have only one blockDeviceMappings with rootVolume - rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() - <= 1 - context: - description: |- - Context is a Reserved field in EC2 APIs - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html - type: string - detailedMonitoring: - description: DetailedMonitoring controls if detailed monitoring is - enabled for instances that are launched - type: boolean - instanceProfile: - description: |- - InstanceProfile is the AWS entity that instances use. - This field is mutually exclusive from role. - The instance profile should already have a role assigned to it that Karpenter - has PassRole permission on for instance launch using this instanceProfile to succeed. - type: string - x-kubernetes-validations: - - message: instanceProfile cannot be empty - rule: self != '' - instanceStorePolicy: - description: InstanceStorePolicy specifies how to handle instance-store - disks. - enum: - - RAID0 - type: string - kubelet: - description: |- - Kubelet defines args to be used when configuring kubelet on provisioned nodes. - They are a subset of the upstream types, recognizing not all options may be supported. - Wherever possible, the types and names should reflect the upstream kubelet types. - properties: - clusterDNS: - description: |- - clusterDNS is a list of IP addresses for the cluster DNS server. - Note that not all providers may use all addresses. - items: - type: string - type: array - cpuCFSQuota: - description: CPUCFSQuota enables CPU CFS quota enforcement for - containers that specify CPU limits. - type: boolean - evictionHard: - additionalProperties: - type: string - description: EvictionHard is the map of signal names to quantities - that define hard eviction thresholds - type: object - x-kubernetes-validations: - - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - evictionMaxPodGracePeriod: - description: |- - EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in - response to soft eviction thresholds being met. - format: int32 - type: integer - evictionSoft: - additionalProperties: - type: string - description: EvictionSoft is the map of signal names to quantities - that define soft eviction thresholds - type: object - x-kubernetes-validations: - - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - evictionSoftGracePeriod: - additionalProperties: - type: string - description: EvictionSoftGracePeriod is the map of signal names - to quantities that define grace periods for each eviction signal - type: object - x-kubernetes-validations: - - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] - rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) - imageGCHighThresholdPercent: - description: |- - ImageGCHighThresholdPercent is the percent of disk usage after which image - garbage collection is always run. The percent is calculated by dividing this - field value by 100, so this field must be between 0 and 100, inclusive. - When specified, the value must be greater than ImageGCLowThresholdPercent. - format: int32 - maximum: 100 - minimum: 0 - type: integer - imageGCLowThresholdPercent: - description: |- - ImageGCLowThresholdPercent is the percent of disk usage before which image - garbage collection is never run. Lowest disk usage to garbage collect to. - The percent is calculated by dividing this field value by 100, - so the field value must be between 0 and 100, inclusive. - When specified, the value must be less than imageGCHighThresholdPercent - format: int32 - maximum: 100 - minimum: 0 - type: integer - kubeReserved: - additionalProperties: - type: string - description: KubeReserved contains resources reserved for Kubernetes - system components. - type: object - x-kubernetes-validations: - - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] - rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' - || x=='pid') - - message: kubeReserved value cannot be a negative resource quantity - rule: self.all(x, !self[x].startsWith('-')) - maxPods: - description: |- - MaxPods is an override for the maximum number of pods that can run on - a worker node instance. - format: int32 - minimum: 0 - type: integer - podsPerCore: - description: |- - PodsPerCore is an override for the number of pods that can run on a worker node - instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if - MaxPods is a lower value, that value will be used. - format: int32 - minimum: 0 - type: integer - systemReserved: - additionalProperties: - type: string - description: SystemReserved contains resources reserved for OS - system daemons and kernel memory. + * standard: 1-1,024 + pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ + type: string + volumeType: + description: |- + VolumeType of the block device. + For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) + in the Amazon Elastic Compute Cloud User Guide. + enum: + - standard + - io1 + - io2 + - gp2 + - sc1 + - st1 + - gp3 + type: string + type: object + x-kubernetes-validations: + - message: snapshotID or volumeSize must be defined + rule: has(self.snapshotID) || has(self.volumeSize) + rootVolume: + description: |- + RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can + configure at most one root volume in BlockDeviceMappings. + type: boolean type: object - x-kubernetes-validations: - - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] - rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' - || x=='pid') - - message: systemReserved value cannot be a negative resource - quantity - rule: self.all(x, !self[x].startsWith('-')) - type: object - x-kubernetes-validations: - - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent - rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) - ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : - true' - - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod - rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true - - message: evictionSoftGracePeriod OwnerKey does not have a matching - evictionSoft - rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, - (e in self.evictionSoft)):true - metadataOptions: - default: - httpEndpoint: enabled - httpProtocolIPv6: disabled - httpPutResponseHopLimit: 1 - httpTokens: required - description: |- - MetadataOptions for the generated launch template of provisioned nodes. - - - This specifies the exposure of the Instance Metadata Service to - provisioned EC2 nodes. For more information, - see Instance Metadata and User Data - (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) - in the Amazon Elastic Compute Cloud User Guide. - - - Refer to recommended, security best practices - (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) - for limiting exposure of Instance Metadata and User Data to pods. - If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 - disabled, with httpPutResponseLimit of 1, and with httpTokens - required. - properties: - httpEndpoint: - default: enabled - description: |- - HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned - nodes. If metadata options is non-nil, but this parameter is not specified, - the default state is "enabled". - - - If you specify a value of "disabled", instance metadata will not be accessible - on the node. - enum: - - enabled - - disabled - type: string - httpProtocolIPv6: - default: disabled - description: |- - HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata - service on provisioned nodes. If metadata options is non-nil, but this parameter - is not specified, the default state is "disabled". - enum: - - enabled - - disabled - type: string - httpPutResponseHopLimit: - default: 1 - description: |- - HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for - instance metadata requests. The larger the number, the further instance - metadata requests can travel. Possible values are integers from 1 to 64. - If metadata options is non-nil, but this parameter is not specified, the - default value is 1. - format: int64 - maximum: 64 - minimum: 1 - type: integer - httpTokens: - default: required - description: |- - HTTPTokens determines the state of token usage for instance metadata - requests. If metadata options is non-nil, but this parameter is not - specified, the default state is "required". - - - If the state is optional, one can choose to retrieve instance metadata with - or without a signed token header on the request. If one retrieves the IAM - role credentials without a token, the version 1.0 role credentials are - returned. If one retrieves the IAM role credentials using a valid signed - token, the version 2.0 role credentials are returned. - - - If the state is "required", one must send a signed token header with any - instance metadata retrieval requests. In this state, retrieving the IAM - role credentials always returns the version 2.0 credentials; the version - 1.0 credentials are not available. - enum: - - required - - optional - type: string - type: object - role: - description: |- - Role is the AWS identity that nodes use. This field is immutable. - This field is mutually exclusive from instanceProfile. - Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. - This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented - for the old instance profiles on an update. - type: string - x-kubernetes-validations: - - message: role cannot be empty - rule: self != '' - - message: immutable field changed - rule: self == oldSelf - securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group - selector terms. The terms are ORed. - items: + maxItems: 50 + type: array + x-kubernetes-validations: + - message: must have only one blockDeviceMappings with rootVolume + rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + context: description: |- - SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. + Context is a Reserved field in EC2 APIs + https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html + type: string + detailedMonitoring: + description: DetailedMonitoring controls if detailed monitoring is enabled for instances that are launched + type: boolean + instanceProfile: + description: |- + InstanceProfile is the AWS entity that instances use. + This field is mutually exclusive from role. + The instance profile should already have a role assigned to it that Karpenter + has PassRole permission on for instance launch using this instanceProfile to succeed. + type: string + x-kubernetes-validations: + - message: instanceProfile cannot be empty + rule: self != '' + instanceStorePolicy: + description: InstanceStorePolicy specifies how to handle instance-store disks. + enum: + - RAID0 + type: string + kubelet: + description: |- + Kubelet defines args to be used when configuring kubelet on provisioned nodes. + They are a subset of the upstream types, recognizing not all options may be supported. + Wherever possible, the types and names should reflect the upstream kubelet types. properties: - id: - description: ID is the security group id in EC2 - pattern: sg-[0-9a-z]+ - type: string - name: + clusterDNS: description: |- - Name is the security group name in EC2. - This value is the name field, which is different from the name tag. - type: string - tags: + clusterDNS is a list of IP addresses for the cluster DNS server. + Note that not all providers may use all addresses. + items: + type: string + type: array + cpuCFSQuota: + description: CPUCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. + type: boolean + evictionHard: additionalProperties: type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionHard is the map of signal names to quantities that define hard eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionMaxPodGracePeriod: description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 + EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in + response to soft eviction thresholds being met. + format: int32 + type: integer + evictionSoft: + additionalProperties: + type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionSoft is the map of signal names to quantities that define soft eviction thresholds type: object x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: securityGroupSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination - of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' - subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. - The terms are ORed. - items: - description: |- - SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the subnet id in EC2 - pattern: subnet-[0-9a-z]+ - type: string - tags: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionSoftGracePeriod: additionalProperties: type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 + description: EvictionSoftGracePeriod is the map of signal names to quantities that define grace periods for each eviction signal type: object x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: subnetSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id'] - rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in subnetSelectorTerms' - rule: '!self.all(x, has(x.id) && has(x.tags))' - tags: - additionalProperties: - type: string - description: Tags to be applied on ec2 resources like instances and - launch templates. - type: object - x-kubernetes-validations: - - message: empty tag keys aren't supported - rule: self.all(k, k != '') - - message: tag contains a restricted tag matching kubernetes.io/cluster/ - rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) - - message: tag contains a restricted tag matching karpenter.sh/nodepool - rule: self.all(k, k != 'karpenter.sh/nodepool') - - message: tag contains a restricted tag matching karpenter.sh/managed-by - rule: self.all(k, k !='karpenter.sh/managed-by') - - message: tag contains a restricted tag matching karpenter.sh/nodeclaim - rule: self.all(k, k !='karpenter.sh/nodeclaim') - - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass - rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') - userData: - description: |- - UserData to be applied to the provisioned nodes. - It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into - this UserData to ensure nodes are being provisioned with the correct configuration. - type: string - required: - - amiSelectorTerms - - securityGroupSelectorTerms - - subnetSelectorTerms - type: object - x-kubernetes-validations: - - message: must specify exactly one of ['role', 'instanceProfile'] - rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) - && has(self.instanceProfile)) - - message: changing from 'instanceProfile' to 'role' is not supported. - You must delete and recreate this node class if you want to change - this. - rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) - && has(self.instanceProfile)) - status: - description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass - properties: - amis: - description: |- - AMI contains the current AMI values that are available to the - cluster under the AMI selectors. - items: - description: AMI contains resolved AMI selector values utilized - for node launch - properties: - id: - description: ID of the AMI - type: string - name: - description: Name of the AMI - type: string - requirements: - description: Requirements of the AMI to be utilized on an instance - type - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. - type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - required: - - id - - requirements - type: object - type: array - conditions: - description: Conditions contains signals for health and readiness - items: - description: Condition aliases the upstream type and adds additional - helper methods - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 + ImageGCHighThresholdPercent is the percent of disk usage after which image + garbage collection is always run. The percent is calculated by dividing this + field value by 100, so this field must be between 0 and 100, inclusive. + When specified, the value must be greater than ImageGCLowThresholdPercent. + format: int32 + maximum: 100 minimum: 0 type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: + imageGCLowThresholdPercent: description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - instanceProfile: - description: InstanceProfile contains the resolved instance profile - for the role - type: string - securityGroups: - description: |- - SecurityGroups contains the current Security Groups values that are available to the - cluster under the SecurityGroups selectors. - items: - description: SecurityGroup contains resolved SecurityGroup selector - values utilized for node launch - properties: - id: - description: ID of the security group - type: string - name: - description: Name of the security group - type: string - required: - - id - type: object - type: array - subnets: - description: |- - Subnets contains the current Subnet values that are available to the - cluster under the subnet selectors. - items: - description: Subnet contains resolved Subnet selector values utilized - for node launch - properties: - id: - description: ID of the subnet - type: string - zone: - description: The associated availability zone - type: string - zoneID: - description: The associated availability zone ID - type: string - required: - - id - - zone - type: object - type: array - type: object - type: object - served: true - storage: true - subresources: - status: {} - - name: v1beta1 - schema: - openAPIV3Schema: - description: EC2NodeClass is the Schema for the EC2NodeClass API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. - This will contain configuration necessary to launch instances in AWS. - properties: - amiFamily: - description: AMIFamily is the AMI family that instances use. - enum: - - AL2 - - AL2023 - - Bottlerocket - - Ubuntu - - Custom - - Windows2019 - - Windows2022 - type: string - amiSelectorTerms: - description: AMISelectorTerms is a list of or ami selector terms. - The terms are ORed. - items: - description: |- - AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the ami id in EC2 - pattern: ami-[0-9a-z]+ - type: string - name: + ImageGCLowThresholdPercent is the percent of disk usage before which image + garbage collection is never run. Lowest disk usage to garbage collect to. + The percent is calculated by dividing this field value by 100, + so the field value must be between 0 and 100, inclusive. + When specified, the value must be less than imageGCHighThresholdPercent + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + description: KubeReserved contains resources reserved for Kubernetes system components. + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + maxPods: description: |- - Name is the ami name in EC2. - This value is the name field, which is different from the name tag. - type: string - owner: + MaxPods is an override for the maximum number of pods that can run on + a worker node instance. + format: int32 + minimum: 0 + type: integer + podsPerCore: description: |- - Owner is the owner for the ami. - You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" - type: string - tags: + PodsPerCore is an override for the number of pods that can run on a worker node + instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if + MaxPods is a lower value, that value will be used. + format: int32 + minimum: 0 + type: integer + systemReserved: additionalProperties: type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + description: SystemReserved contains resources reserved for OS system daemons and kernel memory. type: object x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: systemReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in amiSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || - has(x.owner)))' - associatePublicIPAddress: - description: AssociatePublicIPAddress controls if public IP addresses - are assigned to instances that are launched with the nodeclass. - type: boolean - blockDeviceMappings: - description: BlockDeviceMappings to be applied to provisioned nodes. - items: - properties: - deviceName: - description: The device name (for example, /dev/sdh or xvdh). - type: string - ebs: - description: EBS contains parameters used to automatically set - up EBS volumes when an instance is launched. - properties: - deleteOnTermination: - description: DeleteOnTermination indicates whether the EBS - volume is deleted on instance termination. - type: boolean - encrypted: - description: |- - Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only - be attached to instances that support Amazon EBS encryption. If you are creating - a volume from a snapshot, you can't specify an encryption value. - type: boolean - iops: - description: |- - IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, - this represents the number of IOPS that are provisioned for the volume. For - gp2 volumes, this represents the baseline performance of the volume and the - rate at which the volume accumulates I/O credits for bursting. - - - The following are the supported values for each volume type: + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : true' + - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod + rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true + - message: evictionSoftGracePeriod OwnerKey does not have a matching evictionSoft + rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, (e in self.evictionSoft)):true + metadataOptions: + default: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + description: |- + MetadataOptions for the generated launch template of provisioned nodes. - * gp3: 3,000-16,000 IOPS + This specifies the exposure of the Instance Metadata Service to + provisioned EC2 nodes. For more information, + see Instance Metadata and User Data + (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) + in the Amazon Elastic Compute Cloud User Guide. - * io1: 100-64,000 IOPS + Refer to recommended, security best practices + (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) + for limiting exposure of Instance Metadata and User Data to pods. + If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 + disabled, with httpPutResponseLimit of 1, and with httpTokens + required. + properties: + httpEndpoint: + default: enabled + description: |- + HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned + nodes. If metadata options is non-nil, but this parameter is not specified, + the default state is "enabled". - * io2: 100-64,000 IOPS + If you specify a value of "disabled", instance metadata will not be accessible + on the node. + enum: + - enabled + - disabled + type: string + httpProtocolIPv6: + default: disabled + description: |- + HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata + service on provisioned nodes. If metadata options is non-nil, but this parameter + is not specified, the default state is "disabled". + enum: + - enabled + - disabled + type: string + httpPutResponseHopLimit: + default: 1 + description: |- + HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for + instance metadata requests. The larger the number, the further instance + metadata requests can travel. Possible values are integers from 1 to 64. + If metadata options is non-nil, but this parameter is not specified, the + default value is 1. + format: int64 + maximum: 64 + minimum: 1 + type: integer + httpTokens: + default: required + description: |- + HTTPTokens determines the state of token usage for instance metadata + requests. If metadata options is non-nil, but this parameter is not + specified, the default state is "required". - For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built - on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). - Other instance families guarantee performance up to 32,000 IOPS. + If the state is optional, one can choose to retrieve instance metadata with + or without a signed token header on the request. If one retrieves the IAM + role credentials without a token, the version 1.0 role credentials are + returned. If one retrieves the IAM role credentials using a valid signed + token, the version 2.0 role credentials are returned. - This parameter is supported for io1, io2, and gp3 volumes only. This parameter - is not supported for gp2, st1, sc1, or standard volumes. - format: int64 - type: integer - kmsKeyID: - description: KMSKeyID (ARN) of the symmetric Key Management - Service (KMS) CMK used for encryption. + If the state is "required", one must send a signed token header with any + instance metadata retrieval requests. In this state, retrieving the IAM + role credentials always returns the version 2.0 credentials; the version + 1.0 credentials are not available. + enum: + - required + - optional + type: string + type: object + role: + description: |- + Role is the AWS identity that nodes use. This field is immutable. + This field is mutually exclusive from instanceProfile. + Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. + This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented + for the old instance profiles on an update. + type: string + x-kubernetes-validations: + - message: role cannot be empty + rule: self != '' + - message: immutable field changed + rule: self == oldSelf + securityGroupSelectorTerms: + description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + items: + description: |- + SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the security group id in EC2 + pattern: sg-[0-9a-z]+ + type: string + name: + description: |- + Name is the security group name in EC2. + This value is the name field, which is different from the name tag. + type: string + tags: + additionalProperties: type: string - snapshotID: - description: SnapshotID is the ID of an EBS snapshot + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: securityGroupSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' + subnetSelectorTerms: + description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + items: + description: |- + SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the subnet id in EC2 + pattern: subnet-[0-9a-z]+ + type: string + tags: + additionalProperties: type: string - throughput: - description: |- - Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. - Valid Range: Minimum value of 125. Maximum value of 1000. - format: int64 - type: integer - volumeSize: + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: subnetSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + rule: '!self.all(x, has(x.id) && has(x.tags))' + tags: + additionalProperties: + type: string + description: Tags to be applied on ec2 resources like instances and launch templates. + type: object + x-kubernetes-validations: + - message: empty tag keys aren't supported + rule: self.all(k, k != '') + - message: tag contains a restricted tag matching kubernetes.io/cluster/ + rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) + - message: tag contains a restricted tag matching karpenter.sh/nodepool + rule: self.all(k, k != 'karpenter.sh/nodepool') + - message: tag contains a restricted tag matching karpenter.sh/managed-by + rule: self.all(k, k !='karpenter.sh/managed-by') + - message: tag contains a restricted tag matching karpenter.sh/nodeclaim + rule: self.all(k, k !='karpenter.sh/nodeclaim') + - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass + rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') + userData: + description: |- + UserData to be applied to the provisioned nodes. + It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into + this UserData to ensure nodes are being provisioned with the correct configuration. + type: string + required: + - securityGroupSelectorTerms + - subnetSelectorTerms + type: object + x-kubernetes-validations: + - message: must specify exactly one of ['role', 'instanceProfile'] + rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) && has(self.instanceProfile)) + - message: changing from 'instanceProfile' to 'role' is not supported. You must delete and recreate this node class if you want to change this. + rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) && has(self.instanceProfile)) + status: + description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass + properties: + amis: + description: |- + AMI contains the current AMI values that are available to the + cluster under the AMI selectors. + items: + description: AMI contains resolved AMI selector values utilized for node launch + properties: + id: + description: ID of the AMI + type: string + name: + description: Name of the AMI + type: string + requirements: + description: Requirements of the AMI to be utilized on an instance type + items: description: |- - VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or - a volume size. The following are the supported volumes sizes for each volume - type: + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + required: + - id + - requirements + type: object + type: array + conditions: + description: Conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional helper methods + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + instanceProfile: + description: InstanceProfile contains the resolved instance profile for the role + type: string + securityGroups: + description: |- + SecurityGroups contains the current Security Groups values that are available to the + cluster under the SecurityGroups selectors. + items: + description: SecurityGroup contains resolved SecurityGroup selector values utilized for node launch + properties: + id: + description: ID of the security group + type: string + name: + description: Name of the security group + type: string + required: + - id + type: object + type: array + subnets: + description: |- + Subnets contains the current Subnet values that are available to the + cluster under the subnet selectors. + items: + description: Subnet contains resolved Subnet selector values utilized for node launch + properties: + id: + description: ID of the subnet + type: string + zone: + description: The associated availability zone + type: string + zoneID: + description: The associated availability zone ID + type: string + required: + - id + - zone + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} + - name: v1beta1 + schema: + openAPIV3Schema: + description: EC2NodeClass is the Schema for the EC2NodeClass API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + EC2NodeClassSpec is the top level specification for the AWS Karpenter Provider. + This will contain configuration necessary to launch instances in AWS. + properties: + amiFamily: + description: AMIFamily is the AMI family that instances use. + enum: + - AL2 + - AL2023 + - Bottlerocket + - Ubuntu + - Custom + - Windows2019 + - Windows2022 + type: string + amiSelectorTerms: + description: AMISelectorTerms is a list of or ami selector terms. The terms are ORed. + items: + description: |- + AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the ami id in EC2 + pattern: ami-[0-9a-z]+ + type: string + name: + description: |- + Name is the ami name in EC2. + This value is the name field, which is different from the name tag. + type: string + owner: + description: |- + Owner is the owner for the ami. + You can specify a combination of AWS account IDs, "self", "amazon", and "aws-marketplace" + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in amiSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name) || has(x.owner)))' + associatePublicIPAddress: + description: AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. + type: boolean + blockDeviceMappings: + description: BlockDeviceMappings to be applied to provisioned nodes. + items: + properties: + deviceName: + description: The device name (for example, /dev/sdh or xvdh). + type: string + ebs: + description: EBS contains parameters used to automatically set up EBS volumes when an instance is launched. + properties: + deleteOnTermination: + description: DeleteOnTermination indicates whether the EBS volume is deleted on instance termination. + type: boolean + encrypted: + description: |- + Encrypted indicates whether the EBS volume is encrypted. Encrypted volumes can only + be attached to instances that support Amazon EBS encryption. If you are creating + a volume from a snapshot, you can't specify an encryption value. + type: boolean + iops: + description: |- + IOPS is the number of I/O operations per second (IOPS). For gp3, io1, and io2 volumes, + this represents the number of IOPS that are provisioned for the volume. For + gp2 volumes, this represents the baseline performance of the volume and the + rate at which the volume accumulates I/O credits for bursting. - * gp2 and gp3: 1-16,384 + The following are the supported values for each volume type: - * io1 and io2: 4-16,384 + * gp3: 3,000-16,000 IOPS - * st1 and sc1: 125-16,384 + * io1: 100-64,000 IOPS - * standard: 1-1,024 - pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ - type: string - volumeType: - description: |- - VolumeType of the block device. - For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) - in the Amazon Elastic Compute Cloud User Guide. - enum: - - standard - - io1 - - io2 - - gp2 - - sc1 - - st1 - - gp3 - type: string - type: object - x-kubernetes-validations: - - message: snapshotID or volumeSize must be defined - rule: has(self.snapshotID) || has(self.volumeSize) - rootVolume: - description: |- - RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can - configure at most one root volume in BlockDeviceMappings. - type: boolean - type: object - maxItems: 50 - type: array - x-kubernetes-validations: - - message: must have only one blockDeviceMappings with rootVolume - rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() - <= 1 - context: - description: |- - Context is a Reserved field in EC2 APIs - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html - type: string - detailedMonitoring: - description: DetailedMonitoring controls if detailed monitoring is - enabled for instances that are launched - type: boolean - instanceProfile: - description: |- - InstanceProfile is the AWS entity that instances use. - This field is mutually exclusive from role. - The instance profile should already have a role assigned to it that Karpenter - has PassRole permission on for instance launch using this instanceProfile to succeed. - type: string - x-kubernetes-validations: - - message: instanceProfile cannot be empty - rule: self != '' - instanceStorePolicy: - description: InstanceStorePolicy specifies how to handle instance-store - disks. - enum: - - RAID0 - type: string - metadataOptions: - default: - httpEndpoint: enabled - httpProtocolIPv6: disabled - httpPutResponseHopLimit: 1 - httpTokens: required - description: |- - MetadataOptions for the generated launch template of provisioned nodes. + * io2: 100-64,000 IOPS - This specifies the exposure of the Instance Metadata Service to - provisioned EC2 nodes. For more information, - see Instance Metadata and User Data - (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) - in the Amazon Elastic Compute Cloud User Guide. + For io1 and io2 volumes, we guarantee 64,000 IOPS only for Instances built + on the Nitro System (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances). + Other instance families guarantee performance up to 32,000 IOPS. - Refer to recommended, security best practices - (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) - for limiting exposure of Instance Metadata and User Data to pods. - If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 - disabled, with httpPutResponseLimit of 1, and with httpTokens - required. - properties: - httpEndpoint: - default: enabled - description: |- - HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned - nodes. If metadata options is non-nil, but this parameter is not specified, - the default state is "enabled". + This parameter is supported for io1, io2, and gp3 volumes only. This parameter + is not supported for gp2, st1, sc1, or standard volumes. + format: int64 + type: integer + kmsKeyID: + description: KMSKeyID (ARN) of the symmetric Key Management Service (KMS) CMK used for encryption. + type: string + snapshotID: + description: SnapshotID is the ID of an EBS snapshot + type: string + throughput: + description: |- + Throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s. + Valid Range: Minimum value of 125. Maximum value of 1000. + format: int64 + type: integer + volumeSize: + description: |- + VolumeSize in `Gi`, `G`, `Ti`, or `T`. You must specify either a snapshot ID or + a volume size. The following are the supported volumes sizes for each volume + type: - If you specify a value of "disabled", instance metadata will not be accessible - on the node. - enum: - - enabled - - disabled - type: string - httpProtocolIPv6: - default: disabled - description: |- - HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata - service on provisioned nodes. If metadata options is non-nil, but this parameter - is not specified, the default state is "disabled". - enum: - - enabled - - disabled - type: string - httpPutResponseHopLimit: - default: 2 - description: |- - HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for - instance metadata requests. The larger the number, the further instance - metadata requests can travel. Possible values are integers from 1 to 64. - If metadata options is non-nil, but this parameter is not specified, the - default value is 2. - format: int64 - maximum: 64 - minimum: 1 - type: integer - httpTokens: - default: required - description: |- - HTTPTokens determines the state of token usage for instance metadata - requests. If metadata options is non-nil, but this parameter is not - specified, the default state is "required". + * gp2 and gp3: 1-16,384 - If the state is optional, one can choose to retrieve instance metadata with - or without a signed token header on the request. If one retrieves the IAM - role credentials without a token, the version 1.0 role credentials are - returned. If one retrieves the IAM role credentials using a valid signed - token, the version 2.0 role credentials are returned. + * io1 and io2: 4-16,384 - If the state is "required", one must send a signed token header with any - instance metadata retrieval requests. In this state, retrieving the IAM - role credentials always returns the version 2.0 credentials; the version - 1.0 credentials are not available. - enum: - - required - - optional - type: string - type: object - role: - description: |- - Role is the AWS identity that nodes use. This field is immutable. - This field is mutually exclusive from instanceProfile. - Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. - This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented - for the old instance profiles on an update. - type: string - x-kubernetes-validations: - - message: role cannot be empty - rule: self != '' - - message: immutable field changed - rule: self == oldSelf - securityGroupSelectorTerms: - description: SecurityGroupSelectorTerms is a list of or security group - selector terms. The terms are ORed. - items: - description: |- - SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the security group id in EC2 - pattern: sg-[0-9a-z]+ - type: string - name: - description: |- - Name is the security group name in EC2. - This value is the name field, which is different from the name tag. - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: securityGroupSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id', 'name'] - rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' - - message: '''name'' is mutually exclusive, cannot be set with a combination - of other fields in securityGroupSelectorTerms' - rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' - subnetSelectorTerms: - description: SubnetSelectorTerms is a list of or subnet selector terms. - The terms are ORed. - items: - description: |- - SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. - If multiple fields are used for selection, the requirements are ANDed. - properties: - id: - description: ID is the subnet id in EC2 - pattern: subnet-[0-9a-z]+ - type: string - tags: - additionalProperties: - type: string - description: |- - Tags is a map of key/value tags used to select subnets - Specifying '*' for a value selects all values for a given tag key. - maxProperties: 20 - type: object - x-kubernetes-validations: - - message: empty tag keys or values aren't supported - rule: self.all(k, k != '' && self[k] != '') - type: object - maxItems: 30 - type: array - x-kubernetes-validations: - - message: subnetSelectorTerms cannot be empty - rule: self.size() != 0 - - message: expected at least one, got none, ['tags', 'id'] - rule: self.all(x, has(x.tags) || has(x.id)) - - message: '''id'' is mutually exclusive, cannot be set with a combination - of other fields in subnetSelectorTerms' - rule: '!self.all(x, has(x.id) && has(x.tags))' - tags: - additionalProperties: - type: string - description: Tags to be applied on ec2 resources like instances and - launch templates. - type: object - x-kubernetes-validations: - - message: empty tag keys aren't supported - rule: self.all(k, k != '') - - message: tag contains a restricted tag matching kubernetes.io/cluster/ - rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) - - message: tag contains a restricted tag matching karpenter.sh/nodepool - rule: self.all(k, k != 'karpenter.sh/nodepool') - - message: tag contains a restricted tag matching karpenter.sh/managed-by - rule: self.all(k, k !='karpenter.sh/managed-by') - - message: tag contains a restricted tag matching karpenter.sh/nodeclaim - rule: self.all(k, k !='karpenter.sh/nodeclaim') - - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass - rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') - userData: - description: |- - UserData to be applied to the provisioned nodes. - It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into - this UserData to ensure nodes are being provisioned with the correct configuration. - type: string - required: - - amiFamily - - securityGroupSelectorTerms - - subnetSelectorTerms - type: object - x-kubernetes-validations: - - message: amiSelectorTerms is required when amiFamily == 'Custom' - rule: 'self.amiFamily == ''Custom'' ? self.amiSelectorTerms.size() != - 0 : true' - - message: must specify exactly one of ['role', 'instanceProfile'] - rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) - && has(self.instanceProfile)) - - message: changing from 'instanceProfile' to 'role' is not supported. - You must delete and recreate this node class if you want to change - this. - rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) - && has(self.instanceProfile)) - status: - description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass - properties: - amis: - description: |- - AMI contains the current AMI values that are available to the - cluster under the AMI selectors. - items: - description: AMI contains resolved AMI selector values utilized - for node launch - properties: - id: - description: ID of the AMI - type: string - name: - description: Name of the AMI - type: string - requirements: - description: Requirements of the AMI to be utilized on an instance - type - items: - description: |- - A node selector requirement is a selector that contains values, a key, and an operator - that relates the key and values. - properties: - key: - description: The label key that the selector applies to. + * st1 and sc1: 125-16,384 + + + * standard: 1-1,024 + pattern: ^((?:[1-9][0-9]{0,3}|[1-4][0-9]{4}|[5][0-8][0-9]{3}|59000)Gi|(?:[1-9][0-9]{0,3}|[1-5][0-9]{4}|[6][0-3][0-9]{3}|64000)G|([1-9]||[1-5][0-7]|58)Ti|([1-9]||[1-5][0-9]|6[0-3]|64)T)$ type: string - operator: + volumeType: description: |- - Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + VolumeType of the block device. + For more information, see Amazon EBS volume types (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html) + in the Amazon Elastic Compute Cloud User Guide. + enum: + - standard + - io1 + - io2 + - gp2 + - sc1 + - st1 + - gp3 type: string - values: - description: |- - An array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. If the operator is Gt or Lt, the values - array must have a single element, which will be interpreted as an integer. - This array is replaced during a strategic merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator type: object - type: array - required: - - id - - requirements - type: object - type: array - conditions: - description: Conditions contains signals for health and readiness - items: - description: Condition aliases the upstream type and adds additional - helper methods + x-kubernetes-validations: + - message: snapshotID or volumeSize must be defined + rule: has(self.snapshotID) || has(self.volumeSize) + rootVolume: + description: |- + RootVolume is a flag indicating if this device is mounted as kubelet root dir. You can + configure at most one root volume in BlockDeviceMappings. + type: boolean + type: object + maxItems: 50 + type: array + x-kubernetes-validations: + - message: must have only one blockDeviceMappings with rootVolume + rule: self.filter(x, has(x.rootVolume)?x.rootVolume==true:false).size() <= 1 + context: + description: |- + Context is a Reserved field in EC2 APIs + https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html + type: string + detailedMonitoring: + description: DetailedMonitoring controls if detailed monitoring is enabled for instances that are launched + type: boolean + instanceProfile: + description: |- + InstanceProfile is the AWS entity that instances use. + This field is mutually exclusive from role. + The instance profile should already have a role assigned to it that Karpenter + has PassRole permission on for instance launch using this instanceProfile to succeed. + type: string + x-kubernetes-validations: + - message: instanceProfile cannot be empty + rule: self != '' + instanceStorePolicy: + description: InstanceStorePolicy specifies how to handle instance-store disks. + enum: + - RAID0 + type: string + metadataOptions: + default: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 1 + httpTokens: required + description: |- + MetadataOptions for the generated launch template of provisioned nodes. + + + This specifies the exposure of the Instance Metadata Service to + provisioned EC2 nodes. For more information, + see Instance Metadata and User Data + (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) + in the Amazon Elastic Compute Cloud User Guide. + + + Refer to recommended, security best practices + (https://aws.github.io/aws-eks-best-practices/security/docs/iam/#restrict-access-to-the-instance-profile-assigned-to-the-worker-node) + for limiting exposure of Instance Metadata and User Data to pods. + If omitted, defaults to httpEndpoint enabled, with httpProtocolIPv6 + disabled, with httpPutResponseLimit of 1, and with httpTokens + required. properties: - lastTransitionTime: + httpEndpoint: + default: enabled description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time + HTTPEndpoint enables or disables the HTTP metadata endpoint on provisioned + nodes. If metadata options is non-nil, but this parameter is not specified, + the default state is "enabled". + + + If you specify a value of "disabled", instance metadata will not be accessible + on the node. + enum: + - enabled + - disabled type: string - message: + httpProtocolIPv6: + default: disabled description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 + HTTPProtocolIPv6 enables or disables the IPv6 endpoint for the instance metadata + service on provisioned nodes. If metadata options is non-nil, but this parameter + is not specified, the default state is "disabled". + enum: + - enabled + - disabled type: string - observedGeneration: + httpPutResponseHopLimit: + default: 2 description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. + HTTPPutResponseHopLimit is the desired HTTP PUT response hop limit for + instance metadata requests. The larger the number, the further instance + metadata requests can travel. Possible values are integers from 1 to 64. + If metadata options is non-nil, but this parameter is not specified, the + default value is 2. format: int64 - minimum: 0 + maximum: 64 + minimum: 1 type: integer - reason: + httpTokens: + default: required description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. + HTTPTokens determines the state of token usage for instance metadata + requests. If metadata options is non-nil, but this parameter is not + specified, the default state is "required". + + + If the state is optional, one can choose to retrieve instance metadata with + or without a signed token header on the request. If one retrieves the IAM + role credentials without a token, the version 1.0 role credentials are + returned. If one retrieves the IAM role credentials using a valid signed + token, the version 2.0 role credentials are returned. + + + If the state is "required", one must send a signed token header with any + instance metadata retrieval requests. In this state, retrieving the IAM + role credentials always returns the version 2.0 credentials; the version + 1.0 credentials are not available. enum: - - "True" - - "False" - - Unknown + - required + - optional type: string - type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - instanceProfile: - description: InstanceProfile contains the resolved instance profile - for the role - type: string - securityGroups: - description: |- - SecurityGroups contains the current Security Groups values that are available to the - cluster under the SecurityGroups selectors. - items: - description: SecurityGroup contains resolved SecurityGroup selector - values utilized for node launch - properties: - id: - description: ID of the security group - type: string - name: - description: Name of the security group - type: string - required: - - id type: object - type: array - subnets: - description: |- - Subnets contains the current Subnet values that are available to the - cluster under the subnet selectors. - items: - description: Subnet contains resolved Subnet selector values utilized - for node launch - properties: - id: - description: ID of the subnet - type: string - zone: - description: The associated availability zone - type: string - zoneID: - description: The associated availability zone ID - type: string - required: - - id - - zone + role: + description: |- + Role is the AWS identity that nodes use. This field is immutable. + This field is mutually exclusive from instanceProfile. + Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances. + This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented + for the old instance profiles on an update. + type: string + x-kubernetes-validations: + - message: role cannot be empty + rule: self != '' + - message: immutable field changed + rule: self == oldSelf + securityGroupSelectorTerms: + description: SecurityGroupSelectorTerms is a list of or security group selector terms. The terms are ORed. + items: + description: |- + SecurityGroupSelectorTerm defines selection logic for a security group used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the security group id in EC2 + pattern: sg-[0-9a-z]+ + type: string + name: + description: |- + Name is the security group name in EC2. + This value is the name field, which is different from the name tag. + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: securityGroupSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id', 'name'] + rule: self.all(x, has(x.tags) || has(x.id) || has(x.name)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.id) && (has(x.tags) || has(x.name)))' + - message: '''name'' is mutually exclusive, cannot be set with a combination of other fields in securityGroupSelectorTerms' + rule: '!self.all(x, has(x.name) && (has(x.tags) || has(x.id)))' + subnetSelectorTerms: + description: SubnetSelectorTerms is a list of or subnet selector terms. The terms are ORed. + items: + description: |- + SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. + If multiple fields are used for selection, the requirements are ANDed. + properties: + id: + description: ID is the subnet id in EC2 + pattern: subnet-[0-9a-z]+ + type: string + tags: + additionalProperties: + type: string + description: |- + Tags is a map of key/value tags used to select subnets + Specifying '*' for a value selects all values for a given tag key. + maxProperties: 20 + type: object + x-kubernetes-validations: + - message: empty tag keys or values aren't supported + rule: self.all(k, k != '' && self[k] != '') + type: object + maxItems: 30 + type: array + x-kubernetes-validations: + - message: subnetSelectorTerms cannot be empty + rule: self.size() != 0 + - message: expected at least one, got none, ['tags', 'id'] + rule: self.all(x, has(x.tags) || has(x.id)) + - message: '''id'' is mutually exclusive, cannot be set with a combination of other fields in subnetSelectorTerms' + rule: '!self.all(x, has(x.id) && has(x.tags))' + tags: + additionalProperties: + type: string + description: Tags to be applied on ec2 resources like instances and launch templates. type: object - type: array - type: object - type: object - served: true - storage: false - subresources: - status: {} + x-kubernetes-validations: + - message: empty tag keys aren't supported + rule: self.all(k, k != '') + - message: tag contains a restricted tag matching kubernetes.io/cluster/ + rule: self.all(k, !k.startsWith('kubernetes.io/cluster') ) + - message: tag contains a restricted tag matching karpenter.sh/nodepool + rule: self.all(k, k != 'karpenter.sh/nodepool') + - message: tag contains a restricted tag matching karpenter.sh/managed-by + rule: self.all(k, k !='karpenter.sh/managed-by') + - message: tag contains a restricted tag matching karpenter.sh/nodeclaim + rule: self.all(k, k !='karpenter.sh/nodeclaim') + - message: tag contains a restricted tag matching karpenter.k8s.aws/ec2nodeclass + rule: self.all(k, k !='karpenter.k8s.aws/ec2nodeclass') + userData: + description: |- + UserData to be applied to the provisioned nodes. + It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into + this UserData to ensure nodes are being provisioned with the correct configuration. + type: string + required: + - amiFamily + - securityGroupSelectorTerms + - subnetSelectorTerms + type: object + x-kubernetes-validations: + - message: amiSelectorTerms is required when amiFamily == 'Custom' + rule: 'self.amiFamily == ''Custom'' ? self.amiSelectorTerms.size() != 0 : true' + - message: must specify exactly one of ['role', 'instanceProfile'] + rule: (has(self.role) && !has(self.instanceProfile)) || (!has(self.role) && has(self.instanceProfile)) + - message: changing from 'instanceProfile' to 'role' is not supported. You must delete and recreate this node class if you want to change this. + rule: (has(oldSelf.role) && has(self.role)) || (has(oldSelf.instanceProfile) && has(self.instanceProfile)) + status: + description: EC2NodeClassStatus contains the resolved state of the EC2NodeClass + properties: + amis: + description: |- + AMI contains the current AMI values that are available to the + cluster under the AMI selectors. + items: + description: AMI contains resolved AMI selector values utilized for node launch + properties: + id: + description: ID of the AMI + type: string + name: + description: Name of the AMI + type: string + requirements: + description: Requirements of the AMI to be utilized on an instance type + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + required: + - id + - requirements + type: object + type: array + conditions: + description: Conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional helper methods + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + instanceProfile: + description: InstanceProfile contains the resolved instance profile for the role + type: string + securityGroups: + description: |- + SecurityGroups contains the current Security Groups values that are available to the + cluster under the SecurityGroups selectors. + items: + description: SecurityGroup contains resolved SecurityGroup selector values utilized for node launch + properties: + id: + description: ID of the security group + type: string + name: + description: Name of the security group + type: string + required: + - id + type: object + type: array + subnets: + description: |- + Subnets contains the current Subnet values that are available to the + cluster under the subnet selectors. + items: + description: Subnet contains resolved Subnet selector values utilized for node launch + properties: + id: + description: ID of the subnet + type: string + zone: + description: The associated availability zone + type: string + zoneID: + description: The associated availability zone ID + type: string + required: + - id + - zone + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1beta1 + - v1 + clientConfig: + service: + name: karpenter + namespace: kube-system + port: 8443 diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 3a4c8edaded8..d733cfdcf88d 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -54,7 +54,7 @@ type EC2NodeClassSpec struct { // +kubebuilder:validation:XValidation:message="'alias' is mutually exclusive, cannot be set with a combination of other amiSelectorTerms",rule="!(self.exists(x, has(x.alias)) && self.size() != 1)" // +kubebuilder:validation:MinItems:=1 // +kubebuilder:validation:MaxItems:=30 - // +required + // +optional AMISelectorTerms []AMISelectorTerm `json:"amiSelectorTerms" hash:"ignore"` // UserData to be applied to the provisioned nodes. // It must be in the appropriate format based on the AMIFamily in use. Karpenter will merge certain fields into @@ -165,7 +165,7 @@ type SecurityGroupSelectorTerm struct { // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { // Alias specifies which EKS optimized AMI to select. - // Each alias consistes of a family and a version, specified as "family@version". + // Each alias consists of a family and a version, specified as "family@version". // Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. // The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). // The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. @@ -453,8 +453,11 @@ func (in *EC2NodeClass) InstanceProfileTags(clusterName string) map[string]strin }) } -func (in *EC2NodeClassSpec) AMIFamily() string { - if term, ok := lo.Find(in.AMISelectorTerms, func(t AMISelectorTerm) bool { +func (in *EC2NodeClass) AMIFamily() string { + if family, ok := in.Annotations[AnnotationAMIFamilyCompatibility]; ok { + return family + } + if term, ok := lo.Find(in.Spec.AMISelectorTerms, func(t AMISelectorTerm) bool { return t.Alias != "" }); ok { switch strings.Split(term.Alias, "@")[0] { @@ -473,8 +476,11 @@ func (in *EC2NodeClassSpec) AMIFamily() string { return AMIFamilyCustom } -func (in *EC2NodeClassSpec) AMIVersion() string { - if term, ok := lo.Find(in.AMISelectorTerms, func(t AMISelectorTerm) bool { +func (in *EC2NodeClass) AMIVersion() string { + if _, ok := in.Annotations[AnnotationAMIFamilyCompatibility]; ok { + return "latest" + } + if term, ok := lo.Find(in.Spec.AMISelectorTerms, func(t AMISelectorTerm) bool { return t.Alias != "" }); ok { parts := strings.Split(term.Alias, "@") @@ -483,7 +489,7 @@ func (in *EC2NodeClassSpec) AMIVersion() string { } return parts[1] } - return "" + return "latest" } // EC2NodeClassList contains a list of EC2NodeClass diff --git a/pkg/apis/v1/ec2nodeclass_conversion.go b/pkg/apis/v1/ec2nodeclass_conversion.go index 15518f8ed381..5b9c62cf4ebc 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion.go +++ b/pkg/apis/v1/ec2nodeclass_conversion.go @@ -16,6 +16,8 @@ package v1 import ( "context" + "fmt" + "strings" "github.com/samber/lo" "knative.dev/pkg/apis" @@ -27,6 +29,10 @@ func (in *EC2NodeClass) ConvertTo(ctx context.Context, to apis.Convertible) erro v1beta1enc := to.(*v1beta1.EC2NodeClass) v1beta1enc.ObjectMeta = in.ObjectMeta + v1beta1enc.Spec.AMIFamily = lo.ToPtr(in.AMIFamily()) + v1beta1enc.Annotations = lo.Assign(v1beta1enc.Annotations, map[string]string{ + v1beta1.AnnotationAMIVersionCompatibility: in.AMIVersion(), + }) in.Spec.convertTo(&v1beta1enc.Spec) in.Status.convertTo((&v1beta1enc.Status)) return nil @@ -54,7 +60,6 @@ func (in *EC2NodeClassSpec) convertTo(v1beta1enc *v1beta1.EC2NodeClassSpec) { Tags: ami.Tags, } }) - v1beta1enc.AMIFamily = in.AMIFamily v1beta1enc.AssociatePublicIPAddress = in.AssociatePublicIPAddress v1beta1enc.Context = in.Context v1beta1enc.DetailedMonitoring = in.DetailedMonitoring @@ -102,6 +107,18 @@ func (in *EC2NodeClass) ConvertFrom(ctx context.Context, from apis.Convertible) v1beta1enc := from.(*v1beta1.EC2NodeClass) in.ObjectMeta = v1beta1enc.ObjectMeta + // If the v1beta1 AMI family is supported on v1, construct an alias. Otherwise, use the compatibility annotation. + // In practice, this is only used to support the Ubuntu AMI family during conversion. + switch lo.FromPtr(v1beta1enc.Spec.AMIFamily) { + case AMIFamilyAL2, AMIFamilyAL2023, AMIFamilyBottlerocket, Windows2019, Windows2022: + version := lo.ValueOr(v1beta1enc.Annotations, v1beta1.AnnotationAMIVersionCompatibility, "latest") + in.Spec.AMISelectorTerms = []AMISelectorTerm{{Alias: fmt.Sprintf("%s@%s", strings.ToLower(lo.FromPtr(v1beta1enc.Spec.AMIFamily)), version)}} + default: + in.Annotations = lo.Assign(in.Annotations, map[string]string{ + AnnotationAMIFamilyCompatibility: lo.FromPtr(v1beta1enc.Spec.AMIFamily), + }) + } + in.Spec.convertFrom(&v1beta1enc.Spec) in.Status.convertFrom((&v1beta1enc.Status)) return nil @@ -121,15 +138,14 @@ func (in *EC2NodeClassSpec) convertFrom(v1beta1enc *v1beta1.EC2NodeClassSpec) { Tags: sg.Tags, } }) - in.AMISelectorTerms = lo.Map(v1beta1enc.AMISelectorTerms, func(ami v1beta1.AMISelectorTerm, _ int) AMISelectorTerm { + in.AMISelectorTerms = append(in.AMISelectorTerms, lo.Map(v1beta1enc.AMISelectorTerms, func(ami v1beta1.AMISelectorTerm, _ int) AMISelectorTerm { return AMISelectorTerm{ ID: ami.ID, Name: ami.Name, Owner: ami.Owner, Tags: ami.Tags, } - }) - in.AMIFamily = v1beta1enc.AMIFamily + })...) in.AssociatePublicIPAddress = v1beta1enc.AssociatePublicIPAddress in.Context = v1beta1enc.Context in.DetailedMonitoring = v1beta1enc.DetailedMonitoring diff --git a/pkg/apis/v1/ec2nodeclass_conversion_test.go b/pkg/apis/v1/ec2nodeclass_conversion_test.go index 70a60fcf1e52..7507bc8351d8 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion_test.go +++ b/pkg/apis/v1/ec2nodeclass_conversion_test.go @@ -15,6 +15,9 @@ limitations under the License. package v1_test import ( + "fmt" + "strings" + "github.com/awslabs/operatorpkg/status" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -22,6 +25,8 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "sigs.k8s.io/karpenter/pkg/test" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + . "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" ) @@ -38,8 +43,18 @@ var _ = Describe("Convert v1 to v1beta1 EC2NodeClass API", func() { }) It("should convert v1 ec2nodeclass metadata", func() { - v1ec2nodeclass.ObjectMeta = test.ObjectMeta() - Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) + v1ec2nodeclass.ObjectMeta = test.ObjectMeta(metav1.ObjectMeta{ + Annotations: map[string]string{"foo": "bar"}, + }) + Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) + + // Remove the compatibility annotations from the EC2NodeClass. + v1beta1ec2nodeclass.ObjectMeta.Annotations = lo.OmitByKeys(v1beta1ec2nodeclass.ObjectMeta.Annotations, []string{ + v1beta1.AnnotationAMIVersionCompatibility, + }) + if len(v1beta1ec2nodeclass.ObjectMeta.Annotations) == 0 { + v1beta1ec2nodeclass.ObjectMeta.Annotations = nil + } Expect(v1beta1ec2nodeclass.ObjectMeta).To(BeEquivalentTo(v1ec2nodeclass.ObjectMeta)) }) Context("EC2NodeClass Spec", func() { @@ -108,10 +123,25 @@ var _ = Describe("Convert v1 to v1beta1 EC2NodeClass API", func() { Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AssociatePublicIPAddress)).To(BeTrue()) }) - It("should convert v1 ec2nodeclass ami family", func() { - v1ec2nodeclass.Spec.AMIFamily = &AMIFamilyUbuntu + It("should convert v1 ec2nodeclass alias", func() { + v1ec2nodeclass.Spec.AMISelectorTerms = []AMISelectorTerm{{Alias: "al2023@latest"}} + Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) + Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(v1beta1.AMIFamilyAL2023)) + }) + It("should convert v1 ec2nodeclass alias with pinned version", func() { + family := AMIFamilyAL2023 + version := "v20240625" + v1ec2nodeclass.Spec.AMISelectorTerms = []AMISelectorTerm{{Alias: fmt.Sprintf("%s@%s", strings.ToLower(family), version)}} Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) - Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(v1beta1.AMIFamilyUbuntu)) + Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(family)) + Expect(v1beta1ec2nodeclass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationAMIVersionCompatibility, version)) + }) + It("should convert v1 ec2nodeclass with AMIFamily compat annotation", func() { + v1ec2nodeclass.Annotations = lo.Assign(v1ec2nodeclass.Annotations, map[string]string{ + AnnotationAMIFamilyCompatibility: v1beta1.AMIFamilyAL2023, + }) + Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) + Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(v1beta1.AMIFamilyAL2023)) }) It("should convert v1 ec2nodeclass user data", func() { v1ec2nodeclass.Spec.UserData = lo.ToPtr("test user data") @@ -269,8 +299,18 @@ var _ = Describe("Convert v1beta1 to v1 EC2NodeClass API", func() { }) It("should convert v1beta1 ec2nodeclass metadata", func() { - v1beta1ec2nodeclass.ObjectMeta = test.ObjectMeta() + v1beta1ec2nodeclass.ObjectMeta = test.ObjectMeta(metav1.ObjectMeta{ + Annotations: map[string]string{"foo": "bar"}, + }) Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) + + // Remove the compatibility annotations from the EC2NodeClass + v1ec2nodeclass.ObjectMeta.Annotations = lo.OmitByKeys(v1ec2nodeclass.ObjectMeta.Annotations, []string{ + AnnotationAMIFamilyCompatibility, + }) + if len(v1ec2nodeclass.ObjectMeta.Annotations) == 0 { + v1ec2nodeclass.ObjectMeta.Annotations = nil + } Expect(v1ec2nodeclass.ObjectMeta).To(BeEquivalentTo(v1beta1ec2nodeclass.ObjectMeta)) }) Context("EC2NodeClass Spec", func() { @@ -340,9 +380,22 @@ var _ = Describe("Convert v1beta1 to v1 EC2NodeClass API", func() { Expect(lo.FromPtr(v1ec2nodeclass.Spec.AssociatePublicIPAddress)).To(BeTrue()) }) It("should convert v1beta1 ec2nodeclass ami family", func() { - v1beta1ec2nodeclass.Spec.AMIFamily = &AMIFamilyUbuntu + v1beta1ec2nodeclass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 + Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) + Expect(v1ec2nodeclass.Spec.AMISelectorTerms).To(ContainElement(AMISelectorTerm{Alias: "al2023@latest"})) + }) + It("should convert v1beta1 ec2nodeclass ami family (pinned AMI)", func() { + v1beta1ec2nodeclass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 + v1beta1ec2nodeclass.Annotations = lo.Assign(v1beta1ec2nodeclass.Annotations, map[string]string{ + v1beta1.AnnotationAMIVersionCompatibility: "v20240625", + }) + Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) + Expect(v1ec2nodeclass.Spec.AMISelectorTerms).To(ContainElement(AMISelectorTerm{Alias: "al2023@v20240625"})) + }) + It("should convert v1beta1 ec2nodeclass ami family (ubuntu compat)", func() { + v1beta1ec2nodeclass.Spec.AMIFamily = &v1beta1.AMIFamilyUbuntu Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) - Expect(lo.FromPtr(v1ec2nodeclass.Spec.AMIFamily)).To(Equal(v1beta1.AMIFamilyUbuntu)) + Expect(v1ec2nodeclass.Annotations).To(HaveKeyWithValue(AnnotationAMIFamilyCompatibility, "Ubuntu")) }) It("should convert v1beta1 ec2nodeclass user data", func() { v1beta1ec2nodeclass.Spec.UserData = lo.ToPtr("test user data") diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index afed0631aaed..a4ca5dd09f8c 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -29,14 +29,13 @@ import ( ) var _ = Describe("Hash", func() { - const staticHash = "10790156025840984195" + const staticHash = "12469392724194263290" var nodeClass *v1.EC2NodeClass BeforeEach(func() { nodeClass = &v1.EC2NodeClass{ ObjectMeta: test.ObjectMeta(metav1.ObjectMeta{}), Spec: v1.EC2NodeClassSpec{ - AMIFamily: lo.ToPtr(v1.AMIFamilyAL2023), - Role: "role-1", + Role: "role-1", Tags: map[string]string{ "keyTag-1": "valueTag-1", "keyTag-2": "valueTag-2", @@ -81,26 +80,26 @@ var _ = Describe("Hash", func() { }, Entry("Base EC2NodeClass", staticHash, v1.EC2NodeClass{}), // Static fields, expect changed hash from base - Entry("UserData", "18317182711135792962", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{UserData: aws.String("userdata-test-2")}}), - Entry("Tags", "7254882043893135054", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}}), - Entry("Context", "17271601354348855032", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Context: aws.String("context-2")}}), - Entry("DetailedMonitoring", "3320998103335094348", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), - Entry("AMIFamily", "11029247967399146065", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AMIFamily: aws.String(v1.AMIFamilyBottlerocket)}}), - Entry("InstanceStorePolicy", "15591048753403695860", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), - Entry("AssociatePublicIPAddress", "8788624850560996180", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), - Entry("MetadataOptions HTTPEndpoint", "12130088184516131939", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), - Entry("MetadataOptions HTTPProtocolIPv6", "9851778617676567202", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPProtocolIPv6: lo.ToPtr("enabled")}}}), - Entry("MetadataOptions HTTPPutResponseHopLimit", "10114972825726256442", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPPutResponseHopLimit: lo.ToPtr(int64(10))}}}), - Entry("MetadataOptions HTTPTokens", "15328515228245883488", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: lo.ToPtr("required")}}}), - Entry("BlockDeviceMapping DeviceName", "14855383487702710824", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{DeviceName: lo.ToPtr("map-device-test-3")}}}}), - Entry("BlockDeviceMapping RootVolume", "9591488558660758449", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{RootVolume: true}}}}), - Entry("BlockDeviceMapping DeleteOnTermination", "2802222466202766732", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{DeleteOnTermination: lo.ToPtr(true)}}}}}), - Entry("BlockDeviceMapping Encrypted", "16743053872042184219", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{Encrypted: lo.ToPtr(true)}}}}}), - Entry("BlockDeviceMapping IOPS", "17284705682110195253", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{IOPS: lo.ToPtr(int64(10))}}}}}), - Entry("BlockDeviceMapping KMSKeyID", "9151019926310241707", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{KMSKeyID: lo.ToPtr("test")}}}}}), - Entry("BlockDeviceMapping SnapshotID", "5250341140179985875", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{SnapshotID: lo.ToPtr("test")}}}}}), - Entry("BlockDeviceMapping Throughput", "16711481758638864953", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{Throughput: lo.ToPtr(int64(10))}}}}}), - Entry("BlockDeviceMapping VolumeType", "488614640133725370", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{VolumeType: lo.ToPtr("io1")}}}}}), + + Entry("UserData", "10726797622220086701", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{UserData: aws.String("userdata-test-2")}}), + Entry("Tags", "13171706991797150099", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}}), + Entry("Context", "2889419402034926781", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Context: aws.String("context-2")}}), + Entry("DetailedMonitoring", "3268207623472999947", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), + Entry("InstanceStorePolicy", "14988327748406934174", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), + Entry("AssociatePublicIPAddress", "15544493229975292346", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), + Entry("MetadataOptions HTTPEndpoint", "17871731458970668774", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), + Entry("MetadataOptions HTTPProtocolIPv6", "2470633037813609088", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPProtocolIPv6: lo.ToPtr("enabled")}}}), + Entry("MetadataOptions HTTPPutResponseHopLimit", "17675179355294901357", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPPutResponseHopLimit: lo.ToPtr(int64(10))}}}), + Entry("MetadataOptions HTTPTokens", "2669105690505918645", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: lo.ToPtr("required")}}}), + Entry("BlockDeviceMapping DeviceName", "5415148539492750790", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{DeviceName: lo.ToPtr("map-device-test-3")}}}}), + Entry("BlockDeviceMapping RootVolume", "5518942571120617117", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{RootVolume: true}}}}), + Entry("BlockDeviceMapping DeleteOnTermination", "2581652380077676602", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{DeleteOnTermination: lo.ToPtr(true)}}}}}), + Entry("BlockDeviceMapping Encrypted", "9177809208865678872", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{Encrypted: lo.ToPtr(true)}}}}}), + Entry("BlockDeviceMapping IOPS", "10810952692173241494", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{IOPS: lo.ToPtr(int64(10))}}}}}), + Entry("BlockDeviceMapping KMSKeyID", "2530423540851551058", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{KMSKeyID: lo.ToPtr("test")}}}}}), + Entry("BlockDeviceMapping SnapshotID", "9712886753345517947", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{SnapshotID: lo.ToPtr("test")}}}}}), + Entry("BlockDeviceMapping Throughput", "3334301275630239638", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{Throughput: lo.ToPtr(int64(10))}}}}}), + Entry("BlockDeviceMapping VolumeType", "7651488174200364927", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{{EBS: &v1.BlockDevice{VolumeType: lo.ToPtr("io1")}}}}}), // Behavior / Dynamic fields, expect same hash as base Entry("Modified AMISelector", staticHash, v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AMISelectorTerms: []v1.AMISelectorTerm{{Tags: map[string]string{"ami-test-key": "ami-test-value"}}}}}), @@ -111,12 +110,12 @@ var _ = Describe("Hash", func() { // doesn't work well with unexported fields, like the ones that are present in resource.Quantity It("should match static hash when updating blockDeviceMapping volumeSize", func() { nodeClass.Spec.BlockDeviceMappings[0].EBS.VolumeSize = resource.NewScaledQuantity(10, resource.Giga) - Expect(nodeClass.Hash()).To(Equal("4802236799448001710")) + Expect(nodeClass.Hash()).To(Equal("13279394903563315705")) }) It("should match static hash for instanceProfile", func() { nodeClass.Spec.Role = "" nodeClass.Spec.InstanceProfile = lo.ToPtr("test-instance-profile") - Expect(nodeClass.Hash()).To(Equal("7914642030762404205")) + Expect(nodeClass.Hash()).To(Equal("13329599275048356421")) }) It("should match static hash when reordering tags", func() { nodeClass.Spec.Tags = map[string]string{"keyTag-2": "valueTag-2", "keyTag-1": "valueTag-1"} @@ -136,7 +135,6 @@ var _ = Describe("Hash", func() { Entry("Tags", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}}), Entry("Context", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{Context: aws.String("context-2")}}), Entry("DetailedMonitoring", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), - Entry("AMIFamily", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AMIFamily: aws.String(v1.AMIFamilyBottlerocket)}}), Entry("InstanceStorePolicy", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), Entry("AssociatePublicIPAddress", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), Entry("MetadataOptions HTTPEndpoint", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index 3c41ab32936d..fb9823e355bf 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -396,12 +396,6 @@ var _ = Describe("CEL/Validation", func() { } Expect(env.Client.Create(ctx, nc)).To(Succeed()) }) - It("should fail when no ami selector terms are specified", func() { - nc.Spec.AMISelectorTerms = nil - Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) - nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{} - Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) - }) It("should fail when a ami selector term has no values", func() { nc.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ {}, diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index 40bc9b2ca13e..5f780d9c2bfb 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -122,6 +122,7 @@ var ( AnnotationKubeletCompatibilityHash = apis.CompatabilityGroup + "/kubelet-drift-hash" AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = apis.Group + "/tagged" + AnnotationAMIFamilyCompatibility = apis.CompatibilityGroup + "/v1beta1-ami-family-conversion" TagNodeClaim = coreapis.Group + "/nodeclaim" TagManagedLaunchTemplate = apis.Group + "/cluster" diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index d7ae21c84230..6d3e13033434 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -121,6 +121,7 @@ var ( AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = apis.Group + "/tagged" + AnnotationAMIVersionCompatibility = apis.CompatibilityGroup + "/v1-ami-version-conversion" TagNodeClaim = coreapis.Group + "/nodeclaim" TagManagedLaunchTemplate = apis.Group + "/cluster" diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 69f91f97e368..2b78ae16ff0a 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -888,8 +888,8 @@ var _ = Describe("CloudProvider", func() { Tags: map[string]string{ "fakeKey": "fakeValue", }, - Context: lo.ToPtr("fake-context"), - DetailedMonitoring: lo.ToPtr(false), + Context: lo.ToPtr("fake-context"), + DetailedMonitoring: lo.ToPtr(false), AMISelectorTerms: []v1.AMISelectorTerm{{ Alias: "al2023@latest", }}, diff --git a/pkg/controllers/nodeclass/status/ami_test.go b/pkg/controllers/nodeclass/status/ami_test.go index e52b9fe2771a..6bd726710eb1 100644 --- a/pkg/controllers/nodeclass/status/ami_test.go +++ b/pkg/controllers/nodeclass/status/ami_test.go @@ -135,7 +135,8 @@ var _ = Describe("NodeClass AMI Status Controller", func() { ExpectApplied(ctx, env.Client, nodeClass) ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.AMIs).To(Equal([]v1.AMI{ + Expect(len(nodeClass.Status.AMIs)).To(Equal(4)) + Expect(nodeClass.Status.AMIs).To(ContainElements([]v1.AMI{ { Name: "test-ami-3", ID: "ami-id-789", @@ -245,7 +246,8 @@ var _ = Describe("NodeClass AMI Status Controller", func() { ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) nodeClass = ExpectExists(ctx, env.Client, nodeClass) - Expect(nodeClass.Status.AMIs).To(Equal([]v1.AMI{ + Expect(len(nodeClass.Status.AMIs)).To(Equal(2)) + Expect(nodeClass.Status.AMIs).To(ContainElements([]v1.AMI{ { Name: "test-ami-2", ID: "ami-id-456", diff --git a/pkg/controllers/nodeclass/status/readiness.go b/pkg/controllers/nodeclass/status/readiness.go index 828ae098010b..ea16052b81de 100644 --- a/pkg/controllers/nodeclass/status/readiness.go +++ b/pkg/controllers/nodeclass/status/readiness.go @@ -32,6 +32,11 @@ type Readiness struct { } func (n Readiness) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { + // TODO: Drop runtime check once support for conversion is dropped, and make AMISelectorTerms required via CEL. + if _, ok := nodeClass.Annotations[v1.AnnotationAMIFamilyCompatibility]; !ok && len(nodeClass.Spec.AMISelectorTerms) == 0 { + nodeClass.StatusConditions().SetFalse(status.ConditionReady, "NodeClassNotReady", "Invalid AMI configuration") + return reconcile.Result{}, fmt.Errorf("invalid configuration, AMISelectorTerms or 'karpenter.sh/v1beta1-amifamily' compatibility annotation must be specified") + } // A NodeClass that uses AL2023 requires the cluster CIDR for launching nodes. // To allow Karpenter to be used for Non-EKS clusters, resolving the Cluster CIDR // will not be done at startup but instead in a reconcile loop. diff --git a/pkg/fake/ssmapi.go b/pkg/fake/ssmapi.go index e37b20d55511..e3853365bbcc 100644 --- a/pkg/fake/ssmapi.go +++ b/pkg/fake/ssmapi.go @@ -23,10 +23,11 @@ import ( "github.com/Pallinder/go-randomdata" "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/aws/karpenter-provider-aws/pkg/providers/version" "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" + "github.com/aws/karpenter-provider-aws/pkg/providers/version" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/service/ssm" diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index 0da37fa3d749..90b9baf2f24f 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -27,6 +27,7 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" "github.com/aws/aws-sdk-go/service/ec2" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" diff --git a/pkg/providers/amifamily/ami.go b/pkg/providers/amifamily/ami.go index 513229c230fb..723c18a9071c 100644 --- a/pkg/providers/amifamily/ami.go +++ b/pkg/providers/amifamily/ami.go @@ -26,16 +26,16 @@ import ( "github.com/mitchellh/hashstructure/v2" "github.com/patrickmn/go-cache" "github.com/samber/lo" - corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/log" v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/version" - "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" "sigs.k8s.io/karpenter/pkg/utils/pretty" + + "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" ) type Provider interface { @@ -85,7 +85,7 @@ func (p *DefaultProvider) List(ctx context.Context, nodeClass *v1.EC2NodeClass) func (p *DefaultProvider) DescribeImageQueries(ctx context.Context, nodeClass *v1.EC2NodeClass) ([]DescribeImageQuery, error) { // Aliases are mutually exclusive, both on the term level and field level within a term. // This is enforced by a CEL validation, we will treat this as an invariant. - if amiFamilyKey := nodeClass.AMIFamily(); amiFamilyKey != v1beta1.AMIFamilyCustom { + if amiFamilyKey := nodeClass.AMIFamily(); amiFamilyKey != v1.AMIFamilyCustom { amiVersion := nodeClass.AMIVersion() amiFamily := GetAMIFamily(&amiFamilyKey, nil) kubernetesVersion, err := p.versionProvider.Get(ctx) @@ -143,6 +143,7 @@ func (p *DefaultProvider) DescribeImageQueries(ctx context.Context, nodeClass *v return queries, nil } +//nolint:gocyclo func (p *DefaultProvider) amis(ctx context.Context, queries []DescribeImageQuery) (AMIs, error) { hash, err := hashstructure.Hash(queries, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) if err != nil { diff --git a/pkg/providers/amifamily/bottlerocket.go b/pkg/providers/amifamily/bottlerocket.go index 0c699240a206..0014868bfb34 100644 --- a/pkg/providers/amifamily/bottlerocket.go +++ b/pkg/providers/amifamily/bottlerocket.go @@ -31,8 +31,8 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" "github.com/aws/aws-sdk-go/aws" - corev1 "k8s.io/api/core/v1" "github.com/aws/aws-sdk-go/service/ec2" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" ) @@ -75,7 +75,7 @@ func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Pr Name: lo.ToPtr("image-id"), Values: imageIDs, }}, - KnownRequirements: make(map[string][]scheduling.Requirements), + KnownRequirements: requirements, }, nil } diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 24ec92dd6b5b..eb1e81690a49 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -29,8 +29,8 @@ import ( v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap" - "github.com/aws/karpenter-provider-aws/pkg/utils" "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" + "github.com/aws/karpenter-provider-aws/pkg/utils" "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/scheduling" diff --git a/pkg/providers/amifamily/types.go b/pkg/providers/amifamily/types.go index 092d6d75ccb0..ac37d20b06b0 100644 --- a/pkg/providers/amifamily/types.go +++ b/pkg/providers/amifamily/types.go @@ -21,11 +21,12 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/karpenter/pkg/scheduling" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" ) const ( @@ -59,9 +60,9 @@ func (a AMIs) Sort() { type Variant string var ( - VariantStandard Variant = "standard" - VariantNvidia Variant = "nvidia" - VariantNeuron Variant = "neuron" + VariantStandard Variant = "standard" + VariantNvidia Variant = "nvidia" + VariantNeuron Variant = "neuron" ) func NewVariant(v string) (Variant, error) { @@ -89,8 +90,8 @@ func (v Variant) Requirements() scheduling.Requirements { } type DescribeImageQuery struct { - Filters []*ec2.Filter - Owners []string + Filters []*ec2.Filter + Owners []string // KnownRequirements is a map from image IDs to a set of known requirements. // When discovering image IDs via SSM we know additional requirements which aren't surfaced by ec2:DescribeImage (e.g. GPU / Neuron compatibility) // Sometimes, an image may have multiple sets of known requirements. For example, the AL2 GPU AMI is compatible with both Neuron and Nvidia GPU diff --git a/pkg/providers/ssm/provider.go b/pkg/providers/ssm/provider.go index ced87b07ea52..216615573713 100644 --- a/pkg/providers/ssm/provider.go +++ b/pkg/providers/ssm/provider.go @@ -31,14 +31,14 @@ type Provider interface { type DefaultProvider struct { sync.Mutex - cache *cache.Cache + cache *cache.Cache ssmapi ssmiface.SSMAPI } func NewDefaultProvider(ssmapi ssmiface.SSMAPI, cache *cache.Cache) *DefaultProvider { return &DefaultProvider{ ssmapi: ssmapi, - cache: cache, + cache: cache, } } @@ -53,7 +53,7 @@ func (p *DefaultProvider) List(ctx context.Context, path string) (map[string]str values := map[string]string{} if err := p.ssmapi.GetParametersByPathPagesWithContext(ctx, &ssm.GetParametersByPathInput{ Recursive: lo.ToPtr(true), - Path: &path, + Path: &path, }, func(out *ssm.GetParametersByPathOutput, _ bool) bool { for _, parameter := range out.Parameters { if parameter.Name == nil || parameter.Value == nil { diff --git a/pkg/providers/version/version.go b/pkg/providers/version/version.go index 64b716d9859a..c820d611c508 100644 --- a/pkg/providers/version/version.go +++ b/pkg/providers/version/version.go @@ -77,7 +77,7 @@ func (p *DefaultProvider) Get(ctx context.Context) (string, error) { return version, nil } -func SupportedK8sVersions() []string{ +func SupportedK8sVersions() []string { minMinor := lo.Must(strconv.Atoi(strings.Split(MinK8sVersion, ".")[1])) maxMinor := lo.Must(strconv.Atoi(strings.Split(MaxK8sVersion, ".")[1])) versions := make([]string, 0, maxMinor-minMinor+1) diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index dc0d911fb97a..ecba27c5c8c9 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -38,8 +38,8 @@ func EC2NodeClass(overrides ...v1.EC2NodeClass) *v1.EC2NodeClass { panic(fmt.Sprintf("Failed to merge settings: %s", err)) } } - if options.Spec.AMIFamily == nil { - options.Spec.AMIFamily = &v1.AMIFamilyAL2 + if len(options.Spec.AMISelectorTerms) == 0 { + options.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: "al2@latest"}} options.Status.AMIs = []v1.AMI{ { ID: "ami-test1", @@ -137,8 +137,8 @@ func BetaEC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { panic(fmt.Sprintf("Failed to merge settings: %s", err)) } } - if len(options.Spec.AMISelectorTerms) == 0 { - options.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{Alias: "al2@latest"}} + if options.Spec.AMIFamily == nil { + options.Spec.AMIFamily = &v1beta1.AMIFamilyAL2 options.Status.AMIs = []v1beta1.AMI{ { ID: "ami-test1", diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 12584aed340b..5e9530799a55 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -14,43 +14,35 @@ limitations under the License. package drift_test -import ( - "testing" - - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - - v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" - - "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -var env *aws.Environment -var amdAMI string -var nodeClass *v1.EC2NodeClass -var nodePool *karpv1.NodePool - -func TestDrift(t *testing.T) { - RegisterFailHandler(Fail) - BeforeSuite(func() { - env = aws.NewEnvironment(t) - }) - AfterSuite(func() { - env.Stop() - }) - RunSpecs(t, "Drift") -} - -var _ = BeforeEach(func() { - env.BeforeEach() - nodeClass = env.DefaultEC2NodeClass() - nodePool = env.DefaultNodePool(nodeClass) -}) -var _ = AfterEach(func() { env.Cleanup() }) -var _ = AfterEach(func() { env.AfterEach() }) +// import ( +// . "github.com/onsi/ginkgo/v2" +// . "github.com/onsi/gomega" +// ) +// var env *aws.Environment +// var amdAMI string +// var nodeClass *v1.EC2NodeClass +// var nodePool *karpv1.NodePool +// +// func TestDrift(t *testing.T) { +// RegisterFailHandler(Fail) +// BeforeSuite(func() { +// env = aws.NewEnvironment(t) +// }) +// AfterSuite(func() { +// env.Stop() +// }) +// RunSpecs(t, "Drift") +// } +// +// var _ = BeforeEach(func() { +// env.BeforeEach() +// nodeClass = env.DefaultEC2NodeClass() +// nodePool = env.DefaultNodePool(nodeClass) +// }) +// var _ = AfterEach(func() { env.Cleanup() }) +// var _ = AfterEach(func() { env.AfterEach() }) +// // var _ = Describe("Drift", func() { // var dep *appsv1.Deployment // var selector labels.Selector diff --git a/website/content/en/docs/getting-started/getting-started-with-karpenter/cloudformation.yaml b/website/content/en/docs/getting-started/getting-started-with-karpenter/cloudformation.yaml index b20462aa1d6d..a9760c9451d6 100644 --- a/website/content/en/docs/getting-started/getting-started-with-karpenter/cloudformation.yaml +++ b/website/content/en/docs/getting-started/getting-started-with-karpenter/cloudformation.yaml @@ -352,4 +352,4 @@ Resources: - EC2 Instance State-change Notification Targets: - Id: KarpenterInterruptionQueueTarget - Arn: !GetAtt KarpenterInterruptionQueue.Arn \ No newline at end of file + Arn: !GetAtt KarpenterInterruptionQueue.Arn diff --git a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml index b20462aa1d6d..dde2e9999849 100644 --- a/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml +++ b/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml @@ -181,7 +181,7 @@ Resources: "Sid": "AllowSSMReadActions", "Effect": "Allow", "Resource": "arn:${AWS::Partition}:ssm:${AWS::Region}::parameter/aws/service/*", - "Action": "ssm:GetParameter" + "Action": "ssm:GetParametersByPath" }, { "Sid": "AllowPricingReadActions", @@ -352,4 +352,4 @@ Resources: - EC2 Instance State-change Notification Targets: - Id: KarpenterInterruptionQueueTarget - Arn: !GetAtt KarpenterInterruptionQueue.Arn \ No newline at end of file + Arn: !GetAtt KarpenterInterruptionQueue.Arn diff --git a/website/content/en/preview/reference/cloudformation.md b/website/content/en/preview/reference/cloudformation.md index 99a084661338..c9153487df46 100644 --- a/website/content/en/preview/reference/cloudformation.md +++ b/website/content/en/preview/reference/cloudformation.md @@ -310,7 +310,7 @@ This allows the Karpenter controller to do any of those read-only actions across #### AllowSSMReadActions -The AllowSSMReadActions Sid allows the Karpenter controller to read SSM parameters (`ssm:GetParameter`) from the current region for SSM parameters generated by ASW services. +The AllowSSMReadActions Sid allows the Karpenter controller to list SSM parameters (`ssm:GetParametersByPath`) from the current region for SSM parameters generated by ASW services. **NOTE**: If potentially sensitive information is stored in SSM parameters, you could consider restricting access to these messages further. ```json @@ -318,7 +318,7 @@ The AllowSSMReadActions Sid allows the Karpenter controller to read SSM paramete "Sid": "AllowSSMReadActions", "Effect": "Allow", "Resource": "arn:${AWS::Partition}:ssm:${AWS::Region}::parameter/aws/service/*", - "Action": "ssm:GetParameter" + "Action": "ssm:GetParametersByPath" } ``` From 5bd17b1a9fea3b1db78289c38315f6a2cc99ea89 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 15 Jul 2024 18:45:24 -0700 Subject: [PATCH 8/9] remaining changes Please enter the commit message for your changes. Lines starting --- pkg/apis/apis.go | 2 +- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 4 +- pkg/apis/v1/ec2nodeclass.go | 4 +- pkg/apis/v1/ec2nodeclass_conversion.go | 8 +- pkg/apis/v1/ec2nodeclass_conversion_test.go | 27 - pkg/apis/v1/labels.go | 2 +- pkg/apis/v1beta1/labels.go | 1 - pkg/cloudprovider/suite_test.go | 4 - pkg/fake/ssmapi.go | 45 +- pkg/providers/amifamily/al2.go | 2 +- pkg/providers/amifamily/al2023.go | 2 +- pkg/providers/amifamily/bottlerocket.go | 5 +- pkg/providers/amifamily/windows.go | 3 +- pkg/providers/version/version.go | 3 + test/suites/ami/suite_test.go | 9 + test/suites/drift/suite_test.go | 1878 +++++++++-------- .../drift/testdata/al2023_userdata_input.yaml | 15 + .../testdata/al2023_userdata_input.yaml | 3 +- 18 files changed, 1015 insertions(+), 1002 deletions(-) create mode 100644 test/suites/drift/testdata/al2023_userdata_input.yaml diff --git a/pkg/apis/apis.go b/pkg/apis/apis.go index 294b5983c8bd..3e255ad9275d 100644 --- a/pkg/apis/apis.go +++ b/pkg/apis/apis.go @@ -27,7 +27,7 @@ import ( //go:generate controller-gen crd object:headerFile="../../hack/boilerplate.go.txt" paths="./..." output:crd:artifacts:config=crds var ( Group = "karpenter.k8s.aws" - CompatabilityGroup = "compatibility." + Group + CompatibilityGroup = "compatibility." + Group //go:embed crds/karpenter.k8s.aws_ec2nodeclasses.yaml EC2NodeClassCRD []byte CRDs = append(apis.CRDs, diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 9dc67492e313..167be07394e1 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -69,8 +69,8 @@ spec: Alias specifies which EKS optimized AMI to select. Each alias consists of a family and a version, specified as "family@version". Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. - The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). - The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. + The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@v1.10.0"). + The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. This is **not** recommended for production environments. Note: The Windows families do **not** support version pinning, and only latest may be used. maxLength: 30 type: string diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index d733cfdcf88d..3b47396b6bc2 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -167,8 +167,8 @@ type AMISelectorTerm struct { // Alias specifies which EKS optimized AMI to select. // Each alias consists of a family and a version, specified as "family@version". // Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. - // The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@1.10.0"). - // The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. + // The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@v1.10.0"). + // The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. This is **not** recommended for production environments. // Note: The Windows families do **not** support version pinning, and only latest may be used. // +kubebuilder:validation:XValidation:message="'alias' is improperly formatted, must match the format 'family@version'",rule="self.matches('^[a-zA-Z0-9]*@.*$')" // +kubebuilder:validation:XValidation:message="family is not supported, must be one of the following: 'al2', 'al2023', 'bottlerocket', 'windows2019', 'windows2022'",rule="self.find('^[^@]+') in ['al2','al2023','bottlerocket','windows2019','windows2022']" diff --git a/pkg/apis/v1/ec2nodeclass_conversion.go b/pkg/apis/v1/ec2nodeclass_conversion.go index 5b9c62cf4ebc..18ec075542d5 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion.go +++ b/pkg/apis/v1/ec2nodeclass_conversion.go @@ -30,9 +30,6 @@ func (in *EC2NodeClass) ConvertTo(ctx context.Context, to apis.Convertible) erro v1beta1enc.ObjectMeta = in.ObjectMeta v1beta1enc.Spec.AMIFamily = lo.ToPtr(in.AMIFamily()) - v1beta1enc.Annotations = lo.Assign(v1beta1enc.Annotations, map[string]string{ - v1beta1.AnnotationAMIVersionCompatibility: in.AMIVersion(), - }) in.Spec.convertTo(&v1beta1enc.Spec) in.Status.convertTo((&v1beta1enc.Status)) return nil @@ -111,8 +108,9 @@ func (in *EC2NodeClass) ConvertFrom(ctx context.Context, from apis.Convertible) // In practice, this is only used to support the Ubuntu AMI family during conversion. switch lo.FromPtr(v1beta1enc.Spec.AMIFamily) { case AMIFamilyAL2, AMIFamilyAL2023, AMIFamilyBottlerocket, Windows2019, Windows2022: - version := lo.ValueOr(v1beta1enc.Annotations, v1beta1.AnnotationAMIVersionCompatibility, "latest") - in.Spec.AMISelectorTerms = []AMISelectorTerm{{Alias: fmt.Sprintf("%s@%s", strings.ToLower(lo.FromPtr(v1beta1enc.Spec.AMIFamily)), version)}} + in.Spec.AMISelectorTerms = []AMISelectorTerm{{ + Alias: fmt.Sprintf("%s@latest", strings.ToLower(lo.FromPtr(v1beta1enc.Spec.AMIFamily))), + }} default: in.Annotations = lo.Assign(in.Annotations, map[string]string{ AnnotationAMIFamilyCompatibility: lo.FromPtr(v1beta1enc.Spec.AMIFamily), diff --git a/pkg/apis/v1/ec2nodeclass_conversion_test.go b/pkg/apis/v1/ec2nodeclass_conversion_test.go index 7507bc8351d8..38b6235e9891 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion_test.go +++ b/pkg/apis/v1/ec2nodeclass_conversion_test.go @@ -15,9 +15,6 @@ limitations under the License. package v1_test import ( - "fmt" - "strings" - "github.com/awslabs/operatorpkg/status" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -47,14 +44,6 @@ var _ = Describe("Convert v1 to v1beta1 EC2NodeClass API", func() { Annotations: map[string]string{"foo": "bar"}, }) Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) - - // Remove the compatibility annotations from the EC2NodeClass. - v1beta1ec2nodeclass.ObjectMeta.Annotations = lo.OmitByKeys(v1beta1ec2nodeclass.ObjectMeta.Annotations, []string{ - v1beta1.AnnotationAMIVersionCompatibility, - }) - if len(v1beta1ec2nodeclass.ObjectMeta.Annotations) == 0 { - v1beta1ec2nodeclass.ObjectMeta.Annotations = nil - } Expect(v1beta1ec2nodeclass.ObjectMeta).To(BeEquivalentTo(v1ec2nodeclass.ObjectMeta)) }) Context("EC2NodeClass Spec", func() { @@ -128,14 +117,6 @@ var _ = Describe("Convert v1 to v1beta1 EC2NodeClass API", func() { Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(v1beta1.AMIFamilyAL2023)) }) - It("should convert v1 ec2nodeclass alias with pinned version", func() { - family := AMIFamilyAL2023 - version := "v20240625" - v1ec2nodeclass.Spec.AMISelectorTerms = []AMISelectorTerm{{Alias: fmt.Sprintf("%s@%s", strings.ToLower(family), version)}} - Expect(v1ec2nodeclass.ConvertTo(ctx, v1beta1ec2nodeclass)).To(Succeed()) - Expect(lo.FromPtr(v1beta1ec2nodeclass.Spec.AMIFamily)).To(Equal(family)) - Expect(v1beta1ec2nodeclass.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationAMIVersionCompatibility, version)) - }) It("should convert v1 ec2nodeclass with AMIFamily compat annotation", func() { v1ec2nodeclass.Annotations = lo.Assign(v1ec2nodeclass.Annotations, map[string]string{ AnnotationAMIFamilyCompatibility: v1beta1.AMIFamilyAL2023, @@ -384,14 +365,6 @@ var _ = Describe("Convert v1beta1 to v1 EC2NodeClass API", func() { Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) Expect(v1ec2nodeclass.Spec.AMISelectorTerms).To(ContainElement(AMISelectorTerm{Alias: "al2023@latest"})) }) - It("should convert v1beta1 ec2nodeclass ami family (pinned AMI)", func() { - v1beta1ec2nodeclass.Spec.AMIFamily = &v1beta1.AMIFamilyAL2023 - v1beta1ec2nodeclass.Annotations = lo.Assign(v1beta1ec2nodeclass.Annotations, map[string]string{ - v1beta1.AnnotationAMIVersionCompatibility: "v20240625", - }) - Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) - Expect(v1ec2nodeclass.Spec.AMISelectorTerms).To(ContainElement(AMISelectorTerm{Alias: "al2023@v20240625"})) - }) It("should convert v1beta1 ec2nodeclass ami family (ubuntu compat)", func() { v1beta1ec2nodeclass.Spec.AMIFamily = &v1beta1.AMIFamilyUbuntu Expect(v1ec2nodeclass.ConvertFrom(ctx, v1beta1ec2nodeclass)).To(Succeed()) diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index 5f780d9c2bfb..1c96373a8b65 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -119,7 +119,7 @@ var ( LabelInstanceAcceleratorManufacturer = apis.Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = apis.Group + "/instance-accelerator-count" AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" - AnnotationKubeletCompatibilityHash = apis.CompatabilityGroup + "/kubelet-drift-hash" + AnnotationKubeletCompatibilityHash = apis.CompatibilityGroup + "/kubelet-drift-hash" AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = apis.Group + "/tagged" AnnotationAMIFamilyCompatibility = apis.CompatibilityGroup + "/v1beta1-ami-family-conversion" diff --git a/pkg/apis/v1beta1/labels.go b/pkg/apis/v1beta1/labels.go index 6d3e13033434..d7ae21c84230 100644 --- a/pkg/apis/v1beta1/labels.go +++ b/pkg/apis/v1beta1/labels.go @@ -121,7 +121,6 @@ var ( AnnotationEC2NodeClassHash = apis.Group + "/ec2nodeclass-hash" AnnotationEC2NodeClassHashVersion = apis.Group + "/ec2nodeclass-hash-version" AnnotationInstanceTagged = apis.Group + "/tagged" - AnnotationAMIVersionCompatibility = apis.CompatibilityGroup + "/v1-ami-version-conversion" TagNodeClaim = coreapis.Group + "/nodeclaim" TagManagedLaunchTemplate = apis.Group + "/cluster" diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 2b78ae16ff0a..9fbe90a7bae0 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -34,7 +34,6 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/aws/aws-sdk-go/service/ssm" opstatus "github.com/awslabs/operatorpkg/status" "github.com/imdario/mergo" "github.com/samber/lo" @@ -601,9 +600,6 @@ var _ = Describe("CloudProvider", func() { validSecurityGroup = fake.SecurityGroupID() validSubnet1 = fake.SubnetID() validSubnet2 = fake.SubnetID() - awsEnv.SSMAPI.GetParameterOutput = &ssm.GetParameterOutput{ - Parameter: &ssm.Parameter{Value: aws.String(armAMIID)}, - } awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{ Images: []*ec2.Image{ { diff --git a/pkg/fake/ssmapi.go b/pkg/fake/ssmapi.go index e3853365bbcc..7d6e6403833e 100644 --- a/pkg/fake/ssmapi.go +++ b/pkg/fake/ssmapi.go @@ -22,13 +22,10 @@ import ( "strings" "github.com/Pallinder/go-randomdata" - "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/mitchellh/hashstructure/v2" "github.com/samber/lo" "github.com/aws/karpenter-provider-aws/pkg/providers/version" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/service/ssm" "github.com/aws/aws-sdk-go/service/ssm/ssmiface" @@ -37,35 +34,16 @@ import ( type SSMAPI struct { ssmiface.SSMAPI Parameters map[string]string - GetParameterOutput *ssm.GetParameterOutput GetParametersByPathOutput *ssm.GetParametersByPathOutput WantErr error -} -func NewSSMAPI() *SSMAPI { - return &SSMAPI{} + defaultParametersForPath map[string][]*ssm.Parameter } -func (a SSMAPI) GetParameterWithContext(_ context.Context, input *ssm.GetParameterInput, _ ...request.Option) (*ssm.GetParameterOutput, error) { - if a.WantErr != nil { - return nil, a.WantErr - } - if len(a.Parameters) > 0 { - if amiID, ok := a.Parameters[*input.Name]; ok { - return &ssm.GetParameterOutput{ - Parameter: &ssm.Parameter{Value: aws.String(amiID)}, - }, nil - } - return nil, awserr.New(ssm.ErrCodeParameterNotFound, fmt.Sprintf("%s couldn't be found", *input.Name), nil) - } - hc, _ := hashstructure.Hash(input.Name, hashstructure.FormatV2, nil) - if a.GetParameterOutput != nil { - return a.GetParameterOutput, nil +func NewSSMAPI() *SSMAPI { + return &SSMAPI{ + defaultParametersForPath: map[string][]*ssm.Parameter{}, } - - return &ssm.GetParameterOutput{ - Parameter: &ssm.Parameter{Value: aws.String(fmt.Sprintf("test-ami-id-%x", hc))}, - }, nil } func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ssm.GetParametersByPathInput, f func(*ssm.GetParametersByPathOutput, bool) bool, _ ...request.Option) error { @@ -99,14 +77,19 @@ func (a SSMAPI) GetParametersByPathPagesWithContext(_ context.Context, input *ss }, true) return nil } - if params := getDefaultParametersForPath(lo.FromPtr(input.Path)); params != nil { + if params := a.getDefaultParametersForPath(lo.FromPtr(input.Path)); params != nil { f(&ssm.GetParametersByPathOutput{Parameters: params}, true) return nil } return fmt.Errorf("path %q does not exist", lo.FromPtr(input.Path)) } -func getDefaultParametersForPath(path string) []*ssm.Parameter { +func (a SSMAPI) getDefaultParametersForPath(path string) []*ssm.Parameter { + // If we've already generated default parameters, return the same parameters across calls. This ensures we don't + // drift due to different results from one call to the next. + if params, ok := a.defaultParametersForPath[path]; ok { + return params + } suffixes := map[string][]string{ `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2$`: []string{"recommended/image_id"}, `^\/aws\/service\/eks/optimized-ami\/.*\/amazon-linux-2-arm64$`: []string{"recommended/image_id"}, @@ -134,18 +117,20 @@ func getDefaultParametersForPath(path string) []*ssm.Parameter { if !regexp.MustCompile(matchStr).MatchString(path) { continue } - return lo.Map(suffixes, func(suffix string, _ int) *ssm.Parameter { + params := lo.Map(suffixes, func(suffix string, _ int) *ssm.Parameter { return &ssm.Parameter{ Name: lo.ToPtr(fmt.Sprintf("%s/%s", path, suffix)), Value: lo.ToPtr(fmt.Sprintf("ami-%s", randomdata.Alphanumeric(16))), } }) + a.defaultParametersForPath[path] = params + return params } return nil } func (a *SSMAPI) Reset() { - a.GetParameterOutput = nil + a.GetParametersByPathOutput = nil a.Parameters = nil a.WantErr = nil } diff --git a/pkg/providers/amifamily/al2.go b/pkg/providers/amifamily/al2.go index 5037ee98621d..493c4ca449d2 100644 --- a/pkg/providers/amifamily/al2.go +++ b/pkg/providers/amifamily/al2.go @@ -55,7 +55,7 @@ func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k } { results, err := ssmProvider.List(ctx, rootPath) if err != nil { - log.FromContext(ctx).WithValues("path", rootPath, "family", "AL2").Error(err, "discovering AMIs from ssm") + log.FromContext(ctx).WithValues("path", rootPath, "family", "al2").Error(err, "discovering AMIs from ssm") continue } for path, value := range results { diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index 90b9baf2f24f..8cc299c7f19e 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -47,7 +47,7 @@ func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider rootPath := fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023", k8sVersion) results, err := ssmProvider.List(ctx, rootPath) if err != nil { - log.FromContext(ctx).WithValues("path", rootPath, "family", "AL2023").Error(err, "discovering AMIs from ssm") + log.FromContext(ctx).WithValues("path", rootPath, "family", "al2023").Error(err, "discovering AMIs from ssm") return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2023@%s"`, amiVersion) } for path, value := range results { diff --git a/pkg/providers/amifamily/bottlerocket.go b/pkg/providers/amifamily/bottlerocket.go index 0014868bfb34..703784275f30 100644 --- a/pkg/providers/amifamily/bottlerocket.go +++ b/pkg/providers/amifamily/bottlerocket.go @@ -53,13 +53,14 @@ func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Pr } { results, err := ssmProvider.List(ctx, rootPath) if err != nil { - log.FromContext(ctx).WithValues("path", rootPath).Error(err, "discovering AMIs from ssm") + log.FromContext(ctx).WithValues("path", rootPath, "family", "bottlerocket").Error(err, "discovering AMIs from ssm") continue } for path, value := range results { pathComponents := strings.Split(path, "/") // Only select image_id paths which match the desired AMI version - if len(pathComponents) != 8 || pathComponents[7] != "image_id" || pathComponents[6] != amiVersion { + // Note: The SSM path doesn't prefix the version with a v, but Bottlerocket's GitHub releases do. We'll support both. + if len(pathComponents) != 8 || pathComponents[7] != "image_id" || pathComponents[6] != strings.TrimPrefix(amiVersion, "v") { continue } imageIDs = append(imageIDs, lo.ToPtr(value)) diff --git a/pkg/providers/amifamily/windows.go b/pkg/providers/amifamily/windows.go index f50a7e1a0c66..3324e6d1c540 100644 --- a/pkg/providers/amifamily/windows.go +++ b/pkg/providers/amifamily/windows.go @@ -53,7 +53,6 @@ func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provide imageIDs := make([]*string, 0, 5) // SSM aliases are only maintained for the latest Windows AMI releases if amiVersion != AMIVersionLatest { - return DescribeImageQuery{}, fmt.Errorf(`discovering AMIs for alias "windows%s@%s", %q is not a supported version`, w.Version, amiVersion, amiVersion) } // Example Path: /aws/service/ami-windows-latest/Windows_Server-2022-English-Core-EKS_Optimized-1.30/image_id @@ -63,7 +62,7 @@ func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provide } for path, value := range results { pathComponents := strings.Split(path, "/") - if len(pathComponents) != 6 && pathComponents[5] != "image_id" { + if len(pathComponents) != 6 || pathComponents[5] != "image_id" { continue } matches := regexp.MustCompile(`^Windows_Server-(\d+)-English-Core-EKS_Optimized-(\d\.\d+)$`).FindStringSubmatch(pathComponents[4]) diff --git a/pkg/providers/version/version.go b/pkg/providers/version/version.go index c820d611c508..4aa8a01f4abe 100644 --- a/pkg/providers/version/version.go +++ b/pkg/providers/version/version.go @@ -77,6 +77,9 @@ func (p *DefaultProvider) Get(ctx context.Context) (string, error) { return version, nil } +// SupportedK8sVersions returns a slice of version strings in format "major.minor" for all versions of k8s supported by +// this version of Karpenter. +// Note: Assumes k8s only has a single major version (1.x) func SupportedK8sVersions() []string { minMinor := lo.Must(strconv.Atoi(strings.Split(MinK8sVersion, ".")[1])) maxMinor := lo.Must(strconv.Atoi(strings.Split(MaxK8sVersion, ".")[1])) diff --git a/test/suites/ami/suite_test.go b/test/suites/ami/suite_test.go index 0769c118eb07..ca8b119e0f6a 100644 --- a/test/suites/ami/suite_test.go +++ b/test/suites/ami/suite_test.go @@ -71,8 +71,12 @@ var _ = AfterEach(func() { env.AfterEach() }) var _ = Describe("AMI", func() { var customAMI string + var customUserData *string BeforeEach(func() { customAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) + rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") + Expect(err).ToNot(HaveOccurred()) + customUserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, env.ClusterEndpoint, env.ExpectCABundle())) }) It("should use the AMI defined by the AMI Selector Terms", func() { @@ -82,6 +86,7 @@ var _ = Describe("AMI", func() { ID: customAMI, }, } + nodeClass.Spec.UserData = customUserData env.ExpectCreated(pod, nodeClass, nodePool) env.EventuallyExpectHealthy(pod) env.ExpectCreatedNodeCount("==", 1) @@ -99,6 +104,7 @@ var _ = Describe("AMI", func() { ID: oldCustomAMI, }, } + nodeClass.Spec.UserData = customUserData pod := coretest.Pod() env.ExpectCreated(pod, nodeClass, nodePool) @@ -119,6 +125,7 @@ var _ = Describe("AMI", func() { Owner: "fakeOwnerValue", }, } + nodeClass.Spec.UserData = customUserData pod := coretest.Pod() env.ExpectCreated(pod, nodeClass, nodePool) @@ -137,6 +144,7 @@ var _ = Describe("AMI", func() { Name: *output.Images[0].Name, }, } + nodeClass.Spec.UserData = customUserData pod := coretest.Pod() env.ExpectCreated(pod, nodeClass, nodePool) @@ -151,6 +159,7 @@ var _ = Describe("AMI", func() { ID: customAMI, }, } + nodeClass.Spec.UserData = customUserData pod := coretest.Pod() env.ExpectCreated(pod, nodeClass, nodePool) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 5e9530799a55..c1d24267831e 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -14,925 +14,959 @@ limitations under the License. package drift_test -// import ( -// . "github.com/onsi/ginkgo/v2" -// . "github.com/onsi/gomega" -// ) - -// var env *aws.Environment -// var amdAMI string -// var nodeClass *v1.EC2NodeClass -// var nodePool *karpv1.NodePool -// -// func TestDrift(t *testing.T) { -// RegisterFailHandler(Fail) -// BeforeSuite(func() { -// env = aws.NewEnvironment(t) -// }) -// AfterSuite(func() { -// env.Stop() -// }) -// RunSpecs(t, "Drift") -// } -// -// var _ = BeforeEach(func() { -// env.BeforeEach() -// nodeClass = env.DefaultEC2NodeClass() -// nodePool = env.DefaultNodePool(nodeClass) -// }) -// var _ = AfterEach(func() { env.Cleanup() }) -// var _ = AfterEach(func() { env.AfterEach() }) -// -// var _ = Describe("Drift", func() { -// var dep *appsv1.Deployment -// var selector labels.Selector -// var numPods int -// BeforeEach(func() { -// amdAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) -// numPods = 1 -// // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt -// dep = coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: int32(numPods), -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{ -// Labels: map[string]string{ -// "app": "my-app", -// }, -// Annotations: map[string]string{ -// karpv1.DoNotDisruptAnnotationKey: "true", -// }, -// }, -// TerminationGracePeriodSeconds: lo.ToPtr[int64](0), -// }, -// }) -// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) -// }) -// Context("Budgets", func() { -// It("should respect budgets for empty drift", func() { -// nodePool = coretest.ReplaceRequirements(nodePool, -// karpv1.NodeSelectorRequirementWithMinValues{ -// NodeSelectorRequirement: corev1.NodeSelectorRequirement{ -// Key: v1.LabelInstanceSize, -// Operator: corev1.NodeSelectorOpIn, -// Values: []string{"2xlarge"}, -// }, -// }, -// ) -// // We're expecting to create 3 nodes, so we'll expect to see 2 nodes deleting at one time. -// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ -// Nodes: "50%", -// }} -// var numPods int32 = 6 -// dep = coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: numPods, -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{ -// Annotations: map[string]string{ -// karpv1.DoNotDisruptAnnotationKey: "true", -// }, -// Labels: map[string]string{"app": "large-app"}, -// }, -// // Each 2xlarge has 8 cpu, so each node should fit 2 pods. -// ResourceRequirements: corev1.ResourceRequirements{ -// Requests: corev1.ResourceList{ -// corev1.ResourceCPU: resource.MustParse("3"), -// }, -// }, -// }, -// }) -// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) -// env.ExpectCreated(nodeClass, nodePool, dep) -// -// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) -// nodes := env.EventuallyExpectCreatedNodeCount("==", 3) -// env.EventuallyExpectHealthyPodCount(selector, int(numPods)) -// -// // List nodes so that we get any updated information on the nodes. If we don't -// // we have the potential to over-write any changes Karpenter makes to the nodes. -// // Add a finalizer to each node so that we can stop termination disruptions -// By("adding finalizers to the nodes to prevent termination") -// for _, node := range nodes { -// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) -// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) -// env.ExpectUpdated(node) -// } -// -// By("making the nodes empty") -// // Delete the deployment to make all nodes empty. -// env.ExpectDeleted(dep) -// -// // Drift the nodeclaims -// By("drift the nodeclaims") -// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} -// env.ExpectUpdated(nodePool) -// -// env.EventuallyExpectDrifted(nodeClaims...) -// -// // Ensure that we get two nodes tainted, and they have overlap during the drift -// env.EventuallyExpectTaintedNodeCount("==", 2) -// nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) -// -// // Remove the finalizer from each node so that we can terminate -// for _, node := range nodes { -// Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) -// } -// -// // After the deletion timestamp is set and all pods are drained -// // the node should be gone -// env.EventuallyExpectNotFound(nodes[0], nodes[1]) -// -// nodes = env.EventuallyExpectTaintedNodeCount("==", 1) -// Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) -// env.EventuallyExpectNotFound(nodes[0]) -// }) -// It("should respect budgets for non-empty delete drift", func() { -// nodePool = coretest.ReplaceRequirements(nodePool, -// karpv1.NodeSelectorRequirementWithMinValues{ -// NodeSelectorRequirement: corev1.NodeSelectorRequirement{ -// Key: v1.LabelInstanceSize, -// Operator: corev1.NodeSelectorOpIn, -// Values: []string{"2xlarge"}, -// }, -// }, -// ) -// // We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time. -// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ -// Nodes: "50%", -// }} -// var numPods int32 = 9 -// dep = coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: numPods, -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{ -// Annotations: map[string]string{ -// karpv1.DoNotDisruptAnnotationKey: "true", -// }, -// Labels: map[string]string{"app": "large-app"}, -// }, -// // Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods. -// ResourceRequirements: corev1.ResourceRequirements{ -// Requests: corev1.ResourceList{ -// corev1.ResourceCPU: resource.MustParse("2100m"), -// }, -// }, -// }, -// }) -// selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) -// env.ExpectCreated(nodeClass, nodePool, dep) -// -// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) -// nodes := env.EventuallyExpectCreatedNodeCount("==", 3) -// env.EventuallyExpectHealthyPodCount(selector, int(numPods)) -// -// By("scaling down the deployment") -// // Update the deployment to a third of the replicas. -// dep.Spec.Replicas = lo.ToPtr[int32](3) -// env.ExpectUpdated(dep) -// -// // First expect there to be 3 pods, then try to spread the pods. -// env.EventuallyExpectHealthyPodCount(selector, 3) -// env.ForcePodsToSpread(nodes...) -// env.EventuallyExpectHealthyPodCount(selector, 3) -// -// By("cordoning and adding finalizer to the nodes") -// // Add a finalizer to each node so that we can stop termination disruptions -// for _, node := range nodes { -// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) -// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) -// env.ExpectUpdated(node) -// } -// -// By("drifting the nodes") -// // Drift the nodeclaims -// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} -// env.ExpectUpdated(nodePool) -// -// env.EventuallyExpectDrifted(nodeClaims...) -// -// By("enabling disruption by removing the do not disrupt annotation") -// pods := env.EventuallyExpectHealthyPodCount(selector, 3) -// // Remove the do-not-disrupt annotation so that the nodes are now disruptable -// for _, pod := range pods { -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// } -// -// // Ensure that we get two nodes tainted, and they have overlap during the drift -// env.EventuallyExpectTaintedNodeCount("==", 2) -// nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 30*time.Second) -// -// By("removing the finalizer from the nodes") -// Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) -// Expect(env.ExpectTestingFinalizerRemoved(nodes[1])).To(Succeed()) -// -// // After the deletion timestamp is set and all pods are drained -// // the node should be gone -// env.EventuallyExpectNotFound(nodes[0], nodes[1]) -// }) -// It("should respect budgets for non-empty replace drift", func() { -// appLabels := map[string]string{"app": "large-app"} -// nodePool.Labels = appLabels -// // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. -// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ -// Nodes: "3", -// }} -// -// // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node -// numPods = 5 -// selector = labels.SelectorFromSet(appLabels) -// deployment := coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: int32(numPods), -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{ -// Labels: appLabels, -// }, -// PodAntiRequirements: []corev1.PodAffinityTerm{{ -// TopologyKey: corev1.LabelHostname, -// LabelSelector: &metav1.LabelSelector{ -// MatchLabels: appLabels, -// }, -// }}, -// }, -// }) -// -// env.ExpectCreated(nodeClass, nodePool, deployment) -// -// originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", numPods) -// originalNodes := env.EventuallyExpectCreatedNodeCount("==", numPods) -// -// // Check that all deployment pods are online -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// -// By("cordoning and adding finalizer to the nodes") -// // Add a finalizer to each node so that we can stop termination disruptions -// for _, node := range originalNodes { -// Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) -// node.Finalizers = append(node.Finalizers, common.TestingFinalizer) -// env.ExpectUpdated(node) -// } -// -// By("drifting the nodepool") -// nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{"test-annotation": "drift"}) -// env.ExpectUpdated(nodePool) -// -// // Ensure that we get three nodes tainted, and they have overlap during the drift -// env.EventuallyExpectTaintedNodeCount("==", 3) -// env.EventuallyExpectNodeClaimCount("==", 8) -// env.EventuallyExpectNodeCount("==", 8) -// env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) -// -// for _, node := range originalNodes { -// Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) -// } -// -// // Eventually expect all the nodes to be rolled and completely removed -// // Since this completes the disruption operation, this also ensures that we aren't leaking nodes into subsequent -// // tests since nodeclaims that are actively replacing but haven't brought-up nodes yet can register nodes later -// env.EventuallyExpectNotFound(lo.Map(originalNodes, func(n *corev1.Node, _ int) client.Object { return n })...) -// env.EventuallyExpectNotFound(lo.Map(originalNodeClaims, func(n *karpv1.NodeClaim, _ int) client.Object { return n })...) -// env.ExpectNodeClaimCount("==", 5) -// env.ExpectNodeCount("==", 5) -// }) -// It("should not allow drift if the budget is fully blocking", func() { -// // We're going to define a budget that doesn't allow any drift to happen -// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ -// Nodes: "0", -// }} -// -// dep.Spec.Template.Annotations = nil -// env.ExpectCreated(nodeClass, nodePool, dep) -// -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// env.EventuallyExpectCreatedNodeCount("==", 1) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// -// By("drifting the nodes") -// // Drift the nodeclaims -// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} -// env.ExpectUpdated(nodePool) -// -// env.EventuallyExpectDrifted(nodeClaim) -// env.ConsistentlyExpectNoDisruptions(1, time.Minute) -// }) -// It("should not allow drift if the budget is fully blocking during a scheduled time", func() { -// // We're going to define a budget that doesn't allow any drift to happen -// // This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before -// // the current time and extends 15 minutes past the current time -// // Times need to be in UTC since the karpenter containers were built in UTC time -// windowStart := time.Now().Add(-time.Minute * 15).UTC() -// nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ -// Nodes: "0", -// Schedule: lo.ToPtr(fmt.Sprintf("%d %d * * *", windowStart.Minute(), windowStart.Hour())), -// Duration: &metav1.Duration{Duration: time.Minute * 30}, -// }} -// -// dep.Spec.Template.Annotations = nil -// env.ExpectCreated(nodeClass, nodePool, dep) -// -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// env.EventuallyExpectCreatedNodeCount("==", 1) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// -// By("drifting the nodes") -// // Drift the nodeclaims -// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} -// env.ExpectUpdated(nodePool) -// -// env.EventuallyExpectDrifted(nodeClaim) -// env.ConsistentlyExpectNoDisruptions(1, time.Minute) -// }) -// }) -// It("should disrupt nodes that have drifted due to AMIs", func() { -// // Choose an old static image (AL2023 AMIs don't exist for 1.22) -// oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, -// "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", -// fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), -// )) -// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// env.ExpectCreatedNodeCount("==", 1) -// -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.EventuallyExpectNodeCount("==", 1)[0] -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, nodeClaim, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { -// armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) -// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: armAMI}} -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// env.ExpectCreatedNodeCount("==", 1) -// -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.EventuallyExpectNodeCount("==", 1)[0] -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, nodeClaim, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// It("should not disrupt nodes that have drifted without the featureGate enabled", func() { -// env.ExpectSettingsOverridden(corev1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) -// -// // Choose an old static image (AL2023 AMIs don't exist for 1.22) -// oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, -// "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", -// fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), -// )) -// nodeClass.Spec.AMIFamily = &v1.AMIFamilyAL2023 -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// env.ExpectCreatedNodeCount("==", 1) -// -// node := env.Monitor.CreatedNodes()[0] -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} -// env.ExpectUpdated(nodeClass) -// -// // We should consistently get the same node existing for a minute -// Consistently(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), &corev1.Node{})).To(Succeed()) -// }).WithTimeout(time.Minute).Should(Succeed()) -// }) -// It("should disrupt nodes that have drifted due to securitygroup", func() { -// By("getting the cluster vpc id") -// output, err := env.EKSAPI.DescribeCluster(&eks.DescribeClusterInput{Name: awssdk.String(env.ClusterName)}) -// Expect(err).To(BeNil()) -// -// By("creating new security group") -// createSecurityGroup := &ec2.CreateSecurityGroupInput{ -// GroupName: awssdk.String("security-group-drift"), -// Description: awssdk.String("End-to-end Drift Test, should delete after drift test is completed"), -// VpcId: output.Cluster.ResourcesVpcConfig.VpcId, -// TagSpecifications: []*ec2.TagSpecification{ -// { -// ResourceType: awssdk.String("security-group"), -// Tags: []*ec2.Tag{ -// { -// Key: awssdk.String("karpenter.sh/discovery"), -// Value: awssdk.String(env.ClusterName), -// }, -// { -// Key: awssdk.String(coretest.DiscoveryLabel), -// Value: awssdk.String(env.ClusterName), -// }, -// { -// Key: awssdk.String("creation-date"), -// Value: awssdk.String(time.Now().Format(time.RFC3339)), -// }, -// }, -// }, -// }, -// } -// _, _ = env.EC2API.CreateSecurityGroup(createSecurityGroup) -// -// By("looking for security groups") -// var securitygroups []aws.SecurityGroup -// var testSecurityGroup aws.SecurityGroup -// Eventually(func(g Gomega) { -// securitygroups = env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) -// testSecurityGroup, _ = lo.Find(securitygroups, func(sg aws.SecurityGroup) bool { -// return awssdk.StringValue(sg.GroupName) == "security-group-drift" -// }) -// g.Expect(testSecurityGroup).ToNot(BeNil()) -// }).Should(Succeed()) -// -// By("creating a new provider with the new securitygroup") -// awsIDs := lo.FilterMap(securitygroups, func(sg aws.SecurityGroup, _ int) (string, bool) { -// if awssdk.StringValue(sg.GroupId) != awssdk.StringValue(testSecurityGroup.GroupId) { -// return awssdk.StringValue(sg.GroupId), true -// } -// return "", false -// }) -// sgTerms := []v1.SecurityGroupSelectorTerm{{ID: awssdk.StringValue(testSecurityGroup.GroupId)}} -// for _, id := range awsIDs { -// sgTerms = append(sgTerms, v1.SecurityGroupSelectorTerm{ID: id}) -// } -// nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// sgTerms = lo.Reject(sgTerms, func(t v1.SecurityGroupSelectorTerm, _ int) bool { -// return t.ID == awssdk.StringValue(testSecurityGroup.GroupId) -// }) -// nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, nodeClaim, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// It("should disrupt nodes that have drifted due to subnets", func() { -// subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) -// Expect(len(subnets)).To(BeNumerically(">", 1)) -// -// nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[0].ID}} -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[1].ID}} -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// DescribeTable("NodePool Drift", func(nodeClaimTemplate karpv1.NodeClaimTemplate) { -// updatedNodePool := coretest.NodePool( -// karpv1.NodePool{ -// Spec: karpv1.NodePoolSpec{ -// Template: karpv1.NodeClaimTemplate{ -// Spec: karpv1.NodeClaimSpec{ -// NodeClassRef: &karpv1.NodeClassReference{ -// Group: object.GVK(nodeClass).Group, -// Kind: object.GVK(nodeClass).Kind, -// Name: nodeClass.Name, -// }, -// // keep the same instance type requirements to prevent considering instance types that require swap -// Requirements: nodePool.Spec.Template.Spec.Requirements, -// }, -// }, -// }, -// }, -// karpv1.NodePool{ -// Spec: karpv1.NodePoolSpec{ -// Template: nodeClaimTemplate, -// }, -// }, -// ) -// updatedNodePool.ObjectMeta = nodePool.ObjectMeta -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// env.ExpectCreatedOrUpdated(updatedNodePool) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// -// // Nodes will need to have the start-up taint removed before the node can be considered as initialized -// fmt.Println(CurrentSpecReport().LeafNodeText) -// if CurrentSpecReport().LeafNodeText == "Start-up Taints" { -// nodes := env.EventuallyExpectCreatedNodeCount("==", 2) -// sort.Slice(nodes, func(i int, j int) bool { -// return nodes[i].CreationTimestamp.Before(&nodes[j].CreationTimestamp) -// }) -// nodeTwo := nodes[1] -// // Remove the startup taints from the new nodes to initialize them -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeTwo), nodeTwo)).To(Succeed()) -// stored := nodeTwo.DeepCopy() -// nodeTwo.Spec.Taints = lo.Reject(nodeTwo.Spec.Taints, func(t corev1.Taint, _ int) bool { return t.Key == "example.com/another-taint-2" }) -// g.Expect(env.Client.Patch(env.Context, nodeTwo, client.StrategicMergeFrom(stored))).To(Succeed()) -// }).Should(Succeed()) -// } -// env.EventuallyExpectNotFound(pod, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }, -// Entry("Annotations", karpv1.NodeClaimTemplate{ -// ObjectMeta: karpv1.ObjectMeta{ -// Annotations: map[string]string{"keyAnnotationTest": "valueAnnotationTest"}, -// }, -// }), -// Entry("Labels", karpv1.NodeClaimTemplate{ -// ObjectMeta: karpv1.ObjectMeta{ -// Labels: map[string]string{"keyLabelTest": "valueLabelTest"}, -// }, -// }), -// Entry("Taints", karpv1.NodeClaimTemplate{ -// Spec: karpv1.NodeClaimSpec{ -// Taints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, -// }, -// }), -// Entry("Start-up Taints", karpv1.NodeClaimTemplate{ -// Spec: karpv1.NodeClaimSpec{ -// StartupTaints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, -// }, -// }), -// Entry("NodeRequirements", karpv1.NodeClaimTemplate{ -// Spec: karpv1.NodeClaimSpec{ -// // since this will overwrite the default requirements, add instance category and family selectors back into requirements -// Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ -// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: karpv1.CapacityTypeLabelKey, Operator: corev1.NodeSelectorOpIn, Values: []string{karpv1.CapacityTypeSpot}}}, -// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceCategory, Operator: corev1.NodeSelectorOpIn, Values: []string{"c", "m", "r"}}}, -// {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceFamily, Operator: corev1.NodeSelectorOpNotIn, Values: []string{"a1"}}}, -// }, -// }, -// }), -// ) -// DescribeTable("EC2NodeClass", func(nodeClassSpec v1.EC2NodeClassSpec) { -// updatedNodeClass := test.EC2NodeClass(v1.EC2NodeClass{Spec: *nodeClass.Spec.DeepCopy()}, v1.EC2NodeClass{Spec: nodeClassSpec}) -// updatedNodeClass.ObjectMeta = nodeClass.ObjectMeta -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// env.ExpectCreatedOrUpdated(updatedNodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }, -// Entry("UserData", v1.EC2NodeClassSpec{UserData: awssdk.String("#!/bin/bash\necho \"Hello, AL2023\"")}), -// Entry("Tags", v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}), -// Entry("MetadataOptions", v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: awssdk.String("required"), HTTPPutResponseHopLimit: awssdk.Int64(10)}}), -// Entry("BlockDeviceMappings", v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ -// { -// DeviceName: awssdk.String("/dev/xvda"), -// EBS: &v1.BlockDevice{ -// VolumeSize: resources.Quantity("20Gi"), -// VolumeType: awssdk.String("gp3"), -// Encrypted: awssdk.Bool(true), -// }, -// }}}), -// Entry("DetailedMonitoring", v1.EC2NodeClassSpec{DetailedMonitoring: awssdk.Bool(true)}), -// Entry("AMIFamily", v1.EC2NodeClassSpec{AMIFamily: awssdk.String(v1.AMIFamilyBottlerocket)}), -// Entry("KubeletConfiguration", v1.EC2NodeClassSpec{ -// Kubelet: &v1.KubeletConfiguration{ -// EvictionSoft: map[string]string{"memory.available": "5%"}, -// EvictionSoftGracePeriod: map[string]metav1.Duration{"memory.available": {Duration: time.Minute}}, -// }, -// }), -// ) -// It("should drift the EC2NodeClass on InstanceProfile", func() { -// // Create a separate test case for this one since we can't use the default NodeClass that's created due to it having -// // a pre-populated role AND we also need to do the instance profile generation within the scope of this test -// instanceProfileName := fmt.Sprintf("KarpenterNodeInstanceProfile-%s", env.ClusterName) -// instanceProfileDriftName := fmt.Sprintf("KarpenterNodeInstanceProfile-Drift-%s", env.ClusterName) -// roleName := fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName) -// -// for _, name := range []string{instanceProfileName, instanceProfileDriftName} { -// env.ExpectInstanceProfileCreated(name, roleName) -// DeferCleanup(func() { -// env.ExpectInstanceProfileDeleted(name, roleName) -// }) -// } -// nodeClass.Spec.Role = "" -// nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileName) -// -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileDriftName) -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(nodeClaim) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// It("should drift the EC2NodeClass on BlockDeviceMappings volume size update", func() { -// nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ -// { -// DeviceName: awssdk.String("/dev/xvda"), -// EBS: &v1.BlockDevice{ -// VolumeSize: resources.Quantity("20Gi"), -// VolumeType: awssdk.String("gp3"), -// Encrypted: awssdk.Bool(true), -// }, -// }, -// } -// env.ExpectCreated(dep, nodeClass, nodePool) -// pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// node := env.ExpectCreatedNodeCount("==", 1)[0] -// -// nodeClass.Spec.BlockDeviceMappings[0].EBS.VolumeSize = resources.Quantity("100Gi") -// env.ExpectCreatedOrUpdated(nodeClass) -// -// By("validating the drifted status condition has propagated") -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) -// g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted)).ToNot(BeNil()) -// g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) -// }).Should(Succeed()) -// -// delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) -// env.ExpectUpdated(pod) -// env.EventuallyExpectNotFound(pod, node) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// }) -// It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { -// env.ExpectCreated(dep, nodeClass, nodePool) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// nodePool = env.ExpectExists(nodePool).(*karpv1.NodePool) -// expectedHash := nodePool.Hash() -// -// By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, karpv1.NodePoolHashAnnotationKey, karpv1.NodePoolHashVersionAnnotationKey)) -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) -// -// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) -// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) -// }).WithTimeout(30 * time.Second).Should(Succeed()) -// -// nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ -// karpv1.NodePoolHashAnnotationKey: "test-hash-1", -// karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", -// }) -// // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the -// // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that -// // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation -// nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ -// "test-key": "test-value", -// }) -// nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ -// karpv1.NodePoolHashAnnotationKey: "test-hash-2", -// karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", -// }) -// -// // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool -// env.ExpectUpdated(nodeClaim, nodePool) -// expectedHash = nodePool.Hash() -// -// // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) -// -// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) -// g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) -// }) -// }) -// It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { -// env.ExpectCreated(dep, nodeClass, nodePool) -// env.EventuallyExpectHealthyPodCount(selector, numPods) -// nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] -// nodeClass = env.ExpectExists(nodeClass).(*v1.EC2NodeClass) -// expectedHash := nodeClass.Hash() -// -// By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1.AnnotationEC2NodeClassHash, v1.AnnotationEC2NodeClassHashVersion)) -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) -// -// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) -// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) -// }).WithTimeout(30 * time.Second).Should(Succeed()) -// -// nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ -// v1.AnnotationEC2NodeClassHash: "test-hash-1", -// v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", -// }) -// // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the -// // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that -// // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation -// nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ -// "test-key": "test-value", -// }) -// nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ -// v1.AnnotationEC2NodeClassHash: "test-hash-2", -// v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", -// }) -// -// // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass -// env.ExpectUpdated(nodeClaim, nodeClass) -// expectedHash = nodeClass.Hash() -// -// // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation -// Eventually(func(g Gomega) { -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) -// g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) -// -// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) -// g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) -// g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) -// }).WithTimeout(30 * time.Second).Should(Succeed()) -// env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) -// }) -// Context("Failure", func() { -// It("should not disrupt a drifted node if the replacement node never registers", func() { -// // launch a new nodeClaim -// var numPods int32 = 2 -// dep := coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: 2, -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, -// PodAntiRequirements: []corev1.PodAffinityTerm{{ -// TopologyKey: corev1.LabelHostname, -// LabelSelector: &metav1.LabelSelector{ -// MatchLabels: map[string]string{"app": "inflate"}, -// }}, -// }, -// }, -// }) -// env.ExpectCreated(dep, nodeClass, nodePool) -// -// startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) -// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) -// -// // Drift the nodeClaim with bad configuration that will not register a NodeClaim -// nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")}} -// env.ExpectCreatedOrUpdated(nodeClass) -// -// env.EventuallyExpectDrifted(startingNodeClaimState...) -// -// // Expect only a single node to be tainted due to default disruption budgets -// taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) -// -// // Drift should fail and the original node should be untainted -// // TODO: reduce timeouts when disruption waits are factored out -// env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) -// -// // Expect all the NodeClaims that existed on the initial provisioning loop are not removed. -// // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't -// // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node -// Consistently(func(g Gomega) { -// nodeClaims := &karpv1.NodeClaimList{} -// g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) -// startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(nc *karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) -// nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) -// g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) -// }, "2m").Should(Succeed()) -// }) -// It("should not disrupt a drifted node if the replacement node registers but never initialized", func() { -// // launch a new nodeClaim -// var numPods int32 = 2 -// dep := coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: 2, -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, -// PodAntiRequirements: []corev1.PodAffinityTerm{{ -// TopologyKey: corev1.LabelHostname, -// LabelSelector: &metav1.LabelSelector{ -// MatchLabels: map[string]string{"app": "inflate"}, -// }}, -// }, -// }, -// }) -// env.ExpectCreated(dep, nodeClass, nodePool) -// -// startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) -// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) -// -// // Drift the nodeClaim with bad configuration that never initializes -// nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com/taint", Effect: corev1.TaintEffectPreferNoSchedule}} -// env.ExpectCreatedOrUpdated(nodePool) -// -// env.EventuallyExpectDrifted(startingNodeClaimState...) -// -// // Expect only a single node to get tainted due to default disruption budgets -// taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) -// -// // Drift should fail and original node should be untainted -// // TODO: reduce timeouts when disruption waits are factored out -// env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) -// -// // Expect that the new nodeClaim/node is kept around after the un-cordon -// nodeList := &corev1.NodeList{} -// Expect(env.Client.List(env, nodeList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) -// Expect(nodeList.Items).To(HaveLen(int(numPods) + 1)) -// -// nodeClaimList := &karpv1.NodeClaimList{} -// Expect(env.Client.List(env, nodeClaimList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) -// Expect(nodeClaimList.Items).To(HaveLen(int(numPods) + 1)) -// -// // Expect all the NodeClaims that existed on the initial provisioning loop are not removed -// // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't -// // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node -// Consistently(func(g Gomega) { -// nodeClaims := &karpv1.NodeClaimList{} -// g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) -// startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(m *karpv1.NodeClaim, _ int) types.UID { return m.UID })...) -// nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(m karpv1.NodeClaim, _ int) types.UID { return m.UID })...) -// g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) -// }, "2m").Should(Succeed()) -// }) -// It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { -// // Create a deployment that contains a readiness probe that will never succeed -// // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy -// var numPods int32 = 2 -// dep := coretest.Deployment(coretest.DeploymentOptions{ -// Replicas: 2, -// PodOptions: coretest.PodOptions{ -// ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, -// PodAntiRequirements: []corev1.PodAffinityTerm{{ -// TopologyKey: corev1.LabelHostname, -// LabelSelector: &metav1.LabelSelector{ -// MatchLabels: map[string]string{"app": "inflate"}, -// }}, -// }, -// ReadinessProbe: &corev1.Probe{ -// ProbeHandler: corev1.ProbeHandler{ -// HTTPGet: &corev1.HTTPGetAction{ -// Port: intstr.FromInt32(80), -// }, -// }, -// }, -// }, -// }) -// selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) -// minAvailable := intstr.FromInt32(numPods - 1) -// pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ -// Labels: dep.Spec.Template.Labels, -// MinAvailable: &minAvailable, -// }) -// env.ExpectCreated(dep, nodeClass, nodePool, pdb) -// -// nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) -// env.EventuallyExpectCreatedNodeCount("==", int(numPods)) -// -// // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check -// env.EventuallyExpectBoundPodCount(selector, int(numPods)) -// -// // Drift the nodeclaims -// nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} -// env.ExpectUpdated(nodePool) -// -// env.EventuallyExpectDrifted(nodeClaims...) -// env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) -// }) -// }) -// }) +import ( + "fmt" + "os" + "sort" + "testing" + "time" + + awssdk "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/eks" + "github.com/awslabs/operatorpkg/object" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/controller-runtime/pkg/client" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + coretest "sigs.k8s.io/karpenter/pkg/test" + "sigs.k8s.io/karpenter/pkg/utils/resources" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/test" + "github.com/aws/karpenter-provider-aws/test/pkg/environment/aws" + "github.com/aws/karpenter-provider-aws/test/pkg/environment/common" +) + +var env *aws.Environment +var amdAMI string +var nodeClass *v1.EC2NodeClass +var nodePool *karpv1.NodePool + +func TestDrift(t *testing.T) { + RegisterFailHandler(Fail) + BeforeSuite(func() { + env = aws.NewEnvironment(t) + }) + AfterSuite(func() { + env.Stop() + }) + RunSpecs(t, "Drift") +} + +var _ = BeforeEach(func() { + env.BeforeEach() + nodeClass = env.DefaultEC2NodeClass() + nodePool = env.DefaultNodePool(nodeClass) +}) +var _ = AfterEach(func() { env.Cleanup() }) +var _ = AfterEach(func() { env.AfterEach() }) + +var _ = Describe("Drift", func() { + var dep *appsv1.Deployment + var selector labels.Selector + var numPods int + var customUserData *string + BeforeEach(func() { + amdAMI = env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersion())) + numPods = 1 + // Add pods with a do-not-disrupt annotation so that we can check node metadata before we disrupt + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "my-app", + }, + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + }, + TerminationGracePeriodSeconds: lo.ToPtr[int64](0), + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + + rawContent, err := os.ReadFile("testdata/al2023_userdata_input.yaml") + Expect(err).ToNot(HaveOccurred()) + customUserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, env.ClusterEndpoint, env.ExpectCABundle())) + }) + Context("Budgets", func() { + It("should respect budgets for empty drift", func() { + nodePool = coretest.ReplaceRequirements(nodePool, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1.LabelInstanceSize, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"2xlarge"}, + }, + }, + ) + // We're expecting to create 3 nodes, so we'll expect to see 2 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ + Nodes: "50%", + }} + var numPods int32 = 6 + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: numPods, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + Labels: map[string]string{"app": "large-app"}, + }, + // Each 2xlarge has 8 cpu, so each node should fit 2 pods. + ResourceRequirements: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3"), + }, + }, + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) + env.EventuallyExpectHealthyPodCount(selector, int(numPods)) + + // List nodes so that we get any updated information on the nodes. If we don't + // we have the potential to over-write any changes Karpenter makes to the nodes. + // Add a finalizer to each node so that we can stop termination disruptions + By("adding finalizers to the nodes to prevent termination") + for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + By("making the nodes empty") + // Delete the deployment to make all nodes empty. + env.ExpectDeleted(dep) + + // Drift the nodeclaims + By("drift the nodeclaims") + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaims...) + + // Ensure that we get two nodes tainted, and they have overlap during the drift + env.EventuallyExpectTaintedNodeCount("==", 2) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 5*time.Second) + + // Remove the finalizer from each node so that we can terminate + for _, node := range nodes { + Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) + } + + // After the deletion timestamp is set and all pods are drained + // the node should be gone + env.EventuallyExpectNotFound(nodes[0], nodes[1]) + + nodes = env.EventuallyExpectTaintedNodeCount("==", 1) + Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) + env.EventuallyExpectNotFound(nodes[0]) + }) + It("should respect budgets for non-empty delete drift", func() { + nodePool = coretest.ReplaceRequirements(nodePool, + karpv1.NodeSelectorRequirementWithMinValues{ + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1.LabelInstanceSize, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"2xlarge"}, + }, + }, + ) + // We're expecting to create 3 nodes, so we'll expect to see at most 2 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ + Nodes: "50%", + }} + var numPods int32 = 9 + dep = coretest.Deployment(coretest.DeploymentOptions{ + Replicas: numPods, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + karpv1.DoNotDisruptAnnotationKey: "true", + }, + Labels: map[string]string{"app": "large-app"}, + }, + // Each 2xlarge has 8 cpu, so each node should fit no more than 3 pods. + ResourceRequirements: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2100m"), + }, + }, + }, + }) + selector = labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", 3) + nodes := env.EventuallyExpectCreatedNodeCount("==", 3) + env.EventuallyExpectHealthyPodCount(selector, int(numPods)) + + By("scaling down the deployment") + // Update the deployment to a third of the replicas. + dep.Spec.Replicas = lo.ToPtr[int32](3) + env.ExpectUpdated(dep) + + // First expect there to be 3 pods, then try to spread the pods. + env.EventuallyExpectHealthyPodCount(selector, 3) + env.ForcePodsToSpread(nodes...) + env.EventuallyExpectHealthyPodCount(selector, 3) + + By("cordoning and adding finalizer to the nodes") + // Add a finalizer to each node so that we can stop termination disruptions + for _, node := range nodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + By("drifting the nodes") + // Drift the nodeclaims + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaims...) + + By("enabling disruption by removing the do not disrupt annotation") + pods := env.EventuallyExpectHealthyPodCount(selector, 3) + // Remove the do-not-disrupt annotation so that the nodes are now disruptable + for _, pod := range pods { + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + } + + // Ensure that we get two nodes tainted, and they have overlap during the drift + env.EventuallyExpectTaintedNodeCount("==", 2) + nodes = env.ConsistentlyExpectDisruptionsWithNodeCount(2, 3, 30*time.Second) + + By("removing the finalizer from the nodes") + Expect(env.ExpectTestingFinalizerRemoved(nodes[0])).To(Succeed()) + Expect(env.ExpectTestingFinalizerRemoved(nodes[1])).To(Succeed()) + + // After the deletion timestamp is set and all pods are drained + // the node should be gone + env.EventuallyExpectNotFound(nodes[0], nodes[1]) + }) + It("should respect budgets for non-empty replace drift", func() { + appLabels := map[string]string{"app": "large-app"} + nodePool.Labels = appLabels + // We're expecting to create 5 nodes, so we'll expect to see at most 3 nodes deleting at one time. + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ + Nodes: "3", + }} + + // Create a 5 pod deployment with hostname inter-pod anti-affinity to ensure each pod is placed on a unique node + numPods = 5 + selector = labels.SelectorFromSet(appLabels) + deployment := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: appLabels, + }, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: appLabels, + }, + }}, + }, + }) + + env.ExpectCreated(nodeClass, nodePool, deployment) + + originalNodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", numPods) + originalNodes := env.EventuallyExpectCreatedNodeCount("==", numPods) + + // Check that all deployment pods are online + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("cordoning and adding finalizer to the nodes") + // Add a finalizer to each node so that we can stop termination disruptions + for _, node := range originalNodes { + Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), node)).To(Succeed()) + node.Finalizers = append(node.Finalizers, common.TestingFinalizer) + env.ExpectUpdated(node) + } + + By("drifting the nodepool") + nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{"test-annotation": "drift"}) + env.ExpectUpdated(nodePool) + + // Ensure that we get three nodes tainted, and they have overlap during the drift + env.EventuallyExpectTaintedNodeCount("==", 3) + env.EventuallyExpectNodeClaimCount("==", 8) + env.EventuallyExpectNodeCount("==", 8) + env.ConsistentlyExpectDisruptionsWithNodeCount(3, 8, 5*time.Second) + + for _, node := range originalNodes { + Expect(env.ExpectTestingFinalizerRemoved(node)).To(Succeed()) + } + + // Eventually expect all the nodes to be rolled and completely removed + // Since this completes the disruption operation, this also ensures that we aren't leaking nodes into subsequent + // tests since nodeclaims that are actively replacing but haven't brought-up nodes yet can register nodes later + env.EventuallyExpectNotFound(lo.Map(originalNodes, func(n *corev1.Node, _ int) client.Object { return n })...) + env.EventuallyExpectNotFound(lo.Map(originalNodeClaims, func(n *karpv1.NodeClaim, _ int) client.Object { return n })...) + env.ExpectNodeClaimCount("==", 5) + env.ExpectNodeCount("==", 5) + }) + It("should not allow drift if the budget is fully blocking", func() { + // We're going to define a budget that doesn't allow any drift to happen + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ + Nodes: "0", + }} + + dep.Spec.Template.Annotations = nil + env.ExpectCreated(nodeClass, nodePool, dep) + + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + env.EventuallyExpectCreatedNodeCount("==", 1) + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("drifting the nodes") + // Drift the nodeclaims + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaim) + env.ConsistentlyExpectNoDisruptions(1, time.Minute) + }) + It("should not allow drift if the budget is fully blocking during a scheduled time", func() { + // We're going to define a budget that doesn't allow any drift to happen + // This is going to be on a schedule that only lasts 30 minutes, whose window starts 15 minutes before + // the current time and extends 15 minutes past the current time + // Times need to be in UTC since the karpenter containers were built in UTC time + windowStart := time.Now().Add(-time.Minute * 15).UTC() + nodePool.Spec.Disruption.Budgets = []karpv1.Budget{{ + Nodes: "0", + Schedule: lo.ToPtr(fmt.Sprintf("%d %d * * *", windowStart.Minute(), windowStart.Hour())), + Duration: &metav1.Duration{Duration: time.Minute * 30}, + }} + + dep.Spec.Template.Annotations = nil + env.ExpectCreated(nodeClass, nodePool, dep) + + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + env.EventuallyExpectCreatedNodeCount("==", 1) + env.EventuallyExpectHealthyPodCount(selector, numPods) + + By("drifting the nodes") + // Drift the nodeclaims + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaim) + env.ConsistentlyExpectNoDisruptions(1, time.Minute) + }) + }) + It("should disrupt nodes that have drifted due to AMIs", func() { + // Choose an old static image (AL2023 AMIs don't exist for 1.22) + oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, + "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), + )) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} + nodeClass.Spec.UserData = customUserData + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + env.ExpectCreatedNodeCount("==", 1) + + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.EventuallyExpectNodeCount("==", 1)[0] + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, nodeClaim, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { + armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: armAMI}} + nodeClass.Spec.UserData = customUserData + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + env.ExpectCreatedNodeCount("==", 1) + + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.EventuallyExpectNodeCount("==", 1)[0] + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, nodeClaim, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should not disrupt nodes that have drifted without the featureGate enabled", func() { + env.ExpectSettingsOverridden(corev1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"}) + + // Choose an old static image (AL2023 AMIs don't exist for 1.22) + oldCustomAMI := env.GetAMIBySSMPath(lo.Ternary(env.K8sMinorVersion() == 23, + "/aws/service/eks/optimized-ami/1.23/amazon-linux-2023/x86_64/standard/amazon-eks-node-al2023-x86_64-standard-1.23-v20240307/image_id", + fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), + )) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} + + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + + node := env.Monitor.CreatedNodes()[0] + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: amdAMI}} + env.ExpectUpdated(nodeClass) + + // We should consistently get the same node existing for a minute + Consistently(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(node), &corev1.Node{})).To(Succeed()) + }).WithTimeout(time.Minute).Should(Succeed()) + }) + It("should disrupt nodes that have drifted due to securitygroup", func() { + By("getting the cluster vpc id") + output, err := env.EKSAPI.DescribeCluster(&eks.DescribeClusterInput{Name: awssdk.String(env.ClusterName)}) + Expect(err).To(BeNil()) + + By("creating new security group") + createSecurityGroup := &ec2.CreateSecurityGroupInput{ + GroupName: awssdk.String("security-group-drift"), + Description: awssdk.String("End-to-end Drift Test, should delete after drift test is completed"), + VpcId: output.Cluster.ResourcesVpcConfig.VpcId, + TagSpecifications: []*ec2.TagSpecification{ + { + ResourceType: awssdk.String("security-group"), + Tags: []*ec2.Tag{ + { + Key: awssdk.String("karpenter.sh/discovery"), + Value: awssdk.String(env.ClusterName), + }, + { + Key: awssdk.String(coretest.DiscoveryLabel), + Value: awssdk.String(env.ClusterName), + }, + { + Key: awssdk.String("creation-date"), + Value: awssdk.String(time.Now().Format(time.RFC3339)), + }, + }, + }, + }, + } + _, _ = env.EC2API.CreateSecurityGroup(createSecurityGroup) + + By("looking for security groups") + var securitygroups []aws.SecurityGroup + var testSecurityGroup aws.SecurityGroup + Eventually(func(g Gomega) { + securitygroups = env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + testSecurityGroup, _ = lo.Find(securitygroups, func(sg aws.SecurityGroup) bool { + return awssdk.StringValue(sg.GroupName) == "security-group-drift" + }) + g.Expect(testSecurityGroup).ToNot(BeNil()) + }).Should(Succeed()) + + By("creating a new provider with the new securitygroup") + awsIDs := lo.FilterMap(securitygroups, func(sg aws.SecurityGroup, _ int) (string, bool) { + if awssdk.StringValue(sg.GroupId) != awssdk.StringValue(testSecurityGroup.GroupId) { + return awssdk.StringValue(sg.GroupId), true + } + return "", false + }) + sgTerms := []v1.SecurityGroupSelectorTerm{{ID: awssdk.StringValue(testSecurityGroup.GroupId)}} + for _, id := range awsIDs { + sgTerms = append(sgTerms, v1.SecurityGroupSelectorTerm{ID: id}) + } + nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + sgTerms = lo.Reject(sgTerms, func(t v1.SecurityGroupSelectorTerm, _ int) bool { + return t.ID == awssdk.StringValue(testSecurityGroup.GroupId) + }) + nodeClass.Spec.SecurityGroupSelectorTerms = sgTerms + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, nodeClaim, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should disrupt nodes that have drifted due to subnets", func() { + subnets := env.GetSubnetInfo(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnets)).To(BeNumerically(">", 1)) + + nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[0].ID}} + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{ID: subnets[1].ID}} + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + DescribeTable("NodePool Drift", func(nodeClaimTemplate karpv1.NodeClaimTemplate) { + updatedNodePool := coretest.NodePool( + karpv1.NodePool{ + Spec: karpv1.NodePoolSpec{ + Template: karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimSpec{ + NodeClassRef: &karpv1.NodeClassReference{ + Group: object.GVK(nodeClass).Group, + Kind: object.GVK(nodeClass).Kind, + Name: nodeClass.Name, + }, + // keep the same instance type requirements to prevent considering instance types that require swap + Requirements: nodePool.Spec.Template.Spec.Requirements, + }, + }, + }, + }, + karpv1.NodePool{ + Spec: karpv1.NodePoolSpec{ + Template: nodeClaimTemplate, + }, + }, + ) + updatedNodePool.ObjectMeta = nodePool.ObjectMeta + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + env.ExpectCreatedOrUpdated(updatedNodePool) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + + // Nodes will need to have the start-up taint removed before the node can be considered as initialized + fmt.Println(CurrentSpecReport().LeafNodeText) + if CurrentSpecReport().LeafNodeText == "Start-up Taints" { + nodes := env.EventuallyExpectCreatedNodeCount("==", 2) + sort.Slice(nodes, func(i int, j int) bool { + return nodes[i].CreationTimestamp.Before(&nodes[j].CreationTimestamp) + }) + nodeTwo := nodes[1] + // Remove the startup taints from the new nodes to initialize them + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeTwo), nodeTwo)).To(Succeed()) + stored := nodeTwo.DeepCopy() + nodeTwo.Spec.Taints = lo.Reject(nodeTwo.Spec.Taints, func(t corev1.Taint, _ int) bool { return t.Key == "example.com/another-taint-2" }) + g.Expect(env.Client.Patch(env.Context, nodeTwo, client.StrategicMergeFrom(stored))).To(Succeed()) + }).Should(Succeed()) + } + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }, + Entry("Annotations", karpv1.NodeClaimTemplate{ + ObjectMeta: karpv1.ObjectMeta{ + Annotations: map[string]string{"keyAnnotationTest": "valueAnnotationTest"}, + }, + }), + Entry("Labels", karpv1.NodeClaimTemplate{ + ObjectMeta: karpv1.ObjectMeta{ + Labels: map[string]string{"keyLabelTest": "valueLabelTest"}, + }, + }), + Entry("Taints", karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimSpec{ + Taints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, + }, + }), + Entry("Start-up Taints", karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimSpec{ + StartupTaints: []corev1.Taint{{Key: "example.com/another-taint-2", Effect: corev1.TaintEffectPreferNoSchedule}}, + }, + }), + Entry("NodeRequirements", karpv1.NodeClaimTemplate{ + Spec: karpv1.NodeClaimSpec{ + // since this will overwrite the default requirements, add instance category and family selectors back into requirements + Requirements: []karpv1.NodeSelectorRequirementWithMinValues{ + {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: karpv1.CapacityTypeLabelKey, Operator: corev1.NodeSelectorOpIn, Values: []string{karpv1.CapacityTypeSpot}}}, + {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceCategory, Operator: corev1.NodeSelectorOpIn, Values: []string{"c", "m", "r"}}}, + {NodeSelectorRequirement: corev1.NodeSelectorRequirement{Key: v1.LabelInstanceFamily, Operator: corev1.NodeSelectorOpNotIn, Values: []string{"a1"}}}, + }, + }, + }), + ) + DescribeTable("EC2NodeClass", func(nodeClassSpec v1.EC2NodeClassSpec) { + updatedNodeClass := test.EC2NodeClass(v1.EC2NodeClass{Spec: *nodeClass.Spec.DeepCopy()}, v1.EC2NodeClass{Spec: nodeClassSpec}) + updatedNodeClass.ObjectMeta = nodeClass.ObjectMeta + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + env.ExpectCreatedOrUpdated(updatedNodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }, + Entry("UserData", v1.EC2NodeClassSpec{UserData: awssdk.String("#!/bin/bash\necho \"Hello, AL2023\"")}), + Entry("Tags", v1.EC2NodeClassSpec{Tags: map[string]string{"keyTag-test-3": "valueTag-test-3"}}), + Entry("MetadataOptions", v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPTokens: awssdk.String("required"), HTTPPutResponseHopLimit: awssdk.Int64(10)}}), + Entry("BlockDeviceMappings", v1.EC2NodeClassSpec{BlockDeviceMappings: []*v1.BlockDeviceMapping{ + { + DeviceName: awssdk.String("/dev/xvda"), + EBS: &v1.BlockDevice{ + VolumeSize: resources.Quantity("20Gi"), + VolumeType: awssdk.String("gp3"), + Encrypted: awssdk.Bool(true), + }, + }}}), + Entry("DetailedMonitoring", v1.EC2NodeClassSpec{DetailedMonitoring: awssdk.Bool(true)}), + Entry("AMIFamily", v1.EC2NodeClassSpec{ + AMISelectorTerms: []v1.AMISelectorTerm{{Alias: "bottlerocket@latest"}}, + }), + Entry("KubeletConfiguration", v1.EC2NodeClassSpec{ + Kubelet: &v1.KubeletConfiguration{ + EvictionSoft: map[string]string{"memory.available": "5%"}, + EvictionSoftGracePeriod: map[string]metav1.Duration{"memory.available": {Duration: time.Minute}}, + }, + }), + ) + It("should drift the EC2NodeClass on InstanceProfile", func() { + // Create a separate test case for this one since we can't use the default NodeClass that's created due to it having + // a pre-populated role AND we also need to do the instance profile generation within the scope of this test + instanceProfileName := fmt.Sprintf("KarpenterNodeInstanceProfile-%s", env.ClusterName) + instanceProfileDriftName := fmt.Sprintf("KarpenterNodeInstanceProfile-Drift-%s", env.ClusterName) + roleName := fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName) + + for _, name := range []string{instanceProfileName, instanceProfileDriftName} { + env.ExpectInstanceProfileCreated(name, roleName) + DeferCleanup(func() { + env.ExpectInstanceProfileDeleted(name, roleName) + }) + } + nodeClass.Spec.Role = "" + nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileName) + + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + nodeClass.Spec.InstanceProfile = lo.ToPtr(instanceProfileDriftName) + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(nodeClaim) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should drift the EC2NodeClass on BlockDeviceMappings volume size update", func() { + nodeClass.Spec.BlockDeviceMappings = []*v1.BlockDeviceMapping{ + { + DeviceName: awssdk.String("/dev/xvda"), + EBS: &v1.BlockDevice{ + VolumeSize: resources.Quantity("20Gi"), + VolumeType: awssdk.String("gp3"), + Encrypted: awssdk.Bool(true), + }, + }, + } + env.ExpectCreated(dep, nodeClass, nodePool) + pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.ExpectCreatedNodeCount("==", 1)[0] + + nodeClass.Spec.BlockDeviceMappings[0].EBS.VolumeSize = resources.Quantity("100Gi") + env.ExpectCreatedOrUpdated(nodeClass) + + By("validating the drifted status condition has propagated") + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted)).ToNot(BeNil()) + g.Expect(nodeClaim.StatusConditions().Get(karpv1.ConditionTypeDrifted).IsTrue()).To(BeTrue()) + }).Should(Succeed()) + + delete(pod.Annotations, karpv1.DoNotDisruptAnnotationKey) + env.ExpectUpdated(pod) + env.EventuallyExpectNotFound(pod, node) + env.EventuallyExpectHealthyPodCount(selector, numPods) + }) + It("should update the nodepool-hash annotation on the nodepool and nodeclaim when the nodepool's nodepool-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodePool = env.ExpectExists(nodePool).(*karpv1.NodePool) + expectedHash := nodePool.Hash() + + By(fmt.Sprintf("expect nodepool %s and nodeclaim %s to contain %s and %s annotations", nodePool.Name, nodeClaim.Name, karpv1.NodePoolHashAnnotationKey, karpv1.NodePoolHashVersionAnnotationKey)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodePool.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + karpv1.NodePoolHashAnnotationKey: "test-hash-1", + karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-1", + }) + // Updating `nodePool.Spec.Template.Annotations` would normally trigger drift on all nodeclaims owned by the + // nodepool. However, the nodepool-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `nodepool-hash` and `nodepool-hash-version` annotation + nodePool.Spec.Template.Annotations = lo.Assign(nodePool.Spec.Template.Annotations, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + karpv1.NodePoolHashAnnotationKey: "test-hash-2", + karpv1.NodePoolHashVersionAnnotationKey: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodepool + env.ExpectUpdated(nodeClaim, nodePool) + expectedHash = nodePool.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodePool), nodePool)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodePool.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashAnnotationKey, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(karpv1.NodePoolHashVersionAnnotationKey, karpv1.NodePoolHashVersion)) + }) + }) + It("should update the ec2nodeclass-hash annotation on the ec2nodeclass and nodeclaim when the ec2nodeclass's ec2nodeclass-hash-version annotation does not match the controller hash version", func() { + env.ExpectCreated(dep, nodeClass, nodePool) + env.EventuallyExpectHealthyPodCount(selector, numPods) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + nodeClass = env.ExpectExists(nodeClass).(*v1.EC2NodeClass) + expectedHash := nodeClass.Hash() + + By(fmt.Sprintf("expect nodeclass %s and nodeclaim %s to contain %s and %s annotations", nodeClass.Name, nodeClaim.Name, v1.AnnotationEC2NodeClassHash, v1.AnnotationEC2NodeClassHashVersion)) + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + + nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{ + v1.AnnotationEC2NodeClassHash: "test-hash-1", + v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-1", + }) + // Updating `nodeClass.Spec.Tags` would normally trigger drift on all nodeclaims using the + // nodeclass. However, the ec2nodeclass-hash-version does not match the controller hash version, so we will see that + // none of the nodeclaims will be drifted and all nodeclaims will have an updated `ec2nodeclass-hash` and `ec2nodeclass-hash-version` annotation + nodeClass.Spec.Tags = lo.Assign(nodeClass.Spec.Tags, map[string]string{ + "test-key": "test-value", + }) + nodeClaim.Annotations = lo.Assign(nodePool.Annotations, map[string]string{ + v1.AnnotationEC2NodeClassHash: "test-hash-2", + v1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2", + }) + + // The nodeclaim will need to be updated first, as the hash controller will only be triggered on changes to the nodeclass + env.ExpectUpdated(nodeClaim, nodeClass) + expectedHash = nodeClass.Hash() + + // Expect all nodeclaims not to be drifted and contain an updated `nodepool-hash` and `nodepool-hash-version` annotation + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).To(Succeed()) + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClass.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHash, expectedHash)) + g.Expect(nodeClaim.Annotations).To(HaveKeyWithValue(v1.AnnotationEC2NodeClassHashVersion, v1.EC2NodeClassHashVersion)) + }).WithTimeout(30 * time.Second).Should(Succeed()) + env.ConsistentlyExpectNodeClaimsNotDrifted(time.Minute, nodeClaim) + }) + Context("Failure", func() { + It("should not disrupt a drifted node if the replacement node never registers", func() { + // launch a new nodeClaim + var numPods int32 = 2 + dep := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 2, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "inflate"}, + }}, + }, + }, + }) + env.ExpectCreated(dep, nodeClass, nodePool) + + startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) + env.EventuallyExpectCreatedNodeCount("==", int(numPods)) + + // Drift the nodeClaim with bad configuration that will not register a NodeClaim + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: env.GetAMIBySSMPath("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")}} + env.ExpectCreatedOrUpdated(nodeClass) + + env.EventuallyExpectDrifted(startingNodeClaimState...) + + // Expect only a single node to be tainted due to default disruption budgets + taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) + + // Drift should fail and the original node should be untainted + // TODO: reduce timeouts when disruption waits are factored out + env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) + + // Expect all the NodeClaims that existed on the initial provisioning loop are not removed. + // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't + // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node + Consistently(func(g Gomega) { + nodeClaims := &karpv1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) + startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(nc *karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) + nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(nc karpv1.NodeClaim, _ int) types.UID { return nc.UID })...) + g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) + }, "2m").Should(Succeed()) + }) + It("should not disrupt a drifted node if the replacement node registers but never initialized", func() { + // launch a new nodeClaim + var numPods int32 = 2 + dep := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 2, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "inflate"}, + }}, + }, + }, + }) + env.ExpectCreated(dep, nodeClass, nodePool) + + startingNodeClaimState := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) + env.EventuallyExpectCreatedNodeCount("==", int(numPods)) + + // Drift the nodeClaim with bad configuration that never initializes + nodePool.Spec.Template.Spec.StartupTaints = []corev1.Taint{{Key: "example.com/taint", Effect: corev1.TaintEffectPreferNoSchedule}} + env.ExpectCreatedOrUpdated(nodePool) + + env.EventuallyExpectDrifted(startingNodeClaimState...) + + // Expect only a single node to get tainted due to default disruption budgets + taintedNodes := env.EventuallyExpectTaintedNodeCount("==", 1) + + // Drift should fail and original node should be untainted + // TODO: reduce timeouts when disruption waits are factored out + env.EventuallyExpectNodesUntaintedWithTimeout(11*time.Minute, taintedNodes...) + + // Expect that the new nodeClaim/node is kept around after the un-cordon + nodeList := &corev1.NodeList{} + Expect(env.Client.List(env, nodeList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) + Expect(nodeList.Items).To(HaveLen(int(numPods) + 1)) + + nodeClaimList := &karpv1.NodeClaimList{} + Expect(env.Client.List(env, nodeClaimList, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) + Expect(nodeClaimList.Items).To(HaveLen(int(numPods) + 1)) + + // Expect all the NodeClaims that existed on the initial provisioning loop are not removed + // Assert this over several minutes to ensure a subsequent disruption controller pass doesn't + // successfully schedule the evicted pods to the in-flight nodeclaim and disrupt the original node + Consistently(func(g Gomega) { + nodeClaims := &karpv1.NodeClaimList{} + g.Expect(env.Client.List(env, nodeClaims, client.HasLabels{coretest.DiscoveryLabel})).To(Succeed()) + startingNodeClaimUIDs := sets.New(lo.Map(startingNodeClaimState, func(m *karpv1.NodeClaim, _ int) types.UID { return m.UID })...) + nodeClaimUIDs := sets.New(lo.Map(nodeClaims.Items, func(m karpv1.NodeClaim, _ int) types.UID { return m.UID })...) + g.Expect(nodeClaimUIDs.IsSuperset(startingNodeClaimUIDs)).To(BeTrue()) + }, "2m").Should(Succeed()) + }) + It("should not drift any nodes if their PodDisruptionBudgets are unhealthy", func() { + // Create a deployment that contains a readiness probe that will never succeed + // This way, the pod will bind to the node, but the PodDisruptionBudget will never go healthy + var numPods int32 = 2 + dep := coretest.Deployment(coretest.DeploymentOptions{ + Replicas: 2, + PodOptions: coretest.PodOptions{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "inflate"}}, + PodAntiRequirements: []corev1.PodAffinityTerm{{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "inflate"}, + }}, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Port: intstr.FromInt32(80), + }, + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + minAvailable := intstr.FromInt32(numPods - 1) + pdb := coretest.PodDisruptionBudget(coretest.PDBOptions{ + Labels: dep.Spec.Template.Labels, + MinAvailable: &minAvailable, + }) + env.ExpectCreated(dep, nodeClass, nodePool, pdb) + + nodeClaims := env.EventuallyExpectCreatedNodeClaimCount("==", int(numPods)) + env.EventuallyExpectCreatedNodeCount("==", int(numPods)) + + // Expect pods to be bound but not to be ready since we are intentionally failing the readiness check + env.EventuallyExpectBoundPodCount(selector, int(numPods)) + + // Drift the nodeclaims + nodePool.Spec.Template.Annotations = map[string]string{"test": "annotation"} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectDrifted(nodeClaims...) + env.ConsistentlyExpectNoDisruptions(int(numPods), time.Minute) + }) + }) +}) diff --git a/test/suites/drift/testdata/al2023_userdata_input.yaml b/test/suites/drift/testdata/al2023_userdata_input.yaml new file mode 100644 index 000000000000..e400f3259490 --- /dev/null +++ b/test/suites/drift/testdata/al2023_userdata_input.yaml @@ -0,0 +1,15 @@ +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + name: %s + apiServerEndpoint: %s + certificateAuthority: %s + cidr: 10.100.0.0/16 + kubelet: + config: + clusterDNS: + - 10.0.100.10 + flags: + - --node-labels='testing/cluster=unspecified' + - --register-with-taints='karpenter.sh/unregistered:NoExecute' diff --git a/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml b/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml index b0ce7a5e8496..e400f3259490 100644 --- a/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml +++ b/test/suites/nodeclaim/testdata/al2023_userdata_input.yaml @@ -11,4 +11,5 @@ spec: clusterDNS: - 10.0.100.10 flags: - - --node-labels="testing/cluster=unspecified" \ No newline at end of file + - --node-labels='testing/cluster=unspecified' + - --register-with-taints='karpenter.sh/unregistered:NoExecute' From 32f50c15f60b0eb8783dbad2593d47bf9ae6cd47 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Mon, 15 Jul 2024 23:57:03 -0700 Subject: [PATCH 9/9] test fixes --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 2 +- pkg/apis/v1/ec2nodeclass.go | 2 +- pkg/providers/amifamily/al2023.go | 19 +++++++++-- test/suites/drift/suite_test.go | 1 + .../integration/ec2nodeclass_kubelet_test.go | 34 +++++++++++++++++-- 5 files changed, 52 insertions(+), 6 deletions(-) diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 167be07394e1..0753b8454e9b 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -67,7 +67,7 @@ spec: alias: description: |- Alias specifies which EKS optimized AMI to select. - Each alias consists of a family and a version, specified as "family@version". + Each alias consists of a family and an AMI version, specified as "family@version". Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@v1.10.0"). The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. This is **not** recommended for production environments. diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index 3b47396b6bc2..ec468f50c3a1 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -165,7 +165,7 @@ type SecurityGroupSelectorTerm struct { // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { // Alias specifies which EKS optimized AMI to select. - // Each alias consists of a family and a version, specified as "family@version". + // Each alias consists of a family and an AMI version, specified as "family@version". // Valid families include: al2, al2023, bottlerocket, windows2019, and windows2022. // The version can either be pinned to a specific AMI release, with that AMIs version format (ex: "al2023@v20240625" or "bottlerocket@v1.10.0"). // The version can also be set to "latest" for any family. Setting the version to latest will result in drift when a new AMI is released. This is **not** recommended for production environments. diff --git a/pkg/providers/amifamily/al2023.go b/pkg/providers/amifamily/al2023.go index 8cc299c7f19e..535074db3bd9 100644 --- a/pkg/providers/amifamily/al2023.go +++ b/pkg/providers/amifamily/al2023.go @@ -50,6 +50,8 @@ func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider log.FromContext(ctx).WithValues("path", rootPath, "family", "al2023").Error(err, "discovering AMIs from ssm") return DescribeImageQuery{}, fmt.Errorf(`failed to discover any AMIs for alias "al2023@%s"`, amiVersion) } + + ids := map[string]Variant{} for path, value := range results { pathComponents := strings.Split(path, "/") if len(pathComponents) != 11 || pathComponents[10] != "image_id" { @@ -62,9 +64,22 @@ func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider if err != nil { continue } - imageIDs = append(imageIDs, lo.ToPtr(value)) - requirements[value] = []scheduling.Requirements{variant.Requirements()} + ids[value] = variant } + + // EKS doesn't currently vend any accelerated AL2023 AMIs. We should schedule all workloads to + // these standard AMIs until accelerated AMIs are available. This approach ensures Karpenter is + // forwards compatible with acclerated AMIs once they become available. + hasAcceleratedAMIs := lo.ContainsBy(lo.Values(ids), func(v Variant) bool { + return v != VariantStandard + }) + for id, variant := range ids { + imageIDs = append(imageIDs, lo.ToPtr(id)) + if hasAcceleratedAMIs { + requirements[id] = []scheduling.Requirements{variant.Requirements()} + } + } + // Failed to discover any AMIs, we should short circuit AMI discovery if len(imageIDs) == 0 { return DescribeImageQuery{}, fmt.Errorf(`failed to discover AMIs for alias "al2023@%s"`, amiVersion) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index c1d24267831e..7d2d2f989c0f 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -430,6 +430,7 @@ var _ = Describe("Drift", func() { fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/x86_64/standard/recommended/image_id", env.K8sVersionWithOffset(1)), )) nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{ID: oldCustomAMI}} + nodeClass.Spec.UserData = customUserData env.ExpectCreated(dep, nodeClass, nodePool) env.EventuallyExpectHealthyPodCount(selector, numPods) diff --git a/test/suites/integration/ec2nodeclass_kubelet_test.go b/test/suites/integration/ec2nodeclass_kubelet_test.go index a34724abf4cd..0462d9f4b0fd 100644 --- a/test/suites/integration/ec2nodeclass_kubelet_test.go +++ b/test/suites/integration/ec2nodeclass_kubelet_test.go @@ -74,8 +74,38 @@ var _ = Describe("EC2nodeClass Kubelet Configuration", func() { Requirements: []karpv1beta1.NodeSelectorRequirementWithMinValues{ { NodeSelectorRequirement: corev1.NodeSelectorRequirement{ - Key: karpv1beta1.CapacityTypeLabelKey, - Operator: corev1.NodeSelectorOpExists, + Key: corev1.LabelOSStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{string(corev1.Linux)}, + }, + }, + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: karpv1.CapacityTypeLabelKey, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.CapacityTypeOnDemand}, + }, + }, + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1.LabelInstanceCategory, + Operator: corev1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + }, + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1.LabelInstanceGeneration, + Operator: corev1.NodeSelectorOpGt, + Values: []string{"2"}, + }, + }, + // Filter out a1 instance types, which are incompatible with AL2023 AMIs + { + NodeSelectorRequirement: corev1.NodeSelectorRequirement{ + Key: v1.LabelInstanceFamily, + Operator: corev1.NodeSelectorOpNotIn, + Values: []string{"a1"}, }, }, },