diff --git a/designs/deprecated-ami-observability.md b/designs/deprecated-ami-observability.md new file mode 100644 index 000000000000..ab8d2d91234d --- /dev/null +++ b/designs/deprecated-ami-observability.md @@ -0,0 +1,192 @@ +# Observability for Deprecated AMIs + +## Background + +With the recent introduction of a significant feature through [PR #6500](https://github.com/aws/karpenter-provider-aws/pull/6500) Karpenter has enhanced its capability to identify and utilize deprecated Amazon Machine Images (AMIs). Karpenter remains effective in provisioning new nodes within production environments where specific AMI IDs are mandated, adhering to these [guidelines](https://karpenter.sh/docs/tasks/managing-amis/#option-2-lock-down-which-amis-are-selected) or when discovering AMIs based on `AMISelectorTerms`. Previously, if an AMI designated in an EC2NodeClass was deprecated, Karpenter faced challenges in launching new nodes, which could lead to potential service interruptions, especially in cases necessitating auto-scaling driven by Horizontal Pod Autoscalers (HPAs). + +This new feature would also benefit from enhanced observability, allowing cluster admins to identify which EC2NodeClasses are utilizing deprecated AMIs and take action accordingly. + +## Options + +### Option 1: Update the EC2NodeClass CRD + +This approach will modify the current CRD for the EC2NodeClass by adding a new `deprecated` field to the `status.amis` section, providing a clear and immediate indication of AMI deprecation directly within the resource configuration. + +#### Code Definition + +[`pkg/apis/v1/ec2nodeclass_status.go`](../pkg/apis/v1/ec2nodeclass_status.go#L53) + +```go +type AMI struct { + // ID of the AMI + // +required + ID string `json:"id"` + // Deprecation status of the AMI + // +optional + Deprecated bool `json:"deprecated,omitempty"` + // Name of the AMI + // +optional + Name string `json:"name,omitempty"` + // Requirements of the AMI to be utilized on an instance type + // +required + Requirements []corev1.NodeSelectorRequirement `json:"requirements"` +} +``` + +#### Proposed Spec + +``` yaml +status: + amis: + - id: ami-01234567890654321 + name: custom-ami-amd64 + deprecated: true + requirements: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - id: ami-01234567890123456 + name: custom-ami-arm64 + requirements: + - key: kubernetes.io/arch + operator: In + values: + - arm64 +``` + +#### Pros + Cons + +* 👍 This will provide cluster admins a clear, concise indication of which AMIs being utilized by the EC2NodeClass are deprecated. +* 👎 This will add a dependency to require a CRD update for the EC2NodeClass along with the version bump for Karpenter. + +### Option 2: Add new status conditions to the EC2NodeClass CRD + +This is an alternate approach to update the status conditions for the EC2NodeClass to provide information to cluster admins that deprecated AMIs were discovered as part of the `amiSelectorTerms`. + +#### Code Definition + +[`pkg/apis/v1/ec2nodeclass_status.go`](../pkg/apis/v1/ec2nodeclass_status.go#L22) + +``` go + +const ( + ConditionTypeSubnetsReady = "SubnetsReady" + ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" + ConditionTypeAMIsReady = "AMIsReady" + ConditionTypeAMIsDeprecated = "AMIsDeprecated" + ConditionTypeInstanceProfileReady = "InstanceProfileReady" +) + +``` + +#### Proposed Spec + +``` yaml +status: + amis: + - id: ami-01234567890654321 + name: amazon-eks-node-1.29 + requirements: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - id: ami-01234567890123456 + name: amazon-eks-arm64-node-1.29 + requirements: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + conditions: + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: AMIsDeprecated + status: "True" + type: AMIsDeprecated + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: InstanceProfileReady + status: "True" + type: InstanceProfileReady + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: Ready + status: "True" + type: Ready + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: SecurityGroupsReady + status: "True" + type: SecurityGroupsReady + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: SubnetsReady + status: "True" + type: SubnetsReady +``` + +#### Pros + Cons + +* 👍 This change will not warrant a change to the CRD for the EC2NodeClass, making the Karpenter upgrade seamless without CRD update dependencies +* 👍 This can provide an metric out of the box `operator_status_condition_count` which can be used to check which `EC2NodeClass` are using deprecated AMIs +* 👎 This may cause confusion and indicate to cluster admins that a EC2NodeClass is using deprecated AMIs, when in reality it could be that "one of the AMIs discovered are deprecated". +* 👎 Users will have to do the leg work to figure out which AMIs from the discovered AMIs for the EC2NodeClass are deprecated. + + +### Option 3: Best of both worlds + +The final approach would be a combination of both i.e updates to the EC2NodeClass CRD as well as updates to the status condition combining both the approaches mentioned above. + +#### Proposed Spec + +``` yaml +status: + amis: + - id: ami-01234567890654321 + name: amazon-eks-node-1.29 + deprecated: true + requirements: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - id: ami-01234567890123456 + name: amazon-eks-arm64-node-1.29 + requirements: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + conditions: + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: AMIsDeprecated + status: "True" + type: AMIsDeprecated + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: InstanceProfileReady + status: "True" + type: InstanceProfileReady + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: Ready + status: "True" + type: Ready + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: SecurityGroupsReady + status: "True" + type: SecurityGroupsReady + - lastTransitionTime: "2024-09-09T04:32:55Z" + message: "" + reason: SubnetsReady + status: "True" + type: SubnetsReady +``` + +## Recommendation + +Based on the above approaches, the preferred solution would be to leverage [Option 3](#option-3-best-of-both-worlds) diff --git a/pkg/providers/pricing/zz_generated.pricing_aws_cn.go b/pkg/providers/pricing/zz_generated.pricing_aws_cn.go index 345a96b84f57..a3f2ec8f3ec6 100644 --- a/pkg/providers/pricing/zz_generated.pricing_aws_cn.go +++ b/pkg/providers/pricing/zz_generated.pricing_aws_cn.go @@ -16,9 +16,10 @@ limitations under the License. package pricing -// generated at 2023-09-18T13:06:44Z for cn-north-1 +// generated at 2024-11-06T01:00:11Z for cn-north-1 var InitialOnDemandPricesCN = map[string]map[string]float64{ + // cn-north-1 "cn-north-1": { // c3 family "c3.2xlarge": 4.217000, "c3.4xlarge": 8.434000, "c3.8xlarge": 16.869000, "c3.large": 1.054000, @@ -41,10 +42,17 @@ var InitialOnDemandPricesCN = map[string]map[string]float64{ "c6g.12xlarge": 14.064400, "c6g.16xlarge": 18.752600, "c6g.2xlarge": 2.344100, "c6g.4xlarge": 4.688100, "c6g.8xlarge": 9.376300, "c6g.large": 0.586000, "c6g.medium": 0.293000, "c6g.metal": 19.390900, "c6g.xlarge": 1.172000, + // c6gn family + "c6gn.12xlarge": 17.869700, "c6gn.16xlarge": 23.826270, "c6gn.2xlarge": 2.978280, "c6gn.4xlarge": 5.956570, + "c6gn.8xlarge": 11.913140, "c6gn.large": 0.744570, "c6gn.medium": 0.372290, "c6gn.xlarge": 1.489140, // c6i family "c6i.12xlarge": 17.744830, "c6i.16xlarge": 23.659780, "c6i.24xlarge": 35.489660, "c6i.2xlarge": 2.957470, "c6i.32xlarge": 47.319550, "c6i.4xlarge": 5.914940, "c6i.8xlarge": 11.829890, "c6i.large": 0.739370, "c6i.metal": 47.319550, "c6i.xlarge": 1.478740, + // c7g family + "c7g.12xlarge": 15.083100, "c7g.16xlarge": 20.110800, "c7g.2xlarge": 2.513900, "c7g.4xlarge": 5.027700, + "c7g.8xlarge": 10.055400, "c7g.large": 0.628500, "c7g.medium": 0.314200, "c7g.metal": 20.110800, + "c7g.xlarge": 1.256900, // d2 family "d2.2xlarge": 13.345000, "d2.4xlarge": 26.690000, "d2.8xlarge": 53.380000, "d2.xlarge": 6.673000, // g3 family @@ -54,6 +62,9 @@ var InitialOnDemandPricesCN = map[string]map[string]float64{ // g4dn family "g4dn.12xlarge": 38.849000, "g4dn.16xlarge": 43.218000, "g4dn.2xlarge": 7.468000, "g4dn.4xlarge": 11.956000, "g4dn.8xlarge": 21.609000, "g4dn.xlarge": 5.223000, + // g5 family + "g5.12xlarge": 53.640920, "g5.16xlarge": 38.736460, "g5.24xlarge": 77.018980, "g5.2xlarge": 11.462060, + "g5.48xlarge": 154.037950, "g5.4xlarge": 15.358400, "g5.8xlarge": 23.151090, "g5.xlarge": 9.513890, // i2 family "i2.2xlarge": 20.407000, "i2.4xlarge": 40.815000, "i2.8xlarge": 81.630000, "i2.xlarge": 10.204000, // i3 family @@ -62,6 +73,10 @@ var InitialOnDemandPricesCN = map[string]map[string]float64{ // i3en family "i3en.12xlarge": 54.302000, "i3en.24xlarge": 108.605000, "i3en.2xlarge": 9.050000, "i3en.3xlarge": 13.576000, "i3en.6xlarge": 27.151000, "i3en.large": 2.263000, "i3en.xlarge": 4.525000, + // i4i family + "i4i.12xlarge": 43.665000, "i4i.16xlarge": 58.221000, "i4i.24xlarge": 87.330860, "i4i.2xlarge": 7.278000, + "i4i.32xlarge": 116.441150, "i4i.4xlarge": 14.555000, "i4i.8xlarge": 29.110000, "i4i.large": 1.819000, + "i4i.xlarge": 3.639000, // inf1 family "inf1.24xlarge": 47.342000, "inf1.2xlarge": 3.630000, "inf1.6xlarge": 11.835000, "inf1.xlarge": 2.288000, // m1 family @@ -90,6 +105,10 @@ var InitialOnDemandPricesCN = map[string]map[string]float64{ "m6i.12xlarge": 24.316990, "m6i.16xlarge": 32.422660, "m6i.24xlarge": 48.633980, "m6i.2xlarge": 4.052830, "m6i.32xlarge": 64.845310, "m6i.4xlarge": 8.105660, "m6i.8xlarge": 16.211330, "m6i.large": 1.013210, "m6i.metal": 64.845310, "m6i.xlarge": 2.026420, + // m7g family + "m7g.12xlarge": 20.669400, "m7g.16xlarge": 27.559300, "m7g.2xlarge": 3.444900, "m7g.4xlarge": 6.889800, + "m7g.8xlarge": 13.779600, "m7g.large": 0.861200, "m7g.medium": 0.430600, "m7g.metal": 27.559300, + "m7g.xlarge": 1.722500, // p2 family "p2.16xlarge": 169.792000, "p2.8xlarge": 84.896000, "p2.xlarge": 10.612000, // p3 family @@ -123,6 +142,12 @@ var InitialOnDemandPricesCN = map[string]map[string]float64{ "r6i.12xlarge": 29.246110, "r6i.16xlarge": 38.994820, "r6i.24xlarge": 58.492220, "r6i.2xlarge": 4.874350, "r6i.32xlarge": 77.989630, "r6i.4xlarge": 9.748700, "r6i.8xlarge": 19.497410, "r6i.large": 1.218590, "r6i.metal": 77.989630, "r6i.xlarge": 2.437180, + // r7g family + "r7g.12xlarge": 24.875600, "r7g.16xlarge": 33.167500, "r7g.2xlarge": 4.145900, "r7g.4xlarge": 8.291900, + "r7g.8xlarge": 16.583800, "r7g.large": 1.036500, "r7g.medium": 0.518200, "r7g.metal": 33.167500, + "r7g.xlarge": 2.073000, + // t1 family + "t1.micro": 0.221000, // t2 family "t2.2xlarge": 3.392000, "t2.large": 0.851000, "t2.medium": 0.426000, "t2.micro": 0.106000, "t2.nano": 0.060600, "t2.small": 0.212000, "t2.xlarge": 1.696000,