chore: Release v0.34.0 (#5614)

aws · Feb 6, 2024 · ab3d58f · ab3d58f
1 parent 17d6c05
commit ab3d58f
Show file tree

Hide file tree

Showing 101 changed files with 5,860 additions and 3,743 deletions.
diff --git a/charts/karpenter-crd/Chart.yaml b/charts/karpenter-crd/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: karpenter-crd
 description: A Helm chart for Karpenter Custom Resource Definitions (CRDs)
 type: application
-version: 0.33.0
-appVersion: 0.33.0
+version: 0.34.0
+appVersion: 0.34.0
 keywords:
   - cluster
   - node

diff --git a/charts/karpenter/Chart.yaml b/charts/karpenter/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
 name: karpenter
 description: A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes.
 type: application
-version: 0.33.0
-appVersion: 0.33.0
+version: 0.34.0
+appVersion: 0.34.0
 keywords:
   - cluster
   - node

diff --git a/charts/karpenter/README.md b/charts/karpenter/README.md
@@ -2,7 +2,7 @@
 
 A Helm chart for Karpenter, an open-source node provisioning project built for Kubernetes.
 
-![Version: 0.33.0](https://img.shields.io/badge/Version-0.33.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.33.0](https://img.shields.io/badge/AppVersion-0.33.0-informational?style=flat-square)
+![Version: 0.34.0](https://img.shields.io/badge/Version-0.33.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.33.0](https://img.shields.io/badge/AppVersion-0.33.0-informational?style=flat-square)
 
 ## Documentation
 
@@ -15,7 +15,7 @@ You can follow the detailed installation instruction in the [documentation](http
 ```bash
 helm upgrade --install --namespace karpenter --create-namespace \
   karpenter oci://public.ecr.aws/karpenter/karpenter \
-  --version v0.33.0 \
+  --version v0.34.0 \
   --set "serviceAccount.annotations.eks\.amazonaws\.com/role-arn=${KARPENTER_IAM_ROLE_ARN}" \
   --set settings.clusterName=${CLUSTER_NAME} \
   --set settings.interruptionQueue=${CLUSTER_NAME} \
@@ -34,9 +34,9 @@ helm upgrade --install --namespace karpenter --create-namespace \
 | controller.envFrom | list | `[]` |  |
 | controller.extraVolumeMounts | list | `[]` | Additional volumeMounts for the controller pod. |
 | controller.healthProbe.port | int | `8081` | The container port to use for http health probe. |
-| controller.image.digest | string | `"sha256:5e5f59f74d86ff7f13d7d80b89afff8c661cb4e3265f2fdda95b76dd9c838cc1"` | SHA256 digest of the controller image. |
+| controller.image.digest | string | `"sha256:1ec788c4358106d728a352426462014b7ee4734e9d5ec932d2f37a7b15f9be65"` | SHA256 digest of the controller image. |
 | controller.image.repository | string | `"public.ecr.aws/karpenter/controller"` | Repository path to the controller image. |
-| controller.image.tag | string | `"v0.33.0"` | Tag of the controller image. |
+| controller.image.tag | string | `"v0.34.0"` | Tag of the controller image. |
 | controller.metrics.port | int | `8000` | The container port to use for metrics. |
 | controller.resources | object | `{}` | Resources for the controller pod. |
 | controller.sidecarContainer | list | `[]` | Additional sidecarContainer config |

diff --git a/charts/karpenter/values.yaml b/charts/karpenter/values.yaml
@@ -99,9 +99,9 @@ controller:
     # -- Repository path to the controller image.
     repository: public.ecr.aws/karpenter/controller
     # -- Tag of the controller image.
-    tag: v0.33.0
+    tag: v0.34.0
     # -- SHA256 digest of the controller image.
-    digest: sha256:5e5f59f74d86ff7f13d7d80b89afff8c661cb4e3265f2fdda95b76dd9c838cc1
+    digest: sha256:1ec788c4358106d728a352426462014b7ee4734e9d5ec932d2f37a7b15f9be65
   # -- Additional environment variables for the controller pod.
   env: []
   # - name: AWS_REGION

diff --git a/website/content/en/docs/concepts/disruption.md b/website/content/en/docs/concepts/disruption.md
@@ -13,11 +13,13 @@ The finalizer blocks deletion of the node object while the Termination Controlle
 
 ### Disruption Controller
 
-Karpenter automatically discovers disruptable nodes and spins up replacements when needed. Karpenter disrupts nodes by executing one [automatic method](#automatic-methods) at a time, in order of Expiration, Drift, and then Consolidation. Each method varies slightly, but they all follow the standard disruption process:
+Karpenter automatically discovers disruptable nodes and spins up replacements when needed. Karpenter disrupts nodes by executing one [automatic method](#automatic-methods) at a time, in order of Expiration, Drift, and then Consolidation. Each method varies slightly, but they all follow the standard disruption process. Karpenter uses [disruption budgets]({{<ref "#disruption-budgets" >}}) to control the speed of disruption.
 1. Identify a list of prioritized candidates for the disruption method.
    * If there are [pods that cannot be evicted](#pod-eviction) on the node, Karpenter will ignore the node and try disrupting it later.
    * If there are no disruptable nodes, continue to the next disruption method.
-2. For each disruptable node, execute a scheduling simulation with the pods on the node to find if any replacement nodes are needed.
+2. For each disruptable node:
+   1. Check if disrupting it would violate its NodePool's disruption budget.
+   2. Execute a scheduling simulation with the pods on the node to find if any replacement nodes are needed.
 3. Add the `karpenter.sh/disruption:NoSchedule` taint to the node(s) to prevent pods from scheduling to it.
 4. Pre-spin any replacement nodes needed as calculated in Step (2), and wait for them to become ready.
    * If a replacement node fails to initialize, un-taint the node(s), and restart from Step (1), starting at the first disruption method again.
@@ -61,6 +63,8 @@ When you run `kubectl delete node` on a node without a finalizer, the node is de
 
 ## Automated Methods
 
+Automated methods can be rate limited through [NodePool Disruption Budgets]({{<ref "#disruption-budgets" >}})
+
 * **Expiration**: Karpenter will mark nodes as expired and disrupt them after they have lived a set number of seconds, based on the NodePool's `spec.disruption.expireAfter` value. You can use node expiry to periodically recycle nodes due to security concerns.
 * [**Consolidation**]({{<ref "#consolidation" >}}): Karpenter works to actively reduce cluster cost by identifying when:
   * Nodes can be removed because the node is empty
@@ -113,9 +117,17 @@ Events:
 Using preferred anti-affinity and topology spreads can reduce the effectiveness of consolidation. At node launch, Karpenter attempts to satisfy affinity and topology spread preferences. In order to reduce node churn, consolidation must also attempt to satisfy these constraints to avoid immediately consolidating nodes after they launch. This means that consolidation may not disrupt nodes in order to avoid violating preferences, even if kube-scheduler can fit the host pods elsewhere.  Karpenter reports these pods via logging to bring awareness to the possible issues they can cause (e.g. `pod default/inflate-anti-self-55894c5d8b-522jd has a preferred Anti-Affinity which can prevent consolidation`).
 {{% /alert %}}
 
-{{% alert title="Note" color="primary" %}}
-For spot nodes, Karpenter only uses the deletion consolidation mechanism.  It will not replace a spot node with a cheaper spot node.  Spot instance types are selected with the `price-capacity-optimized` strategy and often the cheapest spot instance type is not launched due to the likelihood of interruption. Consolidation would then replace the spot instance with a cheaper instance negating the `price-capacity-optimized` strategy entirely and increasing interruption rate.
-{{% /alert %}}
+#### Spot consolidation
+For spot nodes, Karpenter has deletion consolidation enabled by default. If you would like to enable replacement with spot consolidation, you need to enable the feature through the [`SpotToSpotConsolidation` feature flag]({{<ref "../reference/settings#features-gates" >}}).
+
+Cheaper spot instance types are selected with the [`price-capacity-optimized` strategy](https://aws.amazon.com/blogs/compute/introducing-price-capacity-optimized-allocation-strategy-for-ec2-spot-instances/). Often, the cheapest spot instance type is not launched due to the likelihood of interruption. As a result, Karpenter uses the number of available instance type options cheaper than the currently launched spot instance as a heuristic for evaluating whether it should launch a replacement for the current spot node.
+
+We refer to the number of instances that Karpenter has within its launch decision as a launch's "instance type flexibility." When Karpenter is considering performing a spot-to-spot consolidation replacement, it will check whether replacing the instance type will lead to enough instance type flexibility in the subsequent launch request. As a result, we get the following properties when evaluating for consolidation:
+1) We shouldn't continually consolidate down to the cheapest spot instance which might have very high rates of interruption.
+2) We launch with enough instance types that there’s high likelihood that our replacement instance has comparable availability to our current one.
+
+Karpenter requires a minimum instance type flexibility of 15 instance types when performing single node spot-to-spot consolidations (1 node to 1 node). It does not have the same instance type flexibility requirement for multi-node spot-to-spot consolidations (many nodes to 1 node) since doing so without requiring flexibility won't lead to "race to the bottom" scenarios.
+
 
 ### Drift
 Drift handles changes to the NodePool/EC2NodeClass. For Drift, values in the NodePool/EC2NodeClass are reflected in the NodeClaimTemplateSpec/EC2NodeClassSpec in the same way that they’re set. A NodeClaim will be detected as drifted if the values in its owning NodePool/EC2NodeClass do not match the values in the NodeClaim. Similar to the upstream `deployment.spec.template` relationship to pods, Karpenter will annotate the owning NodePool and EC2NodeClass with a hash of the NodeClaimTemplateSpec to check for drift. Some special cases will be discovered either from Karpenter or through the CloudProvider interface, triggered by NodeClaim/Instance/NodePool/EC2NodeClass changes.
@@ -139,11 +151,11 @@ In special cases, drift can correspond to multiple values and must be handled di
 Behavioral Fields are treated as over-arching settings on the NodePool to dictate how Karpenter behaves. These fields don’t correspond to settings on the NodeClaim or instance. They’re set by the user to control Karpenter’s Provisioning and disruption logic. Since these don’t map to a desired state of NodeClaims, __behavioral fields are not considered for Drift__.
 
 ##### NodePool
-| Fields         |
-|----------------|
-| spec.weight       |
-| spec.limits       |
-| spec.disruption.* |
+| Fields              |
+|---------------------|
+| spec.weight         |
+| spec.limits         |
+| spec.disruption.*   |
 
 Read the [Drift Design](https://github.com/aws/karpenter-core/blob/main/designs/drift.md) for more.
 
@@ -178,6 +190,64 @@ To enable interruption handling, configure the `--interruption-queue-name` CLI a
 
 ## Controls
 
+### Disruption Budgets
+
+You can rate limit Karpenter's disruption through the NodePool's `spec.disruption.budgets`. If undefined, Karpenter will default to one budget with `nodes: 10%`. Budgets will consider nodes that are actively being deleted for any reason, and will only block Karpenter from disrupting nodes voluntarily through expiration, drift, emptiness, and consolidation.
+
+#### Nodes
+When calculating if a budget will block nodes from disruption, Karpenter lists the total number of nodes owned by a NodePool, subtracting out the nodes owned by that NodePool that are currently being deleted and nodes that are NotReady. If the number of nodes being deleted by Karpenter or any other processes is greater than the number of allowed disruptions, disruption for this node will not proceed.
+
+If the budget is configured with a percentage value, such as `20%`, Karpenter will calculate the number of allowed disruptions as `allowed_disruptions = roundup(total * percentage) - total_deleting - total_notready`. If otherwise defined as a non-percentage value, Karpenter will simply subtract the number of nodes from the total `(total * percentage) - total_deleting - total_notready`. For multiple budgets in a NodePool, Karpenter will take the minimum value (most restrictive) of each of the budgets.
+
+For example, the following NodePool with three budgets defines the following requirements:
+- The first budget will only allow 20% of nodes owned by that NodePool to be disrupted. For instance, if there were 19 nodes owned by the NodePool, 4 disruptions would be allowed, rounding up from `19 * .2 = 3.8`.
+- The second budget acts as a ceiling to the previous budget, only allowing 5 disruptions when there are more than 25 nodes.
+- The last budget only blocks disruptions during the first 10 minutes of the day, where 0 disruptions are allowed.
+
+```yaml
+apiVersion: karpenter.sh/v1beta1
+kind: NodePool
+metadata:
+  name: default
+spec:
+  disruption:
+    consolidationPolicy: WhenUnderutilized
+    expireAfter: 720h # 30 * 24h = 720h
+    budgets:
+    - nodes: "20%"
+    - nodes: "5"
+    - nodes: "0"
+      schedule: "@daily"
+      duration: 10m
+```
+
+#### Schedule
+Schedule is a cronjob schedule. Generally, the cron syntax is five space-delimited values with options below, with additional special macros like `@yearly`, `monthly`, `@weekly`, `@daily`, `@hourly`.
+Follow the [Kubernetes documentation](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#writing-a-cronjob-spec) for more information on how to follow the cron syntax.
+
+```bash
+# ┌───────────── minute (0 - 59)
+# │ ┌───────────── hour (0 - 23)
+# │ │ ┌───────────── day of the month (1 - 31)
+# │ │ │ ┌───────────── month (1 - 12)
+# │ │ │ │ ┌───────────── day of the week (0 - 6) (Sunday to Saturday;
+# │ │ │ │ │                                   7 is also Sunday on some systems)
+# │ │ │ │ │                                   OR sun, mon, tue, wed, thu, fri, sat
+# │ │ │ │ │
+# * * * * *
+```
+
+{{% alert title="Note" color="primary" %}}
+Timezones are not supported. Most images default to UTC, but it is best practice to ensure this is the case when considering how to define your budgets.
+{{% /alert %}}
+
+#### Duration
+Duration allows compound durations with minutes and hours values such as `10h5m` or `30m` or `160h`. Since cron syntax does not accept denominations smaller than minutes, users can only define minutes or hours.
+
+{{% alert title="Note" color="primary" %}}
+Duration and Schedule must be defined together. When omitted, the budget is always active. When defined, the schedule determines a starting point where the budget will begin being enforced, and the duration determines how long from that starting point the budget will be enforced.
+{{% /alert %}}
+
 ### Pod-Level Controls
 
 You can block Karpenter from voluntarily choosing to disrupt certain pods by setting the `karpenter.sh/do-not-disrupt: "true"` annotation on the pod. This is useful for pods that you want to run from start to finish without disruption. By opting pods out of this disruption, you are telling Karpenter that it should not voluntarily remove a node containing this pod.

diff --git a/website/content/en/docs/concepts/nodeclasses.md b/website/content/en/docs/concepts/nodeclasses.md
@@ -78,6 +78,9 @@ spec:
         environment: test
     - name: my-ami
     - id: ami-123
+
+  # Optional, use instance-store volumes for node ephemeral-storage
+  instanceStorePolicy: RAID0
 
   # Optional, overrides autogenerated userdata with a merge semantic
   userData: |
@@ -173,7 +176,6 @@ exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
 /etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \
 --dns-cluster-ip '10.100.0.10' \
 --use-max-pods false \
---container-runtime containerd \
 --kubelet-extra-args '--node-labels=karpenter.sh/capacity-type=on-demand,karpenter.sh/nodepool=test  --max-pods=110'
 --//--
 ```
@@ -616,6 +618,35 @@ spec:
 
 The `Custom` AMIFamily ships without any default `blockDeviceMappings`.
 
+## spec.instanceStorePolicy
+
+The `instanceStorePolicy` field controls how [instance-store](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html) volumes are handled. By default, Karpenter and Kubernetes will simply ignore them.
+
+### RAID0
+
+If you intend to use these volumes for faster node ephemeral-storage, set `instanceStorePolicy` to `RAID0`:
+
+```yaml	
+spec:	
+  instanceStorePolicy: RAID0
+```
+
+This will set the allocatable ephemeral-storage of each node to the total size of the instance-store volume(s).	
+
+The disks must be formatted & mounted in a RAID0 and be the underlying filesystem for the Kubelet & Containerd. Instructions for each AMI family are listed below:	
+
+#### AL2	
+
+On AL2, Karpenter automatically configures the disks through an additional boostrap argument (`--local-disks raid0`). The device name is `/dev/md/0` and its mount point is `/mnt/k8s-disks/0`. You should ensure any additional disk setup does not interfere with these.	
+
+#### Others	
+
+For all other AMI families, you must configure the disks yourself. Check out the [`setup-local-disks`](https://github.com/awslabs/amazon-eks-ami/blob/master/files/bin/setup-local-disks) script in [amazon-eks-ami](https://github.com/awslabs/amazon-eks-ami) to see how this is done for AL2.	
+
+{{% alert title="Tip" color="secondary" %}}	
+Since the Kubelet & Containerd will be using the instance-store filesystem, you may consider using a more minimal root volume size.	
+{{% /alert %}}
+
 ## spec.userData
 
 You can control the UserData that is applied to your worker nodes via this field. This allows you to run custom scripts or pass-through custom configuration to Karpenter instances on start-up.
@@ -716,7 +747,6 @@ Content-Type: text/x-shellscript; charset="us-ascii"
 exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
 /etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \
 --use-max-pods false \
---container-runtime containerd \
 --kubelet-extra-args '--node-labels=karpenter.sh/capacity-type=on-demand,karpenter.sh/nodepool=test  --max-pods=110'
 --//--
 ```

diff --git a/website/content/en/docs/concepts/nodepools.md b/website/content/en/docs/concepts/nodepools.md
@@ -138,6 +138,16 @@ spec:
     # You can choose to disable expiration entirely by setting the string value 'Never' here
     expireAfter: 720h
 
+    # Budgets control the speed Karpenter can scale down nodes.
+    # Karpenter will respect the minimum of the currently active budgets, and will round up
+    # when considering percentages. Duration and Schedule must be set together. 
+    budgets: 
+    - nodes: 10%
+    # On Weekdays during business hours, don't do any deprovisioning.
+    - schedule: "0 9 * * mon-fri"
+      duration: 8h
+      nodes: "0"
+
   # Resource limits constrain the total size of the cluster.
   # Limits prevent Karpenter from creating new instances once the limit is exceeded.
   limits:

diff --git a/website/content/en/docs/concepts/scheduling.md b/website/content/en/docs/concepts/scheduling.md
@@ -349,7 +349,6 @@ The three supported `topologyKey` values that Karpenter supports are:
 - `kubernetes.io/hostname`
 - `karpenter.sh/capacity-type`
 
-
 See [Pod Topology Spread Constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/) for details.
 
 {{% alert title="Note" color="primary" %}}