Skip to content

Commit

Permalink
Merge branch 'main' into feat-3182-dedicated-hosts-selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
preflightsiren authored Oct 22, 2023
2 parents 03ba9be + af6387d commit 56d74e7
Show file tree
Hide file tree
Showing 125 changed files with 4,517 additions and 263 deletions.
4 changes: 2 additions & 2 deletions .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ runs:
-n karpenter \
--version "v0-$(git rev-parse HEAD)" \
--set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"="arn:aws:iam::${{ inputs.account_id }}:role/karpenter-irsa-${{ inputs.cluster_name }}" \
--set settings.aws.clusterName="${{ inputs.cluster_name }}" \
--set settings.clusterName="${{ inputs.cluster_name }}" \
--set settings.aws.defaultInstanceProfile="KarpenterNodeInstanceProfile-${{ inputs.cluster_name }}" \
--set settings.aws.interruptionQueueName="${{ inputs.cluster_name }}" \
--set settings.interruptionQueue="${{ inputs.cluster_name }}" \
--set controller.resources.requests.cpu=3 \
--set controller.resources.requests.memory=3Gi \
--set controller.resources.limits.cpu=3 \
Expand Down
2 changes: 2 additions & 0 deletions .github/pull-request-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ docs: <-- Documentation changes that do not impact code
test: <-- Test changes that do not impact behavior
ci: <-- Changes that affect test or rollout automation
!${type}: <-- Include ! if your change includes a backwards incompatible change.
Please review the Karpenter contribution docs at https://karpenter.sh/docs/contributing/ before submitting your pull request.
-->

Fixes #N/A <!-- issue number -->
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
strategy:
fail-fast: false
matrix:
suite: [Integration, Machine, Consolidation, Utilization, Interruption, Drift, Expiration, Chaos, IPv6]
suite: [Beta/Integration, Beta/Drift, Beta/Consolidation, Alpha/Integration, Alpha/Machine, Alpha/Consolidation, Alpha/Utilization, Alpha/Interruption, Alpha/Drift, Alpha/Expiration, Alpha/Chaos, Alpha/IPv6]
uses: ./.github/workflows/e2e.yaml
with:
suite: ${{ matrix.suite }}
Expand Down
27 changes: 15 additions & 12 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,19 @@ on:
type: choice
required: true
options:
- Integration
- Machine
- Consolidation
- Utilization
- Interruption
- Drift
- Expiration
- Chaos
- IPv6
- Scale
- Beta/Integration
- Beta/Drift
- Beta/Consolidation
- Alpha/Integration
- Alpha/Machine
- Alpha/Consolidation
- Alpha/Utilization
- Alpha/Interruption
- Alpha/Drift
- Alpha/Expiration
- Alpha/Chaos
- Alpha/IPv6
- Alpha/Scale
k8s_version:
type: choice
options:
Expand Down Expand Up @@ -95,7 +98,7 @@ jobs:
sleep $(( $RANDOM % 300 + 1 ))
- name: generate cluster name
run: |
CLUSTER_NAME=$(echo ${{ inputs.suite }}-$RANDOM$RANDOM | awk '{print tolower($0)}')
CLUSTER_NAME=$(echo ${{ inputs.suite }}-$RANDOM$RANDOM | awk '{print tolower($0)}' | tr / -)
echo "Using cluster name \"$CLUSTER_NAME\""
echo CLUSTER_NAME=$CLUSTER_NAME >> $GITHUB_ENV
- name: create eks cluster '${{ env.CLUSTER_NAME }}'
Expand All @@ -107,7 +110,7 @@ jobs:
cluster_name: ${{ env.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: ${{ inputs.eksctl_version }}
ip_family: ${{ inputs.suite == 'IPv6' && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4
ip_family: ${{ contains(inputs.suite, 'IPv6') && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4
git_ref: ${{ inputs.git_ref }}
- name: install prometheus
uses: ./.github/actions/e2e/install-prometheus
Expand Down
26 changes: 16 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ CLUSTER_ENDPOINT ?= $(shell kubectl config view --minify -o jsonpath='{.clusters
AWS_ACCOUNT_ID ?= $(shell aws sts get-caller-identity --query Account --output text)
KARPENTER_IAM_ROLE_ARN ?= arn:aws:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter
HELM_OPTS ?= --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn=${KARPENTER_IAM_ROLE_ARN} \
--set settings.aws.clusterName=${CLUSTER_NAME} \
--set settings.aws.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.clusterName=${CLUSTER_NAME} \
--set settings.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
--set settings.aws.interruptionQueueName=${CLUSTER_NAME} \
--set settings.featureGates.driftEnabled=true \
--set settings.interruptionQueue=${CLUSTER_NAME} \
--set settings.featureGates.drift=true \
--set controller.resources.requests.cpu=1 \
--set controller.resources.requests.memory=1Gi \
--set controller.resources.limits.cpu=1 \
Expand Down Expand Up @@ -48,15 +48,18 @@ ci-non-test: verify licenses vulncheck ## Runs checks other than tests

run: ## Run Karpenter controller binary against your local cluster
kubectl create configmap -n ${SYSTEM_NAMESPACE} karpenter-global-settings \
--from-literal=aws.clusterName=${CLUSTER_NAME} \
--from-literal=aws.clusterEndpoint=${CLUSTER_ENDPOINT} \
--from-literal=aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
--from-literal=aws.interruptionQueueName=${CLUSTER_NAME} \
--from-literal=featureGates.driftEnabled=true \
--dry-run=client -o yaml | kubectl apply -f -


SYSTEM_NAMESPACE=${SYSTEM_NAMESPACE} KUBERNETES_MIN_VERSION="1.19.0-0" LEADER_ELECT=false DISABLE_WEBHOOK=true \
SYSTEM_NAMESPACE=${SYSTEM_NAMESPACE} \
KUBERNETES_MIN_VERSION="1.19.0-0" \
LEADER_ELECT=false \
DISABLE_WEBHOOK=true \
CLUSTER_NAME=${CLUSTER_NAME} \
CLUSTER_ENDPOINT=${CLUSTER_ENDPOINT} \
INTERRUPTION_QUEUE=${CLUSTER_NAME} \
FEATURE_GATES="Drift=true" \
go run ./cmd/controller/main.go

clean-run: ## Clean resources deployed by the run target
Expand All @@ -76,7 +79,10 @@ battletest: ## Run randomized, racing, code-covered tests
-tags random_test_delay

e2etests: ## Run the e2e suite against your local cluster
cd test && CLUSTER_NAME=${CLUSTER_NAME} go test \
cd test && CLUSTER_ENDPOINT=${CLUSTER_ENDPOINT} \
CLUSTER_NAME=${CLUSTER_NAME} \
INTERRUPTION_QUEUE=${CLUSTER_NAME} \
go test \
-p 1 \
-count 1 \
-timeout ${TEST_TIMEOUT} \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Karpenter improves the efficiency and cost of running workloads on Kubernetes cl
* **Provisioning** nodes that meet the requirements of the pods
* **Removing** the nodes when the nodes are no longer needed

Come discuss Karpenter in the [#karpenter](https://kubernetes.slack.com/archives/C02SFFZSA2K) channel, in the [Kubernetes slack](https://slack.k8s.io/) or join the [Karpenter working group](https://karpenter.sh/docs/contributing/working-group/) bi-weekly calls.
Come discuss Karpenter in the [#karpenter](https://kubernetes.slack.com/archives/C02SFFZSA2K) channel, in the [Kubernetes slack](https://slack.k8s.io/) or join the [Karpenter working group](https://karpenter.sh/docs/contributing/working-group/) bi-weekly calls. If you want to contribute to the Karpenter project, please refer to the Karpenter docs.

Check out the [Docs](https://karpenter.sh/docs/) to learn more.

Expand Down
23 changes: 16 additions & 7 deletions charts/karpenter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ helm upgrade --install --namespace karpenter --create-namespace \
karpenter oci://public.ecr.aws/karpenter/karpenter \
--version v0.31.0 \
--set serviceAccount.annotations.eks\.amazonaws\.com/role-arn=${KARPENTER_IAM_ROLE_ARN} \
--set settings.aws.clusterName=${CLUSTER_NAME} \
--set settings.aws.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
--set settings.aws.interruptionQueueName=${CLUSTER_NAME} \
--set settings.clusterName=${CLUSTER_NAME} \
--set settings.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.interruptionQueue=${CLUSTER_NAME} \
--wait
```

Expand Down Expand Up @@ -53,7 +52,7 @@ helm upgrade --install --namespace karpenter --create-namespace \
| hostNetwork | bool | `false` | Bind the pod to the host network. This is required when using a custom CNI. |
| imagePullPolicy | string | `"IfNotPresent"` | Image pull policy for Docker images. |
| imagePullSecrets | list | `[]` | Image pull secrets for Docker images. |
| logConfig | object | `{"enabled":true,"errorOutputPaths":["stderr"],"logEncoding":"console","logLevel":{"controller":"debug","global":"debug","webhook":"error"},"outputPaths":["stdout"]}` | Log configuration |
| logConfig | object | `{"enabled":true,"errorOutputPaths":["stderr"],"logEncoding":"console","logLevel":{"controller":"debug","global":"debug","webhook":"error"},"outputPaths":["stdout"]}` | Log configuration (Deprecated: Logging configuration will be dropped by v1, use logLevel instead) |
| logConfig.enabled | bool | `true` | Whether to enable provisioning and mounting the log ConfigMap |
| logConfig.errorOutputPaths | list | `["stderr"]` | Log errorOutputPaths - defaults to stderr only |
| logConfig.logEncoding | string | `"console"` | Log encoding - defaults to console - must be one of 'json', 'console' |
Expand All @@ -79,8 +78,10 @@ helm upgrade --install --namespace karpenter --create-namespace \
| serviceMonitor.additionalLabels | object | `{}` | Additional labels for the ServiceMonitor. |
| serviceMonitor.enabled | bool | `false` | Specifies whether a ServiceMonitor should be created. |
| serviceMonitor.endpointConfig | object | `{}` | Endpoint configuration for the ServiceMonitor. |
| settings | object | `{"aws":{"assumeRoleARN":"","assumeRoleDuration":"15m","clusterCABundle":"","clusterEndpoint":"","clusterName":"","defaultInstanceProfile":"","enableENILimitedPodDensity":true,"enablePodENI":false,"interruptionQueueName":"","isolatedVPC":false,"tags":null,"vmMemoryOverheadPercent":0.075},"batchIdleDuration":"1s","batchMaxDuration":"10s","featureGates":{"driftEnabled":false}}` | Global Settings to configure Karpenter |
| settings.aws | object | `{"assumeRoleARN":"","assumeRoleDuration":"15m","clusterCABundle":"","clusterEndpoint":"","clusterName":"","defaultInstanceProfile":"","enableENILimitedPodDensity":true,"enablePodENI":false,"interruptionQueueName":"","isolatedVPC":false,"tags":null,"vmMemoryOverheadPercent":0.075}` | AWS-specific configuration values |
| settings | object | `{"assumeRoleARN":"","assumeRoleDuration":"15m","aws":{"assumeRoleARN":"","assumeRoleDuration":"15m","clusterCABundle":"","clusterEndpoint":"","clusterName":"","defaultInstanceProfile":"","enableENILimitedPodDensity":true,"enablePodENI":false,"interruptionQueueName":"","isolatedVPC":false,"reservedENIs":"0","tags":null,"vmMemoryOverheadPercent":0.075},"batchIdleDuration":"1s","batchMaxDuration":"10s","clusterCABundle":"","clusterEndpoint":"","clusterName":"","featureGates":{"driftEnabled":false},"interruptionQueue":"","isolatedVPC":false,"reservedENIs":"0","vmMemoryOverheadPercent":0.075}` | Global Settings to configure Karpenter |
| settings.assumeRoleARN | string | `""` | Role to assume for calling AWS services. |
| settings.assumeRoleDuration | string | `"15m"` | Duration of assumed credentials in minutes. Default value is 15 minutes. Not used unless aws.assumeRoleARN set. |
| settings.aws | object | `{"assumeRoleARN":"","assumeRoleDuration":"15m","clusterCABundle":"","clusterEndpoint":"","clusterName":"","defaultInstanceProfile":"","enableENILimitedPodDensity":true,"enablePodENI":false,"interruptionQueueName":"","isolatedVPC":false,"reservedENIs":"0","tags":null,"vmMemoryOverheadPercent":0.075}` | AWS-specific configuration values (Deprecated: Use values without the "aws" prefix instead) |
| settings.aws.assumeRoleARN | string | `""` | Role to assume for calling AWS services. |
| settings.aws.assumeRoleDuration | string | `"15m"` | Duration of assumed credentials in minutes. Default value is 15 minutes. Not used unless aws.assumeRoleARN set. |
| settings.aws.clusterCABundle | string | `""` | Cluster CA bundle for TLS configuration of provisioned nodes. If not set, this is taken from the controller's TLS configuration for the API server. |
Expand All @@ -91,12 +92,20 @@ helm upgrade --install --namespace karpenter --create-namespace \
| settings.aws.enablePodENI | bool | `false` | If true then instances that support pod ENI will report a vpc.amazonaws.com/pod-eni resource |
| settings.aws.interruptionQueueName | string | `""` | interruptionQueueName is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs. |
| settings.aws.isolatedVPC | bool | `false` | If true then assume we can't reach AWS services which don't have a VPC endpoint This also has the effect of disabling look-ups to the AWS pricing endpoint |
| settings.aws.reservedENIs | string | `"0"` | Reserved ENIs are not included in the calculations for max-pods or kube-reserved This is most often used in the VPC CNI custom networking setup https://docs.aws.amazon.com/eks/latest/userguide/cni-custom-network.html |
| settings.aws.tags | string | `nil` | The global tags to use on all AWS infrastructure resources (launch templates, instances, etc.) across node templates |
| settings.aws.vmMemoryOverheadPercent | float | `0.075` | The VM memory overhead as a percent that will be subtracted from the total memory for all instance types |
| settings.batchIdleDuration | string | `"1s"` | The maximum amount of time with no new ending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately. |
| settings.batchMaxDuration | string | `"10s"` | The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one time which usually results in fewer but larger nodes. |
| settings.clusterCABundle | string | `""` | Cluster CA bundle for TLS configuration of provisioned nodes. If not set, this is taken from the controller's TLS configuration for the API server. |
| settings.clusterEndpoint | string | `""` | Cluster endpoint. If not set, will be discovered during startup (EKS only) |
| settings.clusterName | string | `""` | Cluster name. |
| settings.featureGates | object | `{"driftEnabled":false}` | Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features |
| settings.featureGates.driftEnabled | bool | `false` | driftEnabled is in ALPHA and is disabled by default. Setting driftEnabled to true enables the drift deprovisioner to watch for drift between currently deployed nodes and the desired state of nodes set in provisioners and node templates |
| settings.interruptionQueue | string | `""` | interruptionQueue is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs. |
| settings.isolatedVPC | bool | `false` | If true then assume we can't reach AWS services which don't have a VPC endpoint This also has the effect of disabling look-ups to the AWS pricing endpoint |
| settings.reservedENIs | string | `"0"` | Reserved ENIs are not included in the calculations for max-pods or kube-reserved This is most often used in the VPC CNI custom networking setup https://docs.aws.amazon.com/eks/latest/userguide/cni-custom-network.html |
| settings.vmMemoryOverheadPercent | float | `0.075` | The VM memory overhead as a percent that will be subtracted from the total memory for all instance types |
| strategy | object | `{"rollingUpdate":{"maxUnavailable":1}}` | Strategy for updating the pod. |
| terminationGracePeriodSeconds | string | `nil` | Override the default termination grace period for the pod. |
| tolerations | list | `[{"key":"CriticalAddonsOnly","operator":"Exists"}]` | Tolerations to allow the pod to be scheduled to nodes with taints. |
Expand Down
7 changes: 3 additions & 4 deletions charts/karpenter/README.md.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ helm upgrade --install --namespace karpenter --create-namespace \
karpenter oci://public.ecr.aws/karpenter/{{ template "chart.name" . }} \
--version v{{ template "chart.version" . }} \
--set serviceAccount.annotations.eks\.amazonaws\.com/role-arn=${KARPENTER_IAM_ROLE_ARN} \
--set settings.aws.clusterName=${CLUSTER_NAME} \
--set settings.aws.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
--set settings.aws.interruptionQueueName=${CLUSTER_NAME} \
--set settings.clusterName=${CLUSTER_NAME} \
--set settings.clusterEndpoint=${CLUSTER_ENDPOINT} \
--set settings.interruptionQueue=${CLUSTER_NAME} \
--wait
```

Expand Down
27 changes: 0 additions & 27 deletions charts/karpenter/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -141,33 +141,6 @@ This works because Helm treats dictionaries as mutable objects and allows passin
{{- end }}
{{- end }}

{{/*
Flatten Settings Map using "." syntax
*/}}
{{- define "flattenSettings" -}}
{{- $map := first . -}}
{{- $label := last . -}}
{{- range $key := (keys $map | uniq | sortAlpha) }}
{{- $sublabel := $key -}}
{{- $val := (get $map $key) -}}
{{- if $label -}}
{{- $sublabel = list $label $key | join "." -}}
{{- end -}}
{{/* Special-case "tags" since we want this to be a JSON object */}}
{{- if eq $key "tags" -}}
{{- if not (kindIs "invalid" $val) -}}
{{- $sublabel | quote | nindent 2 }}: {{ $val | toJson | quote }}
{{- end -}}
{{- else if kindOf $val | eq "map" -}}
{{- list $val $sublabel | include "flattenSettings" -}}
{{- else -}}
{{- if not (kindIs "invalid" $val) -}}
{{- $sublabel | quote | nindent 2 -}}: {{ $val | quote }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Flatten the stdout logging outputs from args provided
*/}}
Expand Down
50 changes: 49 additions & 1 deletion charts/karpenter/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,52 @@ metadata:
{{- toYaml . | nindent 4 }}
{{- end }}
data:
{{- list .Values.settings "" | include "flattenSettings" | indent 2 }}
{{- with .Values.settings.batchMaxDuration }}
batchMaxDuration: "{{ . }}"
{{- end }}
{{- with .Values.settings.batchIdleDuration }}
batchIdleDuration: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.assumeRoleARN }}
aws.assumeRoleARN: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.assumeRoleDuration }}
aws.assumeRoleDuration: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.clusterCABundle }}
aws.clusterCABundle: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.clusterName }}
aws.clusterName: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.clusterEndpoint }}
aws.clusterEndpoint: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.defaultInstanceProfile }}
aws.defaultInstanceProfile: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.enablePodENI }}
aws.enablePodENI: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.enableENILimitedPodDensity }}
aws.enableENILimitedPodDensity: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.isolatedVPC }}
aws.isolatedVPC: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.vmMemoryOverheadPercent }}
aws.vmMemoryOverheadPercent: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.interruptionQueueName }}
aws.interruptionQueueName: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.tags }}
aws.tags: "{{ . }}"
{{- end }}
{{- with .Values.settings.aws.reservedENIs }}
aws.reservedENIs: "{{ . }}"
{{- end }}
{{- with .Values.settings.featureGates.driftEnabled }}
featureGates.driftEnabled: "${{ . }}"
{{- end }}

Loading

0 comments on commit 56d74e7

Please sign in to comment.