From 9c5e801d030c791531480a33422de9ad458ebb36 Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Thu, 12 Oct 2023 12:36:13 -0700 Subject: [PATCH] Add the NodeClaim indexer --- go.mod | 2 +- go.sum | 7 +- hack/docs/configuration_gen_docs.go | 5 +- hack/docs/instancetypes_gen_docs.go | 10 + hack/docs/metrics_gen_docs.go | 5 +- pkg/apis/crds/karpenter.sh_nodeclaims.yaml | 621 ++++++++--------- pkg/apis/crds/karpenter.sh_nodepools.yaml | 656 ++++++++---------- .../garbagecollection/nodeclaim_test.go | 14 - .../nodeclaim/garbagecollection/suite_test.go | 4 - pkg/controllers/nodeclass/controller.go | 5 +- pkg/operator/operator.go | 10 + pkg/providers/instance/instance.go | 7 +- pkg/providers/instance/suite_test.go | 105 --- pkg/test/nodeclass.go | 2 +- .../content/en/preview/reference/metrics.md | 54 ++ .../content/en/preview/reference/settings.md | 3 + 16 files changed, 676 insertions(+), 834 deletions(-) diff --git a/go.mod b/go.mod index 72ebfd4405a8..992fa127449a 100644 --- a/go.mod +++ b/go.mod @@ -30,7 +30,7 @@ require ( sigs.k8s.io/controller-runtime v0.16.2 ) -replace github.com/aws/karpenter-core => github.com/jonathan-innis/karpenter-core v0.0.0-20231012191332-1e368cfd9c25 +replace github.com/aws/karpenter-core => github.com/jonathan-innis/karpenter-core v0.0.0-20231017081815-4e8c54e9d993 require ( contrib.go.opencensus.io/exporter/ocagent v0.7.1-0.20200907061046-05415f1de66d // indirect diff --git a/go.sum b/go.sum index b845778d4ff1..0091a0e3cbc0 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,6 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= github.com/aws/aws-sdk-go v1.45.25 h1:c4fLlh5sLdK2DCRTY1z0hyuJZU4ygxX8m1FswL6/nF4= github.com/aws/aws-sdk-go v1.45.25/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= -github.com/aws/karpenter-core v0.31.1-0.20231013203304-4239902b18b9 h1:j0iZuhoAKHrt0oqfSiKDqvHMnV/t45wi0loG1lEqdUw= -github.com/aws/karpenter-core v0.31.1-0.20231013203304-4239902b18b9/go.mod h1:rb3kp/3cj38tACF6udfpmIvKoQMwirSVoHNlrd66LyE= github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c h1:oXWwIttmjYLbBKhLazG21aQvpJ3NOOr8IXhCJ/p6e/M= github.com/aws/karpenter/tools/kompat v0.0.0-20231010173459-62c25a3ea85c/go.mod h1:l/TIBsaCx/IrOr0Xvlj/cHLOf05QzuQKEZ1hx2XWmfU= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -168,6 +166,7 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -224,8 +223,8 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/jonathan-innis/karpenter-core v0.0.0-20231012191332-1e368cfd9c25 h1:ouaAwe6U5+LIgvgnAfwa7gMnFKtpvgrRMc6ATJlCiP4= -github.com/jonathan-innis/karpenter-core v0.0.0-20231012191332-1e368cfd9c25/go.mod h1:rb3kp/3cj38tACF6udfpmIvKoQMwirSVoHNlrd66LyE= +github.com/jonathan-innis/karpenter-core v0.0.0-20231017081815-4e8c54e9d993 h1:EglPHmaL2cGCrf7UTji0Hqe+o3dSP5E4ZBA3YfXpdz4= +github.com/jonathan-innis/karpenter-core v0.0.0-20231017081815-4e8c54e9d993/go.mod h1:rb3kp/3cj38tACF6udfpmIvKoQMwirSVoHNlrd66LyE= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= diff --git a/hack/docs/configuration_gen_docs.go b/hack/docs/configuration_gen_docs.go index 844ce0bd87c7..c9ee017f34ac 100644 --- a/hack/docs/configuration_gen_docs.go +++ b/hack/docs/configuration_gen_docs.go @@ -48,11 +48,12 @@ func main() { topDoc := fmt.Sprintf("%s%s\n\n", startDocSections[0], genStart) bottomDoc := fmt.Sprintf("\n%s%s", genEnd, endDocSections[1]) - opts := options.New() + fs := flag.NewFlagSet("karpenter", flag.ContinueOnError) + (&options.Options{}).AddFlags(fs) envVarsBlock := "| Environment Variable | CLI Flag | Description |\n" envVarsBlock += "|--|--|--|\n" - opts.FlagSet.VisitAll(func(f *flag.Flag) { + fs.VisitAll(func(f *flag.Flag) { if f.DefValue == "" { envVarsBlock += fmt.Sprintf("| %s | %s | %s|\n", strings.ReplaceAll(strings.ToUpper(f.Name), "-", "_"), "\\-\\-"+f.Name, f.Usage) } else { diff --git a/hack/docs/instancetypes_gen_docs.go b/hack/docs/instancetypes_gen_docs.go index dcbbc9f1eb83..64bc679fc405 100644 --- a/hack/docs/instancetypes_gen_docs.go +++ b/hack/docs/instancetypes_gen_docs.go @@ -63,10 +63,20 @@ func (m *FakeManager) GetConfig() *rest.Config { return &rest.Config{} } +func (m *FakeManager) GetFieldIndexer() client.FieldIndexer { + return &FakeFieldIndexer{} +} + func (m *FakeManager) Elected() <-chan struct{} { return make(chan struct{}, 1) } +type FakeFieldIndexer struct{} + +func (f *FakeFieldIndexer) IndexField(_ context.Context, _ client.Object, _ string, _ client.IndexerFunc) error { + return nil +} + func main() { flag.Parse() if flag.NArg() != 1 { diff --git a/hack/docs/metrics_gen_docs.go b/hack/docs/metrics_gen_docs.go index af8642a197ed..07deb83c94f1 100644 --- a/hack/docs/metrics_gen_docs.go +++ b/hack/docs/metrics_gen_docs.go @@ -79,10 +79,6 @@ description: > "These metrics are available by default at `karpenter.karpenter.svc.cluster.local:8000/metrics` configurable via the `METRICS_PORT` environment variable documented [here](../settings)\n") previousSubsystem := "" - // Ignore nodeClaimSubsystem and nodePoolSubsystem metrics until NodeClaims are released - allMetrics = lo.Reject(allMetrics, func(m metricInfo, _ int) bool { - return m.subsystem == "nodeclaims" || m.subsystem == "nodepools" - }) for _, metric := range allMetrics { // Controller Runtime naming is different in that they don't specify a namespace or subsystem // Getting the metrics requires special parsing logic @@ -275,6 +271,7 @@ func getIdentMapping(identName string) (string, error) { "interruptionSubsystem": "interruption", "nodeTemplateSubsystem": "nodetemplate", "deprovisioningSubsystem": "deprovisioning", + "disruptionSubsystem": "disruption", "consistencySubsystem": "consistency", "batcherSubsystem": "cloudprovider_batcher", "cloudProviderSubsystem": "cloudprovider", diff --git a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml index de7af73fb226..3ceff377c01d 100644 --- a/pkg/apis/crds/karpenter.sh_nodeclaims.yaml +++ b/pkg/apis/crds/karpenter.sh_nodeclaims.yaml @@ -9,351 +9,316 @@ spec: group: karpenter.sh names: categories: - - karpenter + - karpenter kind: NodeClaim listKind: NodeClaimList plural: nodeclaims singular: nodeclaim scope: Cluster versions: - - additionalPrinterColumns: - - jsonPath: .metadata.labels.node\.kubernetes\.io/instance-type - name: Type - type: string - - jsonPath: .metadata.labels.topology\.kubernetes\.io/zone - name: Zone - type: string - - jsonPath: .status.nodeName - name: Node - type: string - - jsonPath: .status.conditions[?(@.type=="Ready")].status - name: Ready - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - - jsonPath: .metadata.labels.karpenter\.sh/capacity-type - name: Capacity - priority: 1 - type: string - - jsonPath: .metadata.labels.karpenter\.sh/nodepool - name: NodePool - priority: 1 - type: string - - jsonPath: .spec.nodeClassRef.name - name: NodeClass - priority: 1 - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: NodeClaim is the Schema for the NodeClaims API - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - metadata: - type: object - spec: - description: NodeClaimSpec describes the desired state of the NodeClaim - properties: - kubelet: - description: Kubelet defines args to be used when configuring kubelet - on provisioned nodes. They are a subset of the upstream types, recognizing - not all options may be supported. Wherever possible, the types and - names should reflect the upstream kubelet types. - properties: - clusterDNS: - description: clusterDNS is a list of IP addresses for the cluster - DNS server. Note that not all providers may use all addresses. - items: - type: string - type: array - cpuCFSQuota: - description: CPUCFSQuota enables CPU CFS quota enforcement for - containers that specify CPU limits. - type: boolean - evictionHard: - additionalProperties: - type: string - description: EvictionHard is the map of signal names to quantities - that define hard eviction thresholds - type: object - evictionMaxPodGracePeriod: - description: EvictionMaxPodGracePeriod is the maximum allowed - grace period (in seconds) to use when terminating pods in response - to soft eviction thresholds being met. - format: int32 - type: integer - evictionSoft: - additionalProperties: - type: string - description: EvictionSoft is the map of signal names to quantities - that define soft eviction thresholds - type: object - evictionSoftGracePeriod: - additionalProperties: - type: string - description: EvictionSoftGracePeriod is the map of signal names - to quantities that define grace periods for each eviction signal - type: object - imageGCHighThresholdPercent: - description: ImageGCHighThresholdPercent is the percent of disk - usage after which image garbage collection is always run. The - percent is calculated by dividing this field value by 100, so - this field must be between 0 and 100, inclusive. When specified, - the value must be greater than ImageGCLowThresholdPercent. - format: int32 - maximum: 100 - minimum: 0 - type: integer - imageGCLowThresholdPercent: - description: ImageGCLowThresholdPercent is the percent of disk - usage before which image garbage collection is never run. Lowest - disk usage to garbage collect to. The percent is calculated - by dividing this field value by 100, so the field value must - be between 0 and 100, inclusive. When specified, the value must - be less than imageGCHighThresholdPercent - format: int32 - maximum: 100 - minimum: 0 - type: integer - kubeReserved: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: KubeReserved contains resources reserved for Kubernetes - system components. - type: object - maxPods: - description: MaxPods is an override for the maximum number of - pods that can run on a worker node instance. - format: int32 - minimum: 0 - type: integer - podsPerCore: - description: PodsPerCore is an override for the number of pods - that can run on a worker node instance based on the number of - cpu cores. This value cannot exceed MaxPods, so, if MaxPods - is a lower value, that value will be used. - format: int32 - minimum: 0 - type: integer - systemReserved: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: SystemReserved contains resources reserved for OS - system daemons and kernel memory. - type: object - type: object - nodeClassRef: - description: NodeClassRef is a reference to an object that defines - provider specific configuration - properties: - apiVersion: - description: API version of the referent - type: string - kind: - description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' - type: string - name: - description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' - type: string - required: - - name - type: object - requirements: - description: Requirements are layered with GetLabels and applied to - every node. - items: - description: A node selector requirement is a selector that contains - values, a key, and an operator that relates the key and values. + - additionalPrinterColumns: + - jsonPath: .metadata.labels.node\.kubernetes\.io/instance-type + name: Type + type: string + - jsonPath: .metadata.labels.topology\.kubernetes\.io/zone + name: Zone + type: string + - jsonPath: .status.nodeName + name: Node + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + - jsonPath: .metadata.labels.karpenter\.sh/capacity-type + name: Capacity + priority: 1 + type: string + - jsonPath: .metadata.labels.karpenter\.sh/nodepool + name: NodePool + priority: 1 + type: string + - jsonPath: .spec.nodeClassRef.name + name: NodeClass + priority: 1 + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: NodeClaim is the Schema for the NodeClaims API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: NodeClaimSpec describes the desired state of the NodeClaim + properties: + kubelet: + description: Kubelet defines args to be used when configuring kubelet on provisioned nodes. They are a subset of the upstream types, recognizing not all options may be supported. Wherever possible, the types and names should reflect the upstream kubelet types. properties: - key: - description: The label key that the selector applies to. - type: string - operator: - description: Represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and - Lt. - type: string - values: - description: An array of string values. If the operator is In - or NotIn, the values array must be non-empty. If the operator - is Exists or DoesNotExist, the values array must be empty. - If the operator is Gt or Lt, the values array must have a - single element, which will be interpreted as an integer. This - array is replaced during a strategic merge patch. + clusterDNS: + description: clusterDNS is a list of IP addresses for the cluster DNS server. Note that not all providers may use all addresses. items: type: string type: array - required: - - key - - operator + cpuCFSQuota: + description: CPUCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. + type: boolean + evictionHard: + additionalProperties: + type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionHard is the map of signal names to quantities that define hard eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionMaxPodGracePeriod: + description: EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to soft eviction thresholds being met. + format: int32 + type: integer + evictionSoft: + additionalProperties: + type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionSoft is the map of signal names to quantities that define soft eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionSoftGracePeriod: + additionalProperties: + type: string + description: EvictionSoftGracePeriod is the map of signal names to quantities that define grace periods for each eviction signal + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: + description: ImageGCHighThresholdPercent is the percent of disk usage after which image garbage collection is always run. The percent is calculated by dividing this field value by 100, so this field must be between 0 and 100, inclusive. When specified, the value must be greater than ImageGCLowThresholdPercent. + format: int32 + maximum: 100 + minimum: 0 + type: integer + imageGCLowThresholdPercent: + description: ImageGCLowThresholdPercent is the percent of disk usage before which image garbage collection is never run. Lowest disk usage to garbage collect to. The percent is calculated by dividing this field value by 100, so the field value must be between 0 and 100, inclusive. When specified, the value must be less than imageGCHighThresholdPercent + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: KubeReserved contains resources reserved for Kubernetes system components. + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + maxPods: + description: MaxPods is an override for the maximum number of pods that can run on a worker node instance. + format: int32 + minimum: 0 + type: integer + podsPerCore: + description: PodsPerCore is an override for the number of pods that can run on a worker node instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if MaxPods is a lower value, that value will be used. + format: int32 + minimum: 0 + type: integer + systemReserved: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: SystemReserved contains resources reserved for OS system daemons and kernel memory. + type: object + x-kubernetes-validations: + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: systemReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) type: object - type: array - resources: - description: Resources models the resource requirements for the NodeClaim - to launch - properties: - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Requests describes the minimum required resources - for the NodeClaim to launch - type: object - type: object - startupTaints: - description: StartupTaints are taints that are applied to nodes upon - startup which are expected to be removed automatically within a - short period of time, typically by a DaemonSet that tolerates the - taint. These are commonly used by daemonsets to allow initialization - and enforce startup ordering. StartupTaints are ignored for provisioning - purposes in that pods are not required to tolerate a StartupTaint - in order to have nodes provisioned for them. - items: - description: The node this Taint is attached to has the "effect" - on any pod that does not tolerate the Taint. + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : true' + - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod + rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true + - message: evictionSoftGracePeriod OwnerKey does not have a matching evictionSoft + rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, (e in self.evictionSoft)):true + nodeClassRef: + description: NodeClassRef is a reference to an object that defines provider specific configuration properties: - effect: - description: Required. The effect of the taint on pods that - do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule - and NoExecute. + apiVersion: + description: API version of the referent type: string - key: - description: Required. The taint key to be applied to a node. + kind: + description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string - timeAdded: - description: TimeAdded represents the time at which the taint - was added. It is only written for NoExecute taints. - format: date-time - type: string - value: - description: The taint value corresponding to the taint key. + name: + description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string required: - - effect - - key + - name type: object - type: array - taints: - description: Taints will be applied to the NodeClaim's node. - items: - description: The node this Taint is attached to has the "effect" - on any pod that does not tolerate the Taint. + requirements: + description: Requirements are layered with GetLabels and applied to every node. + items: + description: A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + resources: + description: Resources models the resource requirements for the NodeClaim to launch properties: - effect: - description: Required. The effect of the taint on pods that - do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule - and NoExecute. - type: string - key: - description: Required. The taint key to be applied to a node. - type: string - timeAdded: - description: TimeAdded represents the time at which the taint - was added. It is only written for NoExecute taints. - format: date-time - type: string - value: - description: The taint value corresponding to the taint key. - type: string - required: - - effect - - key + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Requests describes the minimum required resources for the NodeClaim to launch + type: object type: object - type: array - required: - - nodeClassRef - - requirements - type: object - status: - description: NodeClaimStatus defines the observed state of NodeClaim - properties: - allocatable: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Allocatable is the estimated allocatable capacity of - the node - type: object - capacity: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Capacity is the estimated full capacity of the node - type: object - conditions: - description: Conditions contains signals for health and readiness - items: - description: 'Condition defines a readiness condition for a Knative - resource. See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties' - properties: - lastTransitionTime: - description: LastTransitionTime is the last time the condition - transitioned from one status to another. We use VolatileTime - in place of metav1.Time to exclude this from creating equality.Semantic - differences (all other things held constant). - type: string - message: - description: A human readable message indicating details about - the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - severity: - description: Severity with which to treat failures of this type - of condition. When this is not specified, it defaults to Error. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: Type of condition. - type: string - required: - - status - - type + startupTaints: + description: StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. + items: + description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + type: array + taints: + description: Taints will be applied to the NodeClaim's node. + items: + description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + type: array + required: + - nodeClassRef + - requirements + type: object + status: + description: NodeClaimStatus defines the observed state of NodeClaim + properties: + allocatable: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Allocatable is the estimated allocatable capacity of the node + type: object + capacity: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Capacity is the estimated full capacity of the node type: object - type: array - imageID: - description: ImageID is an identifier for the image that runs on the - node - type: string - nodeName: - description: NodeName is the name of the corresponding node object - type: string - providerID: - description: ProviderID of the corresponding node object - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} + conditions: + description: Conditions contains signals for health and readiness + items: + description: 'Condition defines a readiness condition for a Knative resource. See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties' + properties: + lastTransitionTime: + description: LastTransitionTime is the last time the condition transitioned from one status to another. We use VolatileTime in place of metav1.Time to exclude this from creating equality.Semantic differences (all other things held constant). + type: string + message: + description: A human readable message indicating details about the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + severity: + description: Severity with which to treat failures of this type of condition. When this is not specified, it defaults to Error. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition. + type: string + required: + - status + - type + type: object + type: array + imageID: + description: ImageID is an identifier for the image that runs on the node + type: string + nodeName: + description: NodeName is the name of the corresponding node object + type: string + providerID: + description: ProviderID of the corresponding node object + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/pkg/apis/crds/karpenter.sh_nodepools.yaml b/pkg/apis/crds/karpenter.sh_nodepools.yaml index b40c7f9a4bb0..5f0083a751ef 100644 --- a/pkg/apis/crds/karpenter.sh_nodepools.yaml +++ b/pkg/apis/crds/karpenter.sh_nodepools.yaml @@ -9,386 +9,320 @@ spec: group: karpenter.sh names: categories: - - karpenter + - karpenter kind: NodePool listKind: NodePoolList plural: nodepools singular: nodepool scope: Cluster versions: - - additionalPrinterColumns: - - jsonPath: .spec.template.spec.nodeClassRef.name - name: NodeClass - type: string - - jsonPath: .spec.weight - name: Weight - priority: 1 - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: NodePool is the Schema for the NodePools API - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - metadata: - type: object - spec: - description: NodePoolSpec is the top level provisioner specification. - Provisioners launch nodes in response to pods that are unschedulable. - A single provisioner is capable of managing a diverse set of nodes. - Node properties are determined from a combination of provisioner and - pod scheduling constraints. - properties: - disruption: - default: - consolidationPolicy: WhenUnderutilized - expireAfter: 720h - description: Disruption contains the parameters that relate to Karpenter's - disruption logic - properties: - consolidateAfter: - description: ConsolidateAfter is the duration the controller will - wait before attempting to terminate nodes that are underutilized. - Refer to ConsolidationPolicy for how underutilization is considered. - pattern: ^(([0-9]+(s|m|h))+)|(Never)$ - type: string - consolidationPolicy: - default: WhenUnderutilized - description: ConsolidationPolicy describes which nodes Karpenter - can disrupt through its consolidation algorithm. This policy - defaults to "WhenUnderutilized" if not specified - enum: - - WhenEmpty - - WhenUnderutilized - type: string - expireAfter: - default: 720h - description: ExpireAfter is the duration the controller will wait - before terminating a node, measured from when the node is created. - This is useful to implement features like eventually consistent - node upgrade, memory leak protection, and disruption testing. - pattern: ^(([0-9]+(s|m|h))+)|(Never)$ - type: string - type: object - x-kubernetes-validations: - - message: consolidateAfter cannot be combined with consolidationPolicy=WhenUnderutilized - rule: 'has(self.consolidateAfter) ? self.consolidationPolicy != - ''WhenUnderutilized'' || self.consolidateAfter == ''Never'' : - true' - - message: consolidateAfter must be specified with consolidationPolicy=WhenEmpty - rule: 'self.consolidationPolicy == ''WhenEmpty'' ? has(self.consolidateAfter) - : true' - limits: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Limits define a set of bounds for provisioning capacity. - type: object - template: - description: Template contains the template of possibilities for the - provisioning logic to launch a NodeClaim with. NodeClaims launched - from this NodePool will often be further constrained than the template - specifies. - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: NodeClaimSpec describes the desired state of the - NodeClaim - properties: - kubelet: - description: Kubelet defines args to be used when configuring - kubelet on provisioned nodes. They are a subset of the upstream - types, recognizing not all options may be supported. Wherever - possible, the types and names should reflect the upstream - kubelet types. - properties: - clusterDNS: - description: clusterDNS is a list of IP addresses for - the cluster DNS server. Note that not all providers - may use all addresses. - items: - type: string - type: array - cpuCFSQuota: - description: CPUCFSQuota enables CPU CFS quota enforcement - for containers that specify CPU limits. - type: boolean - evictionHard: - additionalProperties: - type: string - description: EvictionHard is the map of signal names to - quantities that define hard eviction thresholds - type: object - evictionMaxPodGracePeriod: - description: EvictionMaxPodGracePeriod is the maximum - allowed grace period (in seconds) to use when terminating - pods in response to soft eviction thresholds being met. - format: int32 - type: integer - evictionSoft: - additionalProperties: - type: string - description: EvictionSoft is the map of signal names to - quantities that define soft eviction thresholds - type: object - evictionSoftGracePeriod: - additionalProperties: - type: string - description: EvictionSoftGracePeriod is the map of signal - names to quantities that define grace periods for each - eviction signal - type: object - imageGCHighThresholdPercent: - description: ImageGCHighThresholdPercent is the percent - of disk usage after which image garbage collection is - always run. The percent is calculated by dividing this - field value by 100, so this field must be between 0 - and 100, inclusive. When specified, the value must be - greater than ImageGCLowThresholdPercent. - format: int32 - maximum: 100 - minimum: 0 - type: integer - imageGCLowThresholdPercent: - description: ImageGCLowThresholdPercent is the percent - of disk usage before which image garbage collection - is never run. Lowest disk usage to garbage collect to. - The percent is calculated by dividing this field value - by 100, so the field value must be between 0 and 100, - inclusive. When specified, the value must be less than - imageGCHighThresholdPercent - format: int32 - maximum: 100 - minimum: 0 - type: integer - kubeReserved: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: KubeReserved contains resources reserved - for Kubernetes system components. - type: object - maxPods: - description: MaxPods is an override for the maximum number - of pods that can run on a worker node instance. - format: int32 - minimum: 0 - type: integer - podsPerCore: - description: PodsPerCore is an override for the number - of pods that can run on a worker node instance based - on the number of cpu cores. This value cannot exceed - MaxPods, so, if MaxPods is a lower value, that value - will be used. - format: int32 - minimum: 0 - type: integer - systemReserved: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: SystemReserved contains resources reserved - for OS system daemons and kernel memory. - type: object - type: object - nodeClassRef: - description: NodeClassRef is a reference to an object that - defines provider specific configuration - properties: - apiVersion: - description: API version of the referent + - additionalPrinterColumns: + - jsonPath: .spec.template.spec.nodeClassRef.name + name: NodeClass + type: string + - jsonPath: .spec.weight + name: Weight + priority: 1 + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: NodePool is the Schema for the NodePools API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: NodePoolSpec is the top level provisioner specification. Provisioners launch nodes in response to pods that are unschedulable. A single provisioner is capable of managing a diverse set of nodes. Node properties are determined from a combination of provisioner and pod scheduling constraints. + properties: + disruption: + default: + consolidationPolicy: WhenUnderutilized + expireAfter: 720h + description: Disruption contains the parameters that relate to Karpenter's disruption logic + properties: + consolidateAfter: + description: ConsolidateAfter is the duration the controller will wait before attempting to terminate nodes that are underutilized. Refer to ConsolidationPolicy for how underutilization is considered. + pattern: ^(([0-9]+(s|m|h))+)|(Never)$ + type: string + consolidationPolicy: + default: WhenUnderutilized + description: ConsolidationPolicy describes which nodes Karpenter can disrupt through its consolidation algorithm. This policy defaults to "WhenUnderutilized" if not specified + enum: + - WhenEmpty + - WhenUnderutilized + type: string + expireAfter: + default: 720h + description: ExpireAfter is the duration the controller will wait before terminating a node, measured from when the node is created. This is useful to implement features like eventually consistent node upgrade, memory leak protection, and disruption testing. + pattern: ^(([0-9]+(s|m|h))+)|(Never)$ + type: string + type: object + x-kubernetes-validations: + - message: consolidateAfter cannot be combined with consolidationPolicy=WhenUnderutilized + rule: 'has(self.consolidateAfter) ? self.consolidationPolicy != ''WhenUnderutilized'' || self.consolidateAfter == ''Never'' : true' + - message: consolidateAfter must be specified with consolidationPolicy=WhenEmpty + rule: 'self.consolidationPolicy == ''WhenEmpty'' ? has(self.consolidateAfter) : true' + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Limits define a set of bounds for provisioning capacity. + type: object + template: + description: Template contains the template of possibilities for the provisioning logic to launch a NodeClaim with. NodeClaims launched from this NodePool will often be further constrained than the template specifies. + properties: + metadata: + properties: + annotations: + additionalProperties: type: string - kind: - description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' + type: object + finalizers: + items: type: string - name: - description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: array + labels: + additionalProperties: type: string - required: - - name - type: object - requirements: - description: Requirements are layered with GetLabels and applied - to every node. - items: - description: A node selector requirement is a selector that - contains values, a key, and an operator that relates the - key and values. + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: NodeClaimSpec describes the desired state of the NodeClaim + properties: + kubelet: + description: Kubelet defines args to be used when configuring kubelet on provisioned nodes. They are a subset of the upstream types, recognizing not all options may be supported. Wherever possible, the types and names should reflect the upstream kubelet types. properties: - key: - description: The label key that the selector applies - to. - type: string - operator: - description: Represents a key's relationship to a set - of values. Valid operators are In, NotIn, Exists, - DoesNotExist. Gt, and Lt. - type: string - values: - description: An array of string values. If the operator - is In or NotIn, the values array must be non-empty. - If the operator is Exists or DoesNotExist, the values - array must be empty. If the operator is Gt or Lt, - the values array must have a single element, which - will be interpreted as an integer. This array is replaced - during a strategic merge patch. + clusterDNS: + description: clusterDNS is a list of IP addresses for the cluster DNS server. Note that not all providers may use all addresses. items: type: string type: array - required: - - key - - operator + cpuCFSQuota: + description: CPUCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. + type: boolean + evictionHard: + additionalProperties: + type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionHard is the map of signal names to quantities that define hard eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionMaxPodGracePeriod: + description: EvictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use when terminating pods in response to soft eviction thresholds being met. + format: int32 + type: integer + evictionSoft: + additionalProperties: + type: string + pattern: ^((\d{1,2}(\.\d{1,2})?|100(\.0{1,2})?)%||(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?)$ + description: EvictionSoft is the map of signal names to quantities that define soft eviction thresholds + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + evictionSoftGracePeriod: + additionalProperties: + type: string + description: EvictionSoftGracePeriod is the map of signal names to quantities that define grace periods for each eviction signal + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: + description: ImageGCHighThresholdPercent is the percent of disk usage after which image garbage collection is always run. The percent is calculated by dividing this field value by 100, so this field must be between 0 and 100, inclusive. When specified, the value must be greater than ImageGCLowThresholdPercent. + format: int32 + maximum: 100 + minimum: 0 + type: integer + imageGCLowThresholdPercent: + description: ImageGCLowThresholdPercent is the percent of disk usage before which image garbage collection is never run. Lowest disk usage to garbage collect to. The percent is calculated by dividing this field value by 100, so the field value must be between 0 and 100, inclusive. When specified, the value must be less than imageGCHighThresholdPercent + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: KubeReserved contains resources reserved for Kubernetes system components. + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + maxPods: + description: MaxPods is an override for the maximum number of pods that can run on a worker node instance. + format: int32 + minimum: 0 + type: integer + podsPerCore: + description: PodsPerCore is an override for the number of pods that can run on a worker node instance based on the number of cpu cores. This value cannot exceed MaxPods, so, if MaxPods is a lower value, that value will be used. + format: int32 + minimum: 0 + type: integer + systemReserved: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: SystemReserved contains resources reserved for OS system daemons and kernel memory. + type: object + x-kubernetes-validations: + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid') + - message: systemReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) type: object - type: array - resources: - description: Resources models the resource requirements for - the NodeClaim to launch - properties: - requests: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Requests describes the minimum required resources - for the NodeClaim to launch - type: object - type: object - startupTaints: - description: StartupTaints are taints that are applied to - nodes upon startup which are expected to be removed automatically - within a short period of time, typically by a DaemonSet - that tolerates the taint. These are commonly used by daemonsets - to allow initialization and enforce startup ordering. StartupTaints - are ignored for provisioning purposes in that pods are not - required to tolerate a StartupTaint in order to have nodes - provisioned for them. - items: - description: The node this Taint is attached to has the - "effect" on any pod that does not tolerate the Taint. + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: 'has(self.imageGCHighThresholdPercent) && has(self.imageGCLowThresholdPercent) ? self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent : true' + - message: evictionSoft OwnerKey does not have a matching evictionSoftGracePeriod + rule: has(self.evictionSoft) ? self.evictionSoft.all(e, (e in self.evictionSoftGracePeriod)):true + - message: evictionSoftGracePeriod OwnerKey does not have a matching evictionSoft + rule: has(self.evictionSoftGracePeriod) ? self.evictionSoftGracePeriod.all(e, (e in self.evictionSoft)):true + nodeClassRef: + description: NodeClassRef is a reference to an object that defines provider specific configuration properties: - effect: - description: Required. The effect of the taint on pods - that do not tolerate the taint. Valid effects are - NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: Required. The taint key to be applied to - a node. + apiVersion: + description: API version of the referent type: string - timeAdded: - description: TimeAdded represents the time at which - the taint was added. It is only written for NoExecute - taints. - format: date-time + kind: + description: 'Kind of the referent; More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' type: string - value: - description: The taint value corresponding to the taint - key. + name: + description: 'Name of the referent; More info: http://kubernetes.io/docs/user-guide/identifiers#names' type: string required: - - effect - - key + - name type: object - type: array - taints: - description: Taints will be applied to the NodeClaim's node. - items: - description: The node this Taint is attached to has the - "effect" on any pod that does not tolerate the Taint. + requirements: + description: Requirements are layered with GetLabels and applied to every node. + items: + description: A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. + properties: + key: + description: The label key that the selector applies to. + type: string + operator: + description: Represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. If the operator is Gt or Lt, the values array must have a single element, which will be interpreted as an integer. This array is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + resources: + description: Resources models the resource requirements for the NodeClaim to launch properties: - effect: - description: Required. The effect of the taint on pods - that do not tolerate the taint. Valid effects are - NoSchedule, PreferNoSchedule and NoExecute. - type: string - key: - description: Required. The taint key to be applied to - a node. - type: string - timeAdded: - description: TimeAdded represents the time at which - the taint was added. It is only written for NoExecute - taints. - format: date-time - type: string - value: - description: The taint value corresponding to the taint - key. - type: string - required: - - effect - - key + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Requests describes the minimum required resources for the NodeClaim to launch + type: object type: object - type: array - required: - - nodeClassRef - - requirements - type: object - type: object - weight: - description: Weight is the priority given to the provisioner during - scheduling. A higher numerical weight indicates that this provisioner - will be ordered ahead of other provisioners with lower weights. - A provisioner with no weight will be treated as if it is a provisioner - with a weight of 0. - format: int32 - maximum: 100 - minimum: 1 - type: integer - type: object - status: - description: NodePoolStatus defines the observed state of NodePool - properties: - resources: - additionalProperties: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - description: Resources is the list of resources that have been provisioned. - type: object - type: object - type: object - served: true - storage: true - subresources: - status: {} + startupTaints: + description: StartupTaints are taints that are applied to nodes upon startup which are expected to be removed automatically within a short period of time, typically by a DaemonSet that tolerates the taint. These are commonly used by daemonsets to allow initialization and enforce startup ordering. StartupTaints are ignored for provisioning purposes in that pods are not required to tolerate a StartupTaint in order to have nodes provisioned for them. + items: + description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + type: array + taints: + description: Taints will be applied to the NodeClaim's node. + items: + description: The node this Taint is attached to has the "effect" on any pod that does not tolerate the Taint. + properties: + effect: + description: Required. The effect of the taint on pods that do not tolerate the taint. Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: TimeAdded represents the time at which the taint was added. It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + type: array + required: + - nodeClassRef + - requirements + type: object + type: object + weight: + description: Weight is the priority given to the provisioner during scheduling. A higher numerical weight indicates that this provisioner will be ordered ahead of other provisioners with lower weights. A provisioner with no weight will be treated as if it is a provisioner with a weight of 0. + format: int32 + maximum: 100 + minimum: 1 + type: integer + type: object + status: + description: NodePoolStatus defines the observed state of NodePool + properties: + resources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: Resources is the list of resources that have been provisioned. + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/pkg/controllers/nodeclaim/garbagecollection/nodeclaim_test.go b/pkg/controllers/nodeclaim/garbagecollection/nodeclaim_test.go index fd098c57e989..ef1241a7278d 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/nodeclaim_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/nodeclaim_test.go @@ -32,8 +32,6 @@ import ( corecloudprovider "github.com/aws/karpenter-core/pkg/cloudprovider" coretest "github.com/aws/karpenter-core/pkg/test" . "github.com/aws/karpenter-core/pkg/test/expectations" - nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim" - nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool" "github.com/aws/karpenter/pkg/apis/v1beta1" "github.com/aws/karpenter/pkg/apis/settings" @@ -358,16 +356,4 @@ var _ = Describe("NodeClaim/GarbageCollection", func() { } wg.Wait() }) - It("should not delete an instance if EnableNodePools/EnableNodeClaims isn't enabled", func() { - nodepoolutil.EnableNodePools = false - nodeclaimutil.EnableNodeClaims = false - - // Launch time was 1m ago - instance.LaunchTime = aws.Time(time.Now().Add(-time.Minute)) - awsEnv.EC2API.Instances.Store(aws.StringValue(instance.InstanceId), instance) - - ExpectReconcileSucceeded(ctx, garbageCollectionController, client.ObjectKey{}) - _, err := cloudProvider.Get(ctx, providerID) - Expect(err).ToNot(HaveOccurred()) - }) }) diff --git a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go index 40dd367df7ea..5a9384a3f1fb 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/suite_test.go +++ b/pkg/controllers/nodeclaim/garbagecollection/suite_test.go @@ -41,8 +41,6 @@ import ( "github.com/aws/karpenter-core/pkg/operator/scheme" coretest "github.com/aws/karpenter-core/pkg/test" . "github.com/aws/karpenter-core/pkg/test/expectations" - nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim" - nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool" "github.com/aws/karpenter/pkg/apis" "github.com/aws/karpenter/pkg/apis/settings" "github.com/aws/karpenter/pkg/apis/v1beta1" @@ -85,8 +83,6 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - nodepoolutil.EnableNodePools = true - nodeclaimutil.EnableNodeClaims = true awsEnv.Reset() }) diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 9fb9796bd597..22eaa711d9a6 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -38,9 +38,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/samber/lo" - corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" "github.com/aws/karpenter-core/pkg/events" corecontroller "github.com/aws/karpenter-core/pkg/operator/controller" @@ -107,7 +104,7 @@ func (c *Controller) Finalize(ctx context.Context, nodeClass *v1beta1.EC2NodeCla return reconcile.Result{}, nil } nodeClaimList := &corev1beta1.NodeClaimList{} - if err := c.kubeClient.List(ctx, nodeClaimList, client.MatchingFields{"spec.nodeClass.name": nodeClass.Name}); err != nil { + if err := c.kubeClient.List(ctx, nodeClaimList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { return reconcile.Result{}, fmt.Errorf("listing nodeclaims that are using nodeclass, %w", err) } if len(nodeClaimList.Items) > 0 { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index d07f0dd98f7a..3e430aafa32c 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -37,6 +37,7 @@ import ( "github.com/aws/aws-sdk-go/service/iam" "github.com/aws/aws-sdk-go/service/ssm" "github.com/patrickmn/go-cache" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/samber/lo" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -46,6 +47,7 @@ import ( "knative.dev/pkg/logging" "knative.dev/pkg/ptr" + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" "github.com/aws/karpenter-core/pkg/operator" "github.com/aws/karpenter/pkg/apis/settings" awscache "github.com/aws/karpenter/pkg/cache" @@ -167,6 +169,14 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont launchTemplateProvider, ) + lo.Must0(operator.Manager.GetFieldIndexer().IndexField(ctx, &corev1beta1.NodeClaim{}, "spec.nodeClassRef.name", func(o client.Object) []string { + nc := o.(*corev1beta1.NodeClaim) + if nc.Spec.NodeClassRef == nil { + return []string{} + } + return []string{nc.Spec.NodeClassRef.Name} + }), "failed to setup nodeclaim indexer") + return ctx, &Operator{ Operator: operator, Session: sess, diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index c8b555f20e29..d44f33ac659e 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -32,7 +32,6 @@ import ( "knative.dev/pkg/logging" corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" - nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool" "github.com/aws/karpenter-core/pkg/utils/resources" "github.com/aws/karpenter/pkg/apis/settings" "github.com/aws/karpenter/pkg/apis/v1alpha1" @@ -136,15 +135,11 @@ func (p *Provider) Get(ctx context.Context, id string) (*Instance, error) { func (p *Provider) List(ctx context.Context) ([]*Instance, error) { var out = &ec2.DescribeInstancesOutput{} - tagKeys := []string{v1alpha5.ProvisionerNameLabelKey} - if nodepoolutil.EnableNodePools { - tagKeys = append(tagKeys, corev1beta1.NodePoolLabelKey) - } err := p.ec2api.DescribeInstancesPagesWithContext(ctx, &ec2.DescribeInstancesInput{ Filters: []*ec2.Filter{ { Name: aws.String("tag-key"), - Values: aws.StringSlice(tagKeys), + Values: aws.StringSlice([]string{v1alpha5.ProvisionerNameLabelKey, corev1beta1.NodePoolLabelKey}), }, { Name: aws.String("tag-key"), diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index e7239953bf06..9a46f6393b26 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -36,8 +36,6 @@ import ( "github.com/aws/karpenter-core/pkg/operator/options" "github.com/aws/karpenter-core/pkg/operator/scheme" coretest "github.com/aws/karpenter-core/pkg/test" - nodeclaimutil "github.com/aws/karpenter-core/pkg/utils/nodeclaim" - nodepoolutil "github.com/aws/karpenter-core/pkg/utils/nodepool" "github.com/aws/karpenter/pkg/apis" "github.com/aws/karpenter/pkg/apis/settings" "github.com/aws/karpenter/pkg/cloudprovider" @@ -72,8 +70,6 @@ var _ = AfterSuite(func() { }) var _ = BeforeEach(func() { - nodepoolutil.EnableNodePools = true - nodeclaimutil.EnableNodeClaims = true ctx = options.ToContext(ctx, opts) ctx = coresettings.ToContext(ctx, coretest.Settings()) ctx = settings.ToContext(ctx, test.Settings()) @@ -177,107 +173,6 @@ var _ = Describe("Combined/InstanceProvider", func() { Expect(err).To(BeNil()) Expect(instances).To(HaveLen(40)) - retrievedIDs := sets.New[string](lo.Map(instances, func(i *instance.Instance, _ int) string { return i.ID })...) - Expect(ids.Equal(retrievedIDs)).To(BeTrue()) - }) - It("should only return Provisioner-owned instances and not NodePool-owned instances if EnableNodePools/EnableNodeClaims isn't enabled", func() { - nodepoolutil.EnableNodePools = false - nodeclaimutil.EnableNodeClaims = false - - ids := sets.New[string]() - // Provision instances that have the karpenter.sh/provisioner-name key - for i := 0; i < 20; i++ { - instanceID := fake.InstanceID() - awsEnv.EC2API.Instances.Store( - instanceID, - &ec2.Instance{ - State: &ec2.InstanceState{ - Name: aws.String(ec2.InstanceStateNameRunning), - }, - Tags: []*ec2.Tag{ - { - Key: aws.String(fmt.Sprintf("kubernetes.io/cluster/%s", settings.FromContext(ctx).ClusterName)), - Value: aws.String("owned"), - }, - { - Key: aws.String(v1alpha5.ProvisionerNameLabelKey), - Value: aws.String("default"), - }, - { - Key: aws.String(v1alpha5.MachineManagedByAnnotationKey), - Value: aws.String(settings.FromContext(ctx).ClusterName), - }, - }, - PrivateDnsName: aws.String(fake.PrivateDNSName()), - Placement: &ec2.Placement{ - AvailabilityZone: aws.String(fake.DefaultRegion), - }, - // Launch time was 1m ago - LaunchTime: aws.Time(time.Now().Add(-time.Minute)), - InstanceId: aws.String(instanceID), - InstanceType: aws.String("m5.large"), - }, - ) - ids.Insert(instanceID) - } - // Provision instances that have the karpenter.sh/nodepool key - for i := 0; i < 20; i++ { - instanceID := fake.InstanceID() - awsEnv.EC2API.Instances.Store( - instanceID, - &ec2.Instance{ - State: &ec2.InstanceState{ - Name: aws.String(ec2.InstanceStateNameRunning), - }, - Tags: []*ec2.Tag{ - { - Key: aws.String(fmt.Sprintf("kubernetes.io/cluster/%s", settings.FromContext(ctx).ClusterName)), - Value: aws.String("owned"), - }, - { - Key: aws.String(corev1beta1.NodePoolLabelKey), - Value: aws.String("default"), - }, - { - Key: aws.String(corev1beta1.ManagedByAnnotationKey), - Value: aws.String(settings.FromContext(ctx).ClusterName), - }, - }, - PrivateDnsName: aws.String(fake.PrivateDNSName()), - Placement: &ec2.Placement{ - AvailabilityZone: aws.String(fake.DefaultRegion), - }, - // Launch time was 1m ago - LaunchTime: aws.Time(time.Now().Add(-time.Minute)), - InstanceId: aws.String(instanceID), - InstanceType: aws.String("m5.large"), - }, - ) - } - // Provision instances that do not have this tag key - for i := 0; i < 20; i++ { - instanceID := fake.InstanceID() - awsEnv.EC2API.Instances.Store( - instanceID, - &ec2.Instance{ - State: &ec2.InstanceState{ - Name: aws.String(ec2.InstanceStateNameRunning), - }, - PrivateDnsName: aws.String(fake.PrivateDNSName()), - Placement: &ec2.Placement{ - AvailabilityZone: aws.String(fake.DefaultRegion), - }, - // Launch time was 1m ago - LaunchTime: aws.Time(time.Now().Add(-time.Minute)), - InstanceId: aws.String(instanceID), - InstanceType: aws.String("m5.large"), - }, - ) - } - instances, err := awsEnv.InstanceProvider.List(ctx) - Expect(err).To(BeNil()) - Expect(instances).To(HaveLen(20)) - retrievedIDs := sets.New[string](lo.Map(instances, func(i *instance.Instance, _ int) string { return i.ID })...) Expect(ids.Equal(retrievedIDs)).To(BeTrue()) }) diff --git a/pkg/test/nodeclass.go b/pkg/test/nodeclass.go index 8c780f497a8c..54b7f49b1e49 100644 --- a/pkg/test/nodeclass.go +++ b/pkg/test/nodeclass.go @@ -68,7 +68,7 @@ func EC2NodeClass(overrides ...v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { func EC2NodeClassFieldIndexer(ctx context.Context) func(cache.Cache) error { return func(c cache.Cache) error { - return c.IndexField(ctx, &corev1beta1.NodeClaim{}, "spec.nodeClass.name", func(obj client.Object) []string { + return c.IndexField(ctx, &corev1beta1.NodeClaim{}, "spec.nodeClassRef.name", func(obj client.Object) []string { nc := obj.(*corev1beta1.NodeClaim) if nc.Spec.NodeClassRef == nil { return []string{""} diff --git a/website/content/en/preview/reference/metrics.md b/website/content/en/preview/reference/metrics.md index 112f63b0fe9c..c323fb2ca94f 100644 --- a/website/content/en/preview/reference/metrics.md +++ b/website/content/en/preview/reference/metrics.md @@ -50,6 +50,26 @@ Amount of time required for a replacement machine to become initialized. ### `karpenter_deprovisioning_replacement_machine_launch_failure_counter` The number of times that Karpenter failed to launch a replacement node for deprovisioning. Labeled by deprovisioner. +## Disruption Metrics + +### `karpenter_disruption_actions_performed_total` +Number of disruption methods performed. Labeled by disruption type. + +### `karpenter_disruption_consolidation_timeouts_total` +Number of times the Consolidation algorithm has reached a timeout. Labeled by consolidation type. + +### `karpenter_disruption_eligible_nodes` +Number of nodes eligible for disruption by Karpenter. Labeled by disruption type. + +### `karpenter_disruption_evaluation_duration_seconds` +Duration of the disruption evaluation process in seconds. + +### `karpenter_disruption_replacement_nodeclaim_failures_total` +The number of times that Karpenter failed to launch a replacement node for disruption. Labeled by disruption type. + +### `karpenter_disruption_replacement_nodeclaim_initialized_seconds` +Amount of time required for a replacement nodeclaim to become initialized. + ## Interruption Metrics ### `karpenter_interruption_actions_performed` @@ -87,6 +107,40 @@ Number of machines registered in total by Karpenter. Labeled by the owning provi ### `karpenter_machines_terminated` Number of machines terminated in total by Karpenter. Labeled by reason the machine was terminated and the owning provisioner. +## Nodeclaims Metrics + +### `karpenter_nodeclaims_created` +Number of nodeclaims created in total by Karpenter. Labeled by reason the nodeclaim was created and the owning nodepool. + +### `karpenter_nodeclaims_disrupted` +Number of nodeclaims disrupted in total by Karpenter. Labeled by disruption type of the nodeclaim and the owning nodepool. + +### `karpenter_nodeclaims_drifted` +Number of nodeclaims drifted reasons in total by Karpenter. Labeled by drift type of the nodeclaim and the owning nodepool. + +### `karpenter_nodeclaims_initialized` +Number of nodeclaims initialized in total by Karpenter. Labeled by the owning nodepool. + +### `karpenter_nodeclaims_launched` +Number of nodeclaims launched in total by Karpenter. Labeled by the owning nodepool. + +### `karpenter_nodeclaims_registered` +Number of nodeclaims registered in total by Karpenter. Labeled by the owning nodepool. + +### `karpenter_nodeclaims_terminated` +Number of nodeclaims terminated in total by Karpenter. Labeled by reason the nodeclaim was terminated and the owning nodepool. + +## Nodepools Metrics + +### `karpenter_nodepools_limit` +The nodepool limits are the limits specified on the provisioner that restrict the quantity of resources provisioned. Labeled by nodepool name and resource type. + +### `karpenter_nodepools_usage` +The nodepool usage is the amount of resources that have been provisioned by a particular nodepool. Labeled by nodepool name and resource type. + +### `karpenter_nodepools_usage_pct` +The nodepool usage percentage is the percentage of each resource used based on the resources provisioned and the limits that have been configured. Labeled by nodepool name and resource type. + ## Provisioner Metrics ### `karpenter_provisioner_limit` diff --git a/website/content/en/preview/reference/settings.md b/website/content/en/preview/reference/settings.md index 800cf75c168a..f16ed6c3d0b2 100644 --- a/website/content/en/preview/reference/settings.md +++ b/website/content/en/preview/reference/settings.md @@ -12,8 +12,11 @@ Karpenter surfaces environment variables and CLI parameters to allow you to conf | Environment Variable | CLI Flag | Description | |--|--|--| +| BATCH_IDLE_DURATION | \-\-batch-idle-duration | The maximum amount of time with no new pending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately. (default = 1s)| +| BATCH_MAX_DURATION | \-\-batch-max-duration | The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one time which usually results in fewer but larger nodes. (default = 10s)| | DISABLE_WEBHOOK | \-\-disable-webhook | Disable the admission and validation webhooks (default = false)| | ENABLE_PROFILING | \-\-enable-profiling | Enable the profiling on the metric endpoint (default = false)| +| FEATURE_GATES | \-\-feature-gates | Optional features can be enabled / disabled using feature gates. Current options are: Drift (default = Drift=false)| | HEALTH_PROBE_PORT | \-\-health-probe-port | The port the health probe endpoint binds to for reporting controller health (default = 8081)| | KARPENTER_SERVICE | \-\-karpenter-service | The Karpenter Service name for the dynamic webhook certificate| | KUBE_CLIENT_BURST | \-\-kube-client-burst | The maximum allowed burst of queries to the kube-apiserver (default = 300)|