From e4ff4ad9323f9224b8a1912a5bd79c17715cc41b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Wed, 16 Oct 2024 11:43:02 +0100 Subject: [PATCH 1/8] node-group-auto-discovery support for oci --- cluster-autoscaler/FAQ.md | 134 ++++++++-------- .../oci/instancepools/oci_cloud_provider.go | 4 +- .../oci/nodepools/oci_manager.go | 149 +++++++++++++++++- .../oci/nodepools/oci_manager_test.go | 71 +++++++++ .../oci/nodepools/oci_node_pool.go | 11 ++ 5 files changed, 299 insertions(+), 70 deletions(-) diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index cc0c6bfbb7b2..a38d4d85a689 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -925,73 +925,73 @@ However, CA does not consider "soft" constraints like `preferredDuringScheduling The following startup parameters are supported for cluster autoscaler: -| Parameter | Description | Default | -| --- | --- | --- | -| `cluster-name` | Autoscaled cluster name, if available | "" -| `address` | The address to expose prometheus metrics | :8085 -| `kubernetes` | Kubernetes API Server location. Leave blank for default | "" -| `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | "" -| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file | "" -| `namespace` | Namespace in which cluster-autoscaler run | "kube-system" -| `enforce-node-group-min-size` | Should CA scale up the node group to the configured min size if needed | false -| `scale-down-enabled` | Should CA scale down the cluster | true -| `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes -| `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval -| `scale-down-delay-after-failure` | How long after scale down failure that scale down evaluation resumes | 3 minutes -| `scale-down-unneeded-time` | How long a node should be unneeded before it is eligible for scale down | 10 minutes -| `scale-down-unready-time` | How long an unready node should be unneeded before it is eligible for scale down | 20 minutes -| `scale-down-utilization-threshold` | The maximum value between the sum of cpu requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. This value is a floating point number that can range between zero and one. | 0.5 -| `scale-down-non-empty-candidates-count` | Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to non positive value to turn this heuristic off - CA will not limit the number of nodes it considers." | 30 -| `scale-down-candidates-pool-ratio` | A ratio of nodes that are considered as additional non empty candidates for
scale down when some candidates from previous iteration are no longer valid
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates. | 0.1 -| `scale-down-candidates-pool-min-count` | Minimum number of nodes that are considered as additional non empty candidates
for scale down when some candidates from previous iteration are no longer valid.
When calculating the pool size for additional candidates we take
`max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count)` | 50 -| `scan-interval` | How often cluster is reevaluated for scale up or down | 10 seconds -| `max-nodes-total` | Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number. | 0 -| `cores-total` | Minimum and maximum number of cores in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 320000 -| `memory-total` | Minimum and maximum number of gigabytes of memory in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 6400000 -| `gpu-total` | Minimum and maximum number of different GPUs in cluster, in the format :\:\. Cluster autoscaler will not scale the cluster beyond these numbers. Can be passed multiple times. CURRENTLY THIS FLAG ONLY WORKS ON GKE. | "" -| `cloud-provider` | Cloud provider type. | gce -| `max-empty-bulk-delete` | Maximum number of empty nodes that can be deleted at the same time. | 10 -| `max-graceful-termination-sec` | Maximum number of seconds CA waits for pod termination when trying to scale down a node. | 600 -| `max-total-unready-percentage` | Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations | 45 -| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3 -| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes -| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \:\: | "" -| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.
A definition is expressed `:[[=]]`
The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`
GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`
Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.
Can be used multiple times | "" -| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false -| `estimator` | Type of resource estimator to be used in scale up | binpacking -| `expander` | Type of node group expander to be used in scale up. | random -| `ignore-daemonsets-utilization` | Whether DaemonSet pods will be ignored when calculating resource utilization for scaling down | false -| `ignore-mirror-pods-utilization` | Whether [Mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/) will be ignored when calculating resource utilization for scaling down | false -| `write-status-configmap` | Should CA write status information to a configmap | true -| `status-config-map-name` | The name of the status ConfigMap that CA writes | cluster-autoscaler-status -| `max-inactivity` | Maximum time from last recorded autoscaler activity before automatic restart | 10 minutes -| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes -| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false -| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | "" -| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | "" -| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false -| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15 -| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes -| `expendable-pods-priority-cutoff` | Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable | -10 -| `regional` | Cluster is regional | false -| `leader-elect` | Start a leader election client and gain leadership before executing the main loop.
Enable this when running replicated components for high availability | true -| `leader-elect-lease-duration` | The duration that non-leader candidates will wait after observing a leadership
renewal until attempting to acquire leadership of a led but unrenewed leader slot.
This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate.
This is only applicable if leader election is enabled | 15 seconds -| `leader-elect-renew-deadline` | The interval between attempts by the active cluster-autoscaler to renew a leadership slot before it stops leading.
This must be less than or equal to the lease duration.
This is only applicable if leader election is enabled | 10 seconds -| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.
This is only applicable if leader election is enabled | 2 seconds -| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.
Supported options are `leases` (default), `endpoints`, `endpointsleases`, `configmaps`, and `configmapsleases` | "leases" -| `aws-use-static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false -| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or [mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/)) | true -| `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true -| `skip-nodes-with-custom-controller-pods` | If true cluster autoscaler will never delete nodes with pods owned by custom controllers | true -| `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0 -| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false -| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true -| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | "" -| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false -| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false -| `debugging-snapshot-enabled` | Whether the debugging snapshot of cluster autoscaler feature is enabled. | false -| `node-delete-delay-after-taint` | How long to wait before deleting a node after tainting it. | 5 seconds -| `enable-provisioning-requests` | Whether the clusterautoscaler will be handling the ProvisioningRequest CRs. | false +| Parameter | Description | Default | +| --- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --- | +| `cluster-name` | Autoscaled cluster name, if available | "" +| `address` | The address to expose prometheus metrics | :8085 +| `kubernetes` | Kubernetes API Server location. Leave blank for default | "" +| `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | "" +| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file | "" +| `namespace` | Namespace in which cluster-autoscaler run | "kube-system" +| `enforce-node-group-min-size` | Should CA scale up the node group to the configured min size if needed | false +| `scale-down-enabled` | Should CA scale down the cluster | true +| `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes +| `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval +| `scale-down-delay-after-failure` | How long after scale down failure that scale down evaluation resumes | 3 minutes +| `scale-down-unneeded-time` | How long a node should be unneeded before it is eligible for scale down | 10 minutes +| `scale-down-unready-time` | How long an unready node should be unneeded before it is eligible for scale down | 20 minutes +| `scale-down-utilization-threshold` | The maximum value between the sum of cpu requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. This value is a floating point number that can range between zero and one. | 0.5 +| `scale-down-non-empty-candidates-count` | Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to non positive value to turn this heuristic off - CA will not limit the number of nodes it considers." | 30 +| `scale-down-candidates-pool-ratio` | A ratio of nodes that are considered as additional non empty candidates for
scale down when some candidates from previous iteration are no longer valid
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates. | 0.1 +| `scale-down-candidates-pool-min-count` | Minimum number of nodes that are considered as additional non empty candidates
for scale down when some candidates from previous iteration are no longer valid.
When calculating the pool size for additional candidates we take
`max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count)` | 50 +| `scan-interval` | How often cluster is reevaluated for scale up or down | 10 seconds +| `max-nodes-total` | Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number. | 0 +| `cores-total` | Minimum and maximum number of cores in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 320000 +| `memory-total` | Minimum and maximum number of gigabytes of memory in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 6400000 +| `gpu-total` | Minimum and maximum number of different GPUs in cluster, in the format :\:\. Cluster autoscaler will not scale the cluster beyond these numbers. Can be passed multiple times. CURRENTLY THIS FLAG ONLY WORKS ON GKE. | "" +| `cloud-provider` | Cloud provider type. | gce +| `max-empty-bulk-delete` | Maximum number of empty nodes that can be deleted at the same time. | 10 +| `max-graceful-termination-sec` | Maximum number of seconds CA waits for pod termination when trying to scale down a node. | 600 +| `max-total-unready-percentage` | Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations | 45 +| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3 +| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes +| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \:\: | "" +| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.
A definition is expressed `:[[=]]`
The `aws`, `gce`, `azure` and `oci` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`
GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`
Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.
OCI matches nodepools by Defined or FreeForm tags in the given compartment, e.g. `clusterId:,compartmentId:,nodepoolTags:=&=,min:,max:`. Defined tag keys should be given with a namespace in format `namespace.tagKey`.
Can be used multiple times | "" +| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false +| `estimator` | Type of resource estimator to be used in scale up | binpacking +| `expander` | Type of node group expander to be used in scale up. | random +| `ignore-daemonsets-utilization` | Whether DaemonSet pods will be ignored when calculating resource utilization for scaling down | false +| `ignore-mirror-pods-utilization` | Whether [Mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/) will be ignored when calculating resource utilization for scaling down | false +| `write-status-configmap` | Should CA write status information to a configmap | true +| `status-config-map-name` | The name of the status ConfigMap that CA writes | cluster-autoscaler-status +| `max-inactivity` | Maximum time from last recorded autoscaler activity before automatic restart | 10 minutes +| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes +| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false +| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | "" +| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | "" +| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false +| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15 +| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes +| `expendable-pods-priority-cutoff` | Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable | -10 +| `regional` | Cluster is regional | false +| `leader-elect` | Start a leader election client and gain leadership before executing the main loop.
Enable this when running replicated components for high availability | true +| `leader-elect-lease-duration` | The duration that non-leader candidates will wait after observing a leadership
renewal until attempting to acquire leadership of a led but unrenewed leader slot.
This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate.
This is only applicable if leader election is enabled | 15 seconds +| `leader-elect-renew-deadline` | The interval between attempts by the active cluster-autoscaler to renew a leadership slot before it stops leading.
This must be less than or equal to the lease duration.
This is only applicable if leader election is enabled | 10 seconds +| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.
This is only applicable if leader election is enabled | 2 seconds +| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.
Supported options are `leases` (default), `endpoints`, `endpointsleases`, `configmaps`, and `configmapsleases` | "leases" +| `aws-use-static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false +| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or [mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/)) | true +| `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true +| `skip-nodes-with-custom-controller-pods` | If true cluster autoscaler will never delete nodes with pods owned by custom controllers | true +| `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0 +| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false +| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true +| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | "" +| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false +| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false +| `debugging-snapshot-enabled` | Whether the debugging snapshot of cluster autoscaler feature is enabled. | false +| `node-delete-delay-after-taint` | How long to wait before deleting a node after tainting it. | 5 seconds +| `enable-provisioning-requests` | Whether the clusterautoscaler will be handling the ProvisioningRequest CRs. | false # Troubleshooting diff --git a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go index dfcc1728f7f2..2038d88b8285 100644 --- a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go @@ -153,8 +153,8 @@ func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover if err != nil { klog.Fatalf("Failed to get pool type: %v", err) } - if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) { - manager, err := nodepools.CreateNodePoolManager(opts.CloudConfig, do, createKubeClient(opts)) + if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) || opts.NodeGroupAutoDiscovery != nil { + manager, err := nodepools.CreateNodePoolManager(opts.CloudConfig, opts.NodeGroupAutoDiscovery, do, createKubeClient(opts)) if err != nil { klog.Fatalf("Could not create OCI OKE cloud provider: %v", err) } diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index 6f04b563c3ba..ad0b32e24095 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -9,6 +9,7 @@ import ( "fmt" "math" "os" + "regexp" "strconv" "strings" "time" @@ -34,6 +35,11 @@ import ( const ( maxAddTaintRetries = 5 maxGetNodepoolRetries = 3 + clusterId = "clusterId" + compartmentId = "compartmentId" + nodepoolTags = "nodepoolTags" + min = "min" + max = "max" ) var ( @@ -75,10 +81,11 @@ type okeClient interface { GetNodePool(context.Context, oke.GetNodePoolRequest) (oke.GetNodePoolResponse, error) UpdateNodePool(context.Context, oke.UpdateNodePoolRequest) (oke.UpdateNodePoolResponse, error) DeleteNode(context.Context, oke.DeleteNodeRequest) (oke.DeleteNodeResponse, error) + ListNodePools(ctx context.Context, request oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error) } // CreateNodePoolManager creates an NodePoolManager that can manage autoscaling node pools -func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (NodePoolManager, error) { +func CreateNodePoolManager(cloudConfigPath string, nodeGroupAutoDiscoveryList []string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (NodePoolManager, error) { var err error var configProvider common.ConfigurationProvider @@ -151,6 +158,20 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N nodePoolCache: newNodePoolCache(&okeClient), } + // auto discover nodepools from compartments with nodeGroupAutoDiscovery parameter + klog.Infof("checking node groups for autodiscovery ... ") + for _, arg := range nodeGroupAutoDiscoveryList { + nodeGroup, err := nodeGroupFromArg(arg) + if err != nil { + return nil, fmt.Errorf("unable to construct node group auto discovery from argument: %v", err) + } + nodeGroup.manager = manager + nodeGroup.kubeClient = kubeClient + + manager.nodeGroups = append(manager.nodeGroups, *nodeGroup) + autoDiscoverNodeGroups(manager, manager.okeClient, *nodeGroup) + } + // Contains all the specs from the args that give us the pools. for _, arg := range discoveryOpts.NodeGroupSpecs { np, err := nodePoolFromArg(arg) @@ -180,6 +201,50 @@ func CreateNodePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.N return manager, nil } +func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup nodeGroupAutoDiscovery) (bool, error) { + var resp, reqErr = okeClient.ListNodePools(context.Background(), oke.ListNodePoolsRequest{ + ClusterId: common.String(nodeGroup.clusterId), + CompartmentId: common.String(nodeGroup.compartmentId), + }) + if reqErr != nil { + klog.Errorf("failed to fetch the nodepool list with clusterId: %s, compartmentId: %s. Error: %v", nodeGroup.clusterId, nodeGroup.compartmentId, reqErr) + return false, reqErr + } else { + for _, nodePoolSummary := range resp.Items { + klog.V(5).Infof("found nodepool %v", nodePoolSummary) + if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) { + nodepool := &nodePool{} + nodepool.id = *nodePoolSummary.Id + nodepool.minSize = nodeGroup.minSize + nodepool.maxSize = nodeGroup.maxSize + + nodepool.manager = nodeGroup.manager + nodepool.kubeClient = nodeGroup.kubeClient + + m.staticNodePools[nodepool.id] = nodepool + klog.V(5).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s", nodeGroup.compartmentId, nodepool.id) + } else { + klog.Warningf("nodepool ignored as the tags do not satisfy the requirement : %s , %v, %v", *nodePoolSummary.Id, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) + } + } + } + return true, nil +} + +func validateNodepoolTags(nodeGroupTags map[string]string, freeFormTags map[string]string, definedTags map[string]map[string]interface{}) bool { + if nodeGroupTags != nil { + for tagKey, tagValue := range nodeGroupTags { + namespacedTagKey := strings.Split(tagKey, ".") + if len(namespacedTagKey) == 2 && tagValue != definedTags[namespacedTagKey[0]][namespacedTagKey[1]] { + return false + } else if len(namespacedTagKey) != 2 && tagValue != freeFormTags[tagKey] { + return false + } + } + } + return true +} + // nodePoolFromArg parses a node group spec represented in the form of `::` and produces a node group spec object func nodePoolFromArg(value string) (*nodePool, error) { tokens := strings.SplitN(value, ":", 3) @@ -207,6 +272,78 @@ func nodePoolFromArg(value string) (*nodePool, error) { return spec, nil } +// nodeGroupFromArg parses a node group spec represented in the form of +// `clusterId:,compartmentId:,nodepoolTags:=&=,min:,max:` +// and produces a node group auto discovery object, +// nodepoolTags are optional and CA will capture all nodes if no tags are provided. +func nodeGroupFromArg(value string) (*nodeGroupAutoDiscovery, error) { + // this regex will find the key-value pairs in any given order if separated with a colon + regexPattern := `(?:` + compartmentId + `:(?P<` + compartmentId + `>[^,]+)` + regexPattern = regexPattern + `|` + nodepoolTags + `:(?P<` + nodepoolTags + `>[^,]+)` + regexPattern = regexPattern + `|` + max + `:(?P<` + max + `>[^,]+)` + regexPattern = regexPattern + `|` + min + `:(?P<` + min + `>[^,]+)` + regexPattern = regexPattern + `|` + clusterId + `:(?P<` + clusterId + `>[^,]+)` + regexPattern = regexPattern + `)(?:,|$)` + + re := regexp.MustCompile(regexPattern) + + parametersMap := make(map[string]string) + + // push key-value pairs into a map + for _, match := range re.FindAllStringSubmatch(value, -1) { + for i, name := range re.SubexpNames() { + if i != 0 && match[i] != "" { + parametersMap[name] = match[i] + } + } + } + + spec := &nodeGroupAutoDiscovery{} + + if parametersMap[clusterId] != "" { + spec.clusterId = parametersMap[clusterId] + } else { + return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", clusterId) + } + + if parametersMap[compartmentId] != "" { + spec.compartmentId = parametersMap[compartmentId] + } else { + return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", compartmentId) + } + + if size, err := strconv.Atoi(parametersMap[min]); err == nil { + spec.minSize = size + } else { + return nil, fmt.Errorf("failed to set %s size: %s, expected integer", min, parametersMap[min]) + } + + if size, err := strconv.Atoi(parametersMap[max]); err == nil { + spec.maxSize = size + } else { + return nil, fmt.Errorf("failed to set %s size: %s, expected integer", max, parametersMap[max]) + } + + if parametersMap[nodepoolTags] != "" { + nodepoolTags := parametersMap[nodepoolTags] + + spec.tags = make(map[string]string) + + pairs := strings.Split(nodepoolTags, "&") + + for _, pair := range pairs { + parts := strings.Split(pair, "=") + if len(parts) == 2 { + spec.tags[parts[0]] = parts[1] + } + } + } + + klog.Infof("node group auto discovery spec constructed: %+v", spec) + + return spec, nil +} + type ociManagerImpl struct { cfg *ocicommon.CloudConfig okeClient okeClient @@ -215,6 +352,7 @@ type ociManagerImpl struct { ociTagsGetter ocicommon.TagsGetter registeredTaintsGetter RegisteredTaintsGetter staticNodePools map[string]NodePool + nodeGroups []nodeGroupAutoDiscovery lastRefresh time.Time @@ -253,6 +391,15 @@ func (m *ociManagerImpl) TaintToPreventFurtherSchedulingOnRestart(nodes []*apiv1 } func (m *ociManagerImpl) forceRefresh() error { + // auto discover node groups + if m.nodeGroups != nil { + // empty previous nodepool map to do an auto discovery + m.staticNodePools = make(map[string]NodePool) + for _, nodeGroup := range m.nodeGroups { + autoDiscoverNodeGroups(m, m.okeClient, nodeGroup) + } + } + // rebuild nodepool cache err := m.nodePoolCache.rebuild(m.staticNodePools, maxGetNodepoolRetries) if err != nil { return err diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go index 838d0ce199d1..2f5bac21f824 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go @@ -336,6 +336,10 @@ func (c mockOKEClient) DeleteNode(context.Context, oke.DeleteNodeRequest) (oke.D }, nil } +func (c mockOKEClient) ListNodePools(context.Context, oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error) { + return oke.ListNodePoolsResponse{}, nil +} + func TestRemoveInstance(t *testing.T) { instanceId1 := "instance1" instanceId2 := "instance2" @@ -384,3 +388,70 @@ func TestRemoveInstance(t *testing.T) { } } } + +func TestNodeGroupFromArg(t *testing.T) { + var nodeGroupArg = "clusterId:testClusterId,compartmentId:testCompartmentId,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5" + nodeGroupAutoDiscovery, err := nodeGroupFromArg(nodeGroupArg) + if err != nil { + t.Errorf("Error: #{err}") + } + if nodeGroupAutoDiscovery.clusterId != "testClusterId" { + t.Errorf("Error: clusterId should be testClusterId") + } + if nodeGroupAutoDiscovery.compartmentId != "testCompartmentId" { + t.Errorf("Error: compartmentId should be testCompartmentId") + } + if nodeGroupAutoDiscovery.minSize != 1 { + t.Errorf("Error: minSize should be 1") + } + if nodeGroupAutoDiscovery.maxSize != 5 { + t.Errorf("Error: maxSize should be 5") + } + if nodeGroupAutoDiscovery.tags["ca-managed"] != "true" { + t.Errorf("Error: ca-managed:true is missing in tags.") + } + if nodeGroupAutoDiscovery.tags["namespace.foo"] != "bar" { + t.Errorf("Error: namespace.foo:bar is missing in tags.") + } +} + +func TestValidateNodePoolTags(t *testing.T) { + + var nodeGroupTags map[string]string = nil + var nodePoolTags map[string]string = nil + var definedTags map[string]map[string]interface{} = nil + + if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { + t.Errorf("validateNodepoolTags shouldn't return false for empty tags map") + } + + nodeGroupTags = make(map[string]string) + nodeGroupTags["test"] = "test" + + if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == true { + t.Errorf("validateNodepoolTags shouldn't return true for tags missing") + } + + nodePoolTags = make(map[string]string) + nodePoolTags["foo"] = "bar" + + if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == true { + t.Errorf("validateNodepoolTags shouldn't return true for not matching tags") + } + + nodePoolTags["test"] = "test" + + if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { + t.Errorf("validateNodepoolTags shouldn't return false for matching tags") + } + + nodeGroupTags["ns.tag1"] = "tag2" + definedTagsMap := make(map[string]interface{}) + definedTagsMap["tag1"] = "tag2" + definedTags = make(map[string]map[string]interface{}) + definedTags["ns"] = definedTagsMap + + if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { + t.Errorf("validateNodepoolTags shouldn't return false for namespaced tags") + } +} diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go index 74780d0f2633..43d656e0f76c 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_node_pool.go @@ -46,6 +46,17 @@ type nodePool struct { maxSize int } +type nodeGroupAutoDiscovery struct { + manager NodePoolManager + kubeClient kubernetes.Interface + + clusterId string + compartmentId string + tags map[string]string + minSize int + maxSize int +} + // MaxSize returns maximum size of the node group. func (np *nodePool) MaxSize() int { return np.maxSize From 5ae6fa4eed3cc2cb91040bc1fd83f498818d2bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Wed, 16 Oct 2024 12:13:14 +0100 Subject: [PATCH 2/8] fixing golint issues --- .../oci/nodepools/oci_manager.go | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index ad0b32e24095..3a2056cb0326 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -209,23 +209,22 @@ func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup no if reqErr != nil { klog.Errorf("failed to fetch the nodepool list with clusterId: %s, compartmentId: %s. Error: %v", nodeGroup.clusterId, nodeGroup.compartmentId, reqErr) return false, reqErr - } else { - for _, nodePoolSummary := range resp.Items { - klog.V(5).Infof("found nodepool %v", nodePoolSummary) - if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) { - nodepool := &nodePool{} - nodepool.id = *nodePoolSummary.Id - nodepool.minSize = nodeGroup.minSize - nodepool.maxSize = nodeGroup.maxSize - - nodepool.manager = nodeGroup.manager - nodepool.kubeClient = nodeGroup.kubeClient - - m.staticNodePools[nodepool.id] = nodepool - klog.V(5).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s", nodeGroup.compartmentId, nodepool.id) - } else { - klog.Warningf("nodepool ignored as the tags do not satisfy the requirement : %s , %v, %v", *nodePoolSummary.Id, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) - } + } + for _, nodePoolSummary := range resp.Items { + klog.V(5).Infof("found nodepool %v", nodePoolSummary) + if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) { + nodepool := &nodePool{} + nodepool.id = *nodePoolSummary.Id + nodepool.minSize = nodeGroup.minSize + nodepool.maxSize = nodeGroup.maxSize + + nodepool.manager = nodeGroup.manager + nodepool.kubeClient = nodeGroup.kubeClient + + m.staticNodePools[nodepool.id] = nodepool + klog.V(5).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s", nodeGroup.compartmentId, nodepool.id) + } else { + klog.Warningf("nodepool ignored as the tags do not satisfy the requirement : %s , %v, %v", *nodePoolSummary.Id, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) } } return true, nil From ee70dcea137c33282a36b57e735501292d71e8bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Thu, 17 Oct 2024 13:08:39 +0100 Subject: [PATCH 3/8] removed the log line to close a review item --- cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index 3a2056cb0326..fb44410c2a8e 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -211,7 +211,6 @@ func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup no return false, reqErr } for _, nodePoolSummary := range resp.Items { - klog.V(5).Infof("found nodepool %v", nodePoolSummary) if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) { nodepool := &nodePool{} nodepool.id = *nodePoolSummary.Id From 02c1e0447dc8c897549670174c41efd635c9e4af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Fri, 18 Oct 2024 11:16:06 +0100 Subject: [PATCH 4/8] validations added for nodeGroupAutoDiscovery parameter --- cluster-autoscaler/FAQ.md | 134 +++++++++--------- .../cloudprovider/oci/README.md | 8 ++ .../cloudprovider/oci/common/oci_util.go | 18 +++ .../oci/instancepools/oci_cloud_provider.go | 8 +- 4 files changed, 100 insertions(+), 68 deletions(-) diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index a38d4d85a689..cc0c6bfbb7b2 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -925,73 +925,73 @@ However, CA does not consider "soft" constraints like `preferredDuringScheduling The following startup parameters are supported for cluster autoscaler: -| Parameter | Description | Default | -| --- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --- | -| `cluster-name` | Autoscaled cluster name, if available | "" -| `address` | The address to expose prometheus metrics | :8085 -| `kubernetes` | Kubernetes API Server location. Leave blank for default | "" -| `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | "" -| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file | "" -| `namespace` | Namespace in which cluster-autoscaler run | "kube-system" -| `enforce-node-group-min-size` | Should CA scale up the node group to the configured min size if needed | false -| `scale-down-enabled` | Should CA scale down the cluster | true -| `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes -| `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval -| `scale-down-delay-after-failure` | How long after scale down failure that scale down evaluation resumes | 3 minutes -| `scale-down-unneeded-time` | How long a node should be unneeded before it is eligible for scale down | 10 minutes -| `scale-down-unready-time` | How long an unready node should be unneeded before it is eligible for scale down | 20 minutes -| `scale-down-utilization-threshold` | The maximum value between the sum of cpu requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. This value is a floating point number that can range between zero and one. | 0.5 -| `scale-down-non-empty-candidates-count` | Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to non positive value to turn this heuristic off - CA will not limit the number of nodes it considers." | 30 -| `scale-down-candidates-pool-ratio` | A ratio of nodes that are considered as additional non empty candidates for
scale down when some candidates from previous iteration are no longer valid
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates. | 0.1 -| `scale-down-candidates-pool-min-count` | Minimum number of nodes that are considered as additional non empty candidates
for scale down when some candidates from previous iteration are no longer valid.
When calculating the pool size for additional candidates we take
`max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count)` | 50 -| `scan-interval` | How often cluster is reevaluated for scale up or down | 10 seconds -| `max-nodes-total` | Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number. | 0 -| `cores-total` | Minimum and maximum number of cores in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 320000 -| `memory-total` | Minimum and maximum number of gigabytes of memory in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 6400000 -| `gpu-total` | Minimum and maximum number of different GPUs in cluster, in the format :\:\. Cluster autoscaler will not scale the cluster beyond these numbers. Can be passed multiple times. CURRENTLY THIS FLAG ONLY WORKS ON GKE. | "" -| `cloud-provider` | Cloud provider type. | gce -| `max-empty-bulk-delete` | Maximum number of empty nodes that can be deleted at the same time. | 10 -| `max-graceful-termination-sec` | Maximum number of seconds CA waits for pod termination when trying to scale down a node. | 600 -| `max-total-unready-percentage` | Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations | 45 -| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3 -| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes -| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \:\: | "" -| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.
A definition is expressed `:[[=]]`
The `aws`, `gce`, `azure` and `oci` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`
GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`
Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.
OCI matches nodepools by Defined or FreeForm tags in the given compartment, e.g. `clusterId:,compartmentId:,nodepoolTags:=&=,min:,max:`. Defined tag keys should be given with a namespace in format `namespace.tagKey`.
Can be used multiple times | "" -| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false -| `estimator` | Type of resource estimator to be used in scale up | binpacking -| `expander` | Type of node group expander to be used in scale up. | random -| `ignore-daemonsets-utilization` | Whether DaemonSet pods will be ignored when calculating resource utilization for scaling down | false -| `ignore-mirror-pods-utilization` | Whether [Mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/) will be ignored when calculating resource utilization for scaling down | false -| `write-status-configmap` | Should CA write status information to a configmap | true -| `status-config-map-name` | The name of the status ConfigMap that CA writes | cluster-autoscaler-status -| `max-inactivity` | Maximum time from last recorded autoscaler activity before automatic restart | 10 minutes -| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes -| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false -| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | "" -| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | "" -| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false -| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15 -| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes -| `expendable-pods-priority-cutoff` | Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable | -10 -| `regional` | Cluster is regional | false -| `leader-elect` | Start a leader election client and gain leadership before executing the main loop.
Enable this when running replicated components for high availability | true -| `leader-elect-lease-duration` | The duration that non-leader candidates will wait after observing a leadership
renewal until attempting to acquire leadership of a led but unrenewed leader slot.
This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate.
This is only applicable if leader election is enabled | 15 seconds -| `leader-elect-renew-deadline` | The interval between attempts by the active cluster-autoscaler to renew a leadership slot before it stops leading.
This must be less than or equal to the lease duration.
This is only applicable if leader election is enabled | 10 seconds -| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.
This is only applicable if leader election is enabled | 2 seconds -| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.
Supported options are `leases` (default), `endpoints`, `endpointsleases`, `configmaps`, and `configmapsleases` | "leases" -| `aws-use-static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false -| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or [mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/)) | true -| `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true -| `skip-nodes-with-custom-controller-pods` | If true cluster autoscaler will never delete nodes with pods owned by custom controllers | true -| `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0 -| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false -| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true -| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | "" -| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false -| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false -| `debugging-snapshot-enabled` | Whether the debugging snapshot of cluster autoscaler feature is enabled. | false -| `node-delete-delay-after-taint` | How long to wait before deleting a node after tainting it. | 5 seconds -| `enable-provisioning-requests` | Whether the clusterautoscaler will be handling the ProvisioningRequest CRs. | false +| Parameter | Description | Default | +| --- | --- | --- | +| `cluster-name` | Autoscaled cluster name, if available | "" +| `address` | The address to expose prometheus metrics | :8085 +| `kubernetes` | Kubernetes API Server location. Leave blank for default | "" +| `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | "" +| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file | "" +| `namespace` | Namespace in which cluster-autoscaler run | "kube-system" +| `enforce-node-group-min-size` | Should CA scale up the node group to the configured min size if needed | false +| `scale-down-enabled` | Should CA scale down the cluster | true +| `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes +| `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval +| `scale-down-delay-after-failure` | How long after scale down failure that scale down evaluation resumes | 3 minutes +| `scale-down-unneeded-time` | How long a node should be unneeded before it is eligible for scale down | 10 minutes +| `scale-down-unready-time` | How long an unready node should be unneeded before it is eligible for scale down | 20 minutes +| `scale-down-utilization-threshold` | The maximum value between the sum of cpu requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. This value is a floating point number that can range between zero and one. | 0.5 +| `scale-down-non-empty-candidates-count` | Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to non positive value to turn this heuristic off - CA will not limit the number of nodes it considers." | 30 +| `scale-down-candidates-pool-ratio` | A ratio of nodes that are considered as additional non empty candidates for
scale down when some candidates from previous iteration are no longer valid
Lower value means better CA responsiveness but possible slower scale down latency
Higher value can affect CA performance with big clusters (hundreds of nodes)
Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates. | 0.1 +| `scale-down-candidates-pool-min-count` | Minimum number of nodes that are considered as additional non empty candidates
for scale down when some candidates from previous iteration are no longer valid.
When calculating the pool size for additional candidates we take
`max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count)` | 50 +| `scan-interval` | How often cluster is reevaluated for scale up or down | 10 seconds +| `max-nodes-total` | Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number. | 0 +| `cores-total` | Minimum and maximum number of cores in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 320000 +| `memory-total` | Minimum and maximum number of gigabytes of memory in cluster, in the format \:\. Cluster autoscaler will not scale the cluster beyond these numbers. | 6400000 +| `gpu-total` | Minimum and maximum number of different GPUs in cluster, in the format :\:\. Cluster autoscaler will not scale the cluster beyond these numbers. Can be passed multiple times. CURRENTLY THIS FLAG ONLY WORKS ON GKE. | "" +| `cloud-provider` | Cloud provider type. | gce +| `max-empty-bulk-delete` | Maximum number of empty nodes that can be deleted at the same time. | 10 +| `max-graceful-termination-sec` | Maximum number of seconds CA waits for pod termination when trying to scale down a node. | 600 +| `max-total-unready-percentage` | Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations | 45 +| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3 +| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes +| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \:\: | "" +| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.
A definition is expressed `:[[=]]`
The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`
GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`
Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.
Can be used multiple times | "" +| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false +| `estimator` | Type of resource estimator to be used in scale up | binpacking +| `expander` | Type of node group expander to be used in scale up. | random +| `ignore-daemonsets-utilization` | Whether DaemonSet pods will be ignored when calculating resource utilization for scaling down | false +| `ignore-mirror-pods-utilization` | Whether [Mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/) will be ignored when calculating resource utilization for scaling down | false +| `write-status-configmap` | Should CA write status information to a configmap | true +| `status-config-map-name` | The name of the status ConfigMap that CA writes | cluster-autoscaler-status +| `max-inactivity` | Maximum time from last recorded autoscaler activity before automatic restart | 10 minutes +| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes +| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false +| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | "" +| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | "" +| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false +| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15 +| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes +| `expendable-pods-priority-cutoff` | Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable | -10 +| `regional` | Cluster is regional | false +| `leader-elect` | Start a leader election client and gain leadership before executing the main loop.
Enable this when running replicated components for high availability | true +| `leader-elect-lease-duration` | The duration that non-leader candidates will wait after observing a leadership
renewal until attempting to acquire leadership of a led but unrenewed leader slot.
This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate.
This is only applicable if leader election is enabled | 15 seconds +| `leader-elect-renew-deadline` | The interval between attempts by the active cluster-autoscaler to renew a leadership slot before it stops leading.
This must be less than or equal to the lease duration.
This is only applicable if leader election is enabled | 10 seconds +| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.
This is only applicable if leader election is enabled | 2 seconds +| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.
Supported options are `leases` (default), `endpoints`, `endpointsleases`, `configmaps`, and `configmapsleases` | "leases" +| `aws-use-static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false +| `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for [DaemonSet](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) or [mirror pods](https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/)) | true +| `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true +| `skip-nodes-with-custom-controller-pods` | If true cluster autoscaler will never delete nodes with pods owned by custom controllers | true +| `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0 +| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false +| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true +| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | "" +| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false +| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false +| `debugging-snapshot-enabled` | Whether the debugging snapshot of cluster autoscaler feature is enabled. | false +| `node-delete-delay-after-taint` | How long to wait before deleting a node after tainting it. | 5 seconds +| `enable-provisioning-requests` | Whether the clusterautoscaler will be handling the ProvisioningRequest CRs. | false # Troubleshooting diff --git a/cluster-autoscaler/cloudprovider/oci/README.md b/cluster-autoscaler/cloudprovider/oci/README.md index e0d883ce5074..83bd40cf552b 100644 --- a/cluster-autoscaler/cloudprovider/oci/README.md +++ b/cluster-autoscaler/cloudprovider/oci/README.md @@ -166,6 +166,13 @@ use-instance-principals = true n/a +### Node Group Auto Discovery +`--node-group-auto-discovery` could be given in below pattern. It would discover the nodepools under given compartment by matching the nodepool tags (either they are Freeform or Defined tags) +``` +clusterId:,compartmentId:,nodepoolTags:=&=,min:,max: +``` +Auto discovery can not be used along with static discovery (`node` parameter) to prevent conflicts. + ## Deployment ### Create OCI config secret (only if _not_ using Instance Principals) @@ -272,6 +279,7 @@ kubectl apply -f ./cloudprovider/oci/examples/oci-nodepool-cluster-autoscaler-w- - Avoid manually changing pools that are managed by the Cluster Autoscaler. For example, do not add or remove nodes using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API). - `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported. +- `--node-group-auto-discovery` and `node` parameters can not be used together as it can cause conflicts. - We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled pools. ## Helpful links diff --git a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go index 781cc01c5337..7ebd84dc5f64 100644 --- a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go +++ b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go @@ -205,3 +205,21 @@ func GetAllPoolTypes(groups []string) (string, error) { } return ocidType, nil } + +// HasNodePoolTags checks if nodepoolTags is provided +func HasNodePoolTags(nodeGroupAutoDiscoveryList []string) (bool, error) { + instancePoolTagsFound := false + nodePoolTagsFound := false + for _, arg := range nodeGroupAutoDiscoveryList { + if strings.Contains(arg, "nodepoolTags") { + nodePoolTagsFound = true + } + if strings.Contains(arg, "instancepoolTags") { + instancePoolTagsFound = true + } + } + if instancePoolTagsFound == true && nodePoolTagsFound == true { + return nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags") + } + return nodePoolTagsFound, nil +} diff --git a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go index 2038d88b8285..0257d3e19452 100644 --- a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go @@ -153,7 +153,13 @@ func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover if err != nil { klog.Fatalf("Failed to get pool type: %v", err) } - if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) || opts.NodeGroupAutoDiscovery != nil { + nodepoolTagsFound, err := ocicommon.HasNodePoolTags(opts.NodeGroupAutoDiscovery) + if err != nil { + klog.Fatalf("Failed to get auto discovery tags: %v", err) + } + if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) && nodepoolTagsFound == true { + klog.Fatalf("-nodes and -node-group-auto-discovery parameters can not be used together.") + } else if strings.HasPrefix(ocidType, npconsts.OciNodePoolResourceIdent) || nodepoolTagsFound == true { manager, err := nodepools.CreateNodePoolManager(opts.CloudConfig, opts.NodeGroupAutoDiscovery, do, createKubeClient(opts)) if err != nil { klog.Fatalf("Could not create OCI OKE cloud provider: %v", err) From e79bffe3602012563f4518096a792d3a5056b5a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Fri, 18 Oct 2024 11:18:30 +0100 Subject: [PATCH 5/8] small amendment in document --- cluster-autoscaler/cloudprovider/oci/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/oci/README.md b/cluster-autoscaler/cloudprovider/oci/README.md index 83bd40cf552b..01bab30c6328 100644 --- a/cluster-autoscaler/cloudprovider/oci/README.md +++ b/cluster-autoscaler/cloudprovider/oci/README.md @@ -278,7 +278,7 @@ kubectl apply -f ./cloudprovider/oci/examples/oci-nodepool-cluster-autoscaler-w- correctly (`oci-cloud-controller-manager`). - Avoid manually changing pools that are managed by the Cluster Autoscaler. For example, do not add or remove nodes using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API). -- `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported. +- `--node-autoprovisioning-enabled=true` are not supported. - `--node-group-auto-discovery` and `node` parameters can not be used together as it can cause conflicts. - We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled pools. From cf061f3194c09e5792fde22119efd3863d01d953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Fri, 18 Oct 2024 11:43:49 +0100 Subject: [PATCH 6/8] renaming validation method and returning instancepoolTagsFound variable as well. --- cluster-autoscaler/cloudprovider/oci/common/oci_util.go | 8 ++++---- .../cloudprovider/oci/instancepools/oci_cloud_provider.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go index 7ebd84dc5f64..650bcb50f80c 100644 --- a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go +++ b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go @@ -206,8 +206,8 @@ func GetAllPoolTypes(groups []string) (string, error) { return ocidType, nil } -// HasNodePoolTags checks if nodepoolTags is provided -func HasNodePoolTags(nodeGroupAutoDiscoveryList []string) (bool, error) { +// HasNodeGroupTags checks if nodepoolTags is provided +func HasNodeGroupTags(nodeGroupAutoDiscoveryList []string) (bool, bool, error) { instancePoolTagsFound := false nodePoolTagsFound := false for _, arg := range nodeGroupAutoDiscoveryList { @@ -219,7 +219,7 @@ func HasNodePoolTags(nodeGroupAutoDiscoveryList []string) (bool, error) { } } if instancePoolTagsFound == true && nodePoolTagsFound == true { - return nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags") + return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags") } - return nodePoolTagsFound, nil + return instancePoolTagsFound, nodePoolTagsFound, nil } diff --git a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go index 0257d3e19452..425f4350331c 100644 --- a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go @@ -153,7 +153,7 @@ func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover if err != nil { klog.Fatalf("Failed to get pool type: %v", err) } - nodepoolTagsFound, err := ocicommon.HasNodePoolTags(opts.NodeGroupAutoDiscovery) + _, nodepoolTagsFound, err := ocicommon.HasNodeGroupTags(opts.NodeGroupAutoDiscovery) if err != nil { klog.Fatalf("Failed to get auto discovery tags: %v", err) } From 6396e63dcfa61f2a32ce6111b3098ca7cbb08bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Thu, 24 Oct 2024 16:46:48 +0100 Subject: [PATCH 7/8] addressed review feedback items --- .../cloudprovider/oci/common/oci_util.go | 5 +- .../oci/nodepools/oci_manager.go | 7 +- .../oci/nodepools/oci_manager_test.go | 104 +++++++++++------- 3 files changed, 74 insertions(+), 42 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go index 650bcb50f80c..d1b6c91aa4b5 100644 --- a/cluster-autoscaler/cloudprovider/oci/common/oci_util.go +++ b/cluster-autoscaler/cloudprovider/oci/common/oci_util.go @@ -219,7 +219,10 @@ func HasNodeGroupTags(nodeGroupAutoDiscoveryList []string) (bool, bool, error) { } } if instancePoolTagsFound == true && nodePoolTagsFound == true { - return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags") + return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("can not use both instancepoolTags and nodepoolTags in node-group-auto-discovery") + } + if len(nodeGroupAutoDiscoveryList) > 0 && instancePoolTagsFound == false && nodePoolTagsFound == false { + return instancePoolTagsFound, nodePoolTagsFound, fmt.Errorf("either instancepoolTags or nodepoolTags should be provided in node-group-auto-discovery") } return instancePoolTagsFound, nodePoolTagsFound, nil } diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index fb44410c2a8e..8fdd35b38bf9 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -272,8 +272,7 @@ func nodePoolFromArg(value string) (*nodePool, error) { // nodeGroupFromArg parses a node group spec represented in the form of // `clusterId:,compartmentId:,nodepoolTags:=&=,min:,max:` -// and produces a node group auto discovery object, -// nodepoolTags are optional and CA will capture all nodes if no tags are provided. +// and produces a node group auto discovery object func nodeGroupFromArg(value string) (*nodeGroupAutoDiscovery, error) { // this regex will find the key-value pairs in any given order if separated with a colon regexPattern := `(?:` + compartmentId + `:(?P<` + compartmentId + `>[^,]+)` @@ -333,8 +332,12 @@ func nodeGroupFromArg(value string) (*nodeGroupAutoDiscovery, error) { parts := strings.Split(pair, "=") if len(parts) == 2 { spec.tags[parts[0]] = parts[1] + } else { + return nil, fmt.Errorf("nodepoolTags should be given in tagKey=tagValue format, this is not valid: %s", pair) } } + } else { + return nil, fmt.Errorf("failed to set %s, it is missing in node-group-auto-discovery parameter", nodepoolTags) } klog.Infof("node group auto discovery spec constructed: %+v", spec) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go index 2f5bac21f824..80b7b1a64b66 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go @@ -390,16 +390,16 @@ func TestRemoveInstance(t *testing.T) { } func TestNodeGroupFromArg(t *testing.T) { - var nodeGroupArg = "clusterId:testClusterId,compartmentId:testCompartmentId,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5" + var nodeGroupArg = "clusterId:ocid1.cluster.oc1.test-region.test,compartmentId:ocid1.compartment.oc1.test-region.test,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5" nodeGroupAutoDiscovery, err := nodeGroupFromArg(nodeGroupArg) if err != nil { t.Errorf("Error: #{err}") } - if nodeGroupAutoDiscovery.clusterId != "testClusterId" { - t.Errorf("Error: clusterId should be testClusterId") + if nodeGroupAutoDiscovery.clusterId != "ocid1.cluster.oc1.test-region.test" { + t.Errorf("Error: clusterId should be ocid1.cluster.oc1.test-region.test") } - if nodeGroupAutoDiscovery.compartmentId != "testCompartmentId" { - t.Errorf("Error: compartmentId should be testCompartmentId") + if nodeGroupAutoDiscovery.compartmentId != "ocid1.compartment.oc1.test-region.test" { + t.Errorf("Error: compartmentId should be ocid1.compartment.oc1.test-region.test") } if nodeGroupAutoDiscovery.minSize != 1 { t.Errorf("Error: minSize should be 1") @@ -417,41 +417,67 @@ func TestNodeGroupFromArg(t *testing.T) { func TestValidateNodePoolTags(t *testing.T) { - var nodeGroupTags map[string]string = nil - var nodePoolTags map[string]string = nil - var definedTags map[string]map[string]interface{} = nil - - if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { - t.Errorf("validateNodepoolTags shouldn't return false for empty tags map") - } - - nodeGroupTags = make(map[string]string) - nodeGroupTags["test"] = "test" - - if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == true { - t.Errorf("validateNodepoolTags shouldn't return true for tags missing") - } - - nodePoolTags = make(map[string]string) - nodePoolTags["foo"] = "bar" - - if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == true { - t.Errorf("validateNodepoolTags shouldn't return true for not matching tags") - } - - nodePoolTags["test"] = "test" - - if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { - t.Errorf("validateNodepoolTags shouldn't return false for matching tags") + testCases := map[string]struct { + nodeGroupTags map[string]string + freeFormTags map[string]string + definedTags map[string]map[string]interface{} + expectedResult bool + }{ + "no-tags": { + nodeGroupTags: nil, + freeFormTags: nil, + definedTags: nil, + expectedResult: true, + }, + "node-group tags provided but no tags on nodepool": { + nodeGroupTags: map[string]string{ + "testTag": "testTagValue", + }, + freeFormTags: nil, + definedTags: nil, + expectedResult: false, + }, + "node-group tags and free-form tags do not match": { + nodeGroupTags: map[string]string{ + "testTag": "testTagValue", + }, + freeFormTags: map[string]string{ + "foo": "bar", + }, + definedTags: nil, + expectedResult: false, + }, + "free-form tags have required node-group tags": { + nodeGroupTags: map[string]string{ + "testTag": "testTagValue", + }, + freeFormTags: map[string]string{ + "foo": "bar", + "testTag": "testTagValue", + }, + definedTags: nil, + expectedResult: true, + }, + "defined tags have required node-group tags": { + nodeGroupTags: map[string]string{ + "ns.testTag": "testTagValue", + }, + freeFormTags: nil, + definedTags: map[string]map[string]interface{}{ + "ns": { + "testTag": "testTagValue", + }, + }, + expectedResult: true, + }, } - nodeGroupTags["ns.tag1"] = "tag2" - definedTagsMap := make(map[string]interface{}) - definedTagsMap["tag1"] = "tag2" - definedTags = make(map[string]map[string]interface{}) - definedTags["ns"] = definedTagsMap - - if validateNodepoolTags(nodeGroupTags, nodePoolTags, definedTags) == false { - t.Errorf("validateNodepoolTags shouldn't return false for namespaced tags") + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + result := validateNodepoolTags(tc.nodeGroupTags, tc.freeFormTags, tc.definedTags) + if result != tc.expectedResult { + t.Errorf("Testcase '%s' failed: got %t ; expected %t", name, result, tc.expectedResult) + } + }) } } From 1bf5f72b2bcd4f5da21300744fb2a7498748272e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cgkazanci=E2=80=9D?= Date: Thu, 24 Oct 2024 17:54:43 +0100 Subject: [PATCH 8/8] addressed review feedback items --- cluster-autoscaler/cloudprovider/oci/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/oci/README.md b/cluster-autoscaler/cloudprovider/oci/README.md index 01bab30c6328..640e70607d86 100644 --- a/cluster-autoscaler/cloudprovider/oci/README.md +++ b/cluster-autoscaler/cloudprovider/oci/README.md @@ -167,7 +167,7 @@ use-instance-principals = true n/a ### Node Group Auto Discovery -`--node-group-auto-discovery` could be given in below pattern. It would discover the nodepools under given compartment by matching the nodepool tags (either they are Freeform or Defined tags) +`--node-group-auto-discovery` could be given in below pattern. It would discover the nodepools under given compartment by matching the nodepool tags (either they are Freeform or Defined tags). All of the parameters are mandatory. ``` clusterId:,compartmentId:,nodepoolTags:=&=,min:,max: ```