From 91de60cf7bed23a4dc68fa6e35f9f0c0ae9ae449 Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Wed, 13 Sep 2023 15:33:45 +0200 Subject: [PATCH] lb: Add creation polling timeout Add a polling timeout at infra service creation to fail if services do not receive IPs after a period of time. This will fail in cases where the lb provider has no more IPs or if there are failures like AWS not supporting multi protocol. If the loop is not broken kccm is stuck and no more services will be created. Signed-off-by: Enrique Llorente --- README.md | 3 ++- config/default/cloud-config | 3 ++- config/isolated/cloud-config | 3 ++- pkg/provider/cloud.go | 12 ++++++--- pkg/provider/cloud_test.go | 19 ++++++------- pkg/provider/loadbalancer.go | 32 ++++++++++++++++------ pkg/provider/loadbalancer_test.go | 45 +++++++++++++++++++++++++++---- 7 files changed, 89 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 8897e52aa..f3d366266 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ Output: ```yaml kubeconfig: loadBalancer: - creationPollInterval: 30 + creationPollInterval: 5 + creationPollTimeout: 60 ``` ## How to build a Docker image diff --git a/config/default/cloud-config b/config/default/cloud-config index a453c5d2b..2e7169167 100755 --- a/config/default/cloud-config +++ b/config/default/cloud-config @@ -1,3 +1,4 @@ loadBalancer: - creationPollInterval: 30 + creationPollInterval: 5 + creationPollTimeout: 60 namespace: kvcluster diff --git a/config/isolated/cloud-config b/config/isolated/cloud-config index bf3b5cf58..101a1751a 100755 --- a/config/isolated/cloud-config +++ b/config/isolated/cloud-config @@ -1,5 +1,6 @@ loadBalancer: - creationPollInterval: 30 + creationPollInterval: 5 + creationPollTimeout: 60 namespace: kvcluster instancesV2: enabled: true diff --git a/pkg/provider/cloud.go b/pkg/provider/cloud.go index a51e65238..fab7da0ba 100644 --- a/pkg/provider/cloud.go +++ b/pkg/provider/cloud.go @@ -13,6 +13,7 @@ import ( "k8s.io/client-go/tools/clientcmd" cloudprovider "k8s.io/cloud-provider" "k8s.io/klog/v2" + "k8s.io/utils/pointer" kubevirtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -51,8 +52,12 @@ type CloudConfig struct { type LoadBalancerConfig struct { // Enabled activates the load balancer interface of the CCM Enabled bool `yaml:"enabled"` - // CreationPollInterval determines how many seconds to wait for the load balancer creation - CreationPollInterval int `yaml:"creationPollInterval"` + + // CreationPollInterval determines how many seconds to wait for the load balancer creation between retries + CreationPollInterval *int `yaml:"creationPollInterval,omitempty"` + + // CreationPollTimeout determines how many seconds to wait for the load balancer creation + CreationPollTimeout *int `yaml:"creationPollTimeout,omitempty"` } type InstancesV2Config struct { @@ -68,7 +73,8 @@ func createDefaultCloudConfig() CloudConfig { return CloudConfig{ LoadBalancer: LoadBalancerConfig{ Enabled: true, - CreationPollInterval: defaultLoadBalancerCreatePollInterval, + CreationPollInterval: pointer.Int(int(defaultLoadBalancerCreatePollInterval.Seconds())), + CreationPollTimeout: pointer.Int(int(defaultLoadBalancerCreatePollTimeout.Seconds())), }, InstancesV2: InstancesV2Config{ Enabled: true, diff --git a/pkg/provider/cloud_test.go b/pkg/provider/cloud_test.go index d2330963e..53d88d33d 100644 --- a/pkg/provider/cloud_test.go +++ b/pkg/provider/cloud_test.go @@ -14,7 +14,7 @@ var ( ns = "aNamespace" infraKubeConfigPath = "infraKubeConfig" minimalConf = fmt.Sprintf("kubeconfig: %s", infraKubeConfigPath) - loadbalancerConf = fmt.Sprintf("kubeconfig: %s\nloadBalancer:\n enabled: %t\n creationPollInterval: %d", infraKubeConfigPath, false, 3) + loadbalancerConf = fmt.Sprintf("kubeconfig: %s\nloadBalancer:\n enabled: %t\n creationPollInterval: %d\n creationPollTimeout: %d", infraKubeConfigPath, false, 3, 100) instancesConf = fmt.Sprintf("kubeconfig: %s\ninstancesV2:\n enabled: %t\n enableInstanceTypes: %t", infraKubeConfigPath, false, true) zoneAndRegionConf = fmt.Sprintf("kubeconfig: %s\ninstancesV2:\n zoneAndRegionEnabled: %t\n enableInstanceTypes: %t", infraKubeConfigPath, false, true) namespaceConf = fmt.Sprintf("kubeconfig: %s\nnamespace: %s", infraKubeConfigPath, ns) @@ -22,12 +22,13 @@ var ( invalidKubeconf = "bla" ) -func makeCloudConfig(kubeconfig, namespace string, loadbalancerEnabled, instancesEnabled bool, zoneAndRegionEnabled bool, lbCreationPollInterval int) CloudConfig { +func makeCloudConfig(kubeconfig, namespace string, loadbalancerEnabled, instancesEnabled bool, zoneAndRegionEnabled bool, lbCreationPollInterval int, lbCreationPollTimeout int) CloudConfig { return CloudConfig{ Kubeconfig: kubeconfig, LoadBalancer: LoadBalancerConfig{ Enabled: loadbalancerEnabled, - CreationPollInterval: lbCreationPollInterval, + CreationPollInterval: &lbCreationPollInterval, + CreationPollTimeout: &lbCreationPollTimeout, }, InstancesV2: InstancesV2Config{ Enabled: instancesEnabled, @@ -44,12 +45,12 @@ var _ = Describe("Cloud config", func() { Expect(config).To(Equal(expectedCloudConfig)) Expect(err).To(BeNil()) }, - Entry("With minimal configuration", minimalConf, makeCloudConfig(infraKubeConfigPath, "", true, true, true, 5), nil), - Entry("With loadBalancer configuration", loadbalancerConf, makeCloudConfig(infraKubeConfigPath, "", false, true, true, 3), nil), - Entry("With instance configuration", instancesConf, makeCloudConfig(infraKubeConfigPath, "", true, false, true, 5), nil), - Entry("With zone and region configuration", zoneAndRegionConf, makeCloudConfig(infraKubeConfigPath, "", true, true, false, 5), nil), - Entry("With namespace configuration", namespaceConf, makeCloudConfig(infraKubeConfigPath, ns, true, true, true, 5), nil), - Entry("With full configuration", allConf, makeCloudConfig(infraKubeConfigPath, ns, false, false, true, 5), nil), + Entry("With minimal configuration", minimalConf, makeCloudConfig(infraKubeConfigPath, "", true, true, true, 5, 300), nil), + Entry("With loadBalancer configuration", loadbalancerConf, makeCloudConfig(infraKubeConfigPath, "", false, true, true, 3, 100), nil), + Entry("With instance configuration", instancesConf, makeCloudConfig(infraKubeConfigPath, "", true, false, true, 5, 300), nil), + Entry("With zone and region configuration", zoneAndRegionConf, makeCloudConfig(infraKubeConfigPath, "", true, true, false, 5, 300), nil), + Entry("With namespace configuration", namespaceConf, makeCloudConfig(infraKubeConfigPath, ns, true, true, true, 5, 300), nil), + Entry("With full configuration", allConf, makeCloudConfig(infraKubeConfigPath, ns, false, false, true, 5, 300), nil), ) Describe("KubeVirt Cloud Factory", func() { diff --git a/pkg/provider/loadbalancer.go b/pkg/provider/loadbalancer.go index fbe3d6e31..702bb3845 100644 --- a/pkg/provider/loadbalancer.go +++ b/pkg/provider/loadbalancer.go @@ -16,8 +16,11 @@ import ( ) const ( - // Default interval in seconds between polling the service after creation - defaultLoadBalancerCreatePollInterval = 5 + // Default interval between polling the service after creation + defaultLoadBalancerCreatePollInterval = 5 * time.Second + + // Default timeout between polling the service after creation + defaultLoadBalancerCreatePollTimeout = 5 * time.Minute ) type loadbalancer struct { @@ -92,7 +95,7 @@ func (lb *loadbalancer) EnsureLoadBalancer(ctx context.Context, clusterName stri return nil, err } - err = wait.PollUntil(lb.getLoadBalancerCreatePollInterval()*time.Second, func() (bool, error) { + err = wait.PollWithContext(ctx, lb.getLoadBalancerCreatePollInterval(), lb.getLoadBalancerCreatePollTimeout(), func(ctx context.Context) (bool, error) { if len(lbService.Status.LoadBalancer.Ingress) != 0 { return true, nil } @@ -107,7 +110,7 @@ func (lb *loadbalancer) EnsureLoadBalancer(ctx context.Context, clusterName stri return true, nil } return false, nil - }, ctx.Done()) + }) if err != nil { klog.Errorf("Failed to poll LoadBalancer service: %v", err) return nil, err @@ -232,9 +235,22 @@ func (lb *loadbalancer) createLoadBalancerServicePorts(service *corev1.Service) } func (lb *loadbalancer) getLoadBalancerCreatePollInterval() time.Duration { - if lb.config.CreationPollInterval > 0 { - return time.Duration(lb.config.CreationPollInterval) + return convertLoadBalancerCreatePollConfig(lb.config.CreationPollInterval, defaultLoadBalancerCreatePollInterval, "interval") +} + +func (lb *loadbalancer) getLoadBalancerCreatePollTimeout() time.Duration { + return convertLoadBalancerCreatePollConfig(lb.config.CreationPollTimeout, defaultLoadBalancerCreatePollTimeout, "timeout") +} + +func convertLoadBalancerCreatePollConfig(configValue *int, defaultValue time.Duration, name string) time.Duration { + if configValue == nil { + klog.Infof("Setting creation poll %s to default value '%d'", name, defaultValue) + return defaultValue + } + if *configValue <= 0 { + klog.Infof("Creation poll %s %d' must be > 0. Setting to '%d'", name, *configValue, defaultValue) + return defaultValue } - klog.Infof("Creation poll interval '%d' must be > 0. Setting to '%d'", lb.config.CreationPollInterval, defaultLoadBalancerCreatePollInterval) - return defaultLoadBalancerCreatePollInterval + return time.Duration(*configValue) * time.Second + } diff --git a/pkg/provider/loadbalancer_test.go b/pkg/provider/loadbalancer_test.go index 4ba676308..ccb73e3bc 100644 --- a/pkg/provider/loadbalancer_test.go +++ b/pkg/provider/loadbalancer_test.go @@ -16,6 +16,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -251,7 +252,8 @@ var _ = Describe("LoadBalancer", func() { namespace: "test", client: c, config: LoadBalancerConfig{ - CreationPollInterval: 1, + CreationPollInterval: pointer.Int(1), + CreationPollTimeout: pointer.Int(5), }, } @@ -673,9 +675,9 @@ var _ = Describe("LoadBalancer", func() { Expect(err).Should(Equal(expectedError)) }) - It("Should return an error if polling LoadBalancer service 20-time fails", func() { + It("Should return an error if polling LoadBalancer service fails but success before timeout", func() { expectedError := errors.New("Test error - poll Service") - getCount := 20 + getCount := *lb.config.CreationPollTimeout / *lb.config.CreationPollInterval port := 30001 infraServiceExist := generateInfraService( tenantService, @@ -735,6 +737,39 @@ var _ = Describe("LoadBalancer", func() { ctrl.Finish() }) + It("Should return an error if polling LoadBalancer returns no IPs after some time", func() { + expectedError := errors.New("timed out waiting for the condition") + + c.EXPECT(). + Get(ctx, client.ObjectKey{Name: "af6ebf1722bb111e9b210d663bd873d9", Namespace: "test"}, gomock.AssignableToTypeOf(&corev1.Service{})). + Return(notFoundErr) + + infraService1 := generateInfraService( + tenantService, + []corev1.ServicePort{ + {Name: "port1", Protocol: corev1.ProtocolTCP, Port: 80, TargetPort: intstr.IntOrString{Type: intstr.Int, IntVal: 30001}}, + }, + ) + + c.EXPECT().Create(ctx, infraService1) + + infraService2 := infraService1.DeepCopy() + c.EXPECT().Get( + ctx, + client.ObjectKey{Name: "af6ebf1722bb111e9b210d663bd873d9", Namespace: "test"}, + gomock.AssignableToTypeOf(&corev1.Service{}), + ).Do(func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) { + infraService2.DeepCopyInto(obj.(*corev1.Service)) + }).AnyTimes() + + _, err := lb.EnsureLoadBalancer(ctx, clusterName, tenantService, nodes) + Expect(err).Should(MatchError(expectedError)) + }) + + AfterAll(func() { + ctrl.Finish() + }) + }) Context("With updating loadbalancer", Ordered, func() { @@ -757,7 +792,7 @@ var _ = Describe("LoadBalancer", func() { namespace: "test", client: c, config: LoadBalancerConfig{ - CreationPollInterval: 1, + CreationPollInterval: pointer.Int(1), }, } @@ -1011,7 +1046,7 @@ var _ = Describe("LoadBalancer", func() { namespace: "test", client: c, config: LoadBalancerConfig{ - CreationPollInterval: 1, + CreationPollInterval: pointer.Int(1), }, }