From 6fe0dde9393bc144be3980f68e4316e127e8aa9a Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Thu, 9 Dec 2021 00:34:30 +0200 Subject: [PATCH] kinder: retry the 'etcd --version' during 'cluster-info' During CI the cluster-info fails with 'etcd --version' exiting with status 1, but locally this cannot be reproduced. Retry the command 10 times to try to avoid flakes. --- .../cluster/manager/actions/cluster-info.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kinder/pkg/cluster/manager/actions/cluster-info.go b/kinder/pkg/cluster/manager/actions/cluster-info.go index 599122f8..d166413b 100644 --- a/kinder/pkg/cluster/manager/actions/cluster-info.go +++ b/kinder/pkg/cluster/manager/actions/cluster-info.go @@ -18,9 +18,10 @@ package actions import ( "fmt" - "github.com/pkg/errors" "strings" + "github.com/pkg/errors" + versionutils "k8s.io/apimachinery/pkg/util/version" "k8s.io/kubeadm/kinder/pkg/cluster/status" ) @@ -66,12 +67,25 @@ func CluterInfo(c *status.Cluster) error { "--", } + var lines []string + var err error + // Get the version of etcdctl from the etcd binary + // Retry the version command for a while to avoid "exec" flakes versionArgs := append(etcdArgs, "etcd", "--version") - lines, err := cp1.Command("kubectl", versionArgs...).RunAndCapture() + versionArgs = append([]string{"--request-timeout=2"}, versionArgs...) // Ensure shorter timeout + for i := 0; i < 10; i++ { + lines, err = cp1.Command("kubectl", versionArgs...).RunAndCapture() + if err == nil { + break + } + cp1.Infof("Could not execute 'etcd --version' inside %q (attempt %d/%d): %v\n", cp1.Name(), i+1, 10, + errors.Wrap(err, strings.Join(lines, "\n"))) + } if err != nil { return err } + etcdctlVersion, err := parseEtcdctlVersion(lines) if err != nil { return err