From 8be198468a66bd7f949423897c2da38b4b61785b Mon Sep 17 00:00:00 2001 From: Bhaskarjyoti Bora Date: Mon, 28 Oct 2024 12:46:44 +0530 Subject: [PATCH] roachprod: fix issue with gcloud not able to handle concurrency The gcloud command to update disk labels are run concurrently. But, we saw an issue as the gcloud command is not able to handle the concurrency when we are trying to run 150 node with 4 disks each. So, instead of running the command concurrently for all disks per node, this changes the code to run only 2 disks per node concurrently (1 boot disk and 1 PD). Epic: None Release note: None --- pkg/roachprod/vm/gce/gcloud.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/roachprod/vm/gce/gcloud.go b/pkg/roachprod/vm/gce/gcloud.go index 61744f256946..114eafbaca53 100644 --- a/pkg/roachprod/vm/gce/gcloud.go +++ b/pkg/roachprod/vm/gce/gcloud.go @@ -2323,8 +2323,11 @@ func propagateDiskLabels( if !useLocalSSD { // The persistent disks are already created. The disks are suffixed with an offset // which starts from 1. A total of "pdVolumeCount" disks are created. - for offset := 1; offset <= pdVolumeCount; offset++ { - g.Go(func() error { + g.Go(func() error { + // the loop is run inside the go-routine to ensure that we do not run all the gcloud commands. + // For a 150 node with 4 disks, we have seen that the gcloud command cannot handle so many concurrent + // commands. + for offset := 1; offset <= pdVolumeCount; offset++ { persistentDiskArgs := append([]string(nil), argsPrefix...) persistentDiskArgs = append(persistentDiskArgs, zoneArg...) // N.B. additional persistent disks are suffixed with the offset, starting at 1. @@ -2335,9 +2338,9 @@ func propagateDiskLabels( if err != nil { return errors.Wrapf(err, "Command: gcloud %s\nOutput: %s", persistentDiskArgs, output) } - return nil - }) - } + } + return nil + }) } } }