From 554d7f493eb2593fa5a25656f344fece8616fd72 Mon Sep 17 00:00:00 2001 From: Richard Burnison Date: Thu, 9 Jan 2025 15:21:38 -0500 Subject: [PATCH] Prevent exponential backoff from overflowing. When using `-cut-over-exponential-backoff` with `-default-retries` greater than `64`, the sleep interval will overflow and immediately begin retrying in rapid succession. This commit corrects the backoff algorithm and adds test for both "retry operation" functions. --- go/logic/migrator.go | 15 +++++---- go/logic/migrator_test.go | 67 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 751b54a08..707f167ed 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -24,6 +24,7 @@ import ( var ( ErrMigratorUnsupportedRenameAlter = errors.New("ALTER statement seems to RENAME the table. This is not supported, and you should run your RENAME outside gh-ost.") + RetrySleepFn = time.Sleep ) type ChangelogState string @@ -135,7 +136,7 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo for i := 0; i < maxRetries; i++ { if i != 0 { // sleep after previous iteration - time.Sleep(1 * time.Second) + RetrySleepFn(1 * time.Second) } err = operation() if err == nil { @@ -155,16 +156,16 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo // attempts are reached. Wait intervals between attempts obey a maximum of // `ExponentialBackoffMaxInterval`. func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { - var interval int64 maxRetries := int(this.migrationContext.MaxRetries()) maxInterval := this.migrationContext.ExponentialBackoffMaxInterval for i := 0; i < maxRetries; i++ { - newInterval := int64(math.Exp2(float64(i - 1))) - if newInterval <= maxInterval { - interval = newInterval - } + interval := math.Min( + float64(maxInterval), + math.Max(1, math.Exp2(float64(i-1))), + ) + if i != 0 { - time.Sleep(time.Duration(interval) * time.Second) + RetrySleepFn(time.Duration(interval) * time.Second) } err = operation() if err == nil { diff --git a/go/logic/migrator_test.go b/go/logic/migrator_test.go index dfdfe5390..813909208 100644 --- a/go/logic/migrator_test.go +++ b/go/logic/migrator_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "github.com/testcontainers/testcontainers-go" @@ -378,6 +379,72 @@ func (suite *MigratorTestSuite) TestFoo() { suite.Require().Equal("_testing_del", tableName) } +func TestMigratorRetry(t *testing.T) { + oldRetrySleepFn := RetrySleepFn + defer func() { RetrySleepFn = oldRetrySleepFn }() + + migrationContext := base.NewMigrationContext() + migrationContext.SetDefaultNumRetries(100) + migrator := NewMigrator(migrationContext, "1.2.3") + + var sleeps = 0 + RetrySleepFn = func(duration time.Duration) { + assert.Equal(t, 1*time.Second, duration) + sleeps++ + } + + var tries = 0 + retryable := func() error { + tries++ + if tries < int(migrationContext.MaxRetries()) { + return errors.New("Backoff") + } + return nil + } + + result := migrator.retryOperation(retryable, false) + assert.NoError(t, result) + assert.Equal(t, sleeps, 99) + assert.Equal(t, tries, 100) +} + +func TestMigratorRetryWithExponentialBackoff(t *testing.T) { + oldRetrySleepFn := RetrySleepFn + defer func() { RetrySleepFn = oldRetrySleepFn }() + + migrationContext := base.NewMigrationContext() + migrationContext.SetDefaultNumRetries(100) + migrationContext.SetExponentialBackoffMaxInterval(42) + migrator := NewMigrator(migrationContext, "1.2.3") + + var sleeps = 0 + expected := []int{ + 1, 2, 4, 8, 16, 32, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, + } + RetrySleepFn = func(duration time.Duration) { + assert.Equal(t, time.Duration(expected[sleeps])*time.Second, duration) + sleeps++ + } + + var tries = 0 + retryable := func() error { + tries++ + if tries < int(migrationContext.MaxRetries()) { + return errors.New("Backoff") + } + return nil + } + + result := migrator.retryOperationWithExponentialBackoff(retryable, false) + assert.NoError(t, result) + assert.Equal(t, sleeps, 99) + assert.Equal(t, tries, 100) +} + func TestMigrator(t *testing.T) { suite.Run(t, new(MigratorTestSuite)) }