Skip to content

Commit

Permalink
kv: add a backoff to the retry loop in db.Txn
Browse files Browse the repository at this point in the history
In rare cases (e.g. #77376), two transactions can get repeatedly
deadlocked while trying to write to same key(s): one aborts the other,
but before it can proceed, the other transaction has restarted and
acquired a lock on the key again. This can result in the max
transaction retries being exceeded without either transaction
succeeding.

This commit adds a backoff to the transaction retry loop in `db.Txn`,
which will hopefully help one transaction slow down and let the other
one commit.

Fixes: #77376

Release note: None
  • Loading branch information
miraradeva committed Nov 14, 2024
1 parent 39e43b8 commit f003a9b
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions pkg/kv/txn.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"math"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/isolation"
Expand All @@ -23,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
Expand Down Expand Up @@ -1045,7 +1047,7 @@ func (e *AutoCommitError) Error() string {
func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error) (err error) {
// Run fn in a retry loop until we encounter a success or
// error condition this loop isn't capable of handling.
for attempt := 1; ; attempt++ {
for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
if err := ctx.Err(); err != nil {
return errors.Wrap(err, "txn exec")
}
Expand Down Expand Up @@ -1115,7 +1117,8 @@ func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error)
// txn.db.ctx.Settings == nil is only expected in tests.
maxRetries = int(MaxInternalTxnAutoRetries.Get(&txn.db.ctx.Settings.SV))
}
if attempt > maxRetries {
attempt := r.CurrentAttempt()
if attempt >= maxRetries {
// If the retries limit has been exceeded, rollback and return an error.
rollbackErr := txn.Rollback(ctx)
// NOTE: we don't errors.Wrap the most recent retry error because we want
Expand Down

0 comments on commit f003a9b

Please sign in to comment.