From 4471f23915e331bdc26b2aed988e46145b189ec3 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 16 Oct 2024 01:29:18 -0400 Subject: [PATCH 1/3] raft: rename confchange_v2_add_double_implicit to _single_ The test was only adding a single node, so the name was misleading. Epic: None Release note: None --- ..._double_implicit.txt => confchange_v2_add_single_implicit.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pkg/raft/testdata/{confchange_v2_add_double_implicit.txt => confchange_v2_add_single_implicit.txt} (100%) diff --git a/pkg/raft/testdata/confchange_v2_add_double_implicit.txt b/pkg/raft/testdata/confchange_v2_add_single_implicit.txt similarity index 100% rename from pkg/raft/testdata/confchange_v2_add_double_implicit.txt rename to pkg/raft/testdata/confchange_v2_add_single_implicit.txt From 415787d9c87712fcc031916a76d13ca9e1ee671c Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 16 Oct 2024 01:39:16 -0400 Subject: [PATCH 2/3] raft: use 3x replication in confchange_v2_add_single_implicit This commit updates the confchange_v2_add_single_implicit test to start with 2 nodes so that the addition of a third voter does not regress the lead support and trip up the automatic transition out of the joint configuration. Epic: None Release note: None --- .../async_storage_writes_append_aba_race.txt | 2 +- .../confchange_v2_add_single_implicit.txt | 194 ++++++++++-------- .../testdata/fortification_checkquorum.txt | 1 - .../snapshot_succeed_via_app_resp.txt | 2 +- 4 files changed, 116 insertions(+), 83 deletions(-) diff --git a/pkg/raft/testdata/async_storage_writes_append_aba_race.txt b/pkg/raft/testdata/async_storage_writes_append_aba_race.txt index 99900880af2c..dbccbae9d633 100644 --- a/pkg/raft/testdata/async_storage_writes_append_aba_race.txt +++ b/pkg/raft/testdata/async_storage_writes_append_aba_race.txt @@ -497,7 +497,7 @@ stabilize 1 AppendThread->1 MsgStorageAppendResp Term:0 Log:2/12 INFO mark (term,index)=(2,12) mismatched the last accepted term 3 in unstable log; ignoring AppendThread->1 MsgStorageAppendResp Term:0 Log:3/12 - + raft-log 1 ---- 1/11 EntryNormal "" diff --git a/pkg/raft/testdata/confchange_v2_add_single_implicit.txt b/pkg/raft/testdata/confchange_v2_add_single_implicit.txt index c81fd58b3f3e..3a155354823c 100644 --- a/pkg/raft/testdata/confchange_v2_add_single_implicit.txt +++ b/pkg/raft/testdata/confchange_v2_add_single_implicit.txt @@ -4,132 +4,119 @@ # TODO(tbg): also verify that if the leader changes while in the joint state, the # new leader will auto-transition out of the joint state just the same. -# Bootstrap n1. -add-nodes 1 voters=(1) index=2 +# Bootstrap n1 and n2. We start with 2 nodes in this test so that the addition +# of a third voter does not regress the lead support and trip up the automatic +# transition out of the joint configuration. +add-nodes 2 voters=(1, 2) index=2 ---- -INFO 1 switched to configuration voters=(1) +INFO 1 switched to configuration voters=(1 2) INFO 1 became follower at term 0 -INFO newRaft 1 [peers: [1], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO newRaft 1 [peers: [1,2], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 2 switched to configuration voters=(1 2) +INFO 2 became follower at term 0 +INFO newRaft 2 [peers: [1,2], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] campaign 1 ---- INFO 1 is starting a new election at term 0 INFO 1 became candidate at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 -process-ready 1 +stabilize log-level=none ---- -Ready MustSync=true: -State:StateCandidate -HardState Term:1 Vote:1 Commit:2 Lead:0 LeadEpoch:0 -INFO 1 received MsgVoteResp from 1 at term 1 -INFO 1 has received 1 MsgVoteResp votes and 0 vote rejections -INFO 1 became leader at term 1 +ok +# Add v3 (with a joint configuration and an automatic transition out of joint). propose-conf-change 1 transition=implicit -v2 +v3 ---- ok -# Add n2. +# Add n3. add-nodes 1 ---- -INFO 2 switched to configuration voters=() -INFO 2 became follower at term 0 -INFO newRaft 2 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] +INFO 3 switched to configuration voters=() +INFO 3 became follower at term 0 +INFO newRaft 3 [peers: [], term: 0, commit: 0, applied: 0, lastindex: 0, lastterm: 0] # n1 commits the conf change using itself as commit quorum, then starts catching up n2. # When that's done, it starts auto-transitioning out. Note that the snapshots propagating # the joint config have the AutoLeave flag set in their config. -stabilize 1 2 +stabilize ---- > 1 handling Ready Ready MustSync=true: - State:StateLeader - HardState Term:1 Vote:1 Commit:2 Lead:1 LeadEpoch:1 Entries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 -> 1 handling Ready - Ready MustSync=true: - HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 - CommittedEntries: - 1/3 EntryNormal "" - 1/4 EntryConfChangeV2 v2 - INFO 1 switched to configuration voters=(1 2)&&(1) autoleave - INFO initiating automatic transition out of joint configuration voters=(1 2)&&(1) autoleave -> 1 handling Ready - Ready MustSync=true: - Entries: - 1/5 EntryConfChangeV2 + 1/4 EntryConfChangeV2 v3 Messages: - 1->2 MsgFortifyLeader Term:1 Log:0/0 - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v3] > 2 receiving messages - 1->2 MsgFortifyLeader Term:1 Log:0/0 - INFO 2 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1] - INFO 2 became follower at term 1 - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v2] - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v3] > 2 handling Ready Ready MustSync=true: - HardState Term:1 Commit:0 Lead:1 LeadEpoch:1 + Entries: + 1/4 EntryConfChangeV2 v3 Messages: - 2->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 > 1 receiving messages - 2->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 - DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1 sentCommit=0 matchCommit=0] - DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1 sentCommit=0 matchCommit=0] - DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 sentCommit=4 matchCommit=0 paused pendingSnap=4] + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 > 1 handling Ready - Ready MustSync=false: + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v3 Messages: - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ - 1/4 EntryConfChangeV2 v2 - 1/5 EntryConfChangeV2 - ] - 1->2 MsgSnap Term:1 Log:0/0 - Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true + 1->2 MsgApp Term:1 Log:1/4 Commit:4 + INFO 1 switched to configuration voters=(1 2 3)&&(1 2) autoleave + INFO initiating automatic transition out of joint configuration voters=(1 2 3)&&(1 2) autoleave > 2 receiving messages - 1->2 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ - 1/4 EntryConfChangeV2 v2 - 1/5 EntryConfChangeV2 - ] - DEBUG 2 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 - 1->2 MsgSnap Term:1 Log:0/0 - Snapshot: Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true - INFO log [committed=0, applied=0, applying=0, unstable.offset=1, unstable.offsetInProgress=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] - INFO 2 switched to configuration voters=(1 2)&&(1) autoleave - INFO 2 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] - INFO 2 [commit: 4] restored snapshot [index: 4, term: 1] + 1->2 MsgApp Term:1 Log:1/4 Commit:4 +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryConfChangeV2 + Messages: + 1->3 MsgFortifyLeader Term:1 Log:0/0 + 1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v3] + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] > 2 handling Ready Ready MustSync=true: - HardState Term:1 Commit:4 Lead:1 LeadEpoch:1 - Snapshot Index:4 Term:1 ConfState:Voters:[1 2] VotersOutgoing:[1] Learners:[] LearnersNext:[] AutoLeave:true + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v3 Messages: - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 2 switched to configuration voters=(1 2 3)&&(1 2) autoleave > 1 receiving messages - 2->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) - DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 - DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 sentCommit=4 matchCommit=4 paused pendingSnap=4] -> 1 handling Ready - Ready MustSync=false: - Messages: - 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] > 2 receiving messages 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryConfChangeV2] +> 3 receiving messages + 1->3 MsgFortifyLeader Term:1 Log:0/0 + INFO 3 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1] + INFO 3 became follower at term 1 + 1->3 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v3] + DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 > 2 handling Ready Ready MustSync=true: Entries: 1/5 EntryConfChangeV2 Messages: 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:0 Lead:1 LeadEpoch:1 + Messages: + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) > 1 receiving messages 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) + DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 3 for index 3 + DEBUG 1 decreased progress of 3 to [StateProbe match=0 next=1 sentCommit=0 matchCommit=0] + DEBUG 1 [firstindex: 3, commit: 5] sent snapshot[index: 4, term: 1] to 3 [StateProbe match=0 next=1 sentCommit=0 matchCommit=0] + DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=5 sentCommit=4 matchCommit=0 paused pendingSnap=4] > 1 handling Ready Ready MustSync=true: HardState Term:1 Vote:1 Commit:5 Lead:1 LeadEpoch:1 @@ -137,16 +124,63 @@ stabilize 1 2 1/5 EntryConfChangeV2 Messages: 1->2 MsgApp Term:1 Log:1/5 Commit:5 - INFO 1 switched to configuration voters=(1 2) + 1->3 MsgApp Term:1 Log:1/3 Commit:5 Entries:[ + 1/4 EntryConfChangeV2 v3 + 1/5 EntryConfChangeV2 + ] + 1->3 MsgSnap Term:1 Log:0/0 + Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1 2] Learners:[] LearnersNext:[] AutoLeave:true + INFO 1 switched to configuration voters=(1 2 3) > 2 receiving messages 1->2 MsgApp Term:1 Log:1/5 Commit:5 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/3 Commit:5 Entries:[ + 1/4 EntryConfChangeV2 v3 + 1/5 EntryConfChangeV2 + ] + DEBUG 3 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 + 1->3 MsgSnap Term:1 Log:0/0 + Snapshot: Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1 2] Learners:[] LearnersNext:[] AutoLeave:true + INFO log [committed=0, applied=0, applying=0, unstable.offset=1, unstable.offsetInProgress=1, len(unstable.Entries)=0] starts to restore snapshot [index: 4, term: 1] + INFO 3 switched to configuration voters=(1 2 3)&&(1 2) autoleave + INFO 3 [commit: 4, lastindex: 4, lastterm: 1] restored snapshot [index: 4, term: 1] + INFO 3 [commit: 4] restored snapshot [index: 4, term: 1] > 2 handling Ready Ready MustSync=true: - HardState Term:1 Commit:5 Lead:1 LeadEpoch:1 + HardState Term:1 Vote:1 Commit:5 Lead:1 LeadEpoch:1 CommittedEntries: 1/5 EntryConfChangeV2 Messages: 2->1 MsgAppResp Term:1 Log:0/5 Commit:5 - INFO 2 switched to configuration voters=(1 2) + INFO 2 switched to configuration voters=(1 2 3) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:4 Lead:1 LeadEpoch:1 + Snapshot Index:4 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[1 2] Learners:[] LearnersNext:[] AutoLeave:true + Messages: + 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 > 1 receiving messages 2->1 MsgAppResp Term:1 Log:0/5 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/3 Rejected (Hint: 0) + DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 3 for index 3 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=4 next=5 sentCommit=4 matchCommit=4 paused pendingSnap=4] +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->3 MsgApp Term:1 Log:1/4 Commit:5 Entries:[1/5 EntryConfChangeV2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:5 Entries:[1/5 EntryConfChangeV2] +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:5 Lead:1 LeadEpoch:1 + Entries: + 1/5 EntryConfChangeV2 + CommittedEntries: + 1/5 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/5 Commit:5 + INFO 3 switched to configuration voters=(1 2 3) +> 1 receiving messages + 3->1 MsgAppResp Term:1 Log:0/5 Commit:5 diff --git a/pkg/raft/testdata/fortification_checkquorum.txt b/pkg/raft/testdata/fortification_checkquorum.txt index 15daff97895a..80c249bf5505 100644 --- a/pkg/raft/testdata/fortification_checkquorum.txt +++ b/pkg/raft/testdata/fortification_checkquorum.txt @@ -86,4 +86,3 @@ DEBUG 1 has not received messages from a quorum of peers in the last election ti DEBUG 1 does not have store liveness support from a quorum of peers WARN 1 stepped down to follower since quorum is not active INFO 1 became follower at term 1 - diff --git a/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt b/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt index a965721d63f3..116f3cbaf8e2 100644 --- a/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt +++ b/pkg/raft/testdata/snapshot_succeed_via_app_resp.txt @@ -91,7 +91,7 @@ stabilize 1 Messages: 1->3 MsgSnap Term:1 Log:0/0 Snapshot: Index:11 Term:1 ConfState:Voters:[1 2 3] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false - + status 1 ---- 1: StateReplicate match=11 next=12 sentCommit=10 matchCommit=10 From 8105ddbf232c2065944e7f3b721bc52b0a1bb2ea Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 7 Oct 2024 22:55:43 -0400 Subject: [PATCH 3/3] raft: don't propose config changes with regressed lead support Fixes #125355. If the lead support has not caught up from the previous configuration, we must not propose another configuration change. Doing so would compromise the lead support promise made by the previous configuration and used as an expiration of a leader lease. Instead, we wait for the lead support under the current configuration to catch up to the maximum lead support reached under the previous config. If the lead support is never able to catch up, the leader will eventually step down due to CheckQuorum. As a reminder, if we reject a config change, it will be reproposed again shortly, at which point the lead support will likely have caught up. As a second reminder, we fortify both voters and learners. We also always add new replicas as learners before promoting them to voters. The combination here means that a leader will commonly already be supported a voter when it is promoted added to the lead support computation. Release note: None --- pkg/raft/confchange/validate.go | 12 + pkg/raft/confchange/validate_test.go | 24 + pkg/raft/raft.go | 1 + .../confchange_fortification_safety.txt | 410 ++++++++++++++++++ pkg/raft/tracker/fortificationtracker.go | 76 +++- pkg/raft/tracker/fortificationtracker_test.go | 130 ++++++ 6 files changed, 650 insertions(+), 3 deletions(-) create mode 100644 pkg/raft/testdata/confchange_fortification_safety.txt diff --git a/pkg/raft/confchange/validate.go b/pkg/raft/confchange/validate.go index 8bb5a124f55d..ae34821b3385 100644 --- a/pkg/raft/confchange/validate.go +++ b/pkg/raft/confchange/validate.go @@ -18,6 +18,7 @@ type ValidationContext struct { CurConfig *quorum.Config Applied uint64 PendingConfIndex uint64 + LeadSupportSafe bool DisableValidationAgainstCurConfig bool } @@ -35,6 +36,17 @@ func ValidateProp(ctx ValidationContext, cc pb.ConfChangeV2) error { ctx.PendingConfIndex, ctx.Applied) } + // If the lead support has not caught up from the previous configuration, we + // must not propose another configuration change. Doing so would compromise + // the lead support promise made by the previous configuration and used as an + // expiration of a leader lease. Instead, we wait for the lead support under + // the current configuration to catch up to the maximum lead support reached + // under the previous config. If the lead support is never able to catch up, + // the leader will eventually step down due to CheckQuorum. + if !ctx.LeadSupportSafe { + return errors.Errorf("lead support has not caught up to previous configuration") + } + // The DisableValidationAgainstCurConfig flag allows disabling config change // constraints that are guaranteed by the upper state machine layer (incorrect // ones will apply as no-ops). See raft.Config.DisableConfChangeValidation for diff --git a/pkg/raft/confchange/validate_test.go b/pkg/raft/confchange/validate_test.go index d4e95bba65f4..30197ce5c186 100644 --- a/pkg/raft/confchange/validate_test.go +++ b/pkg/raft/confchange/validate_test.go @@ -40,6 +40,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -49,6 +50,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 10, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -58,16 +60,29 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 15, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "possible unapplied conf change at index 15 \\(applied to 10\\)", }, + { + name: "invalid, unsafe lead support", + ctx: ValidationContext{ + CurConfig: configNormal, + Applied: 10, + PendingConfIndex: 5, + LeadSupportSafe: false, + }, + cc: changeNormal, + expErr: "lead support has not caught up to previous configuration", + }, { name: "invalid, already in joint state", ctx: ValidationContext{ CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "must transition out of joint config first", @@ -78,6 +93,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeLeaveJoint, expErr: "not in joint state; refusing empty conf change", @@ -88,6 +104,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeNormal, @@ -98,6 +115,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeLeaveJoint, @@ -108,6 +126,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -121,6 +140,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -133,6 +153,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -146,6 +167,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 3}, @@ -159,6 +181,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ @@ -172,6 +195,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index b4296ccbda9e..3c9e2b1829c0 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1634,6 +1634,7 @@ func stepLeader(r *raft, m pb.Message) error { CurConfig: &r.config, Applied: r.raftLog.applied, PendingConfIndex: r.pendingConfIndex, + LeadSupportSafe: r.fortificationTracker.ConfigChangeSafe(), DisableValidationAgainstCurConfig: r.disableConfChangeValidation, } if err := confchange.ValidateProp(ccCtx, cc.AsV2()); err != nil { diff --git a/pkg/raft/testdata/confchange_fortification_safety.txt b/pkg/raft/testdata/confchange_fortification_safety.txt new file mode 100644 index 000000000000..444abb78ff60 --- /dev/null +++ b/pkg/raft/testdata/confchange_fortification_safety.txt @@ -0,0 +1,410 @@ +# This test verifies that configuration changes are not permitted to compromise +# the safety of leader fortification. +# +# For details, see FortificationTracker.ConfigChangeSafe. +add-nodes 4 voters=(1, 2, 3) index=2 +---- +INFO 1 switched to configuration voters=(1 2 3) +INFO 1 became follower at term 0 +INFO newRaft 1 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 2 switched to configuration voters=(1 2 3) +INFO 2 became follower at term 0 +INFO newRaft 2 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 3 switched to configuration voters=(1 2 3) +INFO 3 became follower at term 0 +INFO newRaft 3 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 4 switched to configuration voters=(1 2 3) +INFO 4 became follower at term 0 +INFO newRaft 4 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] + +campaign 1 +---- +INFO 1 is starting a new election at term 0 +INFO 1 became candidate at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 3 at term 1 + +stabilize log-level=none +---- +ok + +# Withdraw store liveness support from 3 for 1 so that the lead support is +# fragile. +withdraw-support 3 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +# Add v4. +propose-conf-change 1 +v4 +---- +ok + +stabilize 1 2 3 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 + 1->3 MsgApp Term:1 Log:1/4 Commit:4 + INFO 1 switched to configuration voters=(1 2 3 4) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgFortifyLeader Term:1 Log:0/0 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 2 switched to configuration voters=(1 2 3 4) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 3 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 + +# Try to demote v2. This should be rejected because v1's lead support under the +# current config has not caught up to the previous config. +propose-conf-change 1 +r2 l2 +---- +INFO 1 ignoring conf change {ConfChangeTransitionAuto [{ConfChangeRemoveNode 2} {ConfChangeAddLearnerNode 2}] []} at config voters=(1 2 3 4): lead support has not caught up to previous configuration + +# Fortify v4 so that lead support catches up. +stabilize 1 4 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] +> 4 receiving messages + 1->4 MsgFortifyLeader Term:1 Log:0/0 + INFO 4 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1] + INFO 4 became follower at term 1 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] + DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:2 Lead:1 LeadEpoch:1 + Messages: + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 +> 1 receiving messages + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + DEBUG 1 received MsgAppResp(rejected, hint: (index 2, term 1)) from 4 for index 3 + DEBUG 1 decreased progress of 4 to [StateProbe match=0 next=3 sentCommit=2 matchCommit=2] +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] + DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:4 Lead:1 LeadEpoch:1 + Entries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + CommittedEntries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + Messages: + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + INFO 4 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + DEBUG 1 received MsgAppResp(rejected, hint: (index 2, term 1)) from 4 for index 3 + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 +4 : 1 + +# Try to demote v2 again. This time, the proposal should be allowed. +propose-conf-change 1 +r2 l2 +---- +ok + +stabilize +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 + INFO 1 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave + INFO initiating automatic transition out of joint configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 2 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 3 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 4 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/7 Commit:7 + 1->3 MsgApp Term:1 Log:1/7 Commit:7 + 1->4 MsgApp Term:1 Log:1/7 Commit:7 + INFO 1 switched to configuration voters=(1 3 4) learners=(2) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/7 Commit:7 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/7 Commit:7 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/7 Commit:7 +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 2 switched to configuration voters=(1 3 4) learners=(2) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 3 switched to configuration voters=(1 3 4) learners=(2) +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 4 switched to configuration voters=(1 3 4) learners=(2) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 diff --git a/pkg/raft/tracker/fortificationtracker.go b/pkg/raft/tracker/fortificationtracker.go index 61872d3c4233..13755157d97a 100644 --- a/pkg/raft/tracker/fortificationtracker.go +++ b/pkg/raft/tracker/fortificationtracker.go @@ -148,6 +148,23 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta return ft.leaderMaxSupported.Load() } + // Compute the lead support using the current configuration and forward the + // leaderMaxSupported to avoid regressions when the configuration changes. + leadSupportUntil := ft.computeLeadSupportUntil(state) + return ft.leaderMaxSupported.Forward(leadSupportUntil) +} + +// computeLeadSupportUntil computes the timestamp until which the leader is +// guaranteed fortification using the current quorum configuration. +// +// Unlike LeadSupportUntil, this computation does not provide a guarantee of +// monotonicity. Specifically, its result may regress after a configuration +// change. +func (ft *FortificationTracker) computeLeadSupportUntil(state pb.StateType) hlc.Timestamp { + if state != pb.StateLeader { + panic("computeLeadSupportUntil should only be called by the leader") + } + // TODO(arul): avoid this map allocation as we're calling LeadSupportUntil // from hot paths. supportExpMap := make(map[pb.PeerID]hlc.Timestamp) @@ -162,9 +179,7 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta supportExpMap[id] = curExp } } - - leadSupportUntil := ft.config.Voters.LeadSupportExpiration(supportExpMap) - return ft.leaderMaxSupported.Forward(leadSupportUntil) + return ft.config.Voters.LeadSupportExpiration(supportExpMap) } // CanDefortify returns whether the caller can safely[1] de-fortify the term @@ -184,6 +199,61 @@ func (ft *FortificationTracker) CanDefortify() bool { return ft.storeLiveness.SupportExpired(leaderMaxSupported) } +// ConfigChangeSafe returns whether it is safe to propose a configuration change +// or not, given the current state of lead support. +// +// If the lead support has not caught up from the previous configuration, we +// must not propose another configuration change. Doing so would compromise the +// lead support promise made by the previous configuration and used as an +// expiration of a leader lease. Instead, we wait for the lead support under the +// current configuration to catch up to the maximum lead support reached under +// the previous config. If the lead support is never able to catch up, the +// leader will eventually step down due to CheckQuorum. +// +// The following timeline illustrates the hazard that this check is guarding +// against: +// +// 1. configuration A=(r1, r2, r3), leader=r1 +// - lead_support=20 (r1=30, r2=20, r3=10) +// +// 2. config change #1 adds r4 to the group +// - configuration B=(r1, r2, r3, r4) +// +// 3. lead support appears to regress to 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0) +// +// 4. any majority quorum for leader election involves r1 or r2 +// - therefore, max lead support of 20 is “safe” +// - this is analogous to how the raft Leader Completeness invariant works +// even across config changes, using either (1) single addition/removal +// at-a-time changes, or (2) joint consensus. Either way, consecutive +// configs share overlapping majorities. +// +// 5. config change #2 adds r5 to the group +// - configuration C=(r1, r2, r3, r4, r5) +// +// 6. lead_support still at 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0, r5=0) +// - however, max lead support of 20 no longer “safe” +// +// 7. r3 can win election with support from r4 and r5 before time 20 +// - neither r1 nor r2 need to be involved +// - HAZARD! this could violate the original lead support promise +// +// To avoid this hazard, we must wait for the lead support under configuration B +// to catch up to the maximum lead support reached under configuration A before +// allowing the proposal of configuration C. This ensures that the overlapping +// majorities between subsequent configurations preserve the safety of lead +// support. +func (ft *FortificationTracker) ConfigChangeSafe() bool { + // A configuration change is only safe if the current configuration's lead + // support has caught up to the maximum lead support reached under the + // previous configuration, which is reflected in leaderMaxSupported. + // + // NB: Only run by the leader. + return ft.leaderMaxSupported.Load().LessEq(ft.computeLeadSupportUntil(pb.StateLeader)) +} + // QuorumActive returns whether the leader is currently supported by a quorum or // not. func (ft *FortificationTracker) QuorumActive() bool { diff --git a/pkg/raft/tracker/fortificationtracker_test.go b/pkg/raft/tracker/fortificationtracker_test.go index efd498f124da..896284e1d4c1 100644 --- a/pkg/raft/tracker/fortificationtracker_test.go +++ b/pkg/raft/tracker/fortificationtracker_test.go @@ -492,6 +492,136 @@ func TestCanDefortify(t *testing.T) { } } +// TestConfigChangeSafe tests whether a leader can safely propose a config +// change. +func TestConfigChangeSafe(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ts := func(ts int64) hlc.Timestamp { + return hlc.Timestamp{ + WallTime: ts, + } + } + + testCases := []struct { + afterConfigChange func(sl *mockStoreLiveness, tracker *FortificationTracker) + expConfigChangeSafe bool + expLeadSupportUntil hlc.Timestamp + }{ + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // Nothing. r4 not providing store liveness support or fortified. + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 providing store liveness support but not fortified. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified at earlier epoch. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 39) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, but support still lagging. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(15)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support beyond previous config. + sl.liveness[2] = makeMockLivenessEntry(20, ts(25)) + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers caught up. + sl.liveness[1] = makeMockLivenessEntry(10, ts(15)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers beyond previous config. + sl.liveness[1] = makeMockLivenessEntry(10, ts(20)) + sl.liveness[2] = makeMockLivenessEntry(20, ts(20)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + } + + for _, tc := range testCases { + mockLiveness := makeMockStoreLiveness( + map[pb.PeerID]mockLivenessEntry{ + 1: makeMockLivenessEntry(10, ts(10)), + 2: makeMockLivenessEntry(20, ts(15)), + 3: makeMockLivenessEntry(30, ts(20)), + }, + ) + + cfg := quorum.MakeEmptyConfig() + for _, id := range []pb.PeerID{1, 2, 3} { + cfg.Voters[0][id] = struct{}{} + } + ft := NewFortificationTracker(&cfg, mockLiveness, raftlogger.DiscardLogger) + + // Fortify the leader before the configuration change. + ft.RecordFortification(1, 10) + ft.RecordFortification(2, 20) + ft.RecordFortification(3, 30) + require.Equal(t, ts(15), ft.LeadSupportUntil(pb.StateLeader)) + + // Perform a configuration change that adds r4 to the voter set. + cfg.Voters[0][4] = struct{}{} + + tc.afterConfigChange(&mockLiveness, ft) + + require.Equal(t, tc.expConfigChangeSafe, ft.ConfigChangeSafe()) + require.Equal(t, tc.expLeadSupportUntil, ft.LeadSupportUntil(pb.StateLeader)) + } +} + type mockLivenessEntry struct { epoch pb.Epoch ts hlc.Timestamp