diff --git a/pkg/schedule/schedulers/OWNERS b/pkg/schedule/schedulers/OWNERS deleted file mode 100644 index ae96e4f1f42..00000000000 --- a/pkg/schedule/schedulers/OWNERS +++ /dev/null @@ -1,7 +0,0 @@ -# See the OWNERS docs at https://go.k8s.io/owners -options: - no_parent_owners: true -filters: - "(OWNERS|hot_region_config\\.go)$": - approvers: - - sig-critical-approvers-config diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 77a2e11ea4d..7da73242b23 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -274,9 +274,21 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R args = append(args, handler.config.getRanges(id)...) } - handler.config.BuildWithArgs(args) - err := handler.config.Persist() + err := handler.config.BuildWithArgs(args) if err != nil { + handler.config.mu.Lock() + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() + handler.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + + err = handler.config.Persist() + if err != nil { + handler.config.mu.Lock() + delete(handler.config.StoreIDWitRanges, id) + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) } handler.rd.JSON(w, http.StatusOK, nil) diff --git a/server/schedulers/evict_leader.go b/server/schedulers/evict_leader.go index cbefceeb020..1c4cb51bf9f 100644 --- a/server/schedulers/evict_leader.go +++ b/server/schedulers/evict_leader.go @@ -98,6 +98,9 @@ func (conf *evictLeaderSchedulerConfig) getStores() []uint64 { } func (conf *evictLeaderSchedulerConfig) BuildWithArgs(args []string) error { + failpoint.Inject("buildWithArgsErr", func() { + failpoint.Return(errors.New("fail to build with args")) + }) if len(args) != 1 { return errs.ErrSchedulerConfig.FastGenByArgs("id") } @@ -390,8 +393,15 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R args = append(args, handler.config.getRanges(id)...) } - handler.config.BuildWithArgs(args) - err := handler.config.Persist() + err := handler.config.BuildWithArgs(args) + if err != nil { + handler.config.mu.Lock() + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() + handler.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + err = handler.config.Persist() if err != nil { handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) diff --git a/server/schedulers/grant_leader.go b/server/schedulers/grant_leader.go index 18be38f16a2..3e2484ecc1c 100644 --- a/server/schedulers/grant_leader.go +++ b/server/schedulers/grant_leader.go @@ -294,10 +294,17 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R args = append(args, handler.config.getRanges(id)...) } - handler.config.BuildWithArgs(args) - err := handler.config.Persist() + err := handler.config.BuildWithArgs(args) if err != nil { - handler.config.removeStore(id) + handler.config.mu.Lock() + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() + handler.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + err = handler.config.Persist() + if err != nil { + _, _ = handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 1fc69c89e79..e2a0d794adb 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" "github.com/tikv/pd/pkg/testutil" @@ -480,3 +481,61 @@ func TestScheduler(t *testing.T) { echo = mustExec([]string{"-u", pdAddr, "scheduler", "remove", "split-bucket-scheduler"}, nil) re.Contains(echo, "Success!") } + +func TestEvictLeaderScheduler(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 1) + re.NoError(err) + defer cluster.Destroy() + err = cluster.RunInitialServers() + re.NoError(err) + cluster.WaitLeader() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := pdctlCmd.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + leaderServer := cluster.GetServer(cluster.GetLeader()) + re.NoError(leaderServer.BootstrapCluster()) + for _, store := range stores { + pdctl.MustPutStore(re, leaderServer.GetServer(), store) + } + + pdctl.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) + output, err := pdctl.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + failpoint.Enable("github.com/tikv/pd/server/schedulers/buildWithArgsErr", "return(true)") + output, err = pdctl.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "fail to build with args") + failpoint.Disable("github.com/tikv/pd/server/schedulers/buildWithArgsErr") + output, err = pdctl.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = pdctl.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") +}