diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 20ea200491d..b1a7a6aa70b 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -889,7 +889,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { if c.regionStats != nil { c.regionStats.ClearDefunctRegion(item.GetID()) } - c.labelLevelStats.ClearDefunctRegion(item.GetID()) + c.labelLevelStats.MarkDefunctRegion(item.GetID()) } regionUpdateCacheEventCounter.Inc() } @@ -1969,6 +1969,7 @@ func (c *RaftCluster) updateRegionsLabelLevelStats(regions []*core.RegionInfo) { for _, region := range regions { c.labelLevelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.opt.GetLocationLabels()) } + c.labelLevelStats.ClearDefunctRegions() } func (c *RaftCluster) getRegionStoresLocked(region *core.RegionInfo) []*core.StoreInfo { diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index f3365b1665e..255b1e2b547 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1082,6 +1082,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { opt.SetReplicationConfig(cfg) re.NoError(err) cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(ctx, cluster, nil) for i := uint64(1); i <= 4; i++ { var labels []*metapb.StoreLabel @@ -1116,13 +1117,42 @@ func TestRegionLabelIsolationLevel(t *testing.T) { StartKey: []byte{byte(1)}, EndKey: []byte{byte(2)}, } - r := core.NewRegionInfo(region, peers[0]) - re.NoError(cluster.putRegion(r)) + r1 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r1)) - cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r}) + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r1}) counter := cluster.labelLevelStats.GetLabelCounter() re.Equal(0, counter["none"]) re.Equal(1, counter["zone"]) + + region = &metapb.Region{ + Id: 10, + Peers: peers, + StartKey: []byte{byte(2)}, + EndKey: []byte{byte(3)}, + } + r2 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r2)) + + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r2}) + counter = cluster.labelLevelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(2, counter["zone"]) + + // issue: https://github.com/tikv/pd/issues/8700 + // step1: heartbeat a overlap region, which is used to simulate the case that the region is merged. + // step2: update region 9 and region 10, which is used to simulate the case that patrol is triggered. + // We should only count region 9. + overlapRegion := r1.Clone( + core.WithStartKey(r1.GetStartKey()), + core.WithEndKey(r2.GetEndKey()), + core.WithLeader(r2.GetPeer(8)), + ) + re.NoError(cluster.HandleRegionHeartbeat(overlapRegion)) + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r1, r2}) + counter = cluster.labelLevelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(1, counter["zone"]) } func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) { diff --git a/server/statistics/region_collection.go b/server/statistics/region_collection.go index 8d4d6d1849c..fa2889a7bd3 100644 --- a/server/statistics/region_collection.go +++ b/server/statistics/region_collection.go @@ -320,6 +320,7 @@ type LabelStatistics struct { sync.RWMutex regionLabelStats map[uint64]string labelCounter map[string]int + defunctRegions map[uint64]struct{} } // NewLabelStatistics creates a new LabelStatistics. @@ -327,6 +328,7 @@ func NewLabelStatistics() *LabelStatistics { return &LabelStatistics{ regionLabelStats: make(map[uint64]string), labelCounter: make(map[string]int), + defunctRegions: make(map[uint64]struct{}), } } @@ -360,14 +362,26 @@ func (l *LabelStatistics) Reset() { regionLabelLevelGauge.Reset() } -// ClearDefunctRegion is used to handle the overlap region. -func (l *LabelStatistics) ClearDefunctRegion(regionID uint64) { +// MarkDefunctRegion is used to handle the overlap region. +// It is used to mark the region as defunct and remove it from the label statistics later. +func (l *LabelStatistics) MarkDefunctRegion(regionID uint64) { l.Lock() defer l.Unlock() - if label, ok := l.regionLabelStats[regionID]; ok { - l.labelCounter[label]-- - delete(l.regionLabelStats, regionID) + l.defunctRegions[regionID] = struct{}{} +} + +// ClearDefunctRegions is used to handle the overlap region. +// It is used to remove the defunct regions from the label statistics. +func (l *LabelStatistics) ClearDefunctRegions() { + l.Lock() + defer l.Unlock() + for regionID := range l.defunctRegions { + if label, ok := l.regionLabelStats[regionID]; ok { + l.labelCounter[label]-- + delete(l.regionLabelStats, regionID) + } } + l.defunctRegions = make(map[uint64]struct{}) } // GetLabelCounter is only used for tests.