From 727e7c46067b86df6e73b2f4b9af01c0b43d2b6e Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Mon, 9 Oct 2023 16:26:18 +0800 Subject: [PATCH 01/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node.go | 14 +++++++++++++- metrics/metrics.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/cluster/calcium/node.go b/cluster/calcium/node.go index 212ee9fbb..4600d0488 100644 --- a/cluster/calcium/node.go +++ b/cluster/calcium/node.go @@ -7,6 +7,7 @@ import ( enginefactory "github.com/projecteru2/core/engine/factory" enginetypes "github.com/projecteru2/core/engine/types" "github.com/projecteru2/core/log" + "github.com/projecteru2/core/metrics" "github.com/projecteru2/core/resource/plugins" resourcetypes "github.com/projecteru2/core/resource/types" "github.com/projecteru2/core/types" @@ -89,7 +90,18 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error { }, // then: remove node resource metadata func(ctx context.Context) error { - return c.rmgr.RemoveNode(ctx, nodename) + err := c.rmgr.RemoveNode(ctx, nodename) + if err != nil { + return err + } + enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) + metricsDescriptions, err := c.rmgr.GetMetricsDescription(ctx) + if err == nil { + logger.Error(ctx, err, "failed to get metrics description") + return err + } + metrics.Client.ResetCollectors(metricsDescriptions) + return nil }, // rollback: do nothing func(ctx context.Context, failureByCond bool) error { diff --git a/metrics/metrics.go b/metrics/metrics.go index aa311a140..4ec1179fb 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -85,6 +85,37 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } +// ResetCollectors 清除旧的 Collectors 并创建新的 Collectors +func (m *Metrics) ResetCollectors(metricsDescriptions []*plugintypes.MetricsDescription) { + for _, oldCollector := range m.Collectors { + prometheus.Unregister(oldCollector) + } + // 创建新的 Collectors map + newCollectors := map[string]prometheus.Collector{} + // 重新创建新的 Collectors + for _, desc := range metricsDescriptions { + switch desc.Type { + case gaugeType: + collector := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: desc.Name, + Help: desc.Help, + }, desc.Labels) + newCollectors[desc.Name] = collector + case counterType: + collector := prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: desc.Name, + Help: desc.Help, + }, desc.Labels) + newCollectors[desc.Name] = collector + } + } + + // 将新的 Collectors 添加到 Metrics 结构中 + m.Collectors = newCollectors + // 重新注册新的 Collectors + prometheus.MustRegister(maps.Values(m.Collectors)...) +} + // Lazy connect func (m *Metrics) checkConn(ctx context.Context) error { if m.statsdClient != nil { From 42333c1b4824abb91abf53d399a947a196ab93de Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Mon, 9 Oct 2023 16:31:34 +0800 Subject: [PATCH 02/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cluster/calcium/node.go b/cluster/calcium/node.go index 4600d0488..4ba717b41 100644 --- a/cluster/calcium/node.go +++ b/cluster/calcium/node.go @@ -90,13 +90,13 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error { }, // then: remove node resource metadata func(ctx context.Context) error { - err := c.rmgr.RemoveNode(ctx, nodename) + err = c.rmgr.RemoveNode(ctx, nodename) if err != nil { return err } enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) metricsDescriptions, err := c.rmgr.GetMetricsDescription(ctx) - if err == nil { + if err != nil { logger.Error(ctx, err, "failed to get metrics description") return err } From 276ffc15e14f28af2c54fbb71360bbc34404cda4 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 09:50:56 +0800 Subject: [PATCH 03/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node_test.go | 1 + metrics/metrics.go | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cluster/calcium/node_test.go b/cluster/calcium/node_test.go index 839db530c..5fef0cce2 100644 --- a/cluster/calcium/node_test.go +++ b/cluster/calcium/node_test.go @@ -97,6 +97,7 @@ func TestRemoveNode(t *testing.T) { store.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) rmgr := c.rmgr.(*resourcemocks.Manager) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) + rmgr.On("GetMetricsDescription", mock.Anything).Return(nil, nil) assert.NoError(t, c.RemoveNode(ctx, name)) store.AssertExpectations(t) rmgr.AssertExpectations(t) diff --git a/metrics/metrics.go b/metrics/metrics.go index 4ec1179fb..19bec72dc 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -87,9 +87,15 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri // ResetCollectors 清除旧的 Collectors 并创建新的 Collectors func (m *Metrics) ResetCollectors(metricsDescriptions []*plugintypes.MetricsDescription) { - for _, oldCollector := range m.Collectors { - prometheus.Unregister(oldCollector) + if len(metricsDescriptions) == 0 { + return } + if len(m.Collectors) > 0 { + for _, oldCollector := range m.Collectors { + prometheus.Unregister(oldCollector) + } + } + // 创建新的 Collectors map newCollectors := map[string]prometheus.Collector{} // 重新创建新的 Collectors From 4bf54c3f98923adf9d903cffc07b4bb953046695 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 10:33:58 +0800 Subject: [PATCH 04/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/calcium/node_test.go b/cluster/calcium/node_test.go index 5fef0cce2..48891b18d 100644 --- a/cluster/calcium/node_test.go +++ b/cluster/calcium/node_test.go @@ -45,7 +45,7 @@ func TestAddNode(t *testing.T) { rmgr.On("AddNode", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return( resourcetypes.Resources{}, nil) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) - + rmgr.On("GetMetricsDescription", mock.Anything).Return(nil, nil) // failed by store.AddNode store := c.store.(*storemocks.Store) store.On("AddNode", mock.Anything, mock.Anything).Return(nil, types.ErrMockError).Once() From 45a9bdb7ae0ec18a68a3961ca4d21610fb88fa7e Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 10:51:50 +0800 Subject: [PATCH 05/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node.go | 7 +----- cluster/calcium/node_test.go | 2 -- metrics/metrics.go | 45 ++++++++++-------------------------- 3 files changed, 13 insertions(+), 41 deletions(-) diff --git a/cluster/calcium/node.go b/cluster/calcium/node.go index 4ba717b41..d158163d1 100644 --- a/cluster/calcium/node.go +++ b/cluster/calcium/node.go @@ -95,12 +95,7 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error { return err } enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) - metricsDescriptions, err := c.rmgr.GetMetricsDescription(ctx) - if err != nil { - logger.Error(ctx, err, "failed to get metrics description") - return err - } - metrics.Client.ResetCollectors(metricsDescriptions) + metrics.Client.DeleteUnusedLabelValues(nodename) return nil }, // rollback: do nothing diff --git a/cluster/calcium/node_test.go b/cluster/calcium/node_test.go index 48891b18d..977346f6c 100644 --- a/cluster/calcium/node_test.go +++ b/cluster/calcium/node_test.go @@ -45,7 +45,6 @@ func TestAddNode(t *testing.T) { rmgr.On("AddNode", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return( resourcetypes.Resources{}, nil) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) - rmgr.On("GetMetricsDescription", mock.Anything).Return(nil, nil) // failed by store.AddNode store := c.store.(*storemocks.Store) store.On("AddNode", mock.Anything, mock.Anything).Return(nil, types.ErrMockError).Once() @@ -97,7 +96,6 @@ func TestRemoveNode(t *testing.T) { store.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) rmgr := c.rmgr.(*resourcemocks.Manager) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) - rmgr.On("GetMetricsDescription", mock.Anything).Return(nil, nil) assert.NoError(t, c.RemoveNode(ctx, name)) store.AssertExpectations(t) rmgr.AssertExpectations(t) diff --git a/metrics/metrics.go b/metrics/metrics.go index 19bec72dc..65d4340ef 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -85,41 +85,20 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -// ResetCollectors 清除旧的 Collectors 并创建新的 Collectors -func (m *Metrics) ResetCollectors(metricsDescriptions []*plugintypes.MetricsDescription) { - if len(metricsDescriptions) == 0 { - return - } - if len(m.Collectors) > 0 { - for _, oldCollector := range m.Collectors { - prometheus.Unregister(oldCollector) - } - } - - // 创建新的 Collectors map - newCollectors := map[string]prometheus.Collector{} - // 重新创建新的 Collectors - for _, desc := range metricsDescriptions { - switch desc.Type { - case gaugeType: - collector := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: desc.Name, - Help: desc.Help, - }, desc.Labels) - newCollectors[desc.Name] = collector - case counterType: - collector := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: desc.Name, - Help: desc.Help, - }, desc.Labels) - newCollectors[desc.Name] = collector +// DeleteUnusedLabelValues 清除多余的metric标签值 +func (m *Metrics) DeleteUnusedLabelValues(nodeName string) { + for _, collector := range m.Collectors { + if c, ok := collector.(prometheus.Collector); ok { + if gauge, ok := c.(*prometheus.GaugeVec); ok { + // 清除不需要的标签值 + gauge.DeleteLabelValues(nodeName) + } else if counter, ok := c.(*prometheus.CounterVec); ok { + // 清除不需要的标签值 + counter.DeleteLabelValues(nodeName) + } + // 添加更多的条件来处理其他类型的Collector } } - - // 将新的 Collectors 添加到 Metrics 结构中 - m.Collectors = newCollectors - // 重新注册新的 Collectors - prometheus.MustRegister(maps.Values(m.Collectors)...) } // Lazy connect From b4ff7a69a853e99c792d6b7b2992ef52e4b4c74c Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 10:57:37 +0800 Subject: [PATCH 06/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cluster/calcium/node_test.go b/cluster/calcium/node_test.go index 977346f6c..07056362d 100644 --- a/cluster/calcium/node_test.go +++ b/cluster/calcium/node_test.go @@ -97,6 +97,7 @@ func TestRemoveNode(t *testing.T) { rmgr := c.rmgr.(*resourcemocks.Manager) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) assert.NoError(t, c.RemoveNode(ctx, name)) + store.AssertExpectations(t) rmgr.AssertExpectations(t) } From 148d1711a66ab911cab777509923757b262704b8 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 10:58:59 +0800 Subject: [PATCH 07/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster/calcium/node_test.go b/cluster/calcium/node_test.go index 07056362d..839db530c 100644 --- a/cluster/calcium/node_test.go +++ b/cluster/calcium/node_test.go @@ -45,6 +45,7 @@ func TestAddNode(t *testing.T) { rmgr.On("AddNode", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return( resourcetypes.Resources{}, nil) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) + // failed by store.AddNode store := c.store.(*storemocks.Store) store.On("AddNode", mock.Anything, mock.Anything).Return(nil, types.ErrMockError).Once() @@ -97,7 +98,6 @@ func TestRemoveNode(t *testing.T) { rmgr := c.rmgr.(*resourcemocks.Manager) rmgr.On("RemoveNode", mock.Anything, mock.Anything).Return(nil) assert.NoError(t, c.RemoveNode(ctx, name)) - store.AssertExpectations(t) rmgr.AssertExpectations(t) } From 8ad00ce6f46722bc1df355ea98531ac08aa86780 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 15:47:03 +0800 Subject: [PATCH 08/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metrics/metrics.go | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 65d4340ef..425c1fc22 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -85,18 +85,33 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -// DeleteUnusedLabelValues 清除多余的metric标签值 -func (m *Metrics) DeleteUnusedLabelValues(nodeName string) { +// DeleteUnusedLabelValues 清除多余的 metric 标签值 +func (m *Metrics) DeleteUnusedLabelValues(nodeNameToRemove string) { for _, collector := range m.Collectors { - if c, ok := collector.(prometheus.Collector); ok { - if gauge, ok := c.(*prometheus.GaugeVec); ok { - // 清除不需要的标签值 - gauge.DeleteLabelValues(nodeName) - } else if counter, ok := c.(*prometheus.CounterVec); ok { - // 清除不需要的标签值 - counter.DeleteLabelValues(nodeName) + switch c := collector.(type) { + case *prometheus.GaugeVec, *prometheus.CounterVec: + // 获取所有的标签值 + metrics, _ := prometheus.DefaultGatherer.Gather() + for _, metric := range metrics { + for _, mf := range metric.GetMetric() { + if len(mf.Label) == 0 { + continue + } + for _, label := range mf.Label { + if label.GetName() != "nodename" || label.GetValue() != nodeNameToRemove { + continue + } + // 删除符合条件的度量标签 + switch c := c.(type) { + case *prometheus.GaugeVec: + c.DeleteLabelValues(label.GetValue()) + case *prometheus.CounterVec: + c.DeleteLabelValues(label.GetValue()) + } + } + } } - // 添加更多的条件来处理其他类型的Collector + default: } } } From f4008243f631dc2d4be9c41f7bc35c33164f8262 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 19:07:02 +0800 Subject: [PATCH 09/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node.go | 2 +- metrics/handler.go | 3 ++ metrics/metrics.go | 89 +++++++++++++++++++++++++++++++---------- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/cluster/calcium/node.go b/cluster/calcium/node.go index d158163d1..c142c8f4f 100644 --- a/cluster/calcium/node.go +++ b/cluster/calcium/node.go @@ -95,7 +95,7 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error { return err } enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) - metrics.Client.DeleteUnusedLabelValues(nodename) + metrics.Client.DeleteInvalidNodeLabelValues([]string{nodename}) return nil }, // rollback: do nothing diff --git a/metrics/handler.go b/metrics/handler.go index 0b8a18113..b83bb444d 100644 --- a/metrics/handler.go +++ b/metrics/handler.go @@ -20,14 +20,17 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) if err != nil { logger.Error(ctx, err, "Get all nodes err") } + validNodeNameMap := make(map[string]*types.Node, 0) for node := range nodes { metrics, err := m.rmgr.GetNodeMetrics(ctx, node) if err != nil { logger.Error(ctx, err, "Get metrics failed") continue } + validNodeNameMap[node.Name] = node m.SendMetrics(ctx, metrics...) } + m.DeleteInvalidNodeCacheAndMetrics(validNodeNameMap) h.ServeHTTP(w, r) }) } diff --git a/metrics/metrics.go b/metrics/metrics.go index 425c1fc22..741716c9c 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -6,6 +6,7 @@ import ( "strconv" "sync" + enginefactory "github.com/projecteru2/core/engine/factory" "github.com/projecteru2/core/log" "github.com/projecteru2/core/resource" "github.com/projecteru2/core/resource/cobalt" @@ -85,34 +86,78 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -// DeleteUnusedLabelValues 清除多余的 metric 标签值 -func (m *Metrics) DeleteUnusedLabelValues(nodeNameToRemove string) { +func (m *Metrics) DeleteInvalidNodeCacheAndMetrics(validNodeMap map[string]*types.Node) { + metricNodeNameMap := m.GetNodeNameMapFromMetrics() + // 计算差集 + invalidNodeNameList := make([]string, 0) + for nodeName := range metricNodeNameMap { + if node, exists := validNodeMap[nodeName]; !exists { + invalidNodeNameList = append(invalidNodeNameList, nodeName) + enginefactory.RemoveEngineFromCache(context.Background(), node.Endpoint, node.Ca, node.Cert, node.Key) + } + } + if len(invalidNodeNameList) > 0 { + m.DeleteInvalidNodeLabelValues(invalidNodeNameList) + } +} + +func (m *Metrics) GetNodeNameMapFromMetrics() map[string]bool { + metrics, _ := prometheus.DefaultGatherer.Gather() + nodeNameMap := make(map[string]bool, 0) + for _, metric := range metrics { + for _, mf := range metric.GetMetric() { + if len(mf.Label) == 0 { + continue + } + for _, label := range mf.Label { + if label.GetName() == "nodename" { + nodeNameMap[label.GetValue()] = true + break + } + } + } + } + return nodeNameMap +} + +// DeleteInvalidNodeLabelValues 清除多余的metric标签值 +func (m *Metrics) DeleteInvalidNodeLabelValues(nodeNameToRemoveList []string) { for _, collector := range m.Collectors { - switch c := collector.(type) { - case *prometheus.GaugeVec, *prometheus.CounterVec: - // 获取所有的标签值 - metrics, _ := prometheus.DefaultGatherer.Gather() - for _, metric := range metrics { - for _, mf := range metric.GetMetric() { - if len(mf.Label) == 0 { - continue - } - for _, label := range mf.Label { - if label.GetName() != "nodename" || label.GetValue() != nodeNameToRemove { - continue - } - // 删除符合条件的度量标签 - switch c := c.(type) { - case *prometheus.GaugeVec: - c.DeleteLabelValues(label.GetValue()) - case *prometheus.CounterVec: - c.DeleteLabelValues(label.GetValue()) + if collector == nil { + return + } + metrics, _ := prometheus.DefaultGatherer.Gather() + for _, metric := range metrics { + for _, mf := range metric.GetMetric() { + if len(mf.Label) == 0 { + continue + } + bFind := false + for _, label := range mf.Label { + for _, nodeNameToRemove := range nodeNameToRemoveList { + if label.GetName() == "nodename" && label.GetValue() == nodeNameToRemove { + bFind = true + break } } } + if !bFind { + continue + } + labels := prometheus.Labels{} + for _, label := range mf.Label { + labels[label.GetName()] = label.GetValue() + } + // 删除符合条件的度量标签 + switch c := collector.(type) { + case *prometheus.GaugeVec: + c.Delete(labels) + case *prometheus.CounterVec: + c.Delete(labels) + } } - default: } + // 添加更多的条件来处理其他类型的Collector } } From 0766b4983ee90e35dee8487fd230d471ddff1f9f Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 19:24:49 +0800 Subject: [PATCH 10/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metrics/handler.go | 6 +++--- metrics/metrics.go | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/metrics/handler.go b/metrics/handler.go index b83bb444d..972e73298 100644 --- a/metrics/handler.go +++ b/metrics/handler.go @@ -20,17 +20,17 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) if err != nil { logger.Error(ctx, err, "Get all nodes err") } - validNodeNameMap := make(map[string]*types.Node, 0) + activeNodeMap := make(map[string]*types.Node, 0) for node := range nodes { metrics, err := m.rmgr.GetNodeMetrics(ctx, node) if err != nil { logger.Error(ctx, err, "Get metrics failed") continue } - validNodeNameMap[node.Name] = node + activeNodeMap[node.Name] = node m.SendMetrics(ctx, metrics...) } - m.DeleteInvalidNodeCacheAndMetrics(validNodeNameMap) + m.DeleteInactiveNodeCacheAndMetrics(activeNodeMap) h.ServeHTTP(w, r) }) } diff --git a/metrics/metrics.go b/metrics/metrics.go index 741716c9c..5a8d50467 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -86,18 +86,18 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -func (m *Metrics) DeleteInvalidNodeCacheAndMetrics(validNodeMap map[string]*types.Node) { +func (m *Metrics) DeleteInactiveNodeCacheAndMetrics(activeNodeMap map[string]*types.Node) { metricNodeNameMap := m.GetNodeNameMapFromMetrics() // 计算差集 - invalidNodeNameList := make([]string, 0) + inactiveNodeNameList := make([]string, 0) for nodeName := range metricNodeNameMap { - if node, exists := validNodeMap[nodeName]; !exists { - invalidNodeNameList = append(invalidNodeNameList, nodeName) + if node, exists := activeNodeMap[nodeName]; !exists { + inactiveNodeNameList = append(inactiveNodeNameList, nodeName) enginefactory.RemoveEngineFromCache(context.Background(), node.Endpoint, node.Ca, node.Cert, node.Key) } } - if len(invalidNodeNameList) > 0 { - m.DeleteInvalidNodeLabelValues(invalidNodeNameList) + if len(inactiveNodeNameList) > 0 { + m.DeleteInvalidNodeLabelValues(inactiveNodeNameList) } } From b8a8e613cf330e1adec9911131114e84f82445da Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 22:54:21 +0800 Subject: [PATCH 11/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cluster/calcium/node.go | 5 ++--- metrics/handler.go | 6 +++--- metrics/metrics.go | 43 ++++++++++++++++++++--------------------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/cluster/calcium/node.go b/cluster/calcium/node.go index c142c8f4f..49c73e6fe 100644 --- a/cluster/calcium/node.go +++ b/cluster/calcium/node.go @@ -90,12 +90,11 @@ func (c *Calcium) RemoveNode(ctx context.Context, nodename string) error { }, // then: remove node resource metadata func(ctx context.Context) error { - err = c.rmgr.RemoveNode(ctx, nodename) - if err != nil { + if err = c.rmgr.RemoveNode(ctx, nodename); err != nil { return err } enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) - metrics.Client.DeleteInvalidNodeLabelValues([]string{nodename}) + metrics.Client.RemoveInvalidNodes([]string{nodename}) return nil }, // rollback: do nothing diff --git a/metrics/handler.go b/metrics/handler.go index 972e73298..baaaf1fb9 100644 --- a/metrics/handler.go +++ b/metrics/handler.go @@ -20,17 +20,17 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) if err != nil { logger.Error(ctx, err, "Get all nodes err") } - activeNodeMap := make(map[string]*types.Node, 0) + activeNodes := make(map[string]*types.Node, 0) for node := range nodes { metrics, err := m.rmgr.GetNodeMetrics(ctx, node) if err != nil { logger.Error(ctx, err, "Get metrics failed") continue } - activeNodeMap[node.Name] = node + activeNodes[node.Name] = node m.SendMetrics(ctx, metrics...) } - m.DeleteInactiveNodeCacheAndMetrics(activeNodeMap) + m.DeleteInactiveNodeCacheAndMetrics(ctx, activeNodes) h.ServeHTTP(w, r) }) } diff --git a/metrics/metrics.go b/metrics/metrics.go index 5a8d50467..a2d55d7c2 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -13,6 +13,8 @@ import ( plugintypes "github.com/projecteru2/core/resource/plugins/types" "github.com/projecteru2/core/types" "github.com/projecteru2/core/utils" + io_prometheus_client "github.com/prometheus/client_model/go" + "golang.org/x/exp/slices" statsdlib "github.com/CMGS/statsd" "github.com/prometheus/client_golang/prometheus" @@ -86,22 +88,20 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -func (m *Metrics) DeleteInactiveNodeCacheAndMetrics(activeNodeMap map[string]*types.Node) { - metricNodeNameMap := m.GetNodeNameMapFromMetrics() +func (m *Metrics) DeleteInactiveNodeCacheAndMetrics(ctx context.Context, activeNodesMap map[string]*types.Node) { + metricNodeNameMap := m.getNodeNameMapFromMetrics() // 计算差集 - inactiveNodeNameList := make([]string, 0) + invalidNodes := make([]string, 0) for nodeName := range metricNodeNameMap { - if node, exists := activeNodeMap[nodeName]; !exists { - inactiveNodeNameList = append(inactiveNodeNameList, nodeName) - enginefactory.RemoveEngineFromCache(context.Background(), node.Endpoint, node.Ca, node.Cert, node.Key) + if node, exists := activeNodesMap[nodeName]; !exists { + invalidNodes = append(invalidNodes, nodeName) + enginefactory.RemoveEngineFromCache(ctx, node.Endpoint, node.Ca, node.Cert, node.Key) } } - if len(inactiveNodeNameList) > 0 { - m.DeleteInvalidNodeLabelValues(inactiveNodeNameList) - } + m.RemoveInvalidNodes(invalidNodes) } -func (m *Metrics) GetNodeNameMapFromMetrics() map[string]bool { +func (m *Metrics) getNodeNameMapFromMetrics() map[string]bool { metrics, _ := prometheus.DefaultGatherer.Gather() nodeNameMap := make(map[string]bool, 0) for _, metric := range metrics { @@ -120,8 +120,11 @@ func (m *Metrics) GetNodeNameMapFromMetrics() map[string]bool { return nodeNameMap } -// DeleteInvalidNodeLabelValues 清除多余的metric标签值 -func (m *Metrics) DeleteInvalidNodeLabelValues(nodeNameToRemoveList []string) { +// RemoveInvalidNodes 清除多余的metric标签值 +func (m *Metrics) RemoveInvalidNodes(invalidNodes []string) { + if len(invalidNodes) == 0 { + return + } for _, collector := range m.Collectors { if collector == nil { return @@ -132,16 +135,12 @@ func (m *Metrics) DeleteInvalidNodeLabelValues(nodeNameToRemoveList []string) { if len(mf.Label) == 0 { continue } - bFind := false - for _, label := range mf.Label { - for _, nodeNameToRemove := range nodeNameToRemoveList { - if label.GetName() == "nodename" && label.GetValue() == nodeNameToRemove { - bFind = true - break - } - } - } - if !bFind { + + if !slices.ContainsFunc(mf.Label, func(label *io_prometheus_client.LabelPair) bool { + return label.GetName() == "nodename" && slices.ContainsFunc(invalidNodes, func(nodename string) bool { + return label.GetValue() == nodename + }) + }) { continue } labels := prometheus.Labels{} From 858cdda5115aaf8c8da04cb8ca5ce238b0a316d1 Mon Sep 17 00:00:00 2001 From: nieyinliang Date: Tue, 10 Oct 2023 23:00:16 +0800 Subject: [PATCH 12/12] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug=EF=BC=9A=201.=20?= =?UTF-8?q?=E5=BD=93=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C?= =?UTF-8?q?=E9=87=8D=E7=BD=AEmetric=E7=9A=84collectors=EF=BC=8C=E6=B8=85?= =?UTF-8?q?=E9=99=A4=E8=A2=AB=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E7=9A=84?= =?UTF-8?q?metric=E7=BC=93=E5=AD=98=E6=95=B0=E6=8D=AE=202.=20=E5=BD=93?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=8A=82=E7=82=B9=E6=97=B6=EF=BC=8C=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E5=AF=B9=E5=BA=94=E7=9A=84client=E8=BF=9E=E6=8E=A5?= =?UTF-8?q?=E7=BC=93=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metrics/handler.go | 2 +- metrics/metrics.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/handler.go b/metrics/handler.go index baaaf1fb9..7fc556d8b 100644 --- a/metrics/handler.go +++ b/metrics/handler.go @@ -30,7 +30,7 @@ func (m *Metrics) ResourceMiddleware(cluster cluster.Cluster) func(http.Handler) activeNodes[node.Name] = node m.SendMetrics(ctx, metrics...) } - m.DeleteInactiveNodeCacheAndMetrics(ctx, activeNodes) + m.DeleteInactiveNodesWithCache(ctx, activeNodes) h.ServeHTTP(w, r) }) } diff --git a/metrics/metrics.go b/metrics/metrics.go index a2d55d7c2..762cd5117 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -88,7 +88,7 @@ func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metri } } -func (m *Metrics) DeleteInactiveNodeCacheAndMetrics(ctx context.Context, activeNodesMap map[string]*types.Node) { +func (m *Metrics) DeleteInactiveNodesWithCache(ctx context.Context, activeNodesMap map[string]*types.Node) { metricNodeNameMap := m.getNodeNameMapFromMetrics() // 计算差集 invalidNodes := make([]string, 0)