From 9b1602bf06e54d9820a0bbc1284662b876410e76 Mon Sep 17 00:00:00 2001 From: Paul Maidment Date: Thu, 6 Feb 2025 14:46:35 +0200 Subject: [PATCH] MGMT-19840: Gather operational metrics from installercache The intent of this PR is to trace the following statistics, implemented as counts and incremented from applicable parts of the solution. counterDescriptionInstallerCachePrunedHardlink "Counts the number of times the installercache pruned a hardlink for being too old" counterDescriptionInstallerCacheGetReleaseOK "Counts the number of times that a release was fetched succesfully" counterDescriptionInstallerCacheGetReleaseTimeout "Counts the number of times that a release timed out or had the context cancelled" counterDescriptionInstallerCacheGetReleaseError "Counts the number of times that a release fetch resulted in error" counterDescriptionInstallerCacheReleaseCached "Counts the number of times that a release was found in the cache" counterDescriptionInstallerCacheReleaseExtracted "Counts the number of times that a release was extracted" counterDescriptionInstallerCacheTryEviction "Counts the number of times that the eviction function was called" counterDescriptionInstallerCacheReleaseEvicted "Counts the number of times that a release was evicted" This, combined with the event based metrics gathered in openshift#7156 should provide enough information to track the behaviour of the cache. --- cmd/main.go | 2 +- internal/ignition/installmanifests_test.go | 20 +++-- internal/installercache/installercache.go | 20 ++++- .../installercache/installercache_test.go | 35 ++++++--- internal/metrics/metricsManager.go | 74 ++++++++++++++++++- internal/metrics/mock_metrics_manager_api.go | 60 +++++++++++++++ 6 files changed, 190 insertions(+), 21 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index e2a1c6094ca..799e0f55078 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -492,7 +492,7 @@ func main() { Options.BMConfig.S3EndpointURL = newUrl Options.InstallerCacheConfig.CacheDir = filepath.Join(Options.GeneratorConfig.GetWorkingDirectory(), "installercache") - installerCache, err := installercache.New(Options.InstallerCacheConfig, eventsHandler, diskStatsHelper, log) + installerCache, err := installercache.New(Options.InstallerCacheConfig, eventsHandler, metricsManager, diskStatsHelper, log) failOnError(err, "failed to instantiate installercache") generator := generator.New(log, objectHandler, Options.GeneratorConfig, providerRegistry, manifestsApi, eventsHandler, installerCache) diff --git a/internal/ignition/installmanifests_test.go b/internal/ignition/installmanifests_test.go index b84bfd93413..37a86992668 100644 --- a/internal/ignition/installmanifests_test.go +++ b/internal/ignition/installmanifests_test.go @@ -94,6 +94,7 @@ var _ = Describe("Bootstrap Ignition Update", func() { manifestsAPI *manifestsapi.MockManifestsAPI eventsHandler *eventsapi.MockHandler installerCache *installercache.Installers + metricsAPI *metrics.MockAPI ) BeforeEach(func() { @@ -105,12 +106,13 @@ var _ = Describe("Bootstrap Ignition Update", func() { err1 = os.WriteFile(examplePath, []byte(bootstrap1), 0600) Expect(err1).NotTo(HaveOccurred()) ctrl = gomock.NewController(GinkgoT()) + metricsAPI = metrics.NewMockAPI(ctrl) installerCacheConfig := installercache.Config{ CacheDir: filepath.Join(workDir, "some-dir", "installercache"), MaxCapacity: installercache.Size(5), MaxReleaseSize: installercache.Size(5), } - installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) + installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metricsAPI, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) Expect(err).NotTo(HaveOccurred()) mockS3Client = s3wrapper.NewMockAPI(ctrl) manifestsAPI = manifestsapi.NewMockManifestsAPI(ctrl) @@ -262,6 +264,7 @@ SV4bRR9i0uf+xQ/oYRvugQ25Q7EahO5hJIWRf4aULbk36Zpw3++v2KFnF26zqwB6 ctrl *gomock.Controller manifestsAPI *manifestsapi.MockManifestsAPI eventsHandler eventsapi.Handler + metricsAPI *metrics.MockAPI installerCache *installercache.Installers ) @@ -286,12 +289,13 @@ SV4bRR9i0uf+xQ/oYRvugQ25Q7EahO5hJIWRf4aULbk36Zpw3++v2KFnF26zqwB6 ctrl = gomock.NewController(GinkgoT()) manifestsAPI = manifestsapi.NewMockManifestsAPI(ctrl) eventsHandler = eventsapi.NewMockHandler(ctrl) + metricsAPI = metrics.NewMockAPI(ctrl) installerCacheConfig := installercache.Config{ CacheDir: filepath.Join(workDir, "some-dir", "installercache"), MaxCapacity: installercache.Size(5), MaxReleaseSize: installercache.Size(5), } - installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) + installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metricsAPI, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) Expect(err).NotTo(HaveOccurred()) }) @@ -456,6 +460,7 @@ var _ = Describe("createHostIgnitions", func() { workDir string manifestsAPI *manifestsapi.MockManifestsAPI eventsHandler eventsapi.Handler + metricsAPI *metrics.MockAPI installerCache *installercache.Installers ) @@ -476,13 +481,14 @@ var _ = Describe("createHostIgnitions", func() { mockS3Client = s3wrapper.NewMockAPI(ctrl) manifestsAPI = manifestsapi.NewMockManifestsAPI(ctrl) eventsHandler = eventsapi.NewMockHandler(ctrl) + metricsAPI = metrics.NewMockAPI(ctrl) cluster = testCluster() installerCacheConfig := installercache.Config{ CacheDir: filepath.Join(workDir, "some-dir", "installercache"), MaxCapacity: installercache.Size(5), MaxReleaseSize: installercache.Size(5), } - installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) + installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metricsAPI, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) Expect(err).NotTo(HaveOccurred()) }) @@ -1779,6 +1785,7 @@ var _ = Describe("Bare metal host generation", func() { ctrl *gomock.Controller manifestsAPI *manifestsapi.MockManifestsAPI eventsHandler eventsapi.Handler + metricsAPI *metrics.MockAPI installerCache *installercache.Installers ) @@ -1789,13 +1796,14 @@ var _ = Describe("Bare metal host generation", func() { ctrl = gomock.NewController(GinkgoT()) manifestsAPI = manifestsapi.NewMockManifestsAPI(ctrl) eventsHandler = eventsapi.NewMockHandler(ctrl) + metricsAPI = metrics.NewMockAPI(ctrl) installerCacheConfig := installercache.Config{ CacheDir: filepath.Join(workDir, "some-dir", "installercache"), MaxCapacity: installercache.Size(5), MaxReleaseSize: installercache.Size(5), ReleaseFetchRetryInterval: 1 * time.Microsecond, } - installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) + installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metricsAPI, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) Expect(err).NotTo(HaveOccurred()) }) @@ -1889,6 +1897,7 @@ var _ = Describe("Import Cluster TLS Certs for ephemeral installer", func() { ctrl *gomock.Controller manifestsAPI *manifestsapi.MockManifestsAPI eventsHandler eventsapi.Handler + metricsAPI *metrics.MockAPI installerCache *installercache.Installers ) @@ -1920,13 +1929,14 @@ var _ = Describe("Import Cluster TLS Certs for ephemeral installer", func() { ctrl = gomock.NewController(GinkgoT()) manifestsAPI = manifestsapi.NewMockManifestsAPI(ctrl) eventsHandler = eventsapi.NewMockHandler(ctrl) + metricsAPI = metrics.NewMockAPI(ctrl) installerCacheConfig := installercache.Config{ CacheDir: filepath.Join(workDir, "some-dir", "installercache"), MaxCapacity: installercache.Size(5), MaxReleaseSize: installercache.Size(5), ReleaseFetchRetryInterval: 1 * time.Microsecond, } - installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) + installerCache, err = installercache.New(installerCacheConfig, eventsHandler, metricsAPI, metrics.NewOSDiskStatsHelper(logrus.New()), logrus.New()) Expect(err).NotTo(HaveOccurred()) }) diff --git a/internal/installercache/installercache.go b/internal/installercache/installercache.go index 24f9efc5ac8..216252214c8 100644 --- a/internal/installercache/installercache.go +++ b/internal/installercache/installercache.go @@ -36,6 +36,7 @@ type Installers struct { eventsHandler eventsapi.Handler diskStatsHelper metrics.DiskStatsHelper config Config + metricsAPI metrics.API } type Size int64 @@ -116,7 +117,7 @@ func (rl *Release) Cleanup(ctx context.Context) error { } // New constructs an installer cache with a given storage capacity -func New(config Config, eventsHandler eventsapi.Handler, diskStatsHelper metrics.DiskStatsHelper, log logrus.FieldLogger) (*Installers, error) { +func New(config Config, eventsHandler eventsapi.Handler, metricsAPI metrics.API, diskStatsHelper metrics.DiskStatsHelper, log logrus.FieldLogger) (*Installers, error) { if config.MaxCapacity > 0 && config.MaxReleaseSize == 0 { return nil, fmt.Errorf("config.MaxReleaseSize (%d bytes) must not be zero", config.MaxReleaseSize) } @@ -128,6 +129,7 @@ func New(config Config, eventsHandler eventsapi.Handler, diskStatsHelper metrics eventsHandler: eventsHandler, diskStatsHelper: diskStatsHelper, config: config, + metricsAPI: metricsAPI, }, nil } @@ -138,14 +140,20 @@ func (i *Installers) Get(ctx context.Context, releaseID, releaseIDMirror, pullSe for { select { case <-ctx.Done(): - return nil, ctx.Err() + err := ctx.Err() + if err == context.DeadlineExceeded { + i.metricsAPI.InstallerCacheGetReleaseTimeout(releaseID, true) + } + return nil, err default: release, err := i.get(releaseID, releaseIDMirror, pullSecret, ocRelease, ocpVersion, clusterID) if err == nil { + i.metricsAPI.InstallerCacheGetReleaseOK(release.releaseID, release.cached) return release, nil } _, isCapacityError := err.(*errorInsufficientCacheCapacity) if !isCapacityError { + i.metricsAPI.InstallerCacheGetReleaseError(releaseID, release.cached) return nil, errors.Wrapf(err, "failed to get installer path for release %s", releaseID) } time.Sleep(i.config.ReleaseFetchRetryInterval) @@ -217,6 +225,7 @@ func (i *Installers) get(releaseID, releaseIDMirror, pullSecret string, ocReleas if err != nil { return nil, err } + i.metricsAPI.InstallerCacheGetReleaseOK(release.releaseID, release.cached) return release, nil } @@ -247,6 +256,7 @@ func (i *Installers) shouldEvict(totalUsed int64) (shouldEvict bool) { // // Locking must be done outside evict() to avoid contentions. func (i *Installers) evict() bool { + i.metricsAPI.InstallerCacheTryEviction() // store the file paths files := NewPriorityQueue(&fileInfo{}) links := make([]*fileInfo, 0) @@ -310,8 +320,10 @@ func (i *Installers) evictFile(filePath string) error { i.log.Infof("evicting binary file %s due to storage pressure", filePath) err := os.Remove(filePath) if err != nil { + i.metricsAPI.InstallerCacheReleaseEvicted(false) return err } + i.metricsAPI.InstallerCacheReleaseEvicted(true) // if the parent directory was left empty, // remove it to avoid dangling directories parentDir := path.Dir(filePath) @@ -334,9 +346,9 @@ func (i *Installers) pruneExpiredHardLinks(links []*fileInfo, gracePeriod time.D grace := graceTime.Unix() if finfo.info.ModTime().Unix() < grace { i.log.Infof("attempting to prune hard link %s", finfo.path) - err := os.Remove(finfo.path) - if err != nil { + if err := os.Remove(finfo.path); err != nil { i.log.WithError(err).Errorf("failed to prune hard link %s", finfo.path) + continue } } } diff --git a/internal/installercache/installercache_test.go b/internal/installercache/installercache_test.go index 9e285e64d42..a97fb34f4a8 100644 --- a/internal/installercache/installercache_test.go +++ b/internal/installercache/installercache_test.go @@ -71,6 +71,7 @@ var _ = Describe("installer cache", func() { manager *Installers cacheDir string eventsHandler *eventsapi.MockHandler + metricsAPI *metrics.MockAPI ctx context.Context diskStatsHelper metrics.DiskStatsHelper ) @@ -85,17 +86,17 @@ var _ = Describe("installer cache", func() { } BeforeEach(func() { - ctrl = gomock.NewController(GinkgoT()) diskStatsHelper = metrics.NewOSDiskStatsHelper(logrus.New()) mockRelease = oc.NewMockRelease(ctrl) eventsHandler = eventsapi.NewMockHandler(ctrl) + metricsAPI = metrics.NewMockAPI(ctrl) var err error cacheDir, err = os.MkdirTemp("/tmp", "cacheDir") Expect(err).NotTo(HaveOccurred()) Expect(os.Mkdir(filepath.Join(cacheDir, "quay.io"), 0755)).To(Succeed()) Expect(os.Mkdir(filepath.Join(filepath.Join(cacheDir, "quay.io"), "release-dev"), 0755)).To(Succeed()) - manager, err = New(getInstallerCacheConfig(12, 5), eventsHandler, diskStatsHelper, logrus.New()) + manager, err = New(getInstallerCacheConfig(12, 5), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).NotTo(HaveOccurred()) ctx = context.TODO() }) @@ -140,6 +141,9 @@ var _ = Describe("installer cache", func() { mockReleaseCalls(releaseID, version) } expectEventsSent() + mockReleaseCalls(releaseID, version) + expectEventsSent() + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(releaseID, expectCached).Times(1) l, err := manager.Get(ctx, releaseID, "mirror", "pull-secret", mockRelease, version, clusterID) Expect(err).ShouldNot(HaveOccurred()) Expect(l.releaseID).To(Equal(releaseID)) @@ -189,6 +193,9 @@ var _ = Describe("installer cache", func() { runTest := func(t test, manager *Installers) (*Release, error) { expectEventsSent() mockReleaseCalls(t.releaseID, t.version) + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(t.releaseID, gomock.Any()).AnyTimes() + metricsAPI.EXPECT().InstallerCacheTryEviction().AnyTimes() + metricsAPI.EXPECT().InstallerCacheReleaseEvicted(gomock.Any()).AnyTimes() return manager.Get(ctx, t.releaseID, "mirror", "pull-secret", mockRelease, t.version, t.clusterID) } @@ -221,7 +228,7 @@ var _ = Describe("installer cache", func() { // returns the first error encountered or nil if no error encountered. runParallelTest := func(maxCapacity int64, maxReleaseSize int64, tests []test) error { var err error - manager, err = New(getInstallerCacheConfig(maxCapacity, maxReleaseSize), eventsHandler, diskStatsHelper, getLogger()) + manager, err = New(getInstallerCacheConfig(maxCapacity, maxReleaseSize), eventsHandler, metricsAPI, diskStatsHelper, getLogger()) Expect(err).ToNot(HaveOccurred()) var wg sync.WaitGroup var reportedError error @@ -290,31 +297,32 @@ var _ = Describe("installer cache", func() { }) It("Should raise error on construction if max release size is larger than cache and cache is enabled", func() { - _, err := New(getInstallerCacheConfig(5, 10), eventsHandler, diskStatsHelper, logrus.New()) + _, err := New(getInstallerCacheConfig(5, 10), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal("config.MaxReleaseSize (10 bytes) must not be greater than config.MaxCapacity (5 bytes)")) }) It("Should raise error on construction if max release size is zero and cache is enabled", func() { - _, err := New(getInstallerCacheConfig(5, 0), eventsHandler, diskStatsHelper, logrus.New()) + _, err := New(getInstallerCacheConfig(5, 0), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal("config.MaxReleaseSize (0 bytes) must not be zero")) }) It("Should not raise error on construction if max release size is larger than cache and cache eviction is disabled", func() { - _, err := New(getInstallerCacheConfig(0, 10), eventsHandler, diskStatsHelper, logrus.New()) + _, err := New(getInstallerCacheConfig(0, 10), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).ToNot(HaveOccurred()) }) It("Should not raise error on construction if max release size is zero and cache eviction is disabled", func() { - _, err := New(getInstallerCacheConfig(0, 0), eventsHandler, diskStatsHelper, logrus.New()) + _, err := New(getInstallerCacheConfig(0, 0), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).ToNot(HaveOccurred()) }) It("when cache limit is zero - eviction is skipped", func() { var err error - manager, err = New(getInstallerCacheConfig(0, 5), eventsHandler, diskStatsHelper, logrus.New()) + manager, err = New(getInstallerCacheConfig(0, 5), eventsHandler, metricsAPI, diskStatsHelper, logrus.New()) Expect(err).ToNot(HaveOccurred()) + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(gomock.Any(), false).AnyTimes() clusterId := strfmt.UUID(uuid.New().String()) r1, _ := testGet("4.8", "4.8.0", clusterId, false) r2, _ := testGet("4.9", "4.9.0", clusterId, false) @@ -329,11 +337,14 @@ var _ = Describe("installer cache", func() { Expect(os.IsNotExist(err)).To(BeFalse()) }) - It("exising files access time is updated", func() { + It("existing files access time is updated", func() { + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(gomock.Any(), gomock.Any()).AnyTimes() clusterId := strfmt.UUID(uuid.New().String()) _, _ = testGet("4.8", "4.8.0", clusterId, false) r2, _ := testGet("4.9", "4.9.0", clusterId, false) r1, _ := testGet("4.8", "4.8.0", clusterId, true) + metricsAPI.EXPECT().InstallerCacheTryEviction().Times(1) + metricsAPI.EXPECT().InstallerCacheReleaseEvicted(true).Times(1) r3, _ := testGet("4.10", "4.10.0", clusterId, false) By("verify that the oldest file was deleted") @@ -348,9 +359,12 @@ var _ = Describe("installer cache", func() { }) It("evicts the oldest file", func() { + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(gomock.Any(), false).AnyTimes() clusterId := strfmt.UUID(uuid.New().String()) r1, _ := testGet("4.8", "4.8.0", clusterId, false) r2, _ := testGet("4.9", "4.9.0", clusterId, false) + metricsAPI.EXPECT().InstallerCacheTryEviction().Times(1) + metricsAPI.EXPECT().InstallerCacheReleaseEvicted(true).Times(1) r3, _ := testGet("4.10", "4.10.0", clusterId, false) By("verify that the oldest file was deleted") @@ -366,11 +380,13 @@ var _ = Describe("installer cache", func() { }) It("extracts a release", func() { + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(gomock.Any(), gomock.Any()).AnyTimes() releaseID := "4.10-orig" releaseMirrorID := "" version := "4.10.0" clusterID := strfmt.UUID(uuid.NewString()) mockReleaseCalls(releaseID, version) + metricsAPI.EXPECT().InstallerCacheGetReleaseOK(releaseID, false).Times(1) l, err := manager.Get(ctx, releaseID, releaseMirrorID, "pull-secret", mockRelease, version, clusterID) Expect(err).ShouldNot(HaveOccurred()) Expect(l.releaseID).To(Equal(releaseID)) @@ -389,7 +405,6 @@ var _ = Describe("installer cache", func() { numberOfLinks := 10 numberOfExpiredLinks := 5 - directory, err := os.MkdirTemp("", "testPruneExpiredHardLinks") Expect(err).ToNot(HaveOccurred()) diff --git a/internal/metrics/metricsManager.go b/internal/metrics/metricsManager.go index 6e5111f2802..6a8442136a9 100644 --- a/internal/metrics/metricsManager.go +++ b/internal/metrics/metricsManager.go @@ -3,6 +3,7 @@ package metrics import ( "context" "encoding/json" + "fmt" "time" "github.com/alecthomas/units" @@ -39,6 +40,10 @@ const ( counterFilesystemUsagePercentage = "assisted_installer_filesystem_usage_percentage" counterMonitoredHosts = "assisted_installer_monitored_hosts" counterMonitoredClusters = "assisted_installer_monitored_clusters" + counterInstallerCacheGetRelease = "assisted_installer_cache_get_release" + counterInstallerCacheReleaseCached = "assisted_installer_cache_get_release_cached" + counterInstallerCacheTryEviction = "assisted_installer_cache_try_eviction" + counterInstallerCacheReleaseEvicted = "assisted_installer_cache_release_evicted" ) const ( @@ -61,6 +66,9 @@ const ( counterDescriptionFilesystemUsagePercentage = "The percentage of the filesystem usage by the service" counterDescriptionMonitoredHosts = "Number of hosts monitored by host monitor" counterDescriptionMonitoredClusters = "Number of clusters monitored by cluster monitor" + counterDescriptionInstallerCacheGetRelease = "Counts the number of times a release was attempted, with the outcome as a label, cache status as label" + counterDescriptionInstallerCacheTryEviction = "Counts the number of times that the eviction function was called" + counterDescriptionInstallerCacheReleaseEvicted = "Counts the number of times that a release was evicted, label with success or fail of eviction" ) const ( @@ -77,6 +85,13 @@ const ( imageLabel = "imageName" hosts = "hosts" clusters = "clusters" + labelStatus = "status" + labelValueTimeout = "timeout" + labelValueOK = "ok" + labelValueError = "error" + labelCacheHit = "hit" + labelReleaseID = "releaseId" + labelSucceess = "succeess" ) type API interface { @@ -94,6 +109,11 @@ type API interface { FileSystemUsage(usageInPercentage float64) MonitoredHostsCount(monitoredHosts int64) MonitoredClusterCount(monitoredClusters int64) + InstallerCacheGetReleaseOK(releaseID string, cacheHit bool) + InstallerCacheGetReleaseTimeout(releaseID string, cacheHit bool) + InstallerCacheGetReleaseError(releaseID string, cacheHit bool) + InstallerCacheTryEviction() + InstallerCacheReleaseEvicted(succeeded bool) } type MetricsManager struct { @@ -119,7 +139,11 @@ type MetricsManager struct { serviceLogicFilesystemUsagePercentage *prometheus.GaugeVec serviceLogicMonitoredHosts *prometheus.GaugeVec serviceLogicMonitoredClusters *prometheus.GaugeVec - collectors []prometheus.Collector + serviceLogicInstallerCacheGetRelease *prometheus.CounterVec + serviceLogicInstallerCacheTryEviction *prometheus.CounterVec + serviceLogicInstallerCacheReleaseEvicted *prometheus.CounterVec + + collectors []prometheus.Collector } var _ API = &MetricsManager{} @@ -288,6 +312,30 @@ func NewMetricsManager(registry prometheus.Registerer, eventsHandler eventsapi.H Name: counterMonitoredClusters, Help: counterDescriptionMonitoredClusters, }, []string{hosts}), + + serviceLogicInstallerCacheGetRelease: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: counterInstallerCacheGetRelease, + Help: counterDescriptionInstallerCacheGetRelease, + }, []string{labelStatus, labelReleaseID, labelCacheHit}), + + serviceLogicInstallerCacheTryEviction: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: counterInstallerCacheTryEviction, + Help: counterDescriptionInstallerCacheTryEviction, + }, []string{}), + + serviceLogicInstallerCacheReleaseEvicted: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: counterInstallerCacheReleaseEvicted, + Help: counterDescriptionInstallerCacheReleaseEvicted, + }, []string{labelStatus}), } m.collectors = append(m.collectors, newDirectoryUsageCollector(metricsManagerConfig.DirectoryUsageMonitorConfig.Directories, diskStatsHelper, log)) @@ -312,6 +360,9 @@ func NewMetricsManager(registry prometheus.Registerer, eventsHandler eventsapi.H m.serviceLogicFilesystemUsagePercentage, m.serviceLogicMonitoredHosts, m.serviceLogicMonitoredClusters, + m.serviceLogicInstallerCacheGetRelease, + m.serviceLogicInstallerCacheTryEviction, + m.serviceLogicInstallerCacheReleaseEvicted, ) for _, collector := range m.collectors { @@ -486,3 +537,24 @@ func (m *MetricsManager) MonitoredClusterCount(monitoredClusters int64) { func bytesToGib(bytes int64) int64 { return bytes / int64(units.GiB) } + +// []string{labelStatus, labelReleaseID, labelCached} +func (m *MetricsManager) InstallerCacheGetReleaseOK(releaseId string, cacheHit bool) { + m.serviceLogicInstallerCacheGetRelease.WithLabelValues(labelValueOK, releaseId, fmt.Sprintf("%t", cacheHit)).Inc() +} + +func (m *MetricsManager) InstallerCacheGetReleaseTimeout(releaseId string, cacheHit bool) { + m.serviceLogicInstallerCacheGetRelease.WithLabelValues(labelValueTimeout, releaseId, fmt.Sprintf("%t", cacheHit)).Inc() +} + +func (m *MetricsManager) InstallerCacheGetReleaseError(releaseId string, cacheHit bool) { + m.serviceLogicInstallerCacheGetRelease.WithLabelValues(labelValueError, releaseId, fmt.Sprintf("%t", cacheHit)).Inc() +} + +func (m *MetricsManager) InstallerCacheReleaseEvicted(succeeded bool) { + m.serviceLogicInstallerCacheReleaseEvicted.WithLabelValues(fmt.Sprintf("%t", succeeded)).Inc() +} + +func (m *MetricsManager) InstallerCacheTryEviction() { + m.serviceLogicInstallerCacheTryEviction.WithLabelValues().Inc() +} diff --git a/internal/metrics/mock_metrics_manager_api.go b/internal/metrics/mock_metrics_manager_api.go index 7d6163539e9..80f84f14f29 100644 --- a/internal/metrics/mock_metrics_manager_api.go +++ b/internal/metrics/mock_metrics_manager_api.go @@ -169,6 +169,66 @@ func (mr *MockAPIMockRecorder) InstallationStarted() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallationStarted", reflect.TypeOf((*MockAPI)(nil).InstallationStarted)) } +// InstallerCacheGetReleaseError mocks base method. +func (m *MockAPI) InstallerCacheGetReleaseError(releaseID string, cacheHit bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InstallerCacheGetReleaseError", releaseID, cacheHit) +} + +// InstallerCacheGetReleaseError indicates an expected call of InstallerCacheGetReleaseError. +func (mr *MockAPIMockRecorder) InstallerCacheGetReleaseError(releaseID, cacheHit interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallerCacheGetReleaseError", reflect.TypeOf((*MockAPI)(nil).InstallerCacheGetReleaseError), releaseID, cacheHit) +} + +// InstallerCacheGetReleaseOK mocks base method. +func (m *MockAPI) InstallerCacheGetReleaseOK(releaseID string, cacheHit bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InstallerCacheGetReleaseOK", releaseID, cacheHit) +} + +// InstallerCacheGetReleaseOK indicates an expected call of InstallerCacheGetReleaseOK. +func (mr *MockAPIMockRecorder) InstallerCacheGetReleaseOK(releaseID, cacheHit interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallerCacheGetReleaseOK", reflect.TypeOf((*MockAPI)(nil).InstallerCacheGetReleaseOK), releaseID, cacheHit) +} + +// InstallerCacheGetReleaseTimeout mocks base method. +func (m *MockAPI) InstallerCacheGetReleaseTimeout(releaseID string, cacheHit bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InstallerCacheGetReleaseTimeout", releaseID, cacheHit) +} + +// InstallerCacheGetReleaseTimeout indicates an expected call of InstallerCacheGetReleaseTimeout. +func (mr *MockAPIMockRecorder) InstallerCacheGetReleaseTimeout(releaseID, cacheHit interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallerCacheGetReleaseTimeout", reflect.TypeOf((*MockAPI)(nil).InstallerCacheGetReleaseTimeout), releaseID, cacheHit) +} + +// InstallerCacheReleaseEvicted mocks base method. +func (m *MockAPI) InstallerCacheReleaseEvicted(succeeded bool) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InstallerCacheReleaseEvicted", succeeded) +} + +// InstallerCacheReleaseEvicted indicates an expected call of InstallerCacheReleaseEvicted. +func (mr *MockAPIMockRecorder) InstallerCacheReleaseEvicted(succeeded interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallerCacheReleaseEvicted", reflect.TypeOf((*MockAPI)(nil).InstallerCacheReleaseEvicted), succeeded) +} + +// InstallerCacheTryEviction mocks base method. +func (m *MockAPI) InstallerCacheTryEviction() { + m.ctrl.T.Helper() + m.ctrl.Call(m, "InstallerCacheTryEviction") +} + +// InstallerCacheTryEviction indicates an expected call of InstallerCacheTryEviction. +func (mr *MockAPIMockRecorder) InstallerCacheTryEviction() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallerCacheTryEviction", reflect.TypeOf((*MockAPI)(nil).InstallerCacheTryEviction)) +} + // MonitoredClusterCount mocks base method. func (m *MockAPI) MonitoredClusterCount(monitoredClusters int64) { m.ctrl.T.Helper()