-
Notifications
You must be signed in to change notification settings - Fork 227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MGMT-19840: Gather operational metrics from installercache #7281
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ type Installers struct { | |
eventsHandler eventsapi.Handler | ||
diskStatsHelper metrics.DiskStatsHelper | ||
config Config | ||
metricsAPI metrics.API | ||
} | ||
|
||
type Size int64 | ||
|
@@ -116,7 +117,7 @@ func (rl *Release) Cleanup(ctx context.Context) error { | |
} | ||
|
||
// New constructs an installer cache with a given storage capacity | ||
func New(config Config, eventsHandler eventsapi.Handler, diskStatsHelper metrics.DiskStatsHelper, log logrus.FieldLogger) (*Installers, error) { | ||
func New(config Config, eventsHandler eventsapi.Handler, metricsAPI metrics.API, diskStatsHelper metrics.DiskStatsHelper, log logrus.FieldLogger) (*Installers, error) { | ||
if config.MaxCapacity > 0 && config.MaxReleaseSize == 0 { | ||
return nil, fmt.Errorf("config.MaxReleaseSize (%d bytes) must not be zero", config.MaxReleaseSize) | ||
} | ||
|
@@ -128,6 +129,7 @@ func New(config Config, eventsHandler eventsapi.Handler, diskStatsHelper metrics | |
eventsHandler: eventsHandler, | ||
diskStatsHelper: diskStatsHelper, | ||
config: config, | ||
metricsAPI: metricsAPI, | ||
}, nil | ||
} | ||
|
||
|
@@ -138,14 +140,25 @@ func (i *Installers) Get(ctx context.Context, releaseID, releaseIDMirror, pullSe | |
for { | ||
select { | ||
case <-ctx.Done(): | ||
return nil, ctx.Err() | ||
err := ctx.Err() | ||
if err == context.DeadlineExceeded { | ||
i.metricsAPI.InstallerCacheGetReleaseTimeout(releaseID) | ||
} | ||
return nil, err | ||
default: | ||
release, err := i.get(releaseID, releaseIDMirror, pullSecret, ocRelease, ocpVersion, clusterID) | ||
majorMinorVersion, err := ocRelease.GetMajorMinorVersion(i.log, releaseID, releaseIDMirror, pullSecret) | ||
if err != nil { | ||
i.log.Warnf("unable to get majorMinorVersion to record metric for %s falling back to full URI", releaseID) | ||
majorMinorVersion = releaseID | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as discussed releaseID should not be a value. We can set |
||
} | ||
release, err := i.get(releaseID, releaseIDMirror, pullSecret, ocRelease, ocpVersion, clusterID, majorMinorVersion) | ||
if err == nil { | ||
i.metricsAPI.InstallerCacheGetReleaseOK(majorMinorVersion) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We agreed on having 2 metrics:
I still think the error metric won't be of much (or any) use, but definitely we do not want to count all requests there. What I thought of this is hits+misses+errors should return the total number of requests to this function. Did I misunderstand anything? |
||
return release, nil | ||
} | ||
_, isCapacityError := err.(*errorInsufficientCacheCapacity) | ||
if !isCapacityError { | ||
i.metricsAPI.InstallerCacheGetReleaseError(releaseID) | ||
return nil, errors.Wrapf(err, "failed to get installer path for release %s", releaseID) | ||
} | ||
time.Sleep(i.config.ReleaseFetchRetryInterval) | ||
|
@@ -161,14 +174,16 @@ func (i *Installers) getDiskUsageIncludingHardlinks() (uint64, error) { | |
return usedBytes, nil | ||
} | ||
|
||
func (i *Installers) extractReleaseIfNeeded(path, releaseID, releaseIDMirror, pullSecret, ocpVersion string, ocRelease oc.Release) (extractDuration float64, cached bool, err error) { | ||
func (i *Installers) extractReleaseIfNeeded(path, releaseID, releaseIDMirror, pullSecret, ocpVersion string, ocRelease oc.Release, majorMinorVersion string) (extractDuration float64, cached bool, err error) { | ||
_, err = os.Stat(path) | ||
if err == nil { | ||
i.metricsAPI.InstallerCacheGetReleaseCached(majorMinorVersion, true) | ||
return 0, true, nil // release was found in the cache | ||
} | ||
if !os.IsNotExist(err) { | ||
return 0, false, err | ||
} | ||
i.metricsAPI.InstallerCacheGetReleaseCached(majorMinorVersion, false) | ||
usedBytes, err := i.getDiskUsageIncludingHardlinks() | ||
if err != nil && !os.IsNotExist(err) { | ||
return 0, false, errors.Wrapf(err, "could not determine disk usage information for cache dir %s", i.config.CacheDir) | ||
|
@@ -184,7 +199,7 @@ func (i *Installers) extractReleaseIfNeeded(path, releaseID, releaseIDMirror, pu | |
return time.Since(extractStartTime).Seconds(), false, nil | ||
} | ||
|
||
func (i *Installers) get(releaseID, releaseIDMirror, pullSecret string, ocRelease oc.Release, ocpVersion string, clusterID strfmt.UUID) (*Release, error) { | ||
func (i *Installers) get(releaseID, releaseIDMirror, pullSecret string, ocRelease oc.Release, ocpVersion string, clusterID strfmt.UUID, majorMinorVersion string) (*Release, error) { | ||
i.Lock() | ||
defer i.Unlock() | ||
|
||
|
@@ -199,7 +214,7 @@ func (i *Installers) get(releaseID, releaseIDMirror, pullSecret string, ocReleas | |
if err != nil { | ||
return nil, err | ||
} | ||
release.extractDuration, release.cached, err = i.extractReleaseIfNeeded(path, releaseID, releaseIDMirror, pullSecret, ocpVersion, ocRelease) | ||
release.extractDuration, release.cached, err = i.extractReleaseIfNeeded(path, releaseID, releaseIDMirror, pullSecret, ocpVersion, ocRelease, majorMinorVersion) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
@@ -310,8 +325,10 @@ func (i *Installers) evictFile(filePath string) error { | |
i.log.Infof("evicting binary file %s due to storage pressure", filePath) | ||
err := os.Remove(filePath) | ||
if err != nil { | ||
i.metricsAPI.InstallerCacheReleaseEvicted(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we measure this in the outer method If we really want to add number of files evicted we should change the metric type to bucketed |
||
return err | ||
} | ||
i.metricsAPI.InstallerCacheReleaseEvicted(true) | ||
// if the parent directory was left empty, | ||
// remove it to avoid dangling directories | ||
parentDir := path.Dir(filePath) | ||
|
@@ -334,9 +351,9 @@ func (i *Installers) pruneExpiredHardLinks(links []*fileInfo, gracePeriod time.D | |
grace := graceTime.Unix() | ||
if finfo.info.ModTime().Unix() < grace { | ||
i.log.Infof("attempting to prune hard link %s", finfo.path) | ||
err := os.Remove(finfo.path) | ||
if err != nil { | ||
if err := os.Remove(finfo.path); err != nil { | ||
i.log.WithError(err).Errorf("failed to prune hard link %s", finfo.path) | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the purpose of this |
||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be done outside the for select