Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reporting repository provision request times to prometheus for SLO/SLI #84

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion controllers/component_image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@

// SetupWithManager sets up the controller with the Manager.
func (r *ComponentReconciler) SetupWithManager(mgr ctrl.Manager) error {
if err := initMetrics(); err != nil {
return err
}

Check warning on line 84 in controllers/component_image_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/component_image_controller.go#L83-L84

Added lines #L83 - L84 were not covered by tests

return ctrl.NewControllerManagedBy(mgr).
For(&appstudioredhatcomv1alpha1.Component{}).
Complete(r)
Expand All @@ -93,6 +97,7 @@
func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := ctrllog.FromContext(ctx).WithName("ComponentImageRepository")
ctx = ctrllog.IntoContext(ctx, log)
reconcileStartTime := time.Now()

// Fetch the Component instance
component := &appstudioredhatcomv1alpha1.Component{}
Expand All @@ -108,7 +113,12 @@
return ctrl.Result{}, fmt.Errorf("error reading component: %w", err)
}

componentIdForMetrics := getComponentIdForMetrics(component)

if !component.ObjectMeta.DeletionTimestamp.IsZero() {
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

if controllerutil.ContainsFinalizer(component, ImageRepositoryComponentFinalizer) {
pushRobotAccountName, pullRobotAccountName := generateRobotAccountsNames(component)

Expand Down Expand Up @@ -194,6 +204,8 @@
return ctrl.Result{}, r.reportError(ctx, component, message)
}

setMetricsTime(componentIdForMetrics, reconcileStartTime)

imageRepositoryExists := false
repositoryInfo := ImageRepositoryStatus{}
repositoryInfoStr, imageAnnotationExist := component.Annotations[ImageAnnotationName]
Expand Down Expand Up @@ -286,7 +298,7 @@
// Update component with the generated data and add finalizer
err = r.Client.Get(ctx, req.NamespacedName, component)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error updating the Component's annotations: %w", err)
return ctrl.Result{}, fmt.Errorf("error reading component: %w", err)

Check warning on line 301 in controllers/component_image_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/component_image_controller.go#L301

Added line #L301 was not covered by tests
}
if component.ObjectMeta.DeletionTimestamp.IsZero() {
component.Annotations[ImageAnnotationName] = string(repositoryInfoBytes)
Expand All @@ -308,6 +320,10 @@
})
}

imageRepositoryProvisionTimeMetric.Observe(time.Since(repositoryTimesForMetrics[componentIdForMetrics]).Seconds())
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

return ctrl.Result{}, nil
}

Expand All @@ -319,6 +335,11 @@
messageBytes, _ := json.Marshal(&ImageRepositoryStatus{Message: messsage})
component.Annotations[ImageAnnotationName] = string(messageBytes)
delete(component.Annotations, GenerateImageAnnotationName)

componentIdForMetrics := getComponentIdForMetrics(component)
// remove component from metrics map, permanent error
delete(repositoryTimesForMetrics, componentIdForMetrics)

return r.Client.Update(ctx, component)
}

Expand Down Expand Up @@ -558,3 +579,7 @@
quayImageURL, base64.StdEncoding.EncodeToString([]byte(authString)))
return secretData
}

func getComponentIdForMetrics(component *appstudioredhatcomv1alpha1.Component) string {
return component.Name + "=" + component.Namespace
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is duplicate code at line https://github.com/redhat-appstudio/image-controller/pull/84/files#diff-922fcbfbe2565443329918a2b0d49b2b03588007ec6464a20d3feb824f3f80a2R143

I'd suggest to simplify the arguments and make them explicit

func getComponentIdForMetrics(componentName, componentNS string) string {
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well that uses different thing and not component

84 changes: 84 additions & 0 deletions controllers/imagerepository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
appstudioredhatcomv1alpha1 "github.com/redhat-appstudio/application-api/api/v1alpha1"
imagerepositoryv1alpha1 "github.com/redhat-appstudio/image-controller/api/v1alpha1"
l "github.com/redhat-appstudio/image-controller/pkg/logs"
Expand All @@ -48,6 +50,15 @@
ImageRepositoryFinalizer = "appstudio.openshift.io/image-repository"

buildPipelineServiceAccountName = "appstudio-pipeline"

metricsNamespace = "redhat_appstudio"
metricsSubsystem = "imagecontroller"
)

var (
imageRepositoryProvisionTimeMetric prometheus.Histogram
imageRepositoryProvisionFailureTimeMetric prometheus.Histogram
repositoryTimesForMetrics = map[string]time.Time{}
)

// ImageRepositoryReconciler reconciles a ImageRepository object
Expand All @@ -60,13 +71,62 @@
QuayOrganization string
}

func initMetrics() error {
buckets := getProvisionTimeMetricsBuckets()

// don't register it if it was already registered by another controller
if imageRepositoryProvisionTimeMetric != nil {
return nil
}

imageRepositoryProvisionTimeMetric = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Buckets: buckets,
Name: "image_repository_provision_time",
Help: "The time in seconds spent from the moment of Image repository provision request to Image repository is ready to use.",
})

imageRepositoryProvisionFailureTimeMetric = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Buckets: buckets,
Name: "image_repository_provision_failure_time",
Help: "The time in seconds spent from the moment of Image repository provision request to Image repository failure.",
})

if err := metrics.Registry.Register(imageRepositoryProvisionTimeMetric); err != nil {
return fmt.Errorf("failed to register the image_repository_provision_time metric: %w", err)
}

Check warning on line 100 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L99-L100

Added lines #L99 - L100 were not covered by tests
if err := metrics.Registry.Register(imageRepositoryProvisionFailureTimeMetric); err != nil {
return fmt.Errorf("failed to register the image_repository_provision_failure_time metric: %w", err)
}

Check warning on line 103 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L102-L103

Added lines #L102 - L103 were not covered by tests

return nil
}

func getProvisionTimeMetricsBuckets() []float64 {
return []float64{5, 10, 15, 20, 30, 60, 120, 300}
}

// SetupWithManager sets up the controller with the Manager.
func (r *ImageRepositoryReconciler) SetupWithManager(mgr ctrl.Manager) error {
if err := initMetrics(); err != nil {
return err
}

Check warning on line 116 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L115-L116

Added lines #L115 - L116 were not covered by tests

return ctrl.NewControllerManagedBy(mgr).
For(&imagerepositoryv1alpha1.ImageRepository{}).
Complete(r)
}

func setMetricsTime(idForMetrics string, reconcileStartTime time.Time) {
_, timeRecorded := repositoryTimesForMetrics[idForMetrics]
if !timeRecorded {
repositoryTimesForMetrics[idForMetrics] = reconcileStartTime
}
}

//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/finalizers,verbs=update
Expand All @@ -77,6 +137,7 @@
func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := ctrllog.FromContext(ctx).WithName("ImageRepository")
ctx = ctrllog.IntoContext(ctx, log)
reconcileStartTime := time.Now()

// Fetch the image repository instance
imageRepository := &imagerepositoryv1alpha1.ImageRepository{}
Expand All @@ -89,8 +150,13 @@
log.Error(err, "failed to get image repository", l.Action, l.ActionView)
return ctrl.Result{}, err
}

repositoryIdForMetrics := fmt.Sprintf("%s=%s", imageRepository.Name, imageRepository.Namespace)

if !imageRepository.DeletionTimestamp.IsZero() {
// remove component from metrics map
delete(repositoryTimesForMetrics, repositoryIdForMetrics)

// Reread quay token
r.QuayClient = r.BuildQuayClient(log)

Expand All @@ -109,6 +175,14 @@
}

if imageRepository.Status.State == imagerepositoryv1alpha1.ImageRepositoryStateFailed {
provisionTime, timeRecorded := repositoryTimesForMetrics[repositoryIdForMetrics]
if timeRecorded {
imageRepositoryProvisionFailureTimeMetric.Observe(time.Since(provisionTime).Seconds())

// remove component from metrics map
delete(repositoryTimesForMetrics, repositoryIdForMetrics)
}

return ctrl.Result{}, nil
}

Expand All @@ -117,6 +191,7 @@

// Provision image repository if it hasn't been done yet
if !controllerutil.ContainsFinalizer(imageRepository, ImageRepositoryFinalizer) {
setMetricsTime(repositoryIdForMetrics, reconcileStartTime)
if err := r.ProvisionImageRepository(ctx, imageRepository); err != nil {
log.Error(err, "provision of image repository failed")
return ctrl.Result{}, err
Expand Down Expand Up @@ -161,6 +236,15 @@
}
}

// we are adding to map only for new provision, not for some partial actions,
// so report time only if time was recorded
provisionTime, timeRecorded := repositoryTimesForMetrics[repositoryIdForMetrics]
if timeRecorded {
imageRepositoryProvisionTimeMetric.Observe(time.Since(provisionTime).Seconds())
}
// remove component from metrics map
delete(repositoryTimesForMetrics, repositoryIdForMetrics)

return ctrl.Result{}, nil
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/h2non/gock v1.2.0
github.com/onsi/ginkgo/v2 v2.13.2
github.com/onsi/gomega v1.30.0
github.com/prometheus/client_golang v1.18.0
github.com/redhat-appstudio/application-api v0.0.0-20231026192857-89515ad2504f
github.com/redhat-appstudio/remote-secret v0.0.0-20240103070316-c146261dd544
go.uber.org/zap v1.26.0
Expand Down Expand Up @@ -48,7 +49,6 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down