From 769fb70489806a4254f068cbafb208313a3587bf Mon Sep 17 00:00:00 2001 From: Robert Cerven Date: Thu, 21 Dec 2023 22:44:37 +0100 Subject: [PATCH] reporting repository provision request times to prometheus for SLO/SLI STONEBLD-1775 Signed-off-by: Robert Cerven --- controllers/component_image_controller.go | 27 ++++++++- controllers/imagerepository_controller.go | 67 +++++++++++++++++++++++ go.mod | 2 +- 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/controllers/component_image_controller.go b/controllers/component_image_controller.go index d4383d5..477d961 100644 --- a/controllers/component_image_controller.go +++ b/controllers/component_image_controller.go @@ -79,6 +79,10 @@ type ComponentReconciler struct { // SetupWithManager sets up the controller with the Manager. func (r *ComponentReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := initMetrics(); err != nil { + return err + } + return ctrl.NewControllerManagedBy(mgr). For(&appstudioredhatcomv1alpha1.Component{}). Complete(r) @@ -93,6 +97,7 @@ func (r *ComponentReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := ctrllog.FromContext(ctx).WithName("ComponentImageRepository") ctx = ctrllog.IntoContext(ctx, log) + reconcileStartTime := time.Now() // Fetch the Component instance component := &appstudioredhatcomv1alpha1.Component{} @@ -108,7 +113,12 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, fmt.Errorf("error reading component: %w", err) } + componentIdForMetrics := getComponentIdForMetrics(component) + if !component.ObjectMeta.DeletionTimestamp.IsZero() { + // remove component from metrics map + delete(repositoryTimesForMetrics, componentIdForMetrics) + if controllerutil.ContainsFinalizer(component, ImageRepositoryComponentFinalizer) { pushRobotAccountName, pullRobotAccountName := generateRobotAccountsNames(component) @@ -194,6 +204,8 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, r.reportError(ctx, component, message) } + setMetricsTime(componentIdForMetrics, reconcileStartTime) + imageRepositoryExists := false repositoryInfo := ImageRepositoryStatus{} repositoryInfoStr, imageAnnotationExist := component.Annotations[ImageAnnotationName] @@ -286,7 +298,7 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Update component with the generated data and add finalizer err = r.Client.Get(ctx, req.NamespacedName, component) if err != nil { - return ctrl.Result{}, fmt.Errorf("error updating the Component's annotations: %w", err) + return ctrl.Result{}, fmt.Errorf("error reading component: %w", err) } if component.ObjectMeta.DeletionTimestamp.IsZero() { component.Annotations[ImageAnnotationName] = string(repositoryInfoBytes) @@ -308,6 +320,10 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( }) } + imageRepositoryProvisionTimeMetric.Observe(time.Since(repositoryTimesForMetrics[componentIdForMetrics]).Seconds()) + // remove component from metrics map + delete(repositoryTimesForMetrics, componentIdForMetrics) + return ctrl.Result{}, nil } @@ -319,6 +335,11 @@ func (r *ComponentReconciler) reportError(ctx context.Context, component *appstu messageBytes, _ := json.Marshal(&ImageRepositoryStatus{Message: messsage}) component.Annotations[ImageAnnotationName] = string(messageBytes) delete(component.Annotations, GenerateImageAnnotationName) + + componentIdForMetrics := getComponentIdForMetrics(component) + // remove component from metrics map, permanent error + delete(repositoryTimesForMetrics, componentIdForMetrics) + return r.Client.Update(ctx, component) } @@ -558,3 +579,7 @@ func generateDockerconfigSecretData(quayImageURL string, robotAccount *quay.Robo quayImageURL, base64.StdEncoding.EncodeToString([]byte(authString))) return secretData } + +func getComponentIdForMetrics(component *appstudioredhatcomv1alpha1.Component) string { + return component.Name + "=" + component.Namespace +} diff --git a/controllers/imagerepository_controller.go b/controllers/imagerepository_controller.go index 5ed7b37..c0237e7 100644 --- a/controllers/imagerepository_controller.go +++ b/controllers/imagerepository_controller.go @@ -33,8 +33,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" appstudioredhatcomv1alpha1 "github.com/redhat-appstudio/application-api/api/v1alpha1" imagerepositoryv1alpha1 "github.com/redhat-appstudio/image-controller/api/v1alpha1" l "github.com/redhat-appstudio/image-controller/pkg/logs" @@ -48,6 +50,14 @@ const ( ImageRepositoryFinalizer = "appstudio.openshift.io/image-repository" buildPipelineServiceAccountName = "appstudio-pipeline" + + metricsNamespace = "redhat_appstudio" + metricsSubsystem = "imagecontroller" +) + +var ( + imageRepositoryProvisionTimeMetric prometheus.Histogram + repositoryTimesForMetrics = map[string]time.Time{} ) // ImageRepositoryReconciler reconciles a ImageRepository object @@ -60,13 +70,51 @@ type ImageRepositoryReconciler struct { QuayOrganization string } +func initMetrics() error { + buckets := getProvisionTimeMetricsBuckets() + + // don't register it if it was already registered by another controller + if imageRepositoryProvisionTimeMetric != nil { + return nil + } + + imageRepositoryProvisionTimeMetric = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Buckets: buckets, + Name: "image_repository_provision_time", + Help: "The time in seconds spent from the moment of Image repository provision request to Image repository is ready to use.", + }) + + if err := metrics.Registry.Register(imageRepositoryProvisionTimeMetric); err != nil { + return fmt.Errorf("failed to register the image_repository_provision_time metric: %w", err) + } + + return nil +} + +func getProvisionTimeMetricsBuckets() []float64 { + return []float64{5, 10, 15, 20, 30, 60, 120, 300} +} + // SetupWithManager sets up the controller with the Manager. func (r *ImageRepositoryReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := initMetrics(); err != nil { + return err + } + return ctrl.NewControllerManagedBy(mgr). For(&imagerepositoryv1alpha1.ImageRepository{}). Complete(r) } +func setMetricsTime(componentIdForMetrics string, reconcileStartTime time.Time) { + _, timeRecorded := repositoryTimesForMetrics[componentIdForMetrics] + if !timeRecorded { + repositoryTimesForMetrics[componentIdForMetrics] = reconcileStartTime + } +} + //+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/status,verbs=get;update;patch //+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/finalizers,verbs=update @@ -77,6 +125,7 @@ func (r *ImageRepositoryReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := ctrllog.FromContext(ctx).WithName("ImageRepository") ctx = ctrllog.IntoContext(ctx, log) + reconcileStartTime := time.Now() // Fetch the image repository instance imageRepository := &imagerepositoryv1alpha1.ImageRepository{} @@ -90,7 +139,12 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, err } + componentIdForMetrics := fmt.Sprintf("%s=%s", imageRepository.Labels[ComponentNameLabelName], imageRepository.Namespace) + if !imageRepository.DeletionTimestamp.IsZero() { + // remove component from metrics map + delete(repositoryTimesForMetrics, componentIdForMetrics) + // Reread quay token r.QuayClient = r.BuildQuayClient(log) @@ -109,6 +163,9 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ } if imageRepository.Status.State == imagerepositoryv1alpha1.ImageRepositoryStateFailed { + // remove component from metrics map + delete(repositoryTimesForMetrics, componentIdForMetrics) + return ctrl.Result{}, nil } @@ -117,6 +174,7 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Provision image repository if it hasn't been done yet if !controllerutil.ContainsFinalizer(imageRepository, ImageRepositoryFinalizer) { + setMetricsTime(componentIdForMetrics, reconcileStartTime) if err := r.ProvisionImageRepository(ctx, imageRepository); err != nil { log.Error(err, "provision of image repository failed") return ctrl.Result{}, err @@ -161,6 +219,15 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ } } + // we are adding to map only for new provision, not for some partial actions, + // so report time only if time was recorded + provisionTime, timeRecorded := repositoryTimesForMetrics[componentIdForMetrics] + if timeRecorded { + imageRepositoryProvisionTimeMetric.Observe(time.Since(provisionTime).Seconds()) + } + // remove component from metrics map + delete(repositoryTimesForMetrics, componentIdForMetrics) + return ctrl.Result{}, nil } diff --git a/go.mod b/go.mod index c5832ae..750f026 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/h2non/gock v1.2.0 github.com/onsi/ginkgo/v2 v2.13.2 github.com/onsi/gomega v1.30.0 + github.com/prometheus/client_golang v1.18.0 github.com/redhat-appstudio/application-api v0.0.0-20231026192857-89515ad2504f github.com/redhat-appstudio/remote-secret v0.0.0-20240103070316-c146261dd544 go.uber.org/zap v1.26.0 @@ -48,7 +49,6 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect