Skip to content

Commit

Permalink
reporting repository provision request times to prometheus for SLO/SLI
Browse files Browse the repository at this point in the history
STONEBLD-1775

Signed-off-by: Robert Cerven <[email protected]>
  • Loading branch information
rcerven committed Jan 9, 2024
1 parent 5f29907 commit c3d7e41
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 2 deletions.
27 changes: 26 additions & 1 deletion controllers/component_image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ type ComponentReconciler struct {

// SetupWithManager sets up the controller with the Manager.
func (r *ComponentReconciler) SetupWithManager(mgr ctrl.Manager) error {
if err := initMetrics(); err != nil {
return err
}

Check warning on line 84 in controllers/component_image_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/component_image_controller.go#L83-L84

Added lines #L83 - L84 were not covered by tests

return ctrl.NewControllerManagedBy(mgr).
For(&appstudioredhatcomv1alpha1.Component{}).
Complete(r)
Expand All @@ -93,6 +97,7 @@ func (r *ComponentReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := ctrllog.FromContext(ctx).WithName("ComponentImageRepository")
ctx = ctrllog.IntoContext(ctx, log)
reconcileStartTime := time.Now()

// Fetch the Component instance
component := &appstudioredhatcomv1alpha1.Component{}
Expand All @@ -108,7 +113,12 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, fmt.Errorf("error reading component: %w", err)
}

componentIdForMetrics := getComponentIdForMetrics(component)

if !component.ObjectMeta.DeletionTimestamp.IsZero() {
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

if controllerutil.ContainsFinalizer(component, ImageRepositoryComponentFinalizer) {
pushRobotAccountName, pullRobotAccountName := generateRobotAccountsNames(component)

Expand Down Expand Up @@ -194,6 +204,8 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, r.reportError(ctx, component, message)
}

setMetricsTime(componentIdForMetrics, reconcileStartTime)

imageRepositoryExists := false
repositoryInfo := ImageRepositoryStatus{}
repositoryInfoStr, imageAnnotationExist := component.Annotations[ImageAnnotationName]
Expand Down Expand Up @@ -286,7 +298,7 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// Update component with the generated data and add finalizer
err = r.Client.Get(ctx, req.NamespacedName, component)
if err != nil {
return ctrl.Result{}, fmt.Errorf("error updating the Component's annotations: %w", err)
return ctrl.Result{}, fmt.Errorf("error reading component: %w", err)

Check warning on line 301 in controllers/component_image_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/component_image_controller.go#L301

Added line #L301 was not covered by tests
}
if component.ObjectMeta.DeletionTimestamp.IsZero() {
component.Annotations[ImageAnnotationName] = string(repositoryInfoBytes)
Expand All @@ -308,6 +320,10 @@ func (r *ComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
})
}

imageRepositoryProvisionTimeMetric.Observe(time.Since(repositoryTimesForMetrics[componentIdForMetrics]).Seconds())
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

return ctrl.Result{}, nil
}

Expand All @@ -319,6 +335,11 @@ func (r *ComponentReconciler) reportError(ctx context.Context, component *appstu
messageBytes, _ := json.Marshal(&ImageRepositoryStatus{Message: messsage})
component.Annotations[ImageAnnotationName] = string(messageBytes)
delete(component.Annotations, GenerateImageAnnotationName)

componentIdForMetrics := getComponentIdForMetrics(component)
// remove component from metrics map, permanent error
delete(repositoryTimesForMetrics, componentIdForMetrics)

return r.Client.Update(ctx, component)
}

Expand Down Expand Up @@ -558,3 +579,7 @@ func generateDockerconfigSecretData(quayImageURL string, robotAccount *quay.Robo
quayImageURL, base64.StdEncoding.EncodeToString([]byte(authString)))
return secretData
}

func getComponentIdForMetrics(component *appstudioredhatcomv1alpha1.Component) string {
return component.Name + "=" + component.Namespace
}
84 changes: 84 additions & 0 deletions controllers/imagerepository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
appstudioredhatcomv1alpha1 "github.com/redhat-appstudio/application-api/api/v1alpha1"
imagerepositoryv1alpha1 "github.com/redhat-appstudio/image-controller/api/v1alpha1"
l "github.com/redhat-appstudio/image-controller/pkg/logs"
Expand All @@ -48,6 +50,15 @@ const (
ImageRepositoryFinalizer = "appstudio.openshift.io/image-repository"

buildPipelineServiceAccountName = "appstudio-pipeline"

metricsNamespace = "redhat_appstudio"
metricsSubsystem = "imagecontroller"
)

var (
imageRepositoryProvisionTimeMetric prometheus.Histogram
imageRepositoryProvisionFailureTimeMetric prometheus.Histogram
repositoryTimesForMetrics = map[string]time.Time{}
)

// ImageRepositoryReconciler reconciles a ImageRepository object
Expand All @@ -60,13 +71,62 @@ type ImageRepositoryReconciler struct {
QuayOrganization string
}

func initMetrics() error {
buckets := getProvisionTimeMetricsBuckets()

// don't register it if it was already registered by another controller
if imageRepositoryProvisionTimeMetric != nil {
return nil
}

imageRepositoryProvisionTimeMetric = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Buckets: buckets,
Name: "image_repository_provision_time",
Help: "The time in seconds spent from the moment of Image repository provision request to Image repository is ready to use.",
})

imageRepositoryProvisionFailureTimeMetric = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Buckets: buckets,
Name: "image_repository_provision_failure_time",
Help: "The time in seconds spent from the moment of Image repository provision request to Image repository failure.",
})

if err := metrics.Registry.Register(imageRepositoryProvisionTimeMetric); err != nil {
return fmt.Errorf("failed to register the image_repository_provision_time metric: %w", err)
}

Check warning on line 100 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L99-L100

Added lines #L99 - L100 were not covered by tests
if err := metrics.Registry.Register(imageRepositoryProvisionFailureTimeMetric); err != nil {
return fmt.Errorf("failed to register the image_repository_provision_failure_time metric: %w", err)
}

Check warning on line 103 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L102-L103

Added lines #L102 - L103 were not covered by tests

return nil
}

func getProvisionTimeMetricsBuckets() []float64 {
return []float64{5, 10, 15, 20, 30, 60, 120, 300}
}

// SetupWithManager sets up the controller with the Manager.
func (r *ImageRepositoryReconciler) SetupWithManager(mgr ctrl.Manager) error {
if err := initMetrics(); err != nil {
return err
}

Check warning on line 116 in controllers/imagerepository_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/imagerepository_controller.go#L115-L116

Added lines #L115 - L116 were not covered by tests

return ctrl.NewControllerManagedBy(mgr).
For(&imagerepositoryv1alpha1.ImageRepository{}).
Complete(r)
}

func setMetricsTime(componentIdForMetrics string, reconcileStartTime time.Time) {
_, timeRecorded := repositoryTimesForMetrics[componentIdForMetrics]
if !timeRecorded {
repositoryTimesForMetrics[componentIdForMetrics] = reconcileStartTime
}
}

//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=appstudio.redhat.com,resources=imagerepositories/finalizers,verbs=update
Expand All @@ -77,6 +137,7 @@ func (r *ImageRepositoryReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := ctrllog.FromContext(ctx).WithName("ImageRepository")
ctx = ctrllog.IntoContext(ctx, log)
reconcileStartTime := time.Now()

// Fetch the image repository instance
imageRepository := &imagerepositoryv1alpha1.ImageRepository{}
Expand All @@ -90,7 +151,12 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{}, err
}

componentIdForMetrics := fmt.Sprintf("%s=%s", imageRepository.Labels[ComponentNameLabelName], imageRepository.Namespace)

if !imageRepository.DeletionTimestamp.IsZero() {
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

// Reread quay token
r.QuayClient = r.BuildQuayClient(log)

Expand All @@ -109,6 +175,14 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}

if imageRepository.Status.State == imagerepositoryv1alpha1.ImageRepositoryStateFailed {
provisionTime, timeRecorded := repositoryTimesForMetrics[componentIdForMetrics]
if timeRecorded {
imageRepositoryProvisionFailureTimeMetric.Observe(time.Since(provisionTime).Seconds())
}

// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

return ctrl.Result{}, nil
}

Expand All @@ -117,6 +191,7 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ

// Provision image repository if it hasn't been done yet
if !controllerutil.ContainsFinalizer(imageRepository, ImageRepositoryFinalizer) {
setMetricsTime(componentIdForMetrics, reconcileStartTime)
if err := r.ProvisionImageRepository(ctx, imageRepository); err != nil {
log.Error(err, "provision of image repository failed")
return ctrl.Result{}, err
Expand Down Expand Up @@ -161,6 +236,15 @@ func (r *ImageRepositoryReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
}

// we are adding to map only for new provision, not for some partial actions,
// so report time only if time was recorded
provisionTime, timeRecorded := repositoryTimesForMetrics[componentIdForMetrics]
if timeRecorded {
imageRepositoryProvisionTimeMetric.Observe(time.Since(provisionTime).Seconds())
}
// remove component from metrics map
delete(repositoryTimesForMetrics, componentIdForMetrics)

return ctrl.Result{}, nil
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/h2non/gock v1.2.0
github.com/onsi/ginkgo/v2 v2.13.2
github.com/onsi/gomega v1.30.0
github.com/prometheus/client_golang v1.18.0
github.com/redhat-appstudio/application-api v0.0.0-20231026192857-89515ad2504f
github.com/redhat-appstudio/remote-secret v0.0.0-20240103070316-c146261dd544
go.uber.org/zap v1.26.0
Expand Down Expand Up @@ -48,7 +49,6 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down

0 comments on commit c3d7e41

Please sign in to comment.