diff --git a/.golangci.yml b/.golangci.yml index 836c889..e71beb7 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -38,3 +38,7 @@ linters: - unconvert - unparam - unused +linters-settings: + errcheck: + exclude-functions: + - fmt.Fprintf \ No newline at end of file diff --git a/cmd/main.go b/cmd/main.go index 43feff5..b64e76f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -17,6 +17,7 @@ import ( "github.com/go-logr/logr" persesv1alpha1 "github.com/perses/perses-operator/api/v1alpha1" + prometheusv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" semconv "go.opentelemetry.io/collector/semconv/v1.27.0" otelmetric "go.opentelemetry.io/otel/metric" appsv1 "k8s.io/api/apps/v1" @@ -105,9 +106,10 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(dash0v1alpha1.AddToScheme(scheme)) - // for perses dashboard controller, prometheus scrape config controller etc. + // required for Perses dashboard controller and Prometheus rules controller. utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) utilruntime.Must(persesv1alpha1.AddToScheme(scheme)) + utilruntime.Must(prometheusv1.AddToScheme(scheme)) } func main() { @@ -529,16 +531,30 @@ func startDash0Controllers( metricNamePrefix, &setupLog, ) + prometheusRuleCrdReconciler := &controller.PrometheusRuleCrdReconciler{ + AuthToken: envVars.selfMonitoringAndApiAuthToken, + } + if err := prometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, startupTasksK8sClient, &setupLog); err != nil { + return fmt.Errorf("unable to set up the Prometheus rule reconciler: %w", err) + } + prometheusRuleCrdReconciler.InitializeSelfMonitoringMetrics( + meter, + metricNamePrefix, + &setupLog, + ) operatorConfigurationReconciler := &controller.OperatorConfigurationReconciler{ - Client: k8sClient, - Clientset: clientset, - PersesDashboardCrdReconciler: persesDashboardCrdReconciler, - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("dash0-operator-configuration-controller"), - DeploymentSelfReference: deploymentSelfReference, - Images: images, - DevelopmentMode: developmentMode, + Client: k8sClient, + Clientset: clientset, + ApiClients: []controller.ApiClient{ + persesDashboardCrdReconciler, + prometheusRuleCrdReconciler, + }, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("dash0-operator-configuration-controller"), + DeploymentSelfReference: deploymentSelfReference, + Images: images, + DevelopmentMode: developmentMode, } if err := operatorConfigurationReconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to set up the operator configuration reconciler: %w", err) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 180edd7..7df2fe9 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -65,6 +65,14 @@ rules: - delete - get - list +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch - apiGroups: - operator.dash0.com resources: diff --git a/go.mod b/go.mod index 7a03830..71e6d69 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.2 github.com/perses/perses-operator v0.0.0-20240402153734-4ccf03f6c8e6 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.1 github.com/wI2L/jsondiff v0.6.0 go.opentelemetry.io/collector/pdata v1.17.0 go.opentelemetry.io/collector/semconv v0.111.0 diff --git a/go.sum b/go.sum index 7aeaf99..7680bdf 100644 --- a/go.sum +++ b/go.sum @@ -115,6 +115,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.1 h1:XGoEXT6WTTihO+MD8MAao+YaQIH905HbK0WK2lyo28k= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.77.1/go.mod h1:D0KY8md81DQKdaR/cXwnhoWB3MYYyc/UjvqE8GFkIvA= github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index 792eae3..bace1e3 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -10,7 +10,7 @@ metadata: rules: -# Permissions required to watch for the foreign CRD (Perses dashboards, Prometheus scrape configs). +# Permissions required to watch for the third-party CRD (Perses dashboards, Prometheus check rules): - apiGroups: - apiextensions.k8s.io resources: @@ -20,7 +20,7 @@ rules: - list - watch -# Permissions required to instrument workloads in the apps API group. +# Permissions required to instrument workloads in the apps API group: - apiGroups: - apps resources: @@ -35,7 +35,7 @@ rules: - update - watch -# Permissions required to instrument workloads in the batch API group. +# Permissions required to instrument workloads in the batch API group: - apiGroups: - batch resources: @@ -48,7 +48,7 @@ rules: - update - watch -# Pmrmissions required top create a Dash0 operator configuration resources +# Pmrmissions required to create a Dash0 operator configuration resource: - apiGroups: - "" resources: @@ -57,7 +57,7 @@ rules: - get # Permissions required to queue events to report about the operator's actions, and to attach dangling events to their -# respective involved objects. +# respective involved objects: - apiGroups: - "" resources: @@ -75,7 +75,7 @@ rules: - get # Permissions required to automatically restart (i.e. delete) pods when instrumenting replicasets that are not part of a -# higher order workload (e.g. a deployment, daemonset). +# higher order workload (e.g. a deployment, daemonset): - apiGroups: - "" resources: @@ -85,7 +85,7 @@ rules: - get - list -# Permissions required to watch for the Perses dashboard resources. +# Permissions required to watch Perses dashboard resources: - apiGroups: - perses.dev resources: @@ -95,7 +95,17 @@ rules: - list - watch -# Permissions required to manage the Dash0 monitoring resource, its finalizers and status. +# Permissions required to watch Prometheus rule resources: +- apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch + +# Permissions required to manage the Dash0 monitoring resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -110,7 +120,7 @@ rules: - update - watch -# Permissions required to manage the Dash0 monitoring resource, its finalizers and status. +# Permissions required to manage the Dash0 monitoring resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -118,7 +128,7 @@ rules: verbs: - update -# Permissions required to manage the Dash0 monitoring resource, its finalizers and status. +# Permissions required to manage the Dash0 monitoring resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -128,7 +138,7 @@ rules: - patch - update -# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -143,7 +153,7 @@ rules: - update - watch -# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -151,7 +161,7 @@ rules: verbs: - update -# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status. +# Permissions required to manage the Dash0 operator configuration resource, its finalizers and status: - apiGroups: - operator.dash0.com resources: @@ -161,7 +171,7 @@ rules: - patch - update -# Permissions required to manage OTel collector resources. +# Permissions required to manage OTel collector resources: - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index c8f1048..da3e5e1 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -82,6 +82,14 @@ cluster roles should match snapshot: - get - list - watch + - apiGroups: + - monitoring.coreos.com + resources: + - prometheusrules + verbs: + - get + - list + - watch - apiGroups: - operator.dash0.com resources: diff --git a/internal/backendconnection/otelcolresources/collector_config_maps.go b/internal/backendconnection/otelcolresources/collector_config_maps.go index 6bc78f5..596f42f 100644 --- a/internal/backendconnection/otelcolresources/collector_config_maps.go +++ b/internal/backendconnection/otelcolresources/collector_config_maps.go @@ -121,7 +121,7 @@ func ConvertExportSettingsToExporterList(export dash0v1alpha1.Export) ([]OtlpExp Name: util.AuthorizationHeaderName, Value: authHeaderValue, }} - if d0.Dataset != "" && d0.Dataset != "default" { + if d0.Dataset != "" && d0.Dataset != util.DatasetDefault { headers = append(headers, dash0v1alpha1.Header{ Name: util.Dash0DatasetHeaderName, Value: d0.Dataset, diff --git a/internal/dash0/controller/operator_configuration_controller.go b/internal/dash0/controller/operator_configuration_controller.go index 9938b66..1335c71 100644 --- a/internal/dash0/controller/operator_configuration_controller.go +++ b/internal/dash0/controller/operator_configuration_controller.go @@ -25,14 +25,14 @@ import ( type OperatorConfigurationReconciler struct { client.Client - Clientset *kubernetes.Clientset - PersesDashboardCrdReconciler *PersesDashboardCrdReconciler - Scheme *runtime.Scheme - Recorder record.EventRecorder - DeploymentSelfReference *appsv1.Deployment - DanglingEventsTimeouts *util.DanglingEventsTimeouts - Images util.Images - DevelopmentMode bool + Clientset *kubernetes.Clientset + ApiClients []ApiClient + Scheme *runtime.Scheme + Recorder record.EventRecorder + DeploymentSelfReference *appsv1.Deployment + DanglingEventsTimeouts *util.DanglingEventsTimeouts + Images util.Images + DevelopmentMode bool } const ( @@ -163,16 +163,20 @@ func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctr if resource.HasDash0ApiAccessConfigured() { dataset := resource.Spec.Export.Dash0.Dataset if dataset == "" { - dataset = "default" + dataset = util.DatasetDefault + } + for _, apiClient := range r.ApiClients { + apiClient.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: resource.Spec.Export.Dash0.ApiEndpoint, + Dataset: dataset, + }, &logger) } - r.PersesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ - Endpoint: resource.Spec.Export.Dash0.ApiEndpoint, - Dataset: dataset, - }, &logger) } else { - logger.Info("Settings required for managing dashboards via the operator are missing, the operator will not " + - "update dashboards in Dash0.") - r.PersesDashboardCrdReconciler.RemoveApiEndpointAndDataset() + logger.Info("Settings required for managing dashboards or check rules via the operator are missing, the " + + "operator will not update dashboards nor check rules in Dash0.") + for _, apiClient := range r.ApiClients { + apiClient.RemoveApiEndpointAndDataset() + } } currentSelfMonitoringAndApiAccessConfiguration, err := diff --git a/internal/dash0/controller/operator_configuration_controller_test.go b/internal/dash0/controller/operator_configuration_controller_test.go index 9454b72..7a4de24 100644 --- a/internal/dash0/controller/operator_configuration_controller_test.go +++ b/internal/dash0/controller/operator_configuration_controller_test.go @@ -8,6 +8,7 @@ import ( "fmt" "reflect" + "github.com/go-logr/logr" json "github.com/json-iterator/go" "github.com/wI2L/jsondiff" appsv1 "k8s.io/api/apps/v1" @@ -40,6 +41,8 @@ type SelfMonitoringTestConfig struct { var ( reconciler *OperatorConfigurationReconciler + apiClient1 ApiClient + apiClient2 ApiClient ) var _ = Describe("The operation configuration resource controller", Ordered, func() { @@ -51,6 +54,11 @@ var _ = Describe("The operation configuration resource controller", Ordered, fun EnsureOperatorNamespaceExists(ctx, k8sClient) }) + BeforeEach(func() { + apiClient1 = &DummyApiClient{} + apiClient2 = &DummyApiClient{} + }) + Describe("updates the controller deployment", func() { AfterEach(func() { RemoveOperatorConfigurationResource(ctx, k8sClient) @@ -797,17 +805,18 @@ func cleanUpDeploymentSpecForDiff(spec *appsv1.DeploymentSpec) { } func createReconciler(controllerDeployment *appsv1.Deployment) *OperatorConfigurationReconciler { - persesDashboardCrdReconciler := &PersesDashboardCrdReconciler{ - persesDashboardReconciler: &PersesDashboardReconciler{}, - } + return &OperatorConfigurationReconciler{ - Client: k8sClient, - Clientset: clientset, - Recorder: recorder, - PersesDashboardCrdReconciler: persesDashboardCrdReconciler, - DeploymentSelfReference: controllerDeployment, - DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, - Images: TestImages, + Client: k8sClient, + Clientset: clientset, + Recorder: recorder, + ApiClients: []ApiClient{ + apiClient1, + apiClient2, + }, + DeploymentSelfReference: controllerDeployment, + DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, + Images: TestImages, } } @@ -932,3 +941,19 @@ func verifyNoSelfMonitoringButAuthTokenEnvVarFromSecretRef( g.Expect(container.Env).To( ContainElement(MatchEnvVarValueFrom("SELF_MONITORING_AND_API_AUTH_TOKEN", "secret-ref", "key"))) } + +type DummyApiClient struct { + setCalls int + removeCalls int + apiConfig *ApiConfig +} + +func (c *DummyApiClient) SetApiEndpointAndDataset(apiConfig *ApiConfig, _ *logr.Logger) { + c.setCalls++ + c.apiConfig = apiConfig +} + +func (c *DummyApiClient) RemoveApiEndpointAndDataset() { + c.removeCalls++ + c.apiConfig = nil +} diff --git a/internal/dash0/controller/perses_dashboards_controller.go b/internal/dash0/controller/perses_dashboards_controller.go index 5533fba..a37d5ae 100644 --- a/internal/dash0/controller/perses_dashboards_controller.go +++ b/internal/dash0/controller/perses_dashboards_controller.go @@ -8,30 +8,20 @@ import ( "context" "encoding/json" "fmt" - "io" "net/http" + "net/url" "strings" "sync/atomic" - "time" "github.com/go-logr/logr" otelmetric "go.opentelemetry.io/otel/metric" - corev1 "k8s.io/api/core/v1" - apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/workqueue" - "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/dash0hq/dash0-operator/internal/dash0/util" @@ -47,129 +37,99 @@ type PersesDashboardCrdReconciler struct { //+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch -type ApiConfig struct { - Endpoint string - Dataset string +var ( + persesDashboardCrdReconcileRequestMetric otelmetric.Int64Counter + persesDashboardReconcileRequestMetric otelmetric.Int64Counter +) + +func (r *PersesDashboardCrdReconciler) Manager() ctrl.Manager { + return r.mgr } -type validationResult struct { - namespace string - name string - url string - origin string - authToken string +func (r *PersesDashboardCrdReconciler) GetAuthToken() string { + return r.AuthToken } -var ( - // persesDashboardCrdReconcileRequestMetric otelmetric.Int64Counter - persesDashboardReconcileRequestMetric otelmetric.Int64Counter +func (r *PersesDashboardCrdReconciler) KindDisplayName() string { + return "Perses dashboard" +} - retrySettings = wait.Backoff{ - Duration: 5 * time.Second, - Factor: 1.5, - Steps: 3, - } -) +func (r *PersesDashboardCrdReconciler) Group() string { + return "perses.dev" +} -func (r *PersesDashboardCrdReconciler) SetupWithManager( - ctx context.Context, - mgr ctrl.Manager, - startupK8sClient client.Client, - logger *logr.Logger, -) error { - if r.AuthToken == "" { - logger.Info("No Dash0 auth token has been provided via the operator configuration resource. The operator " + - "will not watch for Perses dashboard resources.") - return nil - } +func (r *PersesDashboardCrdReconciler) Kind() string { + return "PersesDashboard" +} - kubeSystemNamespace := &corev1.Namespace{} - if err := startupK8sClient.Get(ctx, client.ObjectKey{Name: "kube-system"}, kubeSystemNamespace); err != nil { - msg := "unable to get the kube-system namespace uid" - logger.Error(err, msg) - return fmt.Errorf("%s: %w", msg, err) - } +func (r *PersesDashboardCrdReconciler) Version() string { + return "v1alpha1" +} - r.mgr = mgr - r.persesDashboardReconciler = &PersesDashboardReconciler{ - pseudoClusterUid: kubeSystemNamespace.UID, - httpClient: &http.Client{}, - authToken: r.AuthToken, - } +func (r *PersesDashboardCrdReconciler) QualifiedKind() string { + return "persesdashboards.perses.dev" +} - if err := startupK8sClient.Get(ctx, client.ObjectKey{ - Name: "persesdashboards.perses.dev", - }, &apiextensionsv1.CustomResourceDefinition{}); err != nil { - if !apierrors.IsNotFound(err) { - logger.Error(err, "unable to call get the persesdashboards.perses.dev custom resource definition") - return err - } - } else { - r.persesDashboardCrdExists.Store(true) - r.maybeStartWatchingPersesDashboardResources(true, logger) - } +func (r *PersesDashboardCrdReconciler) ControllerName() string { + return "dash0_perses_dashboard_crd_controller" +} - controllerBuilder := ctrl.NewControllerManagedBy(mgr). - Named("dash0_perses_dashboard_crd_controller"). - Watches( - &apiextensionsv1.CustomResourceDefinition{}, - // Deliberately not using a convenience mechanism like &handler.EnqueueRequestForObject{} (which would - // feed all events into the Reconcile method) here, since using the lower-level TypedEventHandler interface - // directly allows us to distinguish between create and delete events more easily. - r, - builder.WithPredicates(makeFilterPredicate())) - if r.skipNameValidation { - controllerBuilder = controllerBuilder.WithOptions(controller.TypedOptions[reconcile.Request]{ - SkipNameValidation: ptr.To(true), - }) - } - if err := controllerBuilder.Complete(r); err != nil { - logger.Error(err, "unable to build the controller for the Perses Dashboard CRD reconciler") - return err - } +func (r *PersesDashboardCrdReconciler) DoesCrdExist() *atomic.Bool { + return &r.persesDashboardCrdExists +} - return nil +func (r *PersesDashboardCrdReconciler) SetCrdExists(exists bool) { + r.persesDashboardCrdExists.Store(exists) } -//+kubebuilder:rbac:groups=perses.dev,resources=persesdashboards,verbs=get;list;watch +func (r *PersesDashboardCrdReconciler) SkipNameValidation() bool { + return r.skipNameValidation +} -func makeFilterPredicate() predicate.Funcs { - return predicate.Funcs{ - CreateFunc: func(e event.CreateEvent) bool { - return isPersesDashboardCrd(e.Object) - }, - UpdateFunc: func(e event.UpdateEvent) bool { - // We are not interested in updates, but we still need to define a filter predicate for it, otherwise _all_ - // update events for CRDs would be passed to our event handler. We always return false to ignore update - // events entirely. Same for generic events. - return false - }, - DeleteFunc: func(e event.DeleteEvent) bool { - return isPersesDashboardCrd(e.Object) - }, - GenericFunc: func(e event.GenericEvent) bool { - return false - }, +func (r *PersesDashboardCrdReconciler) CreateResourceReconciler( + pseudoClusterUid types.UID, + authToken string, + httpClient *http.Client, +) { + r.persesDashboardReconciler = &PersesDashboardReconciler{ + pseudoClusterUid: pseudoClusterUid, + authToken: authToken, + httpClient: httpClient, } } -func isPersesDashboardCrd(crd client.Object) bool { - if crdCasted, ok := crd.(*apiextensionsv1.CustomResourceDefinition); ok { - return crdCasted.Spec.Group == "perses.dev" && - crdCasted.Spec.Names.Kind == "PersesDashboard" - } else { - return false - } +func (r *PersesDashboardCrdReconciler) ResourceReconciler() ThirdPartyResourceReconciler { + return r.persesDashboardReconciler } +func (r *PersesDashboardCrdReconciler) SetupWithManager( + ctx context.Context, + mgr ctrl.Manager, + startupK8sClient client.Client, + logger *logr.Logger, +) error { + r.mgr = mgr + return SetupThirdPartyCrdReconcilerWithManager( + ctx, + startupK8sClient, + r, + logger, + ) +} + +//+kubebuilder:rbac:groups=perses.dev,resources=persesdashboards,verbs=get;list;watch + func (r *PersesDashboardCrdReconciler) Create( ctx context.Context, _ event.TypedCreateEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request], ) { + if persesDashboardCrdReconcileRequestMetric != nil { + persesDashboardCrdReconcileRequestMetric.Add(ctx, 1) + } logger := log.FromContext(ctx) r.persesDashboardCrdExists.Store(true) - r.maybeStartWatchingPersesDashboardResources(false, &logger) + maybeStartWatchingThirdPartyResources(r, false, &logger) } func (r *PersesDashboardCrdReconciler) Update( @@ -186,6 +146,9 @@ func (r *PersesDashboardCrdReconciler) Delete( _ event.TypedDeleteEvent[client.Object], _ workqueue.TypedRateLimitingInterface[reconcile.Request], ) { + if persesDashboardCrdReconcileRequestMetric != nil { + persesDashboardCrdReconcileRequestMetric.Add(ctx, 1) + } logger := log.FromContext(ctx) logger.Info("The PersesDashboard custom resource definition has been deleted.") r.persesDashboardCrdExists.Store(false) @@ -218,18 +181,15 @@ func (r *PersesDashboardCrdReconciler) InitializeSelfMonitoringMetrics( metricNamePrefix string, logger *logr.Logger, ) { - // Note: The persesDashboardCrdReconcileRequestMetric is unused until we actually implement watching the - // PersesDashboard _CRD_, see comment above in SetupWithManager. - - // reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "persesdashboardcrd.reconcile_requests") - // var err error - // if persesDashboardCrdReconcileRequestMetric, err = meter.Int64Counter( - // reconcileRequestMetricName, - // otelmetric.WithUnit("1"), - // otelmetric.WithDescription("Counter for persesdashboard CRD reconcile requests"), - // ); err != nil { - // logger.Error(err, "Cannot initialize the metric %s.") - // } + reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "persesdashboardcrd.reconcile_requests") + var err error + if persesDashboardCrdReconcileRequestMetric, err = meter.Int64Counter( + reconcileRequestMetricName, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter for persesdashboard CRD reconcile requests"), + ); err != nil { + logger.Error(err, "Cannot initialize the metric %s.") + } r.persesDashboardReconciler.InitializeSelfMonitoringMetrics( meter, @@ -247,7 +207,7 @@ func (r *PersesDashboardCrdReconciler) SetApiEndpointAndDataset( return } r.persesDashboardReconciler.apiConfig.Store(apiConfig) - r.maybeStartWatchingPersesDashboardResources(false, logger) + maybeStartWatchingThirdPartyResources(r, false, logger) } func (r *PersesDashboardCrdReconciler) RemoveApiEndpointAndDataset() { @@ -259,72 +219,8 @@ func (r *PersesDashboardCrdReconciler) RemoveApiEndpointAndDataset() { r.persesDashboardReconciler.apiConfig.Store(nil) } -func (r *PersesDashboardCrdReconciler) maybeStartWatchingPersesDashboardResources(isStartup bool, logger *logr.Logger) { - if r.persesDashboardReconciler.isWatching { - // we are already watching, do not start a second watch - return - } - - if !r.persesDashboardCrdExists.Load() { - logger.Info("The persesdashboards.perses.dev custom resource definition does not exist in this cluster, the " + - "operator will not watch for Perses dashboard resources.") - return - } - - apiConfig := r.persesDashboardReconciler.apiConfig.Load() - if !isValidApiConfig(apiConfig) { - if !isStartup { - // Silently ignore this missing precondition if it happens during the startup of the operator. It will - // be remedied automatically once the operator configuration resource is reconciled for the first time. - logger.Info("The persesdashboards.perses.dev custom resource definition is present in this " + - "cluster, but no Dash0 API endpoint been provided via the operator configuration resource, or the " + - "operator configuration resource has not been reconciled yet. The operator will not watch for Perses " + - "dashboard resources. (If there is an operator configuration resource with an API endpoint present in " + - "the cluster, it will be reconciled in a few seconds and this message can be safely ignored.)") - } - return - } - - logger.Info("The persesdashboards.perses.dev custom resource definition is present in this " + - "cluster, and a Dash0 API endpoint has been provided. The operator will watch for Perses dashboard resources.") - r.startWatchingPersesDashboardResources(logger) -} - -func (r *PersesDashboardCrdReconciler) startWatchingPersesDashboardResources( - logger *logr.Logger, -) { - logger.Info("Setting up a watch for Perses dashboard custom resources.") - - unstructuredGvkForPersesDashboards := &unstructured.Unstructured{} - unstructuredGvkForPersesDashboards.SetGroupVersionKind(schema.GroupVersionKind{ - Kind: "PersesDashboard", - Group: "perses.dev", - Version: "v1alpha1", - }) - - controllerBuilder := ctrl.NewControllerManagedBy(r.mgr). - Named("dash0_perses_dashboard_controller"). - Watches( - unstructuredGvkForPersesDashboards, - // Deliberately not using a convenience mechanism like &handler.EnqueueRequestForObject{} (which would - // feed all events into the Reconcile method) here, since using the lower-level TypedEventHandler interface - // directly allows us to distinguish between create and delete events more easily. - r.persesDashboardReconciler, - ) - if r.skipNameValidation { - controllerBuilder = controllerBuilder.WithOptions(controller.TypedOptions[reconcile.Request]{ - SkipNameValidation: ptr.To(true), - }) - } - if err := controllerBuilder.Complete(r.persesDashboardReconciler); err != nil { - logger.Error(err, "unable to create a new controller for watching Perses Dashboards") - return - } - r.persesDashboardReconciler.isWatching = true -} - type PersesDashboardReconciler struct { - isWatching bool + isWatching atomic.Bool pseudoClusterUid types.UID httpClient *http.Client apiConfig atomic.Pointer[ApiConfig] @@ -347,6 +243,38 @@ func (r *PersesDashboardReconciler) InitializeSelfMonitoringMetrics( } } +func (r *PersesDashboardReconciler) KindDisplayName() string { + return "Perses dashboard" +} + +func (r *PersesDashboardReconciler) ShortName() string { + return "dashboard" +} + +func (r *PersesDashboardReconciler) IsWatching() *atomic.Bool { + return &r.isWatching +} + +func (r *PersesDashboardReconciler) SetIsWatching(isWatching bool) { + r.isWatching.Store(isWatching) +} + +func (r *PersesDashboardReconciler) GetAuthToken() string { + return r.authToken +} + +func (r *PersesDashboardReconciler) GetApiConfig() *atomic.Pointer[ApiConfig] { + return &r.apiConfig +} + +func (r *PersesDashboardReconciler) ControllerName() string { + return "dash0_perses_dashboard_controller" +} + +func (r *PersesDashboardReconciler) HttpClient() *http.Client { + return r.httpClient +} + func (r *PersesDashboardReconciler) Create( ctx context.Context, e event.TypedCreateEvent[client.Object], @@ -364,8 +292,17 @@ func (r *PersesDashboardReconciler) Create( "name", e.Object.GetName(), ) - if err := r.UpsertDashboard(e.Object.(*unstructured.Unstructured), &logger); err != nil { - logger.Error(err, "unable to upsert the dashboard") + + if err := util.RetryWithCustomBackoff( + "create dashboard", + func() error { + return upsertViaApi(r, e.Object.(*unstructured.Unstructured), &logger) + }, + retrySettings, + true, + &logger, + ); err != nil { + logger.Error(err, "failed to create the dashboard") } } @@ -387,15 +324,17 @@ func (r *PersesDashboardReconciler) Update( e.ObjectNew.GetName(), ) - _ = util.RetryWithCustomBackoff( - "upsert dashboard", + if err := util.RetryWithCustomBackoff( + "update dashboard", func() error { - return r.UpsertDashboard(e.ObjectNew.(*unstructured.Unstructured), &logger) + return upsertViaApi(r, e.ObjectNew.(*unstructured.Unstructured), &logger) }, retrySettings, true, &logger, - ) + ); err != nil { + logger.Error(err, "failed to update the dashboard") + } } func (r *PersesDashboardReconciler) Delete( @@ -416,15 +355,17 @@ func (r *PersesDashboardReconciler) Delete( e.Object.GetName(), ) - _ = util.RetryWithCustomBackoff( + if err := util.RetryWithCustomBackoff( "delete dashboard", func() error { - return r.DeleteDashboard(e.Object.(*unstructured.Unstructured), &logger) + return deleteViaApi(r, e.Object.(*unstructured.Unstructured), &logger) }, retrySettings, true, &logger, - ) + ); err != nil { + logger.Error(err, "failed to delete the dashboard") + } } func (r *PersesDashboardReconciler) Generic( @@ -445,268 +386,94 @@ func (r *PersesDashboardReconciler) Reconcile( return reconcile.Result{}, nil } -func (r *PersesDashboardReconciler) UpsertDashboard( - persesDashboard *unstructured.Unstructured, +func (r *PersesDashboardReconciler) MapResourceToHttpRequests( + preconditionChecksResult *preconditionValidationResult, + action apiAction, logger *logr.Logger, -) error { - apiConfig := r.apiConfig.Load() - valResult, executeRequest := r.validateConfigAndRenderUrl( - persesDashboard, - apiConfig, - logger, - ) - if !executeRequest { - return nil - } +) ([]*http.Request, error) { + dashboardUrl := r.renderDashboardUrl(preconditionChecksResult) - specRaw := persesDashboard.Object["spec"] - if specRaw == nil { - logger.Info("Perses dashboard has no spec, the dashboard will not be updated in Dash0.") - return nil - } - spec, ok := specRaw.(map[string]interface{}) - if !ok { - logger.Info("Perses dashboard spec is not a map, the dashboard will not be updated in Dash0.") - return nil - } - displayRaw := spec["display"] - if displayRaw == nil { - spec["display"] = map[string]interface{}{} - displayRaw = spec["display"] - } - display, ok := displayRaw.(map[string]interface{}) - if !ok { - logger.Info("Perses dashboard spec.display is not a map, the dashboard will not be updated in Dash0.") - return nil - } - - displayName, ok := display["name"] - if !ok || displayName == "" { - // Let the dashboard name default to the perses dashboard resource's namespace + name, if unset. - display["name"] = fmt.Sprintf("%s/%s", valResult.namespace, valResult.name) - } - - // Remove all unnecessary metadata (labels & annotations), we basically only need the dashboard spec. - serializedDashboard, _ := json.Marshal( - map[string]interface{}{ - "kind": "PersesDashboard", - "spec": spec, - }) - requestPayload := bytes.NewBuffer(serializedDashboard) - - req, err := http.NewRequest( - http.MethodPut, - valResult.url, - requestPayload, - ) - if err != nil { - logger.Error(err, "unable to create a new HTTP request to upsert the dashboard") - return err - } - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", valResult.authToken)) - logger.Info(fmt.Sprintf("Updating/creating dashboard %s in Dash0", valResult.origin)) - res, err := r.httpClient.Do(req) - if err != nil { - logger.Error(err, fmt.Sprintf("unable to execute the HTTP request to update the dashboard %s", valResult.origin)) - return err - } - - if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices { - return r.handleNon2xxStatusCode(res, valResult.origin, logger) - } - - // http status code was 2xx, discard the response body and close it - defer func() { - _, _ = io.Copy(io.Discard, res.Body) - _ = res.Body.Close() - }() + var req *http.Request + var err error - return nil -} + //nolint:ineffassign + actionLabel := "?" + switch action { + case upsert: + actionLabel = "upsert" + spec := preconditionChecksResult.spec + displayRaw := spec["display"] + if displayRaw == nil { + spec["display"] = map[string]interface{}{} + displayRaw = spec["display"] + } + display, ok := displayRaw.(map[string]interface{}) + if !ok { + logger.Info("Perses dashboard spec.display is not a map, the dashboard will not be updated in Dash0.") + return nil, nil + } + displayName, ok := display["name"] + if !ok || displayName == "" { + // Let the dashboard name default to the perses dashboard resource's namespace + name, if unset. + display["name"] = fmt.Sprintf("%s/%s", preconditionChecksResult.k8sNamespace, preconditionChecksResult.k8sName) + } -func (r *PersesDashboardReconciler) DeleteDashboard( - persesDashboard *unstructured.Unstructured, - logger *logr.Logger, -) error { - apiConfig := r.apiConfig.Load() - valResult, executeRequest := r.validateConfigAndRenderUrl( - persesDashboard, - apiConfig, - logger, - ) - if !executeRequest { - return nil + // Remove all unnecessary metadata (labels & annotations), we basically only need the dashboard spec. + serializedDashboard, _ := json.Marshal( + map[string]interface{}{ + "kind": "PersesDashboard", + "spec": spec, + }) + requestPayload := bytes.NewBuffer(serializedDashboard) + + req, err = http.NewRequest( + http.MethodPut, + dashboardUrl, + requestPayload, + ) + case delete: + actionLabel = "delete" + req, err = http.NewRequest( + http.MethodDelete, + dashboardUrl, + nil, + ) + default: + logger.Error(fmt.Errorf("unknown API action: %d", action), "unknown API action") + return nil, nil } - req, err := http.NewRequest( - http.MethodDelete, - valResult.url, - nil, - ) if err != nil { - logger.Error(err, "unable to create a new HTTP request to delete the dashboard") - return err - } - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", valResult.authToken)) - logger.Info(fmt.Sprintf("Deleting dashboard %s in Dash0", valResult.origin)) - res, err := r.httpClient.Do(req) - if err != nil { - logger.Error(err, fmt.Sprintf("unable to execute the HTTP request to delete the dashboard %s", valResult.origin)) - return err + logger.Error(err, fmt.Sprintf("unable to create a new HTTP request to %s the dashboard", actionLabel)) + return nil, err } - if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices { - return r.handleNon2xxStatusCode(res, valResult.origin, logger) + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", preconditionChecksResult.authToken)) + if action == upsert { + req.Header.Set("Content-Type", "application/json") } - // http status code was 2xx, discard the response body and close it - defer func() { - _, _ = io.Copy(io.Discard, res.Body) - _ = res.Body.Close() - }() - - return nil + return []*http.Request{req}, nil } -func (r *PersesDashboardReconciler) validateConfigAndRenderUrl( - persesDashboard *unstructured.Unstructured, - apiConfig *ApiConfig, - logger *logr.Logger, -) (*validationResult, bool) { - if !isValidApiConfig(apiConfig) { - logger.Info("No Dash0 API endpoint has been provided via the operator configuration resource, the dashboard " + - "will not be updated in Dash0.") - return nil, false - } - if r.authToken == "" { - logger.Info("No auth token is set on the controller deployment, the dashboard will not be updated " + - "in Dash0.") - return nil, false - } - - dataset := apiConfig.Dataset - if dataset == "" { - dataset = "default" - } - - namespace, name, ok := readNamespaceAndName(persesDashboard, logger) - if !ok { - return nil, false - } - - dashboardUrl, dashboardOrigin := r.renderDashboardUrl( - apiConfig.Endpoint, - namespace, - name, - dataset, - ) - return &validationResult{ - namespace: namespace, - name: name, - url: dashboardUrl, - origin: dashboardOrigin, - authToken: r.authToken, - }, true -} - -func isValidApiConfig(apiConfig *ApiConfig) bool { - return apiConfig != nil && apiConfig.Endpoint != "" -} - -func (r *PersesDashboardReconciler) renderDashboardUrl( - dash0ApiEndpoint string, - namespace string, - name string, - dataset string, -) (string, string) { +func (r *PersesDashboardReconciler) renderDashboardUrl(preconditionCheckResult *preconditionValidationResult) string { dashboardOrigin := fmt.Sprintf( // we deliberately use _ as the separator, since that is an illegal character in Kubernetes names. This avoids // any potential naming collisions (e.g. namespace="abc" & name="def-ghi" vs. namespace="abc-def" & name="ghi"). "dash0-operator_%s_%s_%s_%s", r.pseudoClusterUid, - dataset, - namespace, - name, + urlEncodePathSegment(preconditionCheckResult.dataset), + preconditionCheckResult.k8sNamespace, + preconditionCheckResult.k8sName, ) - if !strings.HasSuffix(dash0ApiEndpoint, "/") { - dash0ApiEndpoint += "/" + if !strings.HasSuffix(preconditionCheckResult.apiEndpoint, "/") { + preconditionCheckResult.apiEndpoint += "/" } return fmt.Sprintf( "%sapi/dashboards/%s?dataset=%s", - dash0ApiEndpoint, + preconditionCheckResult.apiEndpoint, dashboardOrigin, - dataset, - ), dashboardOrigin -} - -func (r *PersesDashboardReconciler) handleNon2xxStatusCode( - res *http.Response, - dashboardOrigin string, - logger *logr.Logger, -) error { - defer func() { - _ = res.Body.Close() - }() - responseBody, readErr := io.ReadAll(res.Body) - if readErr != nil { - readBodyErr := fmt.Errorf("unable to read the API response payload after receiving status code %d when "+ - "trying to udpate/create/delete the dashboard %s", res.StatusCode, dashboardOrigin) - logger.Error(readBodyErr, "unable to read the API response payload") - return readBodyErr - } - - statusCodeErr := fmt.Errorf( - "unexpected status code %d when updating/creating/deleting the dashboard %s, response body is %s", - res.StatusCode, - dashboardOrigin, - string(responseBody), + url.QueryEscape(preconditionCheckResult.dataset), ) - logger.Error(statusCodeErr, "unexpected status code") - return statusCodeErr -} - -func readNamespaceAndName(persesDashboard *unstructured.Unstructured, logger *logr.Logger) (string, string, bool) { - metadataRaw := persesDashboard.Object["metadata"] - if metadataRaw == nil { - logger.Info("Perses dashboard payload has no metadata section, the dashboard will not be updated in Dash0.") - return "", "", false - } - metadata, ok := metadataRaw.(map[string]interface{}) - if !ok { - logger.Info("Perses dashboard payload metadata section is not a map, the dashboard will not be updated in " + - "Dash0.") - return "", "", false - } - namespace, ok := readStringAttribute(metadata, "namespace", logger) - if !ok { - return "", "", false - } - name, ok := readStringAttribute(metadata, "name", logger) - if !ok { - return "", "", false - } - return namespace, name, true -} - -func readStringAttribute(metadata map[string]interface{}, attributeName string, logger *logr.Logger) (string, bool) { - valueRaw := metadata[attributeName] - if valueRaw == nil { - logger.Info(fmt.Sprintf("Perses dashboard has no attribute metadata.%s, the dashboard will not be updated in "+ - "Dash0.", attributeName)) - return "", false - } - value, ok := valueRaw.(string) - if !ok { - logger.Info(fmt.Sprintf("Perses dashboard metadata.%s is not a string, the dashboard will not be updated "+ - "in Dash0.", attributeName)) - return "", false - } - if value == "" { - logger.Info(fmt.Sprintf("Perses dashboard has no attribute metadata.%s, the dashboard will not be updated in "+ - "Dash0.", attributeName)) - return "", false - } - return value, true } diff --git a/internal/dash0/controller/perses_dashboards_controller_test.go b/internal/dash0/controller/perses_dashboards_controller_test.go index a1e5ba6..71ead7b 100644 --- a/internal/dash0/controller/perses_dashboards_controller_test.go +++ b/internal/dash0/controller/perses_dashboards_controller_test.go @@ -6,21 +6,21 @@ package controller import ( "context" "encoding/json" + "fmt" "time" - "github.com/h2non/gock" persesv1alpha1 "github.com/perses/perses-operator/api/v1alpha1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllertest" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/h2non/gock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -28,12 +28,12 @@ import ( ) var ( - crdReconciler *PersesDashboardCrdReconciler - crd *apiextensionsv1.CustomResourceDefinition + persesDashboardCrdReconciler *PersesDashboardCrdReconciler + persesDashboardCrd *apiextensionsv1.CustomResourceDefinition - crdQualifiedName = types.NamespacedName{ - Name: "persesdashboards.perses.dev", - } + dashboardApiBasePath = "/api/dashboards/" + + defaultExpectedPathDashboard = fmt.Sprintf("%s.*%s", dashboardApiBasePath, "dash0-operator_.*_test-dataset_test-namespace_test-dashboard") ) var _ = Describe("The Perses dashboard controller", Ordered, func() { @@ -52,78 +52,78 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { }) It("does not create a Perses dashboard resource reconciler if there is no auth token", func() { - createCrdReconcilerWithoutAuthToken() + createPersesDashboardCrdReconcilerWithoutAuthToken() ensurePersesDashboardCrdExists(ctx) - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) - Expect(crdReconciler.persesDashboardReconciler).To(BeNil()) - crdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler).To(BeNil()) + persesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ Endpoint: ApiEndpointTest, Dataset: DatasetTest, }, &logger) - Expect(crdReconciler.persesDashboardReconciler).To(BeNil()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler).To(BeNil()) }) It("does not start watching Perses dashboards if the CRD does not exist and the API endpoint has not been provided", func() { - createCrdReconcilerWithAuthToken() - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeFalse()) + createPersesDashboardCrdReconcilerWithAuthToken() + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeFalse()) }) It("does not start watching Perses dashboards if the API endpoint has been provided but the CRD does not exist", func() { - createCrdReconcilerWithAuthToken() - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) - crdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + createPersesDashboardCrdReconcilerWithAuthToken() + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + persesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ Endpoint: ApiEndpointTest, Dataset: DatasetTest, }, &logger) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeFalse()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeFalse()) }) It("does not start watching Perses dashboards if the CRD exists but the API endpoint has not been provided", func() { - createCrdReconcilerWithAuthToken() + createPersesDashboardCrdReconcilerWithAuthToken() ensurePersesDashboardCrdExists(ctx) - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeFalse()) + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeFalse()) }) It("starts watching Perses dashboards if the CRD exists and the API endpoint has been provided", func() { - createCrdReconcilerWithAuthToken() + createPersesDashboardCrdReconcilerWithAuthToken() ensurePersesDashboardCrdExists(ctx) - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeFalse()) - crdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeFalse()) + persesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ Endpoint: ApiEndpointTest, Dataset: DatasetTest, }, &logger) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeTrue()) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeTrue()) }) It("starts watching Perses dashboards if API endpoint is provided and the CRD is created later on", func() { - createCrdReconcilerWithAuthToken() - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + createPersesDashboardCrdReconcilerWithAuthToken() + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) // provide the API endpoint first - crdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + persesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ Endpoint: ApiEndpointTest, Dataset: DatasetTest, }, &logger) // create the CRD a bit later time.Sleep(100 * time.Millisecond) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeFalse()) - dashboardCrd := ensurePersesDashboardCrdExists(ctx) + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeFalse()) + ensurePersesDashboardCrdExists(ctx) // watches are not triggered in unit tests - crdReconciler.Create( + persesDashboardCrdReconciler.Create( ctx, event.TypedCreateEvent[client.Object]{ - Object: dashboardCrd, + Object: persesDashboardCrd, }, &controllertest.TypedQueue[reconcile.Request]{}, ) // verify that the controller starts watching when it sees the CRD being created Eventually(func(g Gomega) { - g.Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeTrue()) + g.Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeTrue()) }).Should(Succeed()) }) }) @@ -132,27 +132,27 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { var persesDashboardReconciler *PersesDashboardReconciler BeforeAll(func() { - createCrdReconcilerWithAuthToken() + createPersesDashboardCrdReconcilerWithAuthToken() ensurePersesDashboardCrdExists(ctx) - Expect(crdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) }) BeforeEach(func() { - crdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + persesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ Endpoint: ApiEndpointTest, Dataset: DatasetTest, }, &logger) - Expect(crdReconciler.persesDashboardReconciler.isWatching).To(BeTrue()) - persesDashboardReconciler = crdReconciler.persesDashboardReconciler + Expect(persesDashboardCrdReconciler.persesDashboardReconciler.isWatching.Load()).To(BeTrue()) + persesDashboardReconciler = persesDashboardCrdReconciler.persesDashboardReconciler }) AfterAll(func() { ensurePersesDashboardCrdDoesNotExist(ctx) }) - It("creates a Perses dashboard resource", func() { - expectPutRequest() + It("creates a dashboard", func() { + expectDashboardPutRequest(defaultExpectedPathDashboard) defer gock.Off() dashboardResource := createDashboardResource() @@ -167,8 +167,8 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { Expect(gock.IsDone()).To(BeTrue()) }) - It("updates a Perses dashboard resource", func() { - expectPutRequest() + It("updates a dashboard", func() { + expectDashboardPutRequest(defaultExpectedPathDashboard) defer gock.Off() dashboardResource := createDashboardResource() @@ -183,8 +183,8 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { Expect(gock.IsDone()).To(BeTrue()) }) - It("deletes a Perses dashboard resource", func() { - expectDeleteRequest() + It("deletes a dashboard", func() { + expectDashboardDeleteRequest(defaultExpectedPathDashboard) defer gock.Off() dashboardResource := createDashboardResource() @@ -200,10 +200,10 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { }) It("it ignores Perses dashboard resource changes if API endpoint is not configured", func() { - expectPutRequest() + expectDashboardPutRequest(defaultExpectedPathDashboard) defer gock.Off() - crdReconciler.RemoveApiEndpointAndDataset() + persesDashboardCrdReconciler.RemoveApiEndpointAndDataset() dashboardResource := createDashboardResource() persesDashboardReconciler.Create( @@ -219,16 +219,16 @@ var _ = Describe("The Perses dashboard controller", Ordered, func() { }) }) -func createCrdReconcilerWithoutAuthToken() { - crdReconciler = &PersesDashboardCrdReconciler{ +func createPersesDashboardCrdReconcilerWithoutAuthToken() { + persesDashboardCrdReconciler = &PersesDashboardCrdReconciler{ // We create the controller multiple times in tests, this option is required, otherwise the controller // runtime will complain. skipNameValidation: true, } } -func createCrdReconcilerWithAuthToken() { - crdReconciler = &PersesDashboardCrdReconciler{ +func createPersesDashboardCrdReconcilerWithAuthToken() { + persesDashboardCrdReconciler = &PersesDashboardCrdReconciler{ AuthToken: AuthorizationTokenTest, // We create the controller multiple times in tests, this option is required, otherwise the controller @@ -237,18 +237,20 @@ func createCrdReconcilerWithAuthToken() { } } -func expectPutRequest() { +func expectDashboardPutRequest(expectedPath string) { gock.New(ApiEndpointTest). - Put("/api/dashboards/.*"). + Put(expectedPath). MatchParam("dataset", DatasetTest). + Times(1). Reply(200). JSON(map[string]string{}) } -func expectDeleteRequest() { +func expectDashboardDeleteRequest(expectedPath string) { gock.New(ApiEndpointTest). - Delete("/api/dashboards/.*"). + Delete(expectedPath). MatchParam("dataset", DatasetTest). + Times(1). Reply(200). JSON(map[string]string{}) } @@ -273,62 +275,16 @@ func createDashboardResource() unstructured.Unstructured { return unstructuredObject } -func ensurePersesDashboardCrdExists(ctx context.Context) *apiextensionsv1.CustomResourceDefinition { - crd_ := EnsureKubernetesObjectExists( +func ensurePersesDashboardCrdExists(ctx context.Context) { + persesDashboardCrd = EnsurePersesDashboardCrdExists( ctx, k8sClient, - crdQualifiedName, - &apiextensionsv1.CustomResourceDefinition{}, - &apiextensionsv1.CustomResourceDefinition{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "apiextensions.k8s.io/v1", - Kind: "CustomResourceDefinition", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "persesdashboards.perses.dev", - }, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Group: "perses.dev", - Names: apiextensionsv1.CustomResourceDefinitionNames{ - Kind: "PersesDashboard", - ListKind: "PersesDashboardList", - Plural: "persesdashboards", - Singular: "persesdashboard", - }, - Scope: "Namespaced", - Versions: []apiextensionsv1.CustomResourceDefinitionVersion{ - { - Name: "v1alpha1", - Schema: &apiextensionsv1.CustomResourceValidation{ - OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ - Type: "object", - Properties: map[string]apiextensionsv1.JSONSchemaProps{ - "apiVersion": {Type: "string"}, - "kind": {Type: "string"}, - "metadata": {Type: "object"}, - "spec": {Type: "object"}, - }, - Required: []string{ - "kind", - "spec", - }, - }, - }, - Served: true, - Storage: true, - }, - }, - }, - }, ) - - crd = crd_.(*apiextensionsv1.CustomResourceDefinition) - return crd } func ensurePersesDashboardCrdDoesNotExist(ctx context.Context) { - if crd != nil { - err := k8sClient.Delete(ctx, crd, &client.DeleteOptions{ + if persesDashboardCrd != nil { + err := k8sClient.Delete(ctx, persesDashboardCrd, &client.DeleteOptions{ GracePeriodSeconds: new(int64), }) if err != nil && apierrors.IsNotFound(err) { @@ -338,10 +294,11 @@ func ensurePersesDashboardCrdDoesNotExist(ctx context.Context) { } Eventually(func(g Gomega) { - err := k8sClient.Get(ctx, crdQualifiedName, &apiextensionsv1.CustomResourceDefinition{}) + err := k8sClient.Get(ctx, PersesDashboardCrdQualifiedName, &apiextensionsv1.CustomResourceDefinition{}) g.Expect(err).To(HaveOccurred()) g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) }).Should(Succeed()) - } + persesDashboardCrd = nil + } } diff --git a/internal/dash0/controller/prometheus_rules_controller.go b/internal/dash0/controller/prometheus_rules_controller.go new file mode 100644 index 0000000..eca2f65 --- /dev/null +++ b/internal/dash0/controller/prometheus_rules_controller.go @@ -0,0 +1,588 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "strings" + "sync/atomic" + + "github.com/go-logr/logr" + prometheusv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + otelmetric "go.opentelemetry.io/otel/metric" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/workqueue" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/yaml" + + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type PrometheusRuleCrdReconciler struct { + AuthToken string + mgr ctrl.Manager + skipNameValidation bool + prometheusRuleReconciler *PrometheusRuleReconciler + prometheusRuleCrdExists atomic.Bool +} + +//+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch + +type CheckRule struct { + Name string `json:"name"` + Expression string `json:"expression"` + For string `json:"for,omitempty"` + Interval string `json:"interval,omitempty"` + KeepFiringFor string `json:"keepFiringFor,omitempty"` + Annotations map[string]string `json:"annotations"` // √ + Labels map[string]string `json:"labels"` +} + +var ( + prometheusRuleCrdReconcileRequestMetric otelmetric.Int64Counter + prometheusRuleReconcileRequestMetric otelmetric.Int64Counter +) + +func (r *PrometheusRuleCrdReconciler) Manager() ctrl.Manager { + return r.mgr +} + +func (r *PrometheusRuleCrdReconciler) GetAuthToken() string { + return r.AuthToken +} + +func (r *PrometheusRuleCrdReconciler) KindDisplayName() string { + return "Prometheus rule" +} + +func (r *PrometheusRuleCrdReconciler) Group() string { + return "monitoring.coreos.com" +} + +func (r *PrometheusRuleCrdReconciler) Kind() string { + return "PrometheusRule" +} + +func (r *PrometheusRuleCrdReconciler) Version() string { + return "v1" +} + +func (r *PrometheusRuleCrdReconciler) QualifiedKind() string { + return "prometheusrules.monitoring.coreos.com" +} + +func (r *PrometheusRuleCrdReconciler) ControllerName() string { + return "dash0_prometheus_rule_crd_controller" +} + +func (r *PrometheusRuleCrdReconciler) DoesCrdExist() *atomic.Bool { + return &r.prometheusRuleCrdExists +} + +func (r *PrometheusRuleCrdReconciler) SetCrdExists(exists bool) { + r.prometheusRuleCrdExists.Store(exists) +} + +func (r *PrometheusRuleCrdReconciler) SkipNameValidation() bool { + return r.skipNameValidation +} + +func (r *PrometheusRuleCrdReconciler) CreateResourceReconciler( + pseudoClusterUid types.UID, + authToken string, + httpClient *http.Client, +) { + r.prometheusRuleReconciler = &PrometheusRuleReconciler{ + pseudoClusterUid: pseudoClusterUid, + authToken: authToken, + httpClient: httpClient, + } +} + +func (r *PrometheusRuleCrdReconciler) ResourceReconciler() ThirdPartyResourceReconciler { + return r.prometheusRuleReconciler +} + +func (r *PrometheusRuleCrdReconciler) SetupWithManager( + ctx context.Context, + mgr ctrl.Manager, + startupK8sClient client.Client, + logger *logr.Logger, +) error { + r.mgr = mgr + return SetupThirdPartyCrdReconcilerWithManager( + ctx, + startupK8sClient, + r, + logger, + ) +} + +//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch + +func (r *PrometheusRuleCrdReconciler) Create( + ctx context.Context, + _ event.TypedCreateEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + if prometheusRuleCrdReconcileRequestMetric != nil { + prometheusRuleCrdReconcileRequestMetric.Add(ctx, 1) + } + logger := log.FromContext(ctx) + r.prometheusRuleCrdExists.Store(true) + maybeStartWatchingThirdPartyResources(r, false, &logger) +} + +func (r *PrometheusRuleCrdReconciler) Update( + context.Context, + event.TypedUpdateEvent[client.Object], + workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + // should not be called, we are not interested in updates + // note: update is called twice prior to delete, it is also called twice after an actual create +} + +func (r *PrometheusRuleCrdReconciler) Delete( + ctx context.Context, + _ event.TypedDeleteEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + if prometheusRuleCrdReconcileRequestMetric != nil { + prometheusRuleCrdReconcileRequestMetric.Add(ctx, 1) + } + logger := log.FromContext(ctx) + logger.Info("The PrometheusRule custom resource definition has been deleted.") + r.prometheusRuleCrdExists.Store(false) + + // Known issue: We would need to stop the watch for the Prometheus rule resources here, but the controller-runtime + // does not provide any API to stop a watch. + // An error will be logged every ten seconds until the controller process is restarted. +} + +func (r *PrometheusRuleCrdReconciler) Generic( + context.Context, + event.TypedGenericEvent[client.Object], + workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + // Should not be called, we are not interested in generic events. +} + +func (r *PrometheusRuleCrdReconciler) Reconcile( + _ context.Context, + _ reconcile.Request, +) (reconcile.Result, error) { + // Reconcile should not be called for the PrometheusRuleCrdReconciler CRD, as we are using the + // TypedEventHandler interface directly when setting up the watch. We still need to implement the method, as the + // controller builder's Complete method requires implementing the Reconciler interface. + return reconcile.Result{}, nil +} + +func (r *PrometheusRuleCrdReconciler) InitializeSelfMonitoringMetrics( + meter otelmetric.Meter, + metricNamePrefix string, + logger *logr.Logger, +) { + reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "prometheusrulecrd.reconcile_requests") + var err error + if prometheusRuleCrdReconcileRequestMetric, err = meter.Int64Counter( + reconcileRequestMetricName, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter for PrometheusRule CRD reconcile requests"), + ); err != nil { + logger.Error(err, "Cannot initialize the metric %s.") + } + + r.prometheusRuleReconciler.InitializeSelfMonitoringMetrics( + meter, + metricNamePrefix, + logger, + ) +} + +func (r *PrometheusRuleCrdReconciler) SetApiEndpointAndDataset( + apiConfig *ApiConfig, + logger *logr.Logger) { + if r.prometheusRuleReconciler == nil { + // If no auth token has been set via environment variable, we do not even create the prometheusRuleReconciler, + // hence this nil check is necessary. + return + } + r.prometheusRuleReconciler.apiConfig.Store(apiConfig) + maybeStartWatchingThirdPartyResources(r, false, logger) +} + +func (r *PrometheusRuleCrdReconciler) RemoveApiEndpointAndDataset() { + if r.prometheusRuleReconciler == nil { + // If no auth token has been set via environment variable, we do not even create the prometheusRuleReconciler, + // hence this nil check is necessary. + return + } + r.prometheusRuleReconciler.apiConfig.Store(nil) +} + +type PrometheusRuleReconciler struct { + isWatching atomic.Bool + pseudoClusterUid types.UID + httpClient *http.Client + apiConfig atomic.Pointer[ApiConfig] + authToken string +} + +func (r *PrometheusRuleReconciler) InitializeSelfMonitoringMetrics( + meter otelmetric.Meter, + metricNamePrefix string, + logger *logr.Logger, +) { + reconcileRequestMetricName := fmt.Sprintf("%s%s", metricNamePrefix, "prometheusrule.reconcile_requests") + var err error + if prometheusRuleReconcileRequestMetric, err = meter.Int64Counter( + reconcileRequestMetricName, + otelmetric.WithUnit("1"), + otelmetric.WithDescription("Counter for prometheus rule reconcile requests"), + ); err != nil { + logger.Error(err, "Cannot initialize the metric %s.") + } +} + +func (r *PrometheusRuleReconciler) KindDisplayName() string { + return "Prometheus rule" +} + +func (r *PrometheusRuleReconciler) ShortName() string { + return "rule" +} + +func (r *PrometheusRuleReconciler) IsWatching() *atomic.Bool { + return &r.isWatching +} + +func (r *PrometheusRuleReconciler) SetIsWatching(isWatching bool) { + r.isWatching.Store(isWatching) +} + +func (r *PrometheusRuleReconciler) GetAuthToken() string { + return r.authToken +} + +func (r *PrometheusRuleReconciler) GetApiConfig() *atomic.Pointer[ApiConfig] { + return &r.apiConfig +} + +func (r *PrometheusRuleReconciler) ControllerName() string { + return "dash0_prometheus_rule_controller" +} + +func (r *PrometheusRuleReconciler) HttpClient() *http.Client { + return r.httpClient +} + +func (r *PrometheusRuleReconciler) Create( + ctx context.Context, + e event.TypedCreateEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + if prometheusRuleReconcileRequestMetric != nil { + prometheusRuleReconcileRequestMetric.Add(ctx, 1) + } + + logger := log.FromContext(ctx) + logger.Info( + "Detected a new Prometheus rule resource", + "namespace", + e.Object.GetNamespace(), + "name", + e.Object.GetName(), + ) + + if err := util.RetryWithCustomBackoff( + "create rule(s)", + func() error { + return upsertViaApi(r, e.Object.(*unstructured.Unstructured), &logger) + }, + retrySettings, + true, + &logger, + ); err != nil { + logger.Error(err, "failed to create the rule(s)") + } +} + +func (r *PrometheusRuleReconciler) Update( + ctx context.Context, + e event.TypedUpdateEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + if prometheusRuleReconcileRequestMetric != nil { + prometheusRuleReconcileRequestMetric.Add(ctx, 1) + } + + logger := log.FromContext(ctx) + logger.Info( + "Detected a change for a Prometheus rule resource", + "namespace", + e.ObjectNew.GetNamespace(), + "name", + e.ObjectNew.GetName(), + ) + + if err := util.RetryWithCustomBackoff( + "update rule(s)", + func() error { + return upsertViaApi(r, e.ObjectNew.(*unstructured.Unstructured), &logger) + }, + retrySettings, + true, + &logger, + ); err != nil { + logger.Error(err, "failed to update the rule(s)") + } +} + +func (r *PrometheusRuleReconciler) Delete( + ctx context.Context, + e event.TypedDeleteEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + if prometheusRuleReconcileRequestMetric != nil { + prometheusRuleReconcileRequestMetric.Add(ctx, 1) + } + + logger := log.FromContext(ctx) + logger.Info( + "Detected the deletion of a Prometheus rule resource", + "namespace", + e.Object.GetNamespace(), + "name", + e.Object.GetName(), + ) + + if err := util.RetryWithCustomBackoff( + "delete rule(s)", + func() error { + return deleteViaApi(r, e.Object.(*unstructured.Unstructured), &logger) + }, + retrySettings, + true, + &logger, + ); err != nil { + logger.Error(err, "failed to delete the rule(s)") + } +} + +func (r *PrometheusRuleReconciler) Generic( + _ context.Context, + _ event.TypedGenericEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + // ignoring generic events +} + +func (r *PrometheusRuleReconciler) Reconcile( + context.Context, + reconcile.Request, +) (reconcile.Result, error) { + // Reconcile should not be called on the PrometheusRuleReconciler, as we are using the TypedEventHandler interface + // directly when setting up the watch. We still need to implement the method, as the controller builder's Complete + // method requires implementing the Reconciler interface. + return reconcile.Result{}, nil +} + +func (r *PrometheusRuleReconciler) MapResourceToHttpRequests( + preconditionChecksResult *preconditionValidationResult, + action apiAction, + logger *logr.Logger, +) ([]*http.Request, error) { + specRaw := preconditionChecksResult.spec + + specAsYaml, err := yaml.Marshal(specRaw) + if err != nil { + logger.Error(err, "unable to marshal the Prometheus rule spec") + return nil, err + } + ruleSpec := prometheusv1.PrometheusRuleSpec{} + if err = yaml.Unmarshal(specAsYaml, &ruleSpec); err != nil { + logger.Error(err, "unable to unmarshal the Prometheus rule spec") + return nil, err + } + + urlPrefix := r.renderUrlPrefix(preconditionChecksResult) + requests := make([]*http.Request, 0) + for _, group := range ruleSpec.Groups { + for ruleIdx, rule := range group.Rules { + checkRuleUrl := fmt.Sprintf( + "%s_%s_%d?dataset=%s", + urlPrefix, + urlEncodePathSegment(group.Name), + ruleIdx, + url.QueryEscape(preconditionChecksResult.dataset), + ) + if request, ok := convertRuleToRequest( + checkRuleUrl, + action, + rule, + preconditionChecksResult, + group.Name, + group.Interval, + logger, + ); ok { + requests = append(requests, request) + } + } + } + + return requests, nil +} + +func (r *PrometheusRuleReconciler) renderUrlPrefix(preconditionCheckResult *preconditionValidationResult) string { + + ruleOriginPrefix := fmt.Sprintf( + // we deliberately use _ as the separator, since that is an illegal character in Kubernetes names. This avoids + // any potential naming collisions (e.g. namespace="abc" & name="def-ghi" vs. namespace="abc-def" & name="ghi"). + "dash0-operator_%s_%s_%s_%s", + r.pseudoClusterUid, + urlEncodePathSegment(preconditionCheckResult.dataset), + preconditionCheckResult.k8sNamespace, + preconditionCheckResult.k8sName, + ) + if !strings.HasSuffix(preconditionCheckResult.apiEndpoint, "/") { + preconditionCheckResult.apiEndpoint += "/" + } + return fmt.Sprintf( + "%sapi/alerting/check-rules/%s", + preconditionCheckResult.apiEndpoint, + ruleOriginPrefix, + ) +} + +func convertRuleToRequest( + checkRuleUrl string, + action apiAction, + rule prometheusv1.Rule, + preconditionCheckResult *preconditionValidationResult, + groupName string, + interval *prometheusv1.Duration, + logger *logr.Logger, +) (*http.Request, bool) { + checkRule, ok := convertRuleToCheckRule(rule, action, groupName, interval, logger) + if !ok { + return nil, false + } + + var req *http.Request + var err error + + //nolint:ineffassign + actionLabel := "?" + switch action { + case upsert: + actionLabel = "upsert" + serializedCheckRule, _ := json.Marshal(checkRule) + requestPayload := bytes.NewBuffer(serializedCheckRule) + req, err = http.NewRequest( + http.MethodPut, + checkRuleUrl, + requestPayload, + ) + case delete: + actionLabel = "delete" + req, err = http.NewRequest( + http.MethodDelete, + checkRuleUrl, + nil, + ) + default: + logger.Error(fmt.Errorf("unknown API action: %d", action), "unknown API action") + return nil, false + } + + if err != nil { + logger.Error( + err, + fmt.Sprintf( + "unable to create a new HTTP request to %s the rule at %s", + actionLabel, + checkRuleUrl, + )) + return nil, false + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", preconditionCheckResult.authToken)) + if action == upsert { + req.Header.Set("Content-Type", "application/json") + } + + return req, true +} + +func convertRuleToCheckRule( + rule prometheusv1.Rule, + action apiAction, + groupName string, + interval *prometheusv1.Duration, + logger *logr.Logger, +) (*CheckRule, bool) { + if rule.Record != "" { + logger.Info("Skipping rule with record attribute", "record", rule.Record) + return nil, false + } + if rule.Alert == "" { + logger.Info("Skipping rule without alert attribute", "record", rule.Record) + return nil, false + } + + if action == delete { + // When deleting a rule, we do not need an actual payload, but do need to skip rules with rule.Record or without + // rule.Alert (that is why we still call convertRuleToCheckRule for deletions). + return &CheckRule{}, true + } + + // If action is not delete, it is upsert, and for that we need to create an actual payload, hence we need to convert + // the rule to a CheckRule. + checkRule := &CheckRule{ + Name: fmt.Sprintf("%s - %s", groupName, rule.Alert), + Interval: convertDuration(interval), + Annotations: rule.Annotations, + Labels: rule.Labels, + Expression: convertIntOrString(rule.Expr), + For: convertDuration(rule.For), + KeepFiringFor: convertNonEmptyDuration(rule.KeepFiringFor), + } + + return checkRule, true +} + +func convertDuration(duration *prometheusv1.Duration) string { + if duration == nil { + return "" + } + return string(*duration) +} + +func convertNonEmptyDuration(duration *prometheusv1.NonEmptyDuration) string { + if duration == nil { + return "" + } + return string(*duration) +} + +func convertIntOrString(intOrString intstr.IntOrString) string { + switch intOrString.Type { + case intstr.String: + return intOrString.StrVal + case intstr.Int: + return string(intOrString.IntVal) + } + return "" +} diff --git a/internal/dash0/controller/prometheus_rules_controller_test.go b/internal/dash0/controller/prometheus_rules_controller_test.go new file mode 100644 index 0000000..8a58a95 --- /dev/null +++ b/internal/dash0/controller/prometheus_rules_controller_test.go @@ -0,0 +1,347 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "encoding/json" + "fmt" + "time" + + prometheusv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllertest" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/h2non/gock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +var ( + prometheusRulePrometheusRuleCrdReconciler *PrometheusRuleCrdReconciler + prometheusRuleCrd *apiextensionsv1.CustomResourceDefinition + + checkRuleApiBasePath = "/api/alerting/check-rules/" + + defaultExpectedPathsCheckRules = []string{ + fmt.Sprintf("%s.*%s", checkRuleApiBasePath, "dash0-operator_.*_test-dataset_test-namespace_test-rule_group_1_0"), + fmt.Sprintf("%s.*%s", checkRuleApiBasePath, "dash0-operator_.*_test-dataset_test-namespace_test-rule_group_1_1"), + fmt.Sprintf("%s.*%s", checkRuleApiBasePath, "dash0-operator_.*_test-dataset_test-namespace_test-rule_group_2_0"), + fmt.Sprintf("%s.*%s", checkRuleApiBasePath, "dash0-operator_.*_test-dataset_test-namespace_test-rule_group_2_1"), + } +) + +var _ = Describe("The Prometheus rule controller", Ordered, func() { + ctx := context.Background() + logger := log.FromContext(ctx) + + BeforeAll(func() { + EnsureTestNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) + }) + + Describe("the Prometheus rule CRD reconciler", func() { + + AfterEach(func() { + ensurePrometheusRuleCrdDoesNotExist(ctx) + }) + + It("does not create a Prometheus rule resource reconciler if there is no auth token", func() { + createPrometheusRuleCrdReconcilerWithoutAuthToken() + ensurePrometheusRuleCrdExists(ctx) + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler).To(BeNil()) + prometheusRulePrometheusRuleCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: ApiEndpointTest, + Dataset: DatasetTest, + }, &logger) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler).To(BeNil()) + }) + + It("does not start watching Prometheus rules if the CRD does not exist and the API endpoint has not been provided", func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeFalse()) + }) + + It("does not start watching Prometheus rules if the API endpoint has been provided but the CRD does not exist", func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + prometheusRulePrometheusRuleCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: ApiEndpointTest, + Dataset: DatasetTest, + }, &logger) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeFalse()) + }) + + It("does not start watching Prometheus rules if the CRD exists but the API endpoint has not been provided", func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + ensurePrometheusRuleCrdExists(ctx) + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeFalse()) + }) + + It("starts watching Prometheus rules if the CRD exists and the API endpoint has been provided", func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + ensurePrometheusRuleCrdExists(ctx) + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeFalse()) + prometheusRulePrometheusRuleCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: ApiEndpointTest, + Dataset: DatasetTest, + }, &logger) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeTrue()) + }) + + It("starts watching Prometheus rules if API endpoint is provided and the CRD is created later on", func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + + // provide the API endpoint first + prometheusRulePrometheusRuleCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: ApiEndpointTest, + Dataset: DatasetTest, + }, &logger) + + // create the CRD a bit later + time.Sleep(100 * time.Millisecond) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeFalse()) + ensurePrometheusRuleCrdExists(ctx) + // watches are not triggered in unit tests + prometheusRulePrometheusRuleCrdReconciler.Create( + ctx, + event.TypedCreateEvent[client.Object]{ + Object: prometheusRuleCrd, + }, + &controllertest.TypedQueue[reconcile.Request]{}, + ) + + // verify that the controller starts watching when it sees the CRD being created + Eventually(func(g Gomega) { + g.Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeTrue()) + }).Should(Succeed()) + }) + }) + + Describe("the Prometheus rule resource reconciler", func() { + var prometheusRuleReconciler *PrometheusRuleReconciler + + BeforeAll(func() { + createPrometheusRuleCrdReconcilerWithAuthToken() + ensurePrometheusRuleCrdExists(ctx) + + Expect(prometheusRulePrometheusRuleCrdReconciler.SetupWithManager(ctx, mgr, k8sClient, &logger)).To(Succeed()) + }) + + BeforeEach(func() { + prometheusRulePrometheusRuleCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: ApiEndpointTest, + Dataset: DatasetTest, + }, &logger) + Expect(prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler.isWatching.Load()).To(BeTrue()) + prometheusRuleReconciler = prometheusRulePrometheusRuleCrdReconciler.prometheusRuleReconciler + }) + + AfterAll(func() { + ensurePrometheusRuleCrdDoesNotExist(ctx) + }) + + It("creates check rules", func() { + expectRulePutRequests(defaultExpectedPathsCheckRules) + defer gock.Off() + + ruleResource := createRuleResource() + prometheusRuleReconciler.Create( + ctx, + event.TypedCreateEvent[client.Object]{ + Object: &ruleResource, + }, + &controllertest.TypedQueue[reconcile.Request]{}, + ) + + Expect(gock.IsDone()).To(BeTrue()) + }) + + It("updates check rules", func() { + expectRulePutRequests(defaultExpectedPathsCheckRules) + defer gock.Off() + + ruleResource := createRuleResource() + prometheusRuleReconciler.Update( + ctx, + event.TypedUpdateEvent[client.Object]{ + ObjectNew: &ruleResource, + }, + &controllertest.TypedQueue[reconcile.Request]{}, + ) + + Expect(gock.IsDone()).To(BeTrue()) + }) + + It("deletes check rules", func() { + expectRuleDeleteRequests(defaultExpectedPathsCheckRules) + defer gock.Off() + + ruleResource := createRuleResource() + prometheusRuleReconciler.Delete( + ctx, + event.TypedDeleteEvent[client.Object]{ + Object: &ruleResource, + }, + &controllertest.TypedQueue[reconcile.Request]{}, + ) + + Expect(gock.IsDone()).To(BeTrue()) + }) + + It("it ignores Prometheus rule resource changes if API endpoint is not configured", func() { + expectRulePutRequests(defaultExpectedPathsCheckRules) + defer gock.Off() + + prometheusRulePrometheusRuleCrdReconciler.RemoveApiEndpointAndDataset() + + ruleResource := createRuleResource() + prometheusRuleReconciler.Create( + ctx, + event.TypedCreateEvent[client.Object]{ + Object: &ruleResource, + }, + &controllertest.TypedQueue[reconcile.Request]{}, + ) + + Expect(gock.IsPending()).To(BeTrue()) + }) + }) +}) + +func createPrometheusRuleCrdReconcilerWithoutAuthToken() { + prometheusRulePrometheusRuleCrdReconciler = &PrometheusRuleCrdReconciler{ + // We create the controller multiple times in tests, this option is required, otherwise the controller + // runtime will complain. + skipNameValidation: true, + } +} + +func createPrometheusRuleCrdReconcilerWithAuthToken() { + prometheusRulePrometheusRuleCrdReconciler = &PrometheusRuleCrdReconciler{ + AuthToken: AuthorizationTokenTest, + + // We create the controller multiple times in tests, this option is required, otherwise the controller + // runtime will complain. + skipNameValidation: true, + } +} + +func expectRulePutRequests(expectedPaths []string) { + for _, expectedPath := range expectedPaths { + gock.New(ApiEndpointTest). + Put(expectedPath). + MatchParam("dataset", DatasetTest). + Times(1). + Reply(200). + JSON(map[string]string{}) + } +} + +func expectRuleDeleteRequests(expectedPaths []string) { + for _, expectedPath := range expectedPaths { + gock.New(ApiEndpointTest). + Delete(expectedPath). + MatchParam("dataset", DatasetTest). + Times(1). + Reply(200). + JSON(map[string]string{}) + } +} + +func createRuleResource() unstructured.Unstructured { + rule := prometheusv1.PrometheusRule{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "monitoring.coreos.com/v1", + Kind: "PrometheusRule", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rule", + Namespace: TestNamespaceName, + }, + Spec: prometheusv1.PrometheusRuleSpec{ + Groups: []prometheusv1.RuleGroup{ + { + Name: "group_1", + Rules: []prometheusv1.Rule{ + { + Alert: "rule_1_1", + Expr: intstr.FromString("vector(1)"), + }, + { + Alert: "rule_1_2", + Expr: intstr.FromString("vector(1)"), + }, + { + Record: "rule_1_3", + Expr: intstr.FromString("vector(1)"), + }, + }, + }, + { + Name: "group_2", + Rules: []prometheusv1.Rule{ + { + Alert: "rule_2_1", + Expr: intstr.FromString("vector(1)"), + }, + { + Alert: "rule_2_2", + Expr: intstr.FromString("vector(1)"), + }, + }, + }, + }, + }, + } + marshalled, err := json.Marshal(rule) + Expect(err).NotTo(HaveOccurred()) + unstructuredObject := unstructured.Unstructured{} + err = json.Unmarshal(marshalled, &unstructuredObject) + Expect(err).NotTo(HaveOccurred()) + return unstructuredObject +} + +func ensurePrometheusRuleCrdExists(ctx context.Context) { + prometheusRuleCrd = EnsurePrometheusRuleCrdExists( + ctx, + k8sClient, + ) +} + +func ensurePrometheusRuleCrdDoesNotExist(ctx context.Context) { + if prometheusRuleCrd != nil { + err := k8sClient.Delete(ctx, prometheusRuleCrd, &client.DeleteOptions{ + GracePeriodSeconds: new(int64), + }) + if err != nil && apierrors.IsNotFound(err) { + return + } else if err != nil { + Expect(err).NotTo(HaveOccurred()) + } + + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, PrometheusRuleCrdQualifiedName, &apiextensionsv1.CustomResourceDefinition{}) + g.Expect(err).To(HaveOccurred()) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }).Should(Succeed()) + + prometheusRuleCrd = nil + } +} diff --git a/internal/dash0/controller/third_party_crd_common.go b/internal/dash0/controller/third_party_crd_common.go new file mode 100644 index 0000000..fe95577 --- /dev/null +++ b/internal/dash0/controller/third_party_crd_common.go @@ -0,0 +1,533 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "sync/atomic" + "time" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type ApiClient interface { + SetApiEndpointAndDataset(*ApiConfig, *logr.Logger) + RemoveApiEndpointAndDataset() +} + +type ThirdPartyCrdReconciler interface { + handler.TypedEventHandler[client.Object, reconcile.Request] + reconcile.TypedReconciler[reconcile.Request] + + Manager() ctrl.Manager + GetAuthToken() string + KindDisplayName() string + Group() string + Kind() string + Version() string + QualifiedKind() string + ControllerName() string + DoesCrdExist() *atomic.Bool + SetCrdExists(bool) + SkipNameValidation() bool + CreateResourceReconciler(types.UID, string, *http.Client) + ResourceReconciler() ThirdPartyResourceReconciler +} + +type ThirdPartyResourceReconciler interface { + handler.TypedEventHandler[client.Object, reconcile.Request] + reconcile.TypedReconciler[reconcile.Request] + + KindDisplayName() string + ShortName() string + IsWatching() *atomic.Bool + SetIsWatching(bool) + GetAuthToken() string + GetApiConfig() *atomic.Pointer[ApiConfig] + ControllerName() string + HttpClient() *http.Client + MapResourceToHttpRequests(*preconditionValidationResult, apiAction, *logr.Logger) ([]*http.Request, error) +} + +type ApiConfig struct { + Endpoint string + Dataset string +} + +type apiAction int + +const ( + upsert apiAction = iota + delete +) + +type preconditionValidationResult struct { + executeRequest bool + authToken string + apiEndpoint string + dataset string + k8sNamespace string + k8sName string + spec map[string]interface{} +} + +var ( + retrySettings = wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.5, + Steps: 3, + } +) + +func SetupThirdPartyCrdReconcilerWithManager( + ctx context.Context, + k8sClient client.Client, + crdReconciler ThirdPartyCrdReconciler, + logger *logr.Logger, +) error { + authToken := crdReconciler.GetAuthToken() + if authToken == "" { + logger.Info(fmt.Sprintf("No Dash0 auth token has been provided via the operator configuration resource. "+ + "The operator will not watch for %s resources.", crdReconciler.KindDisplayName())) + return nil + } + + kubeSystemNamespace := &corev1.Namespace{} + if err := k8sClient.Get(ctx, client.ObjectKey{Name: "kube-system"}, kubeSystemNamespace); err != nil { + msg := "unable to get the kube-system namespace uid" + logger.Error(err, msg) + return fmt.Errorf("%s: %w", msg, err) + } + + crdReconciler.CreateResourceReconciler( + kubeSystemNamespace.UID, + authToken, + &http.Client{}, + ) + + if err := k8sClient.Get(ctx, client.ObjectKey{ + Name: crdReconciler.QualifiedKind(), + }, &apiextensionsv1.CustomResourceDefinition{}); err != nil { + if !apierrors.IsNotFound(err) { + logger.Error( + err, + fmt.Sprintf("unable to call client.Get(\"%s\") custom resource definition", + crdReconciler.QualifiedKind())) + return err + } + } else { + crdReconciler.SetCrdExists(true) + maybeStartWatchingThirdPartyResources(crdReconciler, true, logger) + } + + controllerBuilder := ctrl.NewControllerManagedBy(crdReconciler.Manager()). + Named(crdReconciler.ControllerName()). + Watches( + &apiextensionsv1.CustomResourceDefinition{}, + // Deliberately not using a convenience mechanism like &handler.EnqueueRequestForObject{} (which would + // feed all events into the Reconcile method) here, since using the lower-level TypedEventHandler interface + // directly allows us to distinguish between create and delete events more easily. + crdReconciler, + builder.WithPredicates( + makeFilterPredicate( + crdReconciler.Group(), + crdReconciler.Kind(), + ))) + if crdReconciler.SkipNameValidation() { + controllerBuilder = controllerBuilder.WithOptions(controller.TypedOptions[reconcile.Request]{ + SkipNameValidation: ptr.To(true), + }) + } + if err := controllerBuilder.Complete(crdReconciler); err != nil { + logger.Error(err, + fmt.Sprintf( + "unable to build the controller for the %s CRD reconciler", + crdReconciler.KindDisplayName(), + )) + return err + } + + return nil +} + +func makeFilterPredicate(group string, kind string) predicate.Funcs { + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return isMatchingCrd(group, kind, e.Object) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + // We are not interested in updates, but we still need to define a filter predicate for it, otherwise _all_ + // update events for CRDs would be passed to our event handler. We always return false to ignore update + // events entirely. Same for generic events. + return false + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return isMatchingCrd(group, kind, e.Object) + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + } +} + +func isMatchingCrd(group string, kind string, crd client.Object) bool { + if crdCasted, ok := crd.(*apiextensionsv1.CustomResourceDefinition); ok { + return crdCasted.Spec.Group == group && + crdCasted.Spec.Names.Kind == kind + } else { + return false + } +} + +func maybeStartWatchingThirdPartyResources( + crdReconciler ThirdPartyCrdReconciler, + isStartup bool, + logger *logr.Logger, +) { + if crdReconciler.ResourceReconciler().IsWatching().Load() { + // we are already watching, do not start a second watch + return + } + + if !crdReconciler.DoesCrdExist().Load() { + logger.Info( + fmt.Sprintf("The %s custom resource definition does not exist in this cluster, the operator will not "+ + "watch for %s resources.", + crdReconciler.QualifiedKind(), + crdReconciler.KindDisplayName(), + )) + return + } + + apiConfig := crdReconciler.ResourceReconciler().GetApiConfig().Load() + if !isValidApiConfig(apiConfig) { + if !isStartup { + // Silently ignore this missing precondition if it happens during the startup of the operator. It will + // be remedied automatically once the operator configuration resource is reconciled for the first time. + logger.Info( + fmt.Sprintf( + "The %s custom resource definition is present in this cluster, but no Dash0 API endpoint been "+ + "provided via the operator configuration resource, or the operator configuration resource "+ + "has not been reconciled yet. The operator will not watch for %s resources. "+ + "(If there is an operator configuration resource with an API endpoint present in the "+ + "cluster, it will be reconciled in a few seconds and this message can be safely ignored.)", + crdReconciler.QualifiedKind(), + crdReconciler.KindDisplayName(), + )) + } + return + } + + logger.Info( + fmt.Sprintf( + "The %s custom resource definition is present in this cluster, and a Dash0 API endpoint has been provided. "+ + "The operator will watch for %s resources.", + crdReconciler.QualifiedKind(), + crdReconciler.KindDisplayName(), + ), + ) + startWatchingThirdPartyResources(crdReconciler, logger) +} + +func startWatchingThirdPartyResources( + crdReconciler ThirdPartyCrdReconciler, + logger *logr.Logger, +) { + logger.Info(fmt.Sprintf("Setting up a watch for %s custom resources.", crdReconciler.KindDisplayName())) + + unstructuredGvkForPersesDashboards := &unstructured.Unstructured{} + unstructuredGvkForPersesDashboards.SetGroupVersionKind(schema.GroupVersionKind{ + Kind: crdReconciler.Kind(), + Group: crdReconciler.Group(), + Version: crdReconciler.Version(), + }) + + resourceReconciler := crdReconciler.ResourceReconciler() + controllerBuilder := ctrl.NewControllerManagedBy(crdReconciler.Manager()). + Named(resourceReconciler.ControllerName()). + Watches( + unstructuredGvkForPersesDashboards, + // Deliberately not using a convenience mechanism like &handler.EnqueueRequestForObject{} (which would + // feed all events into the Reconcile method) here, since using the lower-level TypedEventHandler interface + // directly allows us to distinguish between create and delete events more easily. + resourceReconciler, + ) + if crdReconciler.SkipNameValidation() { + controllerBuilder = controllerBuilder.WithOptions(controller.TypedOptions[reconcile.Request]{ + SkipNameValidation: ptr.To(true), + }) + } + if err := controllerBuilder.Complete(resourceReconciler); err != nil { + logger.Error(err, "unable to create a new controller for watching Perses dashboards") + return + } + resourceReconciler.SetIsWatching(true) +} + +func isValidApiConfig(apiConfig *ApiConfig) bool { + return apiConfig != nil && apiConfig.Endpoint != "" +} + +func urlEncodePathSegment(s string) string { + return url.PathEscape( + // For now the Dash0 backend treats %2F the same as "/", so we need to replace forward slashes with + // something other than %2F. + // See https://stackoverflow.com/questions/71581828/gin-problem-accessing-url-encoded-path-param-containing-forward-slash + strings.ReplaceAll(s, "/", "|"), + ) +} + +func upsertViaApi( + resourceReconciler ThirdPartyResourceReconciler, + resource *unstructured.Unstructured, + logger *logr.Logger, +) error { + preconditionChecksResult := validatePreconditions( + resourceReconciler, + resource, + logger, + ) + if !preconditionChecksResult.executeRequest { + return nil + } + + httpRequests, err := resourceReconciler.MapResourceToHttpRequests(preconditionChecksResult, upsert, logger) + if err != nil { + return err + } + + if len(httpRequests) == 0 { + logger.Info( + fmt.Sprintf( + "%s %s/%s did not contain any %s, skipping.", + resourceReconciler.KindDisplayName(), + resource.GetNamespace(), + resource.GetName(), + resourceReconciler.ShortName(), + )) + } + + return executeAllHttpRequests(resourceReconciler, httpRequests, "Creating/updating", logger) +} + +func deleteViaApi( + resourceReconciler ThirdPartyResourceReconciler, + resource *unstructured.Unstructured, + logger *logr.Logger, +) error { + preconditionChecksResult := validatePreconditions( + resourceReconciler, + resource, + logger, + ) + if !preconditionChecksResult.executeRequest { + return nil + } + + httpRequests, err := resourceReconciler.MapResourceToHttpRequests(preconditionChecksResult, delete, logger) + if err != nil { + return err + } + + if len(httpRequests) == 0 { + logger.Info( + fmt.Sprintf( + "%s %s/%s did not contain any %s, skipping.", + resourceReconciler.KindDisplayName(), + resource.GetNamespace(), + resource.GetName(), + resourceReconciler.ShortName(), + )) + } + + return executeAllHttpRequests(resourceReconciler, httpRequests, "Deleting", logger) +} + +func validatePreconditions( + resourceReconciler ThirdPartyResourceReconciler, + resource *unstructured.Unstructured, + logger *logr.Logger, +) *preconditionValidationResult { + namespace := resource.GetNamespace() + name := resource.GetName() + + apiConfig := resourceReconciler.GetApiConfig().Load() + if !isValidApiConfig(apiConfig) { + logger.Info( + fmt.Sprintf( + "No Dash0 API endpoint has been provided via the operator configuration resource, "+ + "the %s(s) from %s/%s will not be updated in Dash0.", + resourceReconciler.ShortName(), + namespace, + name, + )) + return &preconditionValidationResult{ + executeRequest: false, + } + } + + authToken := resourceReconciler.GetAuthToken() + if authToken == "" { + logger.Info( + fmt.Sprintf( + "No auth token is set on the controller deployment, the %s(s) from %s/%s not be updated in Dash0.", + resourceReconciler.ShortName(), + namespace, + name, + )) + return &preconditionValidationResult{ + executeRequest: false, + } + } + + dataset := apiConfig.Dataset + if dataset == "" { + dataset = util.DatasetDefault + } + + specRaw := resource.Object["spec"] + if specRaw == nil { + logger.Info( + fmt.Sprintf( + "%s %s/%s has no spec, the %s(s) from will not be updated in Dash0.", + resourceReconciler.KindDisplayName(), + namespace, + name, + resourceReconciler.ShortName(), + )) + return &preconditionValidationResult{ + executeRequest: false, + } + } + spec, ok := specRaw.(map[string]interface{}) + if !ok { + logger.Info( + fmt.Sprintf( + "The %s spec in %s/%s is not a map, the %s(s) will not be updated in Dash0.", + resourceReconciler.KindDisplayName(), + namespace, + name, + resourceReconciler.ShortName(), + )) + return &preconditionValidationResult{ + executeRequest: false, + } + } + + return &preconditionValidationResult{ + executeRequest: true, + authToken: authToken, + apiEndpoint: apiConfig.Endpoint, + dataset: dataset, + k8sNamespace: namespace, + k8sName: name, + spec: spec, + } +} + +func executeAllHttpRequests( + resourceReconciler ThirdPartyResourceReconciler, + allRequests []*http.Request, + actionLabel string, + logger *logr.Logger, +) error { + for _, req := range allRequests { + if err := executeSingleHttpRequest(resourceReconciler, req, actionLabel, logger); err != nil { + return err + } + } + return nil +} + +func executeSingleHttpRequest( + resourceReconciler ThirdPartyResourceReconciler, + req *http.Request, + actionLabel string, + logger *logr.Logger, +) error { + logger.Info( + fmt.Sprintf( + "%s %s at %s in Dash0", + actionLabel, + resourceReconciler.ShortName(), + req.URL.String(), + )) + res, err := resourceReconciler.HttpClient().Do(req) + if err != nil { + logger.Error(err, + fmt.Sprintf( + "unable to execute the HTTP request to create/update/delete the %s at %s", + resourceReconciler.ShortName(), + req.URL.String(), + )) + return err + } + + if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices { + return handleNon2xxStatusCode(resourceReconciler, req, res, logger) + } + + // http status code was 2xx, discard the response body and close it + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + return nil +} + +func handleNon2xxStatusCode( + resourceReconciler ThirdPartyResourceReconciler, + req *http.Request, + res *http.Response, + logger *logr.Logger, +) error { + defer func() { + _ = res.Body.Close() + }() + responseBody, readErr := io.ReadAll(res.Body) + if readErr != nil { + readBodyErr := fmt.Errorf("unable to read the API response payload after receiving status code %d when "+ + "trying to udpate/create/delete the %s at %s", + res.StatusCode, + resourceReconciler.ShortName(), + req.URL.String(), + ) + logger.Error(readBodyErr, "unable to read the API response payload") + return readBodyErr + } + + statusCodeErr := fmt.Errorf( + "unexpected status code %d when updating/creating/deleting the %s at %s, response body is %s", + res.StatusCode, + resourceReconciler.ShortName(), + req.URL.String(), + string(responseBody), + ) + logger.Error(statusCodeErr, "unexpected status code") + return statusCodeErr +} diff --git a/internal/dash0/selfmonitoringapiaccess/self_monitoring_and_api_access.go b/internal/dash0/selfmonitoringapiaccess/self_monitoring_and_api_access.go index 4b726f4..fe0c445 100644 --- a/internal/dash0/selfmonitoringapiaccess/self_monitoring_and_api_access.go +++ b/internal/dash0/selfmonitoringapiaccess/self_monitoring_and_api_access.go @@ -633,7 +633,7 @@ func ConvertExportConfigurationToEnvVarSettings(selfMonitoringExport dash0v1alph Name: util.AuthorizationHeaderName, Value: authHeaderValue, }} - if dash0Export.Dataset != "" && dash0Export.Dataset != "default" { + if dash0Export.Dataset != "" && dash0Export.Dataset != util.DatasetDefault { headers = append(headers, dash0v1alpha1.Header{ Name: util.Dash0DatasetHeaderName, Value: dash0Export.Dataset, diff --git a/internal/dash0/util/constants.go b/internal/dash0/util/constants.go index d6ecbf8..de454c7 100644 --- a/internal/dash0/util/constants.go +++ b/internal/dash0/util/constants.go @@ -6,6 +6,7 @@ package util const ( AuthorizationHeaderName = "Authorization" Dash0DatasetHeaderName = "Dash0-Dataset" + DatasetDefault = "default" DatasetInsights = "dash0-internal" SelfMonitoringAndApiAuthTokenEnvVarName = "SELF_MONITORING_AND_API_AUTH_TOKEN" diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index c0036e2..22690a4 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -38,15 +38,16 @@ kubectl delete secret \ kubectl delete ns dash0-system --ignore-not-found -# deliberately deleting the dashboard after undeploying the operator to avoid deleting the dashboard in Dash0 every time. +# deliberately deleting dashboards & check rules after undeploying the operator to avoid deleting these items in +# Dash0 every time. kubectl delete -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml || true +kubectl delete -n ${target_namespace} -f test-resources/customresources/prometheusrule/prometheusrule.yaml || true kubectl delete --ignore-not-found=true customresourcedefinition dash0monitorings.operator.dash0.com kubectl delete --ignore-not-found=true customresourcedefinition dash0operatorconfigurations.operator.dash0.com kubectl delete --ignore-not-found=true customresourcedefinition dash0operatorconfigurations.operator.dash0.com -kubectl delete --ignore-not-found=true customresourcedefinition perses.perses.dev kubectl delete --ignore-not-found=true customresourcedefinition persesdashboards.perses.dev -kubectl delete --ignore-not-found=true customresourcedefinition persesdatasources.perses.dev +kubectl delete --ignore-not-found=true customresourcedefinition prometheusrules.monitoring.coreos.com # The following resources are deleted automatically with helm uninstall, unless for example when the operator manager # crashes and the helm pre-delete helm hook cannot run, then they might be left behind. diff --git a/test-resources/bin/test-scenario-01-aum-operator-cr.sh b/test-resources/bin/test-scenario-01-aum-operator-cr.sh index 4297e05..25fefdb 100755 --- a/test-resources/bin/test-scenario-01-aum-operator-cr.sh +++ b/test-resources/bin/test-scenario-01-aum-operator-cr.sh @@ -34,16 +34,12 @@ kubectl create secret \ --from-literal=token="${DASH0_AUTHORIZATION_TOKEN}" finish_step -echo "STEP $step_counter: install foreign custom resource definitions" -install_foreign_crds +echo "STEP $step_counter: install third-party custom resource definitions" +install_third_party_crds finish_step -if [[ "${DEPLOY_PERSES_DASHBOARD:-}" == true ]]; then - echo "STEP $step_counter: deploy a Perses dashboard resource to namespace ${target_namespace}" - kubectl apply -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml - finish_step -fi +install_third_party_resources echo "STEP $step_counter: rebuild images" build_all_images diff --git a/test-resources/bin/test-scenario-02-operator-cr-aum.sh b/test-resources/bin/test-scenario-02-operator-cr-aum.sh index 007cc3f..57afcc2 100755 --- a/test-resources/bin/test-scenario-02-operator-cr-aum.sh +++ b/test-resources/bin/test-scenario-02-operator-cr-aum.sh @@ -34,8 +34,8 @@ kubectl create secret \ --from-literal=token="${DASH0_AUTHORIZATION_TOKEN}" finish_step -echo "STEP $step_counter: install foreign custom resource definitions" -install_foreign_crds +echo "STEP $step_counter: install third-party custom resource definitions" +install_third_party_crds finish_step echo "STEP $step_counter: rebuild images" @@ -71,8 +71,4 @@ if [[ "${DEPLOY_APPLICATION_UNDER_MONITORING:-}" != false ]]; then finish_step fi -if [[ "${DEPLOY_PERSES_DASHBOARD:-}" == true ]]; then - echo "STEP $step_counter: deploy a Perses dashboard resource to namespace ${target_namespace}" - kubectl apply -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml - finish_step -fi +install_third_party_resources diff --git a/test-resources/bin/util b/test-resources/bin/util index 18afd70..1869e1e 100644 --- a/test-resources/bin/util +++ b/test-resources/bin/util @@ -193,7 +193,20 @@ install_monitoring_resource() { kubectl wait --namespace ${target_namespace} dash0monitorings.operator.dash0.com/dash0-monitoring-resource --for condition=Available } -install_foreign_crds() { +install_third_party_crds() { kubectl apply --server-side -f https://raw.githubusercontent.com/perses/perses-operator/main/config/crd/bases/perses.dev_persesdashboards.yaml + kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml } +install_third_party_resources() { + if [[ "${DEPLOY_PERSES_DASHBOARD:-}" == true ]]; then + echo "STEP $step_counter: deploy a Perses dashboard resource to namespace ${target_namespace}" + kubectl apply -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml + finish_step + fi + if [[ "${DEPLOY_PROMETHEUS_RULE:-}" == true ]]; then + echo "STEP $step_counter: deploy a Prometheus rule resource to namespace ${target_namespace}" + kubectl apply -n ${target_namespace} -f test-resources/customresources/prometheusrule/prometheusrule.yaml + finish_step + fi +} \ No newline at end of file diff --git a/test-resources/customresources/prometheusrule/prometheusrule.yaml b/test-resources/customresources/prometheusrule/prometheusrule.yaml new file mode 100644 index 0000000..e13a1aa --- /dev/null +++ b/test-resources/customresources/prometheusrule/prometheusrule.yaml @@ -0,0 +1,44 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: example + role: alert-rules + name: prometheus-example-rules +spec: + groups: + - name: dash0/k8s + interval: 5m + limit: 10 + partial_response_strategy: warn + rules: + - alert: K8s Deployment replicas mismatch + expr: "kube_deployment_spec_replicas != kube_deployment_status_replicas_available" + for: 10s + keep_firing_for: 10s + annotations: + description: "K8s Deployment replicas mismatch" + summary: "K8s Deployment replicas mismatch {{ $labels.k8s_node_name }}" + some-annotation: "another annotation" + labels: + label-1: label value 1 + label-2: label value 2 + label-3: label value 3 + - alert: K8s pod crash looping + expr: "increase(kube_pod_container_status_restarts_total[1m]) > $__threshold" + annotations: + description: "Pod labels.namespace/labels.pod is crash looping. VALUE = value, LABELS = labels" + summary: "K8s pod crash looping K8s pod crash looping {{ $label. k8s_node_name }}" + - record: will be skipped + expr: "vector(1)" + - name: dash0/collector + interval: 10m + limit: 15 + partial_response_strategy: abort + rules: + - alert: exporter send failed spans + expr: "sum by (cloud_region) (rate(otelcol_exporter_send_failed_spans_total{}[1m])) > $__threshold" + for: 5m + annotations: + description: "dash0-collector - exporter send failed spans" + summary: "dash0-collector - exporter send failed spans {{ $labels.cloud_region }}" diff --git a/test/e2e/application_under_test.go b/test/e2e/application_under_test.go index 9e85055..9532040 100644 --- a/test/e2e/application_under_test.go +++ b/test/e2e/application_under_test.go @@ -11,7 +11,7 @@ import ( "gopkg.in/yaml.v3" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/collector.go b/test/e2e/collector.go index 604025f..c0cd463 100644 --- a/test/e2e/collector.go +++ b/test/e2e/collector.go @@ -9,7 +9,7 @@ import ( "strings" "time" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/container_images.go b/test/e2e/container_images.go index bbb190b..21f3bf1 100644 --- a/test/e2e/container_images.go +++ b/test/e2e/container_images.go @@ -8,7 +8,7 @@ import ( "os/exec" "strings" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/dash0_monitoring_resource.go b/test/e2e/dash0_monitoring_resource.go index 08b315b..50e81a3 100644 --- a/test/e2e/dash0_monitoring_resource.go +++ b/test/e2e/dash0_monitoring_resource.go @@ -13,7 +13,7 @@ import ( dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/dash0_operator_configuration_resource.go b/test/e2e/dash0_operator_configuration_resource.go index 93a65ca..b559e19 100644 --- a/test/e2e/dash0_operator_configuration_resource.go +++ b/test/e2e/dash0_operator_configuration_resource.go @@ -12,7 +12,7 @@ import ( "text/template" "time" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index b96305d..e70211c 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -40,7 +40,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { pwdOutput, err := run(exec.Command("pwd"), false) Expect(err).NotTo(HaveOccurred()) workingDir = strings.TrimSpace(pwdOutput) - _, _ = fmt.Fprintf(GinkgoWriter, "workingDir: %s\n", workingDir) + e2ePrint("workingDir: %s\n", workingDir) env, err := readDotEnvFile(dotEnvFile) Expect(err).NotTo(HaveOccurred()) @@ -914,9 +914,9 @@ func runInParallelForAllWorkloadTypes[C workloadConfig]( go func(cfg C) { defer GinkgoRecover() defer wg.Done() - _, _ = fmt.Fprintf(GinkgoWriter, "(before test step: %s)\n", workloadTypeString) + e2ePrint("(before test step: %s)\n", workloadTypeString) testStep(cfg) - _, _ = fmt.Fprintf(GinkgoWriter, "(after test step: %s)\n", workloadTypeString) + e2ePrint("(after test step: %s)\n", workloadTypeString) passed[workloadTypeString] = true }(config) } diff --git a/test/e2e/events.go b/test/e2e/events.go index d5722c7..afa97ff 100644 --- a/test/e2e/events.go +++ b/test/e2e/events.go @@ -12,7 +12,6 @@ import ( "github.com/dash0hq/dash0-operator/internal/dash0/util" - //nolint:golint,revive . "github.com/onsi/gomega" testUtil "github.com/dash0hq/dash0-operator/test/util" diff --git a/test/e2e/labels.go b/test/e2e/labels.go index 0f5daea..a53a846 100644 --- a/test/e2e/labels.go +++ b/test/e2e/labels.go @@ -11,7 +11,7 @@ import ( "github.com/dash0hq/dash0-operator/internal/dash0/util" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/operator.go b/test/e2e/operator.go index 3ff5af4..8a6a10a 100644 --- a/test/e2e/operator.go +++ b/test/e2e/operator.go @@ -12,7 +12,7 @@ import ( "github.com/dash0hq/dash0-operator/internal/dash0/startup" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -106,7 +106,7 @@ func deployOperatorWithAutoOperationConfiguration( return err } - _, _ = fmt.Fprintf(GinkgoWriter, "output of helm install:\n%s", output) + e2ePrint("output of helm install:\n%s", output) waitForManagerPodAndWebhookToStart(operatorNamespace) if operatorConfigurationValues != nil { @@ -190,8 +190,7 @@ func ensureDash0OperatorHelmRepoIsInstalled( Expect(err).NotTo(HaveOccurred()) if !regexp.MustCompile( fmt.Sprintf("%s\\s+%s", repositoryName, operatorHelmChartUrl)).MatchString(repoList) { - _, _ = fmt.Fprintf( - GinkgoWriter, + e2ePrint( "The helm repo %s (%s) has not been found, adding it now.\n", repositoryName, operatorHelmChartUrl, @@ -207,8 +206,7 @@ func ensureDash0OperatorHelmRepoIsInstalled( ))).To(Succeed()) Expect(runAndIgnoreOutput(exec.Command("helm", "repo", "update"))).To(Succeed()) } else { - _, _ = fmt.Fprintf( - GinkgoWriter, + e2ePrint( "The helm repo %s (%s) is already installed, updating it now.\n", repositoryName, operatorHelmChartUrl, @@ -333,7 +331,7 @@ func upgradeOperator( output, err := run(exec.Command("helm", arguments...)) Expect(err).NotTo(HaveOccurred()) - _, _ = fmt.Fprintf(GinkgoWriter, "output of helm upgrade:\n%s", output) + e2ePrint("output of helm upgrade:\n%s", output) By("waiting shortly, to give the operator time to restart after helm upgrade") time.Sleep(5 * time.Second) diff --git a/test/e2e/run_command.go b/test/e2e/run_command.go index ddaa9cc..70f54fa 100644 --- a/test/e2e/run_command.go +++ b/test/e2e/run_command.go @@ -10,7 +10,6 @@ import ( "strings" "time" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive . "github.com/onsi/gomega" ) @@ -38,17 +37,17 @@ func run(cmd *exec.Cmd, logCommandArgs ...bool) (string, error) { cmd.Dir = dir if err := os.Chdir(cmd.Dir); err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "chdir dir: %s\n", err) + e2ePrint("chdir dir: %s\n", err) } cmd.Env = append(os.Environ(), "GO111MODULE=on") command := strings.Join(cmd.Args, " ") if logCommand { - _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) + e2ePrint("running: %s\n", command) } output, err := cmd.CombinedOutput() if alwaysLogOutput { - _, _ = fmt.Fprintf(GinkgoWriter, "output: %s\n", string(output)) + e2ePrint("output: %s\n", string(output)) } if err != nil { return string(output), fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) diff --git a/test/e2e/setup_teardown.go b/test/e2e/setup_teardown.go index a99a4a5..48ea5a2 100644 --- a/test/e2e/setup_teardown.go +++ b/test/e2e/setup_teardown.go @@ -9,7 +9,7 @@ import ( "strconv" "strings" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/e2e/spans.go b/test/e2e/spans.go index 82d3c57..43cffa9 100644 --- a/test/e2e/spans.go +++ b/test/e2e/spans.go @@ -14,7 +14,6 @@ import ( "go.opentelemetry.io/collector/pdata/ptrace" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive . "github.com/onsi/gomega" ) @@ -80,7 +79,7 @@ func sendRequest(g Gomega, port int, httpPathWithQuery string) { }() responseBody, err := io.ReadAll(response.Body) if err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "could not read http response from %s: %s\n", url, err.Error()) + e2ePrint("could not read http response from %s: %s\n", url, err.Error()) } g.Expect(err).NotTo(HaveOccurred()) g.Expect(responseBody).To(ContainSubstring("We make Observability easy for every developer.")) diff --git a/test/e2e/util.go b/test/e2e/util.go new file mode 100644 index 0000000..defc5da --- /dev/null +++ b/test/e2e/util.go @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package e2e + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" +) + +func e2ePrint(format string, a ...any) { + fmt.Fprintf(GinkgoWriter, format, a...) +} diff --git a/test/e2e/verify_instrumentation.go b/test/e2e/verify_instrumentation.go index 6663bbe..70f4d58 100644 --- a/test/e2e/verify_instrumentation.go +++ b/test/e2e/verify_instrumentation.go @@ -8,7 +8,7 @@ import ( "os/exec" "time" - . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) diff --git a/test/util/crds/monitoring.coreos.com_prometheusrules.yaml b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml new file mode 100644 index 0000000..ab08bf1 --- /dev/null +++ b/test/util/crds/monitoring.coreos.com_prometheusrules.yaml @@ -0,0 +1,141 @@ +# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.77.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + operator.prometheus.io/version: 0.77.1 + name: prometheusrules.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + categories: + - prometheus-operator + kind: PrometheusRule + listKind: PrometheusRuleList + plural: prometheusrules + shortNames: + - promrule + singular: prometheusrule + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: |- + The `PrometheusRule` custom resource definition (CRD) defines [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) and [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) rules to be evaluated by `Prometheus` or `ThanosRuler` objects. + + `Prometheus` and `ThanosRuler` objects select `PrometheusRule` objects using label and namespace selectors. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Specification of desired alerting rule definitions for Prometheus. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + description: Interval determines how often rules in the group + are evaluated. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + limit: + description: |- + Limit the number of alerts an alerting rule and series a recording + rule can produce. + Limit is supported starting with Prometheus >= 2.31 and Thanos Ruler >= 0.24. + type: integer + name: + description: Name of the rule group. + minLength: 1 + type: string + partial_response_strategy: + description: |- + PartialResponseStrategy is only used by ThanosRuler and will + be ignored by Prometheus instances. + More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response + pattern: ^(?i)(abort|warn)?$ + type: string + rules: + description: List of alerting and recording rules. + items: + description: |- + Rule describes an alerting or recording rule + See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule + properties: + alert: + description: |- + Name of the alert. Must be a valid label value. + Only one of `record` and `alert` must be set. + type: string + annotations: + additionalProperties: + type: string + description: |- + Annotations to add to each alert. + Only valid for alerting rules. + type: object + expr: + anyOf: + - type: integer + - type: string + description: PromQL expression to evaluate. + x-kubernetes-int-or-string: true + for: + description: Alerts are considered firing once they have + been returned for this long. + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + keep_firing_for: + description: KeepFiringFor defines how long an alert will + continue firing after the condition that triggered it + has cleared. + minLength: 1 + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + labels: + additionalProperties: + type: string + description: Labels to add or overwrite. + type: object + record: + description: |- + Name of the time series to output to. Must be a valid metric name. + Only one of `record` and `alert` must be set. + type: string + required: + - expr + type: object + type: array + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + required: + - spec + type: object + served: true + storage: true diff --git a/test/util/crds/perses.dev_persesdashboards.yaml b/test/util/crds/perses.dev_persesdashboards.yaml new file mode 100644 index 0000000..a9390c3 --- /dev/null +++ b/test/util/crds/perses.dev_persesdashboards.yaml @@ -0,0 +1,258 @@ +# https://raw.githubusercontent.com/perses/perses-operator/b4c59f10020fd77d2eab601e333970b8ea208661/config/crd/bases/perses.dev_persesdashboards.yaml +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: persesdashboards.perses.dev +spec: + group: perses.dev + names: + kind: PersesDashboard + listKind: PersesDashboardList + plural: persesdashboards + singular: persesdashboard + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: PersesDashboard is the Schema for the persesdashboards API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + datasources: + additionalProperties: + properties: + default: + type: boolean + display: + properties: + description: + type: string + name: + type: string + type: object + plugin: + description: |- + Plugin will contain the datasource configuration. + The data typed is available in Cue. + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + required: + - default + - plugin + type: object + description: Datasources is an optional list of datasource definition. + type: object + display: + properties: + description: + type: string + name: + type: string + type: object + duration: + description: Duration is the default time range to use when getting + data to fill the dashboard + format: duration + type: string + layouts: + items: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + type: array + panels: + additionalProperties: + properties: + kind: + type: string + spec: + properties: + display: + properties: + description: + type: string + name: + type: string + required: + - name + type: object + plugin: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + queries: + items: + properties: + kind: + type: string + spec: + properties: + plugin: + properties: + kind: + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + required: + - plugin + type: object + required: + - kind + - spec + type: object + type: array + required: + - display + - plugin + type: object + required: + - kind + - spec + type: object + type: object + refreshInterval: + description: RefreshInterval is the default refresh interval to use + when landing on the dashboard + format: duration + type: string + variables: + items: + properties: + kind: + description: Kind is the type of the variable. Depending on + the value of Kind, it will change the content of Spec. + type: string + spec: + x-kubernetes-preserve-unknown-fields: true + required: + - kind + - spec + type: object + type: array + required: + - duration + - layouts + - panels + type: object + status: + description: PersesDashboardStatus defines the observed state of PersesDashboard + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/test/util/third_party_resource.go b/test/util/third_party_resource.go new file mode 100644 index 0000000..f995707 --- /dev/null +++ b/test/util/third_party_resource.go @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "context" + _ "embed" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + . "github.com/onsi/gomega" +) + +var ( + //go:embed crds/perses.dev_persesdashboards.yaml + persesDashboardsCrdYaml []byte + PersesDashboardCrdQualifiedName = types.NamespacedName{ + Name: "persesdashboards.perses.dev", + } + + //go:embed crds/monitoring.coreos.com_prometheusrules.yaml + prometheusRulesCrdYaml []byte + PrometheusRuleCrdQualifiedName = types.NamespacedName{ + Name: "prometheusrules.monitoring.coreos.com", + } +) + +func EnsurePersesDashboardCrdExists( + ctx context.Context, + k8sClient client.Client, +) *apiextensionsv1.CustomResourceDefinition { + return ensureThirdPartyResourceExists( + ctx, + k8sClient, + persesDashboardsCrdYaml, + PersesDashboardCrdQualifiedName, + ) +} + +func EnsurePrometheusRuleCrdExists( + ctx context.Context, + k8sClient client.Client, +) *apiextensionsv1.CustomResourceDefinition { + return ensureThirdPartyResourceExists( + ctx, + k8sClient, + prometheusRulesCrdYaml, + PrometheusRuleCrdQualifiedName, + ) +} + +func ensureThirdPartyResourceExists( + ctx context.Context, + k8sClient client.Client, + crdYaml []byte, + crdQualifiedName types.NamespacedName, +) *apiextensionsv1.CustomResourceDefinition { + crdFromYaml := &apiextensionsv1.CustomResourceDefinition{} + Expect(yaml.Unmarshal(crdYaml, crdFromYaml)).To(Succeed()) + crdObject := EnsureKubernetesObjectExists( + ctx, + k8sClient, + crdQualifiedName, + &apiextensionsv1.CustomResourceDefinition{}, + crdFromYaml, + ) + + return crdObject.(*apiextensionsv1.CustomResourceDefinition) +}