From 3447e7b9892280a548e3ad267a46966b5459a5d6 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Tue, 17 Sep 2024 18:14:43 +0200 Subject: [PATCH] feat(chart): create operator configuration resource via helm chart --- Makefile | 2 +- api/dash0monitoring/v1alpha1/types_common.go | 9 +- cmd/main.go | 85 ++++++-- .../operator.dash0.com_dash0monitorings.yaml | 9 +- ...dash0.com_dash0operatorconfigurations.yaml | 9 +- config/rbac/role.yaml | 6 + helm-chart/dash0-operator/README.md | 3 +- .../templates/operator/cluster-roles.yaml | 8 + .../operator/deployment-and-webhooks.yaml | 22 ++ helm-chart/dash0-operator/values.yaml | 53 ++++- internal/dash0/controller/dash0_controller.go | 1 + .../operator_pre_delete_handler.go | 2 +- .../operator_pre_delete_handler_test.go | 2 +- .../pre_delete_suite_test.go} | 4 +- .../startup/auto_operator_configuration.go | 193 ++++++++++++++++++ test/e2e/operator.go | 4 - test/util/operator_resource.go | 2 +- 17 files changed, 370 insertions(+), 44 deletions(-) rename internal/dash0/{removal => predelete}/operator_pre_delete_handler.go (99%) rename internal/dash0/{removal => predelete}/operator_pre_delete_handler_test.go (99%) rename internal/dash0/{removal/removal_suite_test.go => predelete/pre_delete_suite_test.go} (98%) create mode 100644 internal/dash0/startup/auto_operator_configuration.go diff --git a/Makefile b/Makefile index 21ce344c..427b8782 100644 --- a/Makefile +++ b/Makefile @@ -165,7 +165,7 @@ golangci-lint: lint: golangci-lint ## Run golangci-lint linter & yamllint @echo -------------------------------- $(GOLANGCI_LINT) run - helm lint helm-chart/dash0-operator --set operator.disableSecretCheck=true --set operator.disableOtlpEndpointCheck=true + helm lint helm-chart/dash0-operator .PHONY: lint-fix lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes diff --git a/api/dash0monitoring/v1alpha1/types_common.go b/api/dash0monitoring/v1alpha1/types_common.go index 1cb8d535..ee108cfb 100644 --- a/api/dash0monitoring/v1alpha1/types_common.go +++ b/api/dash0monitoring/v1alpha1/types_common.go @@ -36,8 +36,8 @@ type Export struct { type Dash0Configuration struct { // The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value // needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - // https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - // `dash0.com:4317`. + // https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + // `ingress.` and end in `dash0.com:4317`. // // +kubebuilder:validation:Required Endpoint string `json:"endpoint"` @@ -61,14 +61,15 @@ type Dash0Configuration struct { type Authorization struct { // The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has // to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - // token for your Dash0 organization can be copied from https://app.dash0.com/settings. + // token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + // "Auth Tokens". // // +kubebuilder:validation:Optional Token *string `json:"token"` // either token or secret ref, with token taking precedence // A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is // ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - // https://app.dash0.com/settings. + // https://app.dash0.com -> organization settings -> "Auth Tokens". // // +kubebuilder:validation:Optional SecretRef *SecretRef `json:"secretRef"` diff --git a/cmd/main.go b/cmd/main.go index f8e79e94..a539e488 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -39,8 +39,9 @@ import ( "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" "github.com/dash0hq/dash0-operator/internal/dash0/controller" "github.com/dash0hq/dash0-operator/internal/dash0/instrumentation" - "github.com/dash0hq/dash0-operator/internal/dash0/removal" + "github.com/dash0hq/dash0-operator/internal/dash0/predelete" "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/startup" "github.com/dash0hq/dash0-operator/internal/dash0/util" "github.com/dash0hq/dash0-operator/internal/dash0/webhooks" //+kubebuilder:scaffold:imports @@ -105,25 +106,40 @@ func init() { func main() { ctx := context.Background() - var uninstrumentAll bool + var operatorConfigurationEndpoint string + var operatorConfigurationToken string + var operatorConfigurationSecretRefName string + var operatorConfigurationSecretRefKey string + var isUninstrumentAll bool var metricsAddr string var enableLeaderElection bool var probeAddr string var secureMetrics bool var enableHTTP2 bool - flag.BoolVar(&uninstrumentAll, "uninstrument-all", false, - "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluster. This "+ - "will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will revert the "+ - "instrumentation of all workloads in all namespaces.") + + flag.BoolVar(&isUninstrumentAll, "uninstrument-all", false, + "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluste, then "+ + "exit. This will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will "+ + "revert the instrumentation of all workloads in all namespaces.") + flag.StringVar(&operatorConfigurationEndpoint, "operator-configuration-endpoint", "", + "The Dash0 endpoint gRPC URL for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationToken, "operator-configuration-token", "", + "The Dash0 auth token for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefName, "operator-configuration-secret-ref-name", "", + "The name of an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefKey, "operator-configuration-secret-ref-key", "", + "The key in an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") flag.BoolVar(&secureMetrics, "metrics-secure", false, - "If set, the metrics endpoint is served securely") + "If set, the metrics endpoint is served securely.") flag.BoolVar(&enableHTTP2, "enable-http2", false, - "If set, HTTP/2 will be enabled for the metrics and webhook servers") + "If set, HTTP/2 will be enabled for the metrics and webhook servers.") var developmentMode bool developmentModeRaw, isSet := os.LookupEnv(developmentModeEnvVarName) @@ -143,8 +159,8 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) - if uninstrumentAll { - if err := deleteDash0MonitoringResourcesInAllNamespaces(&setupLog); err != nil { + if isUninstrumentAll { + if err := deleteMonitoringResourcesInAllNamespaces(&setupLog); err != nil { setupLog.Error(err, "deleting the Dash0 monitoring resources in all namespaces failed") os.Exit(1) } @@ -196,6 +212,18 @@ func main() { map[string]string{semconv.AttributeK8SDeploymentUID: string(deploymentSelfReference.UID)}, ) + var operatorConfiguration *startup.OperatorConfigurationValues + if len(operatorConfigurationEndpoint) > 0 { + operatorConfiguration = &startup.OperatorConfigurationValues{ + Endpoint: operatorConfigurationEndpoint, + Token: operatorConfigurationToken, + SecretRef: startup.SecretRef{ + Name: operatorConfigurationSecretRefName, + Key: operatorConfigurationSecretRefKey, + }, + } + } + if err = startOperatorManager( ctx, metricsAddr, @@ -204,6 +232,7 @@ func main() { webhookServer, probeAddr, enableLeaderElection, + operatorConfiguration, developmentMode, ); err != nil { setupLog.Error(err, "The Dash0 operator manager process failed to start.") @@ -219,6 +248,7 @@ func startOperatorManager( webhookServer k8swebhook.Server, probeAddr string, enableLeaderElection bool, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ @@ -285,7 +315,7 @@ func startOperatorManager( developmentMode, ) - err = startDash0Controllers(ctx, mgr, clientset, developmentMode) + err = startDash0Controllers(ctx, mgr, clientset, operatorConfiguration, developmentMode) if err != nil { return err } @@ -396,6 +426,7 @@ func startDash0Controllers( ctx context.Context, mgr manager.Manager, clientset *kubernetes.Clientset, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { oTelCollectorBaseUrl := @@ -419,8 +450,10 @@ func startDash0Controllers( ctx, clientset, mgr.GetEventRecorderFor("dash0-startup-tasks"), + operatorConfiguration, images, oTelCollectorBaseUrl, + &setupLog, ) logCurrentSelfMonitoringSettings(deploymentSelfReference) @@ -555,9 +588,17 @@ func executeStartupTasks( ctx context.Context, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder, + operatorConfiguration *startup.OperatorConfigurationValues, images util.Images, oTelCollectorBaseUrl string, + logger *logr.Logger, ) { + createOperatorConfiguration( + ctx, + startupTasksK8sClient, + operatorConfiguration, + logger, + ) instrumentAtStartup( ctx, startupTasksK8sClient, @@ -618,8 +659,26 @@ func logCurrentSelfMonitoringSettings(deploymentSelfReference *appsv1.Deployment } } -func deleteDash0MonitoringResourcesInAllNamespaces(logger *logr.Logger) error { - handler, err := removal.NewOperatorPreDeleteHandler() +func createOperatorConfiguration( + ctx context.Context, + k8sClient client.Client, + operatorConfiguration *startup.OperatorConfigurationValues, + logger *logr.Logger, +) { + if operatorConfiguration != nil { + handler := startup.AutoOperatorConfigurationResourceHandler{ + Client: k8sClient, + OperatorNamespace: envVars.operatorNamespace, + NamePrefix: envVars.oTelCollectorNamePrefix, + } + if err := handler.CreateOperatorConfigurationResource(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "Failed to create the requested Dash0 operator configuration resource.") + } + } +} + +func deleteMonitoringResourcesInAllNamespaces(logger *logr.Logger) error { + handler, err := predelete.NewOperatorPreDeleteHandler() if err != nil { logger.Error(err, "Failed to create the OperatorPreDeleteHandler.") return err diff --git a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml index 8b927444..ae4b5604 100644 --- a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml +++ b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml index fbfddb4f..56fcfa3c 100644 --- a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml +++ b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c822fe58..9d798ebc 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -28,6 +28,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/README.md b/helm-chart/dash0-operator/README.md index 33163c13..0b290b13 100644 --- a/helm-chart/dash0-operator/README.md +++ b/helm-chart/dash0-operator/README.md @@ -202,7 +202,8 @@ spec: If you want to provide the Dash0 authorization token via a Kubernetes secret instead of providing the token as a string, create the secret in the namespace where the Dash0 operator is installed. If you followed the guide above, the name of that namespace is `dash0-system`. -The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> "Auth Tokens". +The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings +-> "Auth Tokens". You can freely choose the name of the secret and the key of the token within the secret. Create the secret by using the following command: diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index bc36910b..4d51afcf 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -38,6 +38,14 @@ rules: - update - watch +# Pmrmissions required top create a Dash0 operator configuration resources +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get + # Permissions required to queue events to report about the operator's actions, and to attach dangling events to their # respective involved objects. - apiGroups: diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml index bf7d328c..82c5b376 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml @@ -78,6 +78,28 @@ spec: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect +{{- if .Values.operator.dash0Backend.enabled }} +{{- if not .Values.operator.dash0Backend.endpoint }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but you did not provide a value for operator.dash0Backend.endpoint. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end}} + - --operator-configuration-endpoint={{ .Values.operator.dash0Backend.endpoint }} +{{- if .Values.operator.dash0Backend.token }} + - --operator-configuration-token={{ .Values.operator.dash0Backend.token }} +{{- else if (and .Values.operator.dash0Backend.secretRef.name .Values.operator.dash0Backend.secretRef.key) }} +{{- $secret := lookup "v1" "Secret" .Release.Namespace .Values.operator.dash0Backend.secretRef.name -}} +{{- if $secret -}} +{{- if not (index $secret.data .Values.operator.dash0Backend.secretRef.key) -}} +{{- fail (printf "Error: There is a secret named \"%s\" in the target namespace \"%s\", but it does not have the required key \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace .Values.operator.dash0Backend.secretRef.key) -}} +{{- end -}} +{{- else -}} +{{- fail (printf "Error: There is no secret named \"%s\" in the target namespace \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace) -}} +{{- end }} + - --operator-configuration-secret-ref-name={{ .Values.operator.dash0Backend.secretRef.name }} + - --operator-configuration-secret-ref-key={{ .Values.operator.dash0Backend.secretRef.key }} +{{- else }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but neither operator.dash0Backend.token nor operator.dash0Backend.secretRef.name & operator.dash0Backend.secretRef.key have been provided. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end }} +{{- end }} env: - name: DASH0_OPERATOR_NAMESPACE valueFrom: diff --git a/helm-chart/dash0-operator/values.yaml b/helm-chart/dash0-operator/values.yaml index 35cc3acd..dd78fdf3 100644 --- a/helm-chart/dash0-operator/values.yaml +++ b/helm-chart/dash0-operator/values.yaml @@ -3,6 +3,51 @@ # settings for the operator/controller operator: + + # Use the operator.dash0Backend settings to configure the connection to the Dash0 backend; telemetry will be sent to + # the configured Dash0 backend by default. Under the hood, this will create a Dash0OperatorConfiguration resource + # right away, when starting the operator. If left empty, you can always create a Dash0OperatorConfiguration resource + # manually later. + dash0Backend: + # Set this to true to enable the creation of a Dash0OperatorConfiguration resource at startup. If a + # Dash0OperatorConfiguration already exists in the cluster, no action will be taken. Note that if this is set to + # true, you will also need to provide a valid endpoint (operator.dash0Backend.endpoint), and either or an auth + # token (operator.dash0Backend.token) or a reference to a Kubernetes secret containing that token + # (operator.dash0Backend.secretRef). + enabled: false + + # The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory if + # operator.dash0Backend.enabled is true, otherwise it will be ignored. The value needs to be the OTLP/gRPC endpoint + # of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization + # settings -> "Endpoints". The correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. + endpoint: + + # The Dash0 authorization token. This property is optional, but either this property or the secretRef configuration + # has to be provided if operator.dash0Backend.enabled is true. If operator.dash0Backend.enabled is false, this + # property will be ignored. + # If both token and secretRef are provided, the token will be used and secretRef will be ignored. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + token: + + # A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + # ignored if either operator.dash0Backend.enabled is false or operator.dash0Backend.token is set. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + secretRef: + # The name of the secret containing the Dash0 authorization token. Example: Assume you have created the Kubernetes + # secret with the following command: + # kubectl create secret generic \ + # dash0-authorization-secret \ + # --namespace dash0-system \ + # --from-literal=token=auth_...your-token-here... + # + # Then you would set the property to "dash0-authorization-secret". + name: + # The key of the value which contains the Dash0 authorization token. Assuming you have created the Kubernetes + # secret with the command above (see property "name"), then you would set the property to "token". + key: + # number of replica for the controller manager deployment replicaCount: 1 @@ -135,11 +180,3 @@ operator: # If set to true, instructs the logger (Zap) to use a Zap development config (stacktraces on warnings, no sampling), # otherwise a Zap production config will be used (stacktraces on errors, sampling). developmentMode: false - - # If set to true, the operator Helm chart will skip the check for the Dash0 authorization secret. This should only - # be done for testing purposes. - disableSecretCheck: false - - # If set to true, the operator Helm chart will skip the check for the OTLP endpoing setting. This should only be done - # for testing purposes. - disableOtlpEndpointCheck: false diff --git a/internal/dash0/controller/dash0_controller.go b/internal/dash0/controller/dash0_controller.go index 821f4902..db4a15ad 100644 --- a/internal/dash0/controller/dash0_controller.go +++ b/internal/dash0/controller/dash0_controller.go @@ -86,6 +86,7 @@ func (r *Dash0Reconciler) InitializeSelfMonitoringMetrics( //+kubebuilder:rbac:groups=core,resources=events,verbs=create;list;patch;update //+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get //+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;delete +//+kubebuilder:rbac:groups=core,resources=endpoints,verbs=get //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings,verbs=get;list;watch;create;update;patch;delete;deletecollection //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/finalizers,verbs=update //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/status,verbs=get;update;patch diff --git a/internal/dash0/removal/operator_pre_delete_handler.go b/internal/dash0/predelete/operator_pre_delete_handler.go similarity index 99% rename from internal/dash0/removal/operator_pre_delete_handler.go rename to internal/dash0/predelete/operator_pre_delete_handler.go index 9648ab83..5b3f7b8e 100644 --- a/internal/dash0/removal/operator_pre_delete_handler.go +++ b/internal/dash0/predelete/operator_pre_delete_handler.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" diff --git a/internal/dash0/removal/operator_pre_delete_handler_test.go b/internal/dash0/predelete/operator_pre_delete_handler_test.go similarity index 99% rename from internal/dash0/removal/operator_pre_delete_handler_test.go rename to internal/dash0/predelete/operator_pre_delete_handler_test.go index 820de8f1..2e752036 100644 --- a/internal/dash0/removal/operator_pre_delete_handler_test.go +++ b/internal/dash0/predelete/operator_pre_delete_handler_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" diff --git a/internal/dash0/removal/removal_suite_test.go b/internal/dash0/predelete/pre_delete_suite_test.go similarity index 98% rename from internal/dash0/removal/removal_suite_test.go rename to internal/dash0/predelete/pre_delete_suite_test.go index af85be12..685c7d87 100644 --- a/internal/dash0/removal/removal_suite_test.go +++ b/internal/dash0/predelete/pre_delete_suite_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "fmt" @@ -48,7 +48,7 @@ var ( func TestRemoval(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Removal Suite") + RunSpecs(t, "Pre-Delete Suite") } var _ = BeforeSuite(func() { diff --git a/internal/dash0/startup/auto_operator_configuration.go b/internal/dash0/startup/auto_operator_configuration.go new file mode 100644 index 00000000..04b96193 --- /dev/null +++ b/internal/dash0/startup/auto_operator_configuration.go @@ -0,0 +1,193 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "context" + "fmt" + "time" + + "github.com/dash0hq/dash0-operator/internal/dash0/util" + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" +) + +type SecretRef struct { + Name string + Key string +} + +type OperatorConfigurationValues struct { + Endpoint string + Token string + SecretRef +} + +type AutoOperatorConfigurationResourceHandler struct { + client.Client + OperatorNamespace string + NamePrefix string +} + +func (r *AutoOperatorConfigurationResourceHandler) CreateOperatorConfigurationResource( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + if err := r.validateOperatorConfiguration(operatorConfiguration, logger); err != nil { + return err + } + + go func() { + // There is a validation webhook for operator configuration resources. Thus, before we can create an operator + // configuration resource, we need to wait for the webhook endpoint to become available. + if err := r.waitForWebserviceEndpoint(ctx, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + if err := r.createOperatorConfigurationResourceWithRetry(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + }() + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) validateOperatorConfiguration(operatorConfiguration *OperatorConfigurationValues, logger *logr.Logger) error { + if operatorConfiguration.Token == "" { + if operatorConfiguration.SecretRef.Name == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-name have been provided") + } + if operatorConfiguration.SecretRef.Key == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-key have been provided") + } + } + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) waitForWebserviceEndpoint( + ctx context.Context, + logger *logr.Logger, +) error { + if err := util.RetryWithCustomBackoff( + "waiting for webservice endpoint to become available", + func() error { + return r.checkWebServiceEndpoint(ctx, logger) + }, + wait.Backoff{ + Duration: 1 * time.Second, + Factor: 1.0, + Steps: 30, + Cap: 30 * time.Second, + }, + true, + logger, + ); err != nil { + return fmt.Errorf("failed to wait for the webservice endpoint to become available: %w", err) + } + + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) checkWebServiceEndpoint( + ctx context.Context, + logger *logr.Logger, +) error { + endpoints := corev1.Endpoints{} + if err := r.Get(ctx, types.NamespacedName{ + Namespace: r.OperatorNamespace, + Name: fmt.Sprintf("%s-webhook-service", r.NamePrefix), + }, &endpoints); err != nil { + return err + } + + for _, subset := range endpoints.Subsets { + for _, port := range subset.Ports { + if port.Port == 9443 { + return nil + } + } + } + + return fmt.Errorf("the webservice endpoint is not available yet") +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceWithRetry( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + return util.RetryWithCustomBackoff( + "create operator configuration resource at startup", + func() error { + return r.createOperatorConfigurationResourceOnce(ctx, operatorConfiguration, logger) + }, + wait.Backoff{ + Duration: 3 * time.Second, + Factor: 1.5, + Steps: 6, + Cap: 60 * time.Second, + }, + true, + logger, + ) +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceOnce( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + allOperatorConfigurationResources := &dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := r.List(ctx, allOperatorConfigurationResources); err != nil { + return fmt.Errorf("failed to list all Dash0 operator configuration resources: %w", err) + } + + if len(allOperatorConfigurationResources.Items) >= 1 { + logger.Info("There is already at least one Dash0 operator configuration resource. There should only be at " + + "most one operator configuration resource per cluster, hence the operator's post-install hook will not " + + "create an additional operator configuration resource.") + return nil + } + + authorization := dash0v1alpha1.Authorization{} + if operatorConfiguration.Token != "" { + authorization.Token = &operatorConfiguration.Token + } else { + authorization.SecretRef = &dash0v1alpha1.SecretRef{ + Name: operatorConfiguration.SecretRef.Name, + Key: operatorConfiguration.SecretRef.Key, + } + } + + if err := r.Create(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dash0-operator-configuration-auto-resource", + }, + Spec: dash0v1alpha1.Dash0OperatorConfigurationSpec{ + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + Export: &dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: operatorConfiguration.Endpoint, + Authorization: authorization, + }, + }, + }, + }); err != nil { + return fmt.Errorf("failed to create the Dash0 operator configuration resource: %w", err) + } + + logger.Info("a Dash0 operator configuration resource has been created") + return nil +} diff --git a/test/e2e/operator.go b/test/e2e/operator.go index 7e0ba24e..26ba6a47 100644 --- a/test/e2e/operator.go +++ b/test/e2e/operator.go @@ -44,8 +44,6 @@ func deployOperator( operatorNamespace, "--create-namespace", "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) @@ -263,8 +261,6 @@ func upgradeOperator( "--namespace", operatorNamespace, "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) diff --git a/test/util/operator_resource.go b/test/util/operator_resource.go index 43beea54..1cedf6f3 100644 --- a/test/util/operator_resource.go +++ b/test/util/operator_resource.go @@ -21,7 +21,7 @@ import ( const ( Dash0OperatorDeploymentName = "controller-deployment" - OperatorConfigurationResourceName = "dash0-operator-test-resource" + OperatorConfigurationResourceName = "dash0-operator-configuration-test" ) var (