diff --git a/README.md b/README.md index e336242b5..757073711 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ loaded for you. * smartgatewayCollectdEventsManifest * smartgatewayCeilometerEventsManifest * servicemonitorManifest +* scrapeconfigManifest ## Development diff --git a/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml b/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml index 4ec0f7f5c..953bb7739 100644 --- a/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml +++ b/deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml @@ -203,6 +203,15 @@ spec: - kind: ServiceMonitors name: servicemonitors.monitoring.coreos.com version: v1 + - kind: ScrapeConfigs + name: scrapeconfigs.monitoring.coreos.com + version: v1alpha1 + - kind: ServiceMonitors + name: servicemonitors.monitoring.rhobs + version: v1 + - kind: ScrapeConfigs + name: scrapeconfigs.monitoring.rhobs + version: v1alpha1 version: v1beta1 description: Service Telemetry Operator for monitoring clouds displayName: Service Telemetry Operator @@ -378,17 +387,21 @@ spec: - apiGroups: - monitoring.coreos.com resources: + - scrapeconfigs - servicemonitors verbs: - get - create + - delete - apiGroups: - monitoring.rhobs resources: + - scrapeconfigs - servicemonitors verbs: - get - create + - delete - apiGroups: - apps resourceNames: diff --git a/deploy/role.yaml b/deploy/role.yaml index cdade2ce7..ba2dfbbe8 100644 --- a/deploy/role.yaml +++ b/deploy/role.yaml @@ -129,17 +129,21 @@ rules: - apiGroups: - monitoring.coreos.com resources: + - scrapeconfigs - servicemonitors verbs: - get - create + - delete - apiGroups: - monitoring.rhobs resources: + - scrapeconfigs - servicemonitors verbs: - get - create + - delete - apiGroups: - apps resourceNames: diff --git a/roles/servicetelemetry/tasks/base_smartgateway.yml b/roles/servicetelemetry/tasks/base_smartgateway.yml index 4d0cfdafd..7077e928d 100644 --- a/roles/servicetelemetry/tasks/base_smartgateway.yml +++ b/roles/servicetelemetry/tasks/base_smartgateway.yml @@ -6,8 +6,8 @@ k8s: definition: "{{ lookup('template', manifest) | from_yaml }}" -- name: Deploy SG-specific ServiceMonitor for metrics SGs - include_tasks: component_servicemonitor.yml +- name: Deploy SG-specific ScrapeConfig for metrics SGs + include_tasks: component_scrapeconfig.yml when: - data_type == 'metrics' - has_monitoring_api | bool diff --git a/roles/servicetelemetry/tasks/component_scrapeconfig.yml b/roles/servicetelemetry/tasks/component_scrapeconfig.yml new file mode 100644 index 000000000..b05a5ac03 --- /dev/null +++ b/roles/servicetelemetry/tasks/component_scrapeconfig.yml @@ -0,0 +1,93 @@ +- name: Look up prometheus-stf SA to get auth secret name + k8s_info: + api_version: v1 + kind: ServiceAccount + namespace: '{{ ansible_operator_meta.namespace }}' + name: prometheus-stf + register: service_account + +- name: Look up auth secret to get token secret name + k8s_info: + api_version: v1 + kind: Secret + namespace: '{{ ansible_operator_meta.namespace }}' + name: '{{ service_account.resources[0].secrets[0].name }}' + register: auth_secret + +- name: Create SG-specific Scrape Config manifest + set_fact: + sg_specific_scrapeconfig_manifest: | + apiVersion: {{ prometheus_operator_api_string | replace("/v1","/v1alpha1") }} + kind: ScrapeConfig + metadata: + labels: + app: smart-gateway + name: '{{ this_smartgateway }}' + namespace: '{{ ansible_operator_meta.namespace }}' + spec: + authorization: + type: bearer + credentials: + name: '{{ auth_secret.resources[0].metadata.annotations['openshift.io/token-secret.name'] }}' + key: token + metricRelabelings: + - action: labeldrop + regex: pod + - action: labeldrop + regex: namespace + - action: labeldrop + regex: instance + - action: replace + regex: '.*/(.*)$' + replacement: $1 + sourceLabels: [job] + targetLabel: service + - action: labeldrop + regex: job + - action: labeldrop + regex: publisher + - action: replace + targetLabel: container + replacement: sg-core + - action: replace + targetLabel: endpoint + replacement: prom-https + scheme: HTTPS + scrapeInterval: {{ servicetelemetry_vars.backends.metrics.prometheus.scrape_interval }} + staticConfigs: + - targets: + - '{{ this_smartgateway }}.{{ ansible_operator_meta.namespace }}.svc:8083' + tlsConfig: + ca: + configMap: + name: serving-certs-ca-bundle + key: service-ca.crt + serverName: '{{ this_smartgateway }}.{{ ansible_operator_meta.namespace }}.svc' + +- name: Create ScrapeConfig to scrape Smart Gateway + k8s: + state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' + definition: + '{{ sg_specific_scrapeconfig_manifest }}' + +- name: Create additional ScrapeConfig if provided + k8s: + state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' + definition: + '{{ scrapeconfig_manifest }}' + when: scrapeconfig_manifest is defined + +- name: Create additional ServiceMonitor if provided (legacy) + k8s: + state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' + definition: + '{{ servicemonitor_manifest }}' + when: servicemonitor_manifest is defined + +- name: Remove (legacy) default ServiceMonitors + k8s: + state: absent + api_version: '{{ prometheus_operator_api_string }}' + kind: ServiceMonitor + namespace: '{{ ansible_operator_meta.namespace }}' + name: '{{ this_smartgateway }}' \ No newline at end of file diff --git a/roles/servicetelemetry/tasks/component_servicemonitor.yml b/roles/servicetelemetry/tasks/component_servicemonitor.yml deleted file mode 100644 index 753116c46..000000000 --- a/roles/servicetelemetry/tasks/component_servicemonitor.yml +++ /dev/null @@ -1,52 +0,0 @@ -- name: Create SG-specific Service Monitor manifest - set_fact: - sg_specific_servicemonitor_manifest: | - apiVersion: {{ prometheus_operator_api_string }} - kind: ServiceMonitor - metadata: - labels: - app: smart-gateway - name: '{{ this_smartgateway }}' - namespace: '{{ ansible_operator_meta.namespace }}' - spec: - endpoints: - - interval: {{ servicetelemetry_vars.backends.metrics.prometheus.scrape_interval }} - metricRelabelings: - - action: labeldrop - regex: pod - sourcelabels: [] - - action: labeldrop - regex: namespace - sourcelabels: [] - - action: labeldrop - regex: instance - sourcelabels: [] - - action: labeldrop - regex: job - sourcelabels: [] - - action: labeldrop - regex: publisher - sourcelabels: [] - port: prom-https - scheme: https - tlsConfig: - caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt - serverName: "{{ this_smartgateway }}.{{ ansible_operator_meta.namespace }}.svc" - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - selector: - matchLabels: - app: smart-gateway - smart-gateway: "{{ this_smartgateway }}" - -- name: Create ServiceMonitor to scrape Smart Gateway - k8s: - state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' - definition: - '{{ sg_specific_servicemonitor_manifest }}' - -- name: Create additional serviceMonitor if provided - k8s: - state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' - definition: - '{{ servicemonitor_manifest }}' - when: servicemonitor_manifest is defined diff --git a/roles/servicetelemetry/templates/manifest_alertmanager.j2 b/roles/servicetelemetry/templates/manifest_alertmanager.j2 index 4e2287fe9..70a6d68a3 100644 --- a/roles/servicetelemetry/templates/manifest_alertmanager.j2 +++ b/roles/servicetelemetry/templates/manifest_alertmanager.j2 @@ -12,7 +12,7 @@ spec: {% endif %} replicas: {{ servicetelemetry_vars.alerting.alertmanager.deployment_size }} serviceAccountName: alertmanager-stf - serviceMonitorSelector: + scrapeConfigSelector: matchLabels: app: smart-gateway listenLocal: true diff --git a/roles/servicetelemetry/templates/manifest_prometheus.j2 b/roles/servicetelemetry/templates/manifest_prometheus.j2 index d9610b056..cd3680883 100644 --- a/roles/servicetelemetry/templates/manifest_prometheus.j2 +++ b/roles/servicetelemetry/templates/manifest_prometheus.j2 @@ -17,7 +17,7 @@ spec: ruleSelector: {} securityContext: {} serviceAccountName: prometheus-stf - serviceMonitorSelector: + scrapeConfigSelector: matchLabels: app: smart-gateway listenLocal: true diff --git a/tests/smoketest/smoketest.sh b/tests/smoketest/smoketest.sh index b4a8db29f..c8bfd62c3 100755 --- a/tests/smoketest/smoketest.sh +++ b/tests/smoketest/smoketest.sh @@ -129,8 +129,8 @@ echo "*** [INFO] Showing oc get all..." oc get all echo -echo "*** [INFO] Showing servicemonitors..." -oc get servicemonitors.monitoring.rhobs -o yaml +echo "*** [INFO] Showing scrapeconfigs..." +oc get scrapeconfigs.monitoring.rhobs -o yaml echo if [ "$SMOKETEST_VERBOSE" = "true" ]; then