Skip to content

Commit

Permalink
feat(prometheus): support prometheus scraping via annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Oct 16, 2024
1 parent 72ee076 commit bb5d4c1
Show file tree
Hide file tree
Showing 20 changed files with 279 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,13 @@ rules:
- apiGroups:
- ""
resources:
- configmaps
- pods
- namespaces
- nodes
- nodes/stats
- endpoints
- services
verbs:
- get
- list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,13 @@ cluster roles should match snapshot:
- apiGroups:
- ""
resources:
- configmaps
- pods
- namespaces
- nodes
- nodes/stats
- endpoints
- services
verbs:
- get
- list
Expand Down
1 change: 1 addition & 0 deletions images/collector/src/builder/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ receivers:
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.111.0"

processors:
- gomod: "go.opentelemetry.io/collector/processor/batchprocessor v0.111.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {

}, testConfigs)

DescribeTable("should render the Dash0 exporter", func(testConfig testConfig) {
DescribeTable("should render the Dash0 exporter without other exporters, with default settings", func(testConfig testConfig) {
configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{
Namespace: namespace,
NamePrefix: namePrefix,
Expand Down Expand Up @@ -158,7 +158,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0")
}, testConfigs)

DescribeTable("should render a verbose debug exporter in development mode", func(testConfig testConfig) {
DescribeTable("should render a debug exporter in development mode", func(testConfig testConfig) {
configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{
Namespace: namespace,
NamePrefix: namePrefix,
Expand All @@ -183,7 +183,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
debugExporterRaw := exporters["debug"]
Expect(debugExporterRaw).ToNot(BeNil())
debugExporter := debugExporterRaw.(map[string]interface{})
Expect(debugExporter["verbosity"]).To(Equal("detailed"))
Expect(debugExporter).To(HaveLen(0))

exporter := exporters["otlp/dash0"]
Expect(exporter).ToNot(BeNil())
Expand Down Expand Up @@ -557,7 +557,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
debugExporterRaw := exporters["debug"]
Expect(debugExporterRaw).ToNot(BeNil())
debugExporter := debugExporterRaw.(map[string]interface{})
Expect(debugExporter["verbosity"]).To(Equal("detailed"))
Expect(debugExporter).To(HaveLen(0))

exporter2 := exporters["otlp/dash0"]
Expect(exporter2).ToNot(BeNil())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ connectors:

exporters:
{{- if .DevelopmentMode }}
debug:
verbosity: detailed
debug: {}
{{- end }}
{{- range $i, $exporter := .Exporters }}
{{ $exporter.Name }}:
Expand Down Expand Up @@ -103,6 +102,17 @@ receivers:
auth_type: serviceAccount
collection_interval: 20s
endpoint: ${env:K8S_NODE_NAME}:10250
metrics:
# deprecated -> container.cpu.usage
container.cpu.utilization:
enabled: false
# deprecated -> k8s.node.cpu.usage
k8s.node.cpu.utilization:
enabled: false
# deprecated -> k8s.pod.cpu.usage
k8s.pod.cpu.utilization:
enabled: false

{{- if .DevelopmentMode }}
{{- /*
On Docker Desktop, Kind, etc. the API server uses a self-signed cert. Scraping will not work without
Expand All @@ -118,6 +128,148 @@ e2e tests. */}}
insecure_skip_verify: true
{{- end }}

prometheus:
config:
{{- /*
This particular set of scrape config jobs (kubernetes-service-endpoints, kubernetes-service-endpoints-slow,
kubernetes-pods, kubernetes-pods-slow) is mostly a copy of
https://github.com/prometheus-community/helm-charts/blob/5adf0ee898e8e5430471cb43a5f9532745c22f81/charts/prometheus/values.yaml
to be compatible with the well-known configuration via annotations.
*/}}
scrape_configs:

# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
# except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods'
honor_labels: true

kubernetes_sd_configs:
- role: pod
# only scrape data from pods running on the same node as collector
selectors:
- role: pod
field: "spec.nodeName=${K8S_NODE_NAME}"

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# Example Scrape config for pods which should be scraped slower. An useful example
# would be stackriver-exporter which queries an API on every scrape of the pod
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods-slow'
honor_labels: true

scrape_interval: 5m
scrape_timeout: 30s

kubernetes_sd_configs:
- role: pod
# only scrape data from pods running on the same node as collector
selectors:
- role: pod
field: "spec.nodeName=${K8S_NODE_NAME}"

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# TODO Turn on conditionally for monitored namespaces
filelog/monitored_pods:
include:
Expand Down Expand Up @@ -225,6 +377,7 @@ service:
receivers:
- otlp
- kubeletstats
- prometheus
processors:
- k8sattributes
- resourcedetection
Expand Down Expand Up @@ -272,4 +425,9 @@ service:

telemetry:
metrics:
address: ${env:MY_POD_IP}:8888
readers:
- pull:
exporter:
prometheus:
host: "${env:MY_POD_IP}"
port: 8888
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
exporters:
{{- if .DevelopmentMode }}
debug:
verbosity: detailed
debug: {}
{{- end }}
{{- range $i, $exporter := .Exporters }}
{{ $exporter.Name }}:
Expand Down Expand Up @@ -73,4 +72,9 @@ service:

telemetry:
metrics:
address: ${env:MY_POD_IP}:8888
readers:
- pull:
exporter:
prometheus:
host: "${env:MY_POD_IP}"
port: 8888
5 changes: 5 additions & 0 deletions internal/backendconnection/otelcolresources/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,13 @@ func assembleClusterRoleForDaemonSet(config *oTelColConfig) *rbacv1.ClusterRole
Resources: []string{
"pods",
"namespaces",
"nodes",
"configmaps",
// required for Kubelet Metrics/Kubeletstats receiver
"nodes/stats",
// required for Prometheus receiver
"endpoints",
"services",
},
Verbs: []string{"get", "watch", "list"},
},
Expand Down
1 change: 1 addition & 0 deletions internal/controller/perses_dashboards_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func (r *PersesDashboardCrdReconciler) Delete(
// Known issue: We would need to stop the watch for the Perses dashboard resources here, but the controller-runtime
// does not provide any API to stop a watch.
// An error will be logged every ten seconds until the controller process is restarted.
// See https://github.com/kubernetes-sigs/controller-runtime/issues/2983.
}

func (r *PersesDashboardCrdReconciler) Generic(
Expand Down
1 change: 1 addition & 0 deletions internal/controller/prometheus_rules_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func (r *PrometheusRuleCrdReconciler) Delete(
// Known issue: We would need to stop the watch for the Prometheus rule resources here, but the controller-runtime
// does not provide any API to stop a watch.
// An error will be logged every ten seconds until the controller process is restarted.
// See https://github.com/kubernetes-sigs/controller-runtime/issues/2983.
}

func (r *PrometheusRuleCrdReconciler) Generic(
Expand Down
26 changes: 24 additions & 2 deletions test-resources/node.js/express/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,38 @@
// SPDX-License-Identifier: Apache-2.0

const express = require('express');
const { Counter, collectDefaultMetrics, register } = require('prom-client');

const port = parseInt(process.env.PORT || '1207');
const app = express();

collectDefaultMetrics();
const requestCounter = new Counter({
name: 'appundertest_testendointrequestcounter',
help: 'Number of requests to the test endpoint',
});

app.get('/ready', (req, res) => {
res.sendStatus(204);
res.sendStatus(204);
});

app.get('/metrics', async (req, res) => {
try {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
} catch (err) {
res.status(500).end(err);
}
});

app.get('/dash0-k8s-operator-test', (req, res) => {
console.log(`processing request ${req.query['id']}`);
requestCounter.inc();
const reqId = req.query['id']
if (reqId) {
console.log(`processing request ${reqId}`);
} else {
console.log(`processing request`);
}
res.json({ message: 'We make Observability easy for every developer.' });
});

Expand Down
4 changes: 4 additions & 0 deletions test-resources/node.js/express/cronjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ spec:
jobTemplate:
spec:
template:
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "1205"
spec:
restartPolicy: Never
containers:
Expand Down
3 changes: 3 additions & 0 deletions test-resources/node.js/express/daemonset.opt-out.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ spec:
metadata:
labels:
app: dash0-operator-nodejs-20-express-test-daemonset-app
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "1206"
spec:
containers:
- name: dash0-operator-nodejs-20-express-test-daemonset-app
Expand Down
3 changes: 3 additions & 0 deletions test-resources/node.js/express/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ spec:
metadata:
labels:
app: dash0-operator-nodejs-20-express-test-daemonset-app
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "1206"
spec:
containers:
- name: dash0-operator-nodejs-20-express-test-daemonset-app
Expand Down
Loading

0 comments on commit bb5d4c1

Please sign in to comment.