Skip to content

Commit

Permalink
feat(prometheus): support prometheus scraping via annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Oct 16, 2024
1 parent 72ee076 commit bbe7137
Show file tree
Hide file tree
Showing 20 changed files with 395 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,13 @@ rules:
- apiGroups:
- ""
resources:
- configmaps
- pods
- namespaces
- nodes
- nodes/stats
- endpoints
- services
verbs:
- get
- list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,13 @@ cluster roles should match snapshot:
- apiGroups:
- ""
resources:
- configmaps
- pods
- namespaces
- nodes
- nodes/stats
- endpoints
- services
verbs:
- get
- list
Expand Down
1 change: 1 addition & 0 deletions images/collector/src/builder/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ receivers:
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kubeletstatsreceiver v0.111.0"
- gomod: "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.111.0"

processors:
- gomod: "go.opentelemetry.io/collector/processor/batchprocessor v0.111.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {

}, testConfigs)

DescribeTable("should render the Dash0 exporter", func(testConfig testConfig) {
DescribeTable("should render the Dash0 exporter without other exporters, with default settings", func(testConfig testConfig) {
configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{
Namespace: namespace,
NamePrefix: namePrefix,
Expand Down Expand Up @@ -158,7 +158,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
verifyDownstreamExportersInPipelines(collectorConfig, testConfig, "otlp/dash0")
}, testConfigs)

DescribeTable("should render a verbose debug exporter in development mode", func(testConfig testConfig) {
DescribeTable("should render a debug exporter in development mode", func(testConfig testConfig) {
configMap, err := testConfig.assembleConfigMapFunction(&oTelColConfig{
Namespace: namespace,
NamePrefix: namePrefix,
Expand All @@ -183,7 +183,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
debugExporterRaw := exporters["debug"]
Expect(debugExporterRaw).ToNot(BeNil())
debugExporter := debugExporterRaw.(map[string]interface{})
Expect(debugExporter["verbosity"]).To(Equal("detailed"))
Expect(debugExporter).To(HaveLen(0))

exporter := exporters["otlp/dash0"]
Expect(exporter).ToNot(BeNil())
Expand Down Expand Up @@ -557,7 +557,7 @@ var _ = Describe("The OpenTelemetry Collector ConfigMap conent", func() {
debugExporterRaw := exporters["debug"]
Expect(debugExporterRaw).ToNot(BeNil())
debugExporter := debugExporterRaw.(map[string]interface{})
Expect(debugExporter["verbosity"]).To(Equal("detailed"))
Expect(debugExporter).To(HaveLen(0))

exporter2 := exporters["otlp/dash0"]
Expect(exporter2).ToNot(BeNil())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ connectors:

exporters:
{{- if .DevelopmentMode }}
debug:
verbosity: detailed
debug: {}
{{- end }}
{{- range $i, $exporter := .Exporters }}
{{ $exporter.Name }}:
Expand Down Expand Up @@ -103,6 +102,17 @@ receivers:
auth_type: serviceAccount
collection_interval: 20s
endpoint: ${env:K8S_NODE_NAME}:10250
metrics:
# deprecated -> container.cpu.usage
container.cpu.utilization:
enabled: false
# deprecated -> k8s.node.cpu.usage
k8s.node.cpu.utilization:
enabled: false
# deprecated -> k8s.pod.cpu.usage
k8s.pod.cpu.utilization:
enabled: false

{{- if .DevelopmentMode }}
{{- /*
On Docker Desktop, Kind, etc. the API server uses a self-signed cert. Scraping will not work without
Expand All @@ -118,6 +128,264 @@ e2e tests. */}}
insecure_skip_verify: true
{{- end }}

prometheus:
config:
{{- /*
This particular set of scrape config jobs (kubernetes-service-endpoints, kubernetes-service-endpoints-slow,
kubernetes-pods, kubernetes-pods-slow) is mostly a copy of
https://github.com/prometheus-community/helm-charts/blob/5adf0ee898e8e5430471cb43a5f9532745c22f81/charts/prometheus/values.yaml
to be compatible with the well-known configuration via annotations.
*/}}
scrape_configs:

# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of
# `true`, except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints'
honor_labels: true

kubernetes_sd_configs:
- role: endpoints

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $$1:$$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# Scrape config for slow service endpoints; same as above, but with a larger
# timeout and a larger interval
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
# * `prometheus.io/param_<parameter>`: If the metrics endpoint uses parameters
# then you can set any parameter
- job_name: 'kubernetes-service-endpoints-slow'
honor_labels: true

scrape_interval: 5m
scrape_timeout: 30s

kubernetes_sd_configs:
- role: endpoints

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+?)(?::\d+)?;(\d+)
replacement: $$1:$$2
- action: labelmap
regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+)
replacement: __param_$$1
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: service
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`,
# except if `prometheus.io/scrape-slow` is set to `true` as well.
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods'
honor_labels: true

kubernetes_sd_configs:
- role: pod
# only scrape data from pods running on the same node as collector
selectors:
- role: pod
field: "spec.nodeName=${K8S_NODE_NAME}"

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: drop
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# Example Scrape config for pods which should be scraped slower. An useful example
# would be stackriver-exporter which queries an API on every scrape of the pod
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape-slow`: Only scrape pods that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`.
- job_name: 'kubernetes-pods-slow'
honor_labels: true

scrape_interval: 5m
scrape_timeout: 30s

kubernetes_sd_configs:
- role: pod
# only scrape data from pods running on the same node as collector
selectors:
- role: pod
field: "spec.nodeName=${K8S_NODE_NAME}"

relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: kube-system
action: drop
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
regex: (https?)
target_label: __scheme__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})
replacement: '[$2]:$1'
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip]
action: replace
regex: (\d+);((([0-9]+?)(\.|$)){4})
replacement: $2:$1
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
replacement: __param_$1
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
- source_labels: [__meta_kubernetes_pod_phase]
regex: Pending|Succeeded|Failed|Completed
action: drop
- source_labels: [__meta_kubernetes_pod_node_name]
action: replace
target_label: node

# TODO Turn on conditionally for monitored namespaces
filelog/monitored_pods:
include:
Expand Down Expand Up @@ -225,6 +493,7 @@ service:
receivers:
- otlp
- kubeletstats
- prometheus
processors:
- k8sattributes
- resourcedetection
Expand Down Expand Up @@ -272,4 +541,9 @@ service:

telemetry:
metrics:
address: ${env:MY_POD_IP}:8888
readers:
- pull:
exporter:
prometheus:
host: "${env:MY_POD_IP}"
port: 8888
Loading

0 comments on commit bbe7137

Please sign in to comment.