diff --git a/charts/openTelemetry/.helmignore b/charts/openTelemetry/.helmignore new file mode 100644 index 00000000..d3e439ad --- /dev/null +++ b/charts/openTelemetry/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmprojmak diff --git a/charts/openTelemetry/Chart.yaml b/charts/openTelemetry/Chart.yaml new file mode 100644 index 00000000..75ed73f5 --- /dev/null +++ b/charts/openTelemetry/Chart.yaml @@ -0,0 +1,34 @@ +apiVersion: v2 +name: splunk-otel-collector +version: 0.109.0 +appVersion: 0.109.0 +description: Splunk OpenTelemetry Collector for Kubernetes +icon: https://github.com/signalfx/splunk-otel-collector-chart/tree/main/splunk.png +type: application +keywords: + - splunk + - opentelemetry + - observability + - logs + - traces + - metrics + - apm + - instrumentation +maintainers: + - name: dmitryax + - name: jvoravong + - name: emaderer +dependencies: + # Subchart Notes: + # - Avoid uppercase letters in names/aliases, they cause install failure due to subchart resource naming + # - Avoid hyphen characters in names/aliases, they introduce template rendering complications (https://github.com/helm/helm/issues/2192) + - name: cert-manager + version: v1.14.4 + alias: certmanager + repository: https://charts.jetstack.io + condition: certmanager.enabled + - name: opentelemetry-operator + version: 0.49.1 + alias: operator + repository: https://open-telemetry.github.io/opentelemetry-helm-charts + condition: operator.enabled diff --git a/charts/openTelemetry/ci/basic-values.yaml b/charts/openTelemetry/ci/basic-values.yaml new file mode 100644 index 00000000..ebe1cc22 --- /dev/null +++ b/charts/openTelemetry/ci/basic-values.yaml @@ -0,0 +1,11 @@ +clusterName: fake-cluster +splunkObservability: + realm: fake-realm + accessToken: fake-token + +# Logs collection config for Kind cluster +fluentd: + config: + containers: + logFormatType: cri + criTimeFormat: "%Y-%m-%dT%H:%M:%S.%NZ" diff --git a/charts/openTelemetry/ci/logs-only-otel-se-o11y-values.yaml b/charts/openTelemetry/ci/logs-only-otel-se-o11y-values.yaml new file mode 100644 index 00000000..fd199440 --- /dev/null +++ b/charts/openTelemetry/ci/logs-only-otel-se-o11y-values.yaml @@ -0,0 +1,16 @@ +clusterName: fake-cluster +splunkObservability: + realm: fake-realm + accessToken: fake-token + logsEnabled: true + tracesEnabled: false + metricsEnabled: false + +splunkPlatform: + endpoint: "fake-endpoint" + token: "fake-token" + index: "main" + logsEnabled: true + metricsEnabled: false + +logsEngine: otel diff --git a/charts/openTelemetry/ci/logs-only-values.yaml b/charts/openTelemetry/ci/logs-only-values.yaml new file mode 100644 index 00000000..13eb69bc --- /dev/null +++ b/charts/openTelemetry/ci/logs-only-values.yaml @@ -0,0 +1,7 @@ +clusterName: fake-cluster +splunkObservability: + realm: fake-realm + accessToken: fake-token + logsEnabled: true + tracesEnabled: false + metricsEnabled: false diff --git a/charts/openTelemetry/ci/sampler-gateway-env-vars-java-logs-values.yaml b/charts/openTelemetry/ci/sampler-gateway-env-vars-java-logs-values.yaml new file mode 100644 index 00000000..98ed5d6f --- /dev/null +++ b/charts/openTelemetry/ci/sampler-gateway-env-vars-java-logs-values.yaml @@ -0,0 +1,52 @@ +clusterName: fake-cluster +splunkObservability: + realm: fake-realm + accessToken: fake-token + logsEnabled: true + +agent: + config: + processors: + probabilistic_sampler: + hash_seed: 22 + sampling_percentage: 15.3 + service: + pipelines: + traces: + processors: + - memory_limiter + - probabilistic_sampler + - k8sattributes + - batch + - resource + - resourcedetection + extraEnvs: + - name: HTTPS_PROXY + value: "192.168.0.10" + +clusterReceiver: + extraEnvs: + - name: HTTPS_PROXY + value: "192.168.0.10" + +gateway: + enabled: true + replicaCount: 1 + resources: + limits: + cpu: 200m + memory: 400Mi + +fluentd: + config: + logs: + java-app: + from: + pod: "java-app" + multiline: + firstline: /\d{4}-\d{1,2}-\d{1,2}/ + + # Logs collection config for Kind cluster + containers: + logFormatType: cri + criTimeFormat: "%Y-%m-%dT%H:%M:%S.%NZ" diff --git a/charts/openTelemetry/ci/use-custom-gateway-values.yaml b/charts/openTelemetry/ci/use-custom-gateway-values.yaml new file mode 100644 index 00000000..f546a704 --- /dev/null +++ b/charts/openTelemetry/ci/use-custom-gateway-values.yaml @@ -0,0 +1,29 @@ +clusterName: my-cluster +# Validate backward compatible parameters +splunkRealm: us0 +splunkAccessToken: my-access-token + +logsEnabled: false + +agent: + config: + exporters: + otlp: + endpoint: 192.168.0.10:4317 + insecure: true + signalfx: + ingest_url: http://192.168.0.10:9943 + api_url: http://192.168.0.10:6060 + service: + pipelines: + traces: + exporters: [otlp, signalfx] + metrics: + exporters: [otlp] + +clusterReceiver: + config: + exporters: + signalfx: + ingest_url: http://192.168.0.10:9943 + api_url: http://192.168.0.10:6060 diff --git a/charts/openTelemetry/scripts/init-eks-fargate-cluster-receiver.sh b/charts/openTelemetry/scripts/init-eks-fargate-cluster-receiver.sh new file mode 100644 index 00000000..d792caa1 --- /dev/null +++ b/charts/openTelemetry/scripts/init-eks-fargate-cluster-receiver.sh @@ -0,0 +1,45 @@ +#! /usr/bin/bash +set -ex + +echo "Downloading yq" +curl -L -o yq https://github.com/mikefarah/yq/releases/download/v4.16.2/yq_linux_amd64 +ACTUAL=$(sha256sum yq | awk '{print $1}') +if [ "${ACTUAL}" != "5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d" ]; then + echo "will not attempt to use yq with unexpected sha256 (${ACTUAL} != 5c911c4da418ae64af5527b7ee36e77effb85de20c2ce732ed14c7f72743084d)" + exit 1 +fi +chmod a+x yq + +# If we are the first pod (cluster receiver), set the kubelet stats node filter to only follow labelled nodes. +# This node label will be set by the second pod. +if [[ "${K8S_POD_NAME}" == *-0 ]]; then + echo "will configure kubelet stats receiver to follow other StatefulSet replica's node, as well as use cluster receiver." + ./yq e '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && labels[\"splunk-otel-eks-fargate-kubeletstats-receiver-node\"] == \"true\""' /conf/relay.yaml >/splunk-messages/config.yaml + ./yq e -i '.extensions.k8s_observer.observe_pods = false' /splunk-messages/config.yaml + exit 0 +fi + +# Else we are the second pod (wide kubelet stats) label our node to be monitored by the first pod and disable the k8s_cluster receiver. +# Update our config to not monitor ourselves +echo "Labelling our fargate node to denote it hosts the cluster receiver" + +# download kubectl (verifying checksum) +curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.20.4/2021-04-12/bin/linux/amd64/kubectl +ACTUAL=$(sha256sum kubectl | awk '{print $1}') +if [ "${ACTUAL}" != "e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630" ]; then + echo "will not attempt to use kubectl with unexpected sha256 (${ACTUAL} != e84ff8c607b2a10f635c312403f9ede40a045404957e55adcf3d663f9e32c630)" + exit 1 +fi +chmod a+x kubectl +# label node +./kubectl label nodes "$K8S_NODE_NAME" splunk-otel-eks-fargate-kubeletstats-receiver-node=true + +echo "Disabling k8s_cluster receiver for this instance" +# strip k8s_cluster and its pipeline +./yq e 'del(.service.pipelines.metrics)' /conf/relay.yaml >/splunk-messages/config.yaml +./yq e -i 'del(.receivers.k8s_cluster)' /splunk-messages/config.yaml + +# set kubelet stats to not monitor ourselves (all other kubelets) +echo "Ensuring k8s_observer-based kubeletstats receivers won't monitor own node to avoid Fargate network limitation." +# shellcheck disable=SC2016 +./yq e -i '.receivers.receiver_creator.receivers.kubeletstats.rule = .receivers.receiver_creator.receivers.kubeletstats.rule + " && not ( name contains \"${K8S_NODE_NAME}\" )"' /splunk-messages/config.yaml diff --git a/charts/openTelemetry/templates/NOTES.txt b/charts/openTelemetry/templates/NOTES.txt new file mode 100644 index 00000000..a8a97376 --- /dev/null +++ b/charts/openTelemetry/templates/NOTES.txt @@ -0,0 +1,83 @@ +{{/* Current jsonschema doesn't enforce below requirement while `splunkRealm` not provided as (an undesired) default value. */}} +{{- if and (eq (include "splunk-otel-collector.splunkPlatformEnabled" .) "false") (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "false") -}} +{{ fail "[ERROR] Please set at least one of required `splunkObservability.realm` or `splunkPlatform.endpoint` and corresponding token values to specify the platform(s) to send data." }} +{{- end -}} + +{{- if eq (include "splunk-otel-collector.splunkPlatformEnabled" .) "true" }} +Splunk OpenTelemetry Collector is installed and configured to send data to Splunk Platform endpoint "{{ .Values.splunkPlatform.endpoint }}". +{{ end }} +{{- if eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true" }} +Splunk OpenTelemetry Collector is installed and configured to send data to Splunk Observability realm {{ include "splunk-otel-collector.o11yRealm" . }}. +{{ end }} + +{{- if .Values.splunkRealm }} +[WARNING] "splunkRealm" parameter is deprecated, please use "splunkObservability.realm" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if .Values.splunkAccessToken }} +[WARNING] "splunkAccessToken" parameter is deprecated, please use "splunkObservability.accessToken" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if .Values.ingestUrl }} +[WARNING] "ingestUrl" parameter is deprecated, please use "splunkObservability.ingestUrl" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if .Values.apiUrl }} +[WARNING] "apiUrl" parameter is deprecated, please use "splunkObservability.apiUrl" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if not (eq (toString .Values.metricsEnabled) "") }} +[WARNING] "metricsEnabled" parameter is deprecated, please use "splunkObservability.metricsEnabled" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if not (eq (toString .Values.tracesEnabled) "") }} +[WARNING] "tracesEnabled" parameter is deprecated, please use "splunkObservability.tracesEnabled" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if not (eq (toString .Values.logsEnabled) "") }} +[WARNING] "logsEnabled" parameter is deprecated, please use "splunkObservability.logsEnabled" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0353-to-0360 +{{ end }} +{{- if not (eq (toString .Values.distro) "") }} +[WARNING] "distro" parameter is deprecated, please use "distribution" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.provider) "") }} +[WARNING] "provider" parameter is deprecated, please use "cloudProvider" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.extraAttributes.podLabels) "") }} +[WARNING] ".Values.extraAttributes.podLabels" parameter is deprecated, please use ".Values.extraAttributes.fromLabels" instead. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.otelAgent) "") }} +[WARNING] "otelAgent" parameter group is deprecated, please rename it to "agent" in your custom values.yaml. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.otelCollector) "") }} +[WARNING] "otelCollector" parameter group is deprecated, please rename it to "gateway" in your custom values.yaml. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.otelK8sClusterReceiver) "") }} +[WARNING] "otelK8sClusterReceiver" parameter group is deprecated, please rename it to "clusterReceiver" in your custom values.yaml. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- if not (eq (toString .Values.image.fluentd.initContainer) "") }} +[WARNING] "image.fluentd.initContainer" parameter is deprecated now. Now we use the same splunk/fluentd-hec image in init container. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0371-to-0380 +{{ end }} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- if not (eq (toString $clusterReceiver.k8sEventsEnabled) "") }} +[WARNING] "clusterReceiver.k8sEventsEnabled" parameter is deprecated. Please use clusterReceiver.eventsEnabled and splunkObservability.infrastructureMonitoringEventsEnabled. + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0532-to-0540 +{{ end }} +{{- $crEventsEnabled := toString $clusterReceiver.eventsEnabled }} +{{- if not (or (eq $crEventsEnabled "") (eq $crEventsEnabled "false")) }} +[WARNING] "clusterReceiver.eventsEnabled" parameter is deprecated. Soon it will be replaced with "clusterReceiver.k8sObjects". + Upgrade guidelines: https://github.com/signalfx/splunk-otel-collector-chart/blob/main/UPGRADING.md#0670-to-0680 +{{ end }} +{{- if .Values.operator.enabled }} +[INFO] You've enabled the operator's auto-instrumentation feature (operator.enabled=true)! The operator can automatically instrument Kubernetes hosted applications. + - Status: Instrumentation language maturity varies. See `operator.instrumentation.spec` and documentation for utilized instrumentation details. + - Splunk Support: We offer full support for Splunk distributions and best-effort support for native OpenTelemetry distributions of auto-instrumentation libraries. +{{- end }} diff --git a/charts/openTelemetry/templates/_helpers.tpl b/charts/openTelemetry/templates/_helpers.tpl new file mode 100644 index 00000000..4ca724ec --- /dev/null +++ b/charts/openTelemetry/templates/_helpers.tpl @@ -0,0 +1,510 @@ +{{/* vim: set filetype=mustache: */}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "splunk-otel-collector.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "splunk-otel-collector.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "splunk-otel-collector.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Whether to send data to Splunk Platform endpoint +*/}} +{{- define "splunk-otel-collector.splunkPlatformEnabled" -}} +{{- not (eq .Values.splunkPlatform.endpoint "") }} +{{- end -}} + +{{/* +Whether to send data to Splunk Observability endpoint +*/}} +{{- define "splunk-otel-collector.splunkO11yEnabled" -}} +{{- not (eq (include "splunk-otel-collector.o11yRealm" .) "") }} +{{- end -}} + +{{/* +Whether metrics enabled for Splunk Observability, backward compatible. +*/}} +{{- define "splunk-otel-collector.o11yMetricsEnabled" -}} +{{- if eq (toString .Values.metricsEnabled) "" }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.splunkObservability.metricsEnabled }} +{{- else }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.metricsEnabled }} +{{- end -}} +{{- end -}} + +{{/* +Whether traces enabled for Splunk Observability, backward compatible. +*/}} +{{- define "splunk-otel-collector.o11yTracesEnabled" -}} +{{- if eq (toString .Values.tracesEnabled) "" }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.splunkObservability.tracesEnabled }} +{{- else }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.tracesEnabled }} +{{- end -}} +{{- end -}} + +{{/* +Whether logs enabled for Splunk Observability, backward compatible. +*/}} +{{- define "splunk-otel-collector.o11yLogsEnabled" -}} +{{- if eq (toString .Values.logsEnabled) "" }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.splunkObservability.logsEnabled }} +{{- else }} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.logsEnabled }} +{{- end -}} +{{- end -}} + +{{/* +Whether Splunk Observability Profiling is enabled. +*/}} +{{- define "splunk-otel-collector.o11yProfilingEnabled" -}} +{{- and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.splunkObservability.profilingEnabled }} +{{- end -}} + +{{/* +Whether logs or profiling is enabled for Splunk Observability. +*/}} +{{- define "splunk-otel-collector.o11yLogsOrProfilingEnabled" -}} +{{- or (eq (include "splunk-otel-collector.o11yLogsEnabled" .) "true") (eq (include "splunk-otel-collector.o11yProfilingEnabled" .) "true") }} +{{- end -}} + +{{/* +Whether logs enabled for Splunk Platform. +*/}} +{{- define "splunk-otel-collector.platformLogsEnabled" -}} +{{- and (eq (include "splunk-otel-collector.splunkPlatformEnabled" .) "true") .Values.splunkPlatform.logsEnabled }} +{{- end -}} + +{{/* +Whether metrics enabled for Splunk Platform. +*/}} +{{- define "splunk-otel-collector.platformMetricsEnabled" -}} +{{- and (eq (include "splunk-otel-collector.splunkPlatformEnabled" .) "true") .Values.splunkPlatform.metricsEnabled }} +{{- end -}} + +{{/* +Whether traces enabled for Splunk Platform. +*/}} +{{- define "splunk-otel-collector.platformTracesEnabled" -}} +{{- and (eq (include "splunk-otel-collector.splunkPlatformEnabled" .) "true") .Values.splunkPlatform.tracesEnabled }} +{{- end -}} + +{{/* +Whether metrics enabled for any destination. +*/}} +{{- define "splunk-otel-collector.metricsEnabled" -}} +{{- or (eq (include "splunk-otel-collector.o11yMetricsEnabled" .) "true") (eq (include "splunk-otel-collector.platformMetricsEnabled" .) "true") }} +{{- end -}} + +{{/* +Whether traces enabled for any destination. +*/}} +{{- define "splunk-otel-collector.tracesEnabled" -}} +{{- or (eq (include "splunk-otel-collector.o11yTracesEnabled" .) "true") (eq (include "splunk-otel-collector.platformTracesEnabled" .) "true") }} +{{- end -}} + +{{/* +Whether logs enabled for any destination. +*/}} +{{- define "splunk-otel-collector.logsEnabled" -}} +{{- or (eq (include "splunk-otel-collector.o11yLogsEnabled" .) "true") (eq (include "splunk-otel-collector.platformLogsEnabled" .) "true") }} +{{- end -}} + +{{/* +Whether profiling data is enabled (applicable to Splunk Observability only). +*/}} +{{- define "splunk-otel-collector.profilingEnabled" -}} +{{- include "splunk-otel-collector.o11yProfilingEnabled" . }} +{{- end -}} + +{{/* +Define name for the Splunk Secret +*/}} +{{- define "splunk-otel-collector.secret" -}} +{{- default (include "splunk-otel-collector.fullname" .) .Values.secret.name }} +{{- end -}} + +{{/* +Define name for the etcd Secret +*/}} +{{- define "splunk-otel-collector.etcdSecret" -}} +{{- if .Values.agent.controlPlaneMetrics.etcd.secret.name -}} +{{- printf "%s" .Values.agent.controlPlaneMetrics.etcd.secret.name -}} +{{- else -}} +{{- $name := (include "splunk-otel-collector.fullname" .) -}} +{{- printf "%s-etcd" $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "splunk-otel-collector.serviceAccountName" -}} + {{ default (include "splunk-otel-collector.fullname" .) .Values.serviceAccount.name }} +{{- end -}} + +{{/* +Get Splunk Observability Realm. +*/}} +{{- define "splunk-otel-collector.o11yRealm" -}} +{{- .Values.splunkObservability.realm | default .Values.splunkRealm | default "" }} +{{- end -}} + + +{{/* +Get Splunk ingest URL +*/}} +{{- define "splunk-otel-collector.o11yIngestUrl" -}} +{{- $realm := (include "splunk-otel-collector.o11yRealm" .) }} +{{- .Values.splunkObservability.ingestUrl | default .Values.ingestUrl | default (printf "https://ingest.%s.signalfx.com" $realm) }} +{{- end -}} + +{{/* +Get Splunk API URL. +*/}} +{{- define "splunk-otel-collector.o11yApiUrl" -}} +{{- $realm := (include "splunk-otel-collector.o11yRealm" .) }} +{{- .Values.splunkObservability.apiUrl | default .Values.apiUrl | default (printf "https://api.%s.signalfx.com" $realm) }} +{{- end -}} + +{{/* +Get Splunk Observability Access Token. +*/}} +{{- define "splunk-otel-collector.o11yAccessToken" -}} +{{- .Values.splunkObservability.accessToken | default .Values.splunkAccessToken | default "" -}} +{{- end -}} + +{{/* +Create the fluentd image name. +*/}} +{{- define "splunk-otel-collector.image.fluentd" -}} +{{- printf "%s:%s" .Values.image.fluentd.repository .Values.image.fluentd.tag | trimSuffix ":" -}} +{{- end -}} + +{{/* +Create the opentelemetry collector image name. +*/}} +{{- define "splunk-otel-collector.image.otelcol" -}} +{{- printf "%s:%s" .Values.image.otelcol.repository (.Values.image.otelcol.tag | default .Chart.AppVersion) -}} +{{- end -}} + +{{/* +Create the patch-log-dirs image name. +*/}} +{{- define "splunk-otel-collector.image.initPatchLogDirs" -}} +{{- printf "%s:%s" .Values.image.initPatchLogDirs.repository .Values.image.initPatchLogDirs.tag | trimSuffix ":" -}} +{{- end -}} + +{{/* + This helper converts the input value of memory to MiB. + Input needs to be a valid value as supported by k8s memory resource field. + */}} +{{- define "splunk-otel-collector.convertMemToMib" }} +{{- $mem := lower . -}} +{{- if hasSuffix "e" $mem -}} +{{- $mem = mulf (trimSuffix "e" $mem | float64) 1e18 -}} +{{- else if hasSuffix "ei" $mem -}} +{{- $mem = mulf (trimSuffix "e" $mem | float64) 0x1p60 -}} +{{- else if hasSuffix "p" $mem -}} +{{- $mem = mulf (trimSuffix "p" $mem | float64) 1e15 -}} +{{- else if hasSuffix "pi" $mem -}} +{{- $mem = mulf (trimSuffix "pi" $mem | float64) 0x1p50 -}} +{{- else if hasSuffix "t" $mem -}} +{{- $mem = mulf (trimSuffix "t" $mem | float64) 1e12 -}} +{{- else if hasSuffix "ti" $mem -}} +{{- $mem = mulf (trimSuffix "ti" $mem | float64) 0x1p40 -}} +{{- else if hasSuffix "g" $mem -}} +{{- $mem = mulf (trimSuffix "g" $mem | float64) 1e9 -}} +{{- else if hasSuffix "gi" $mem -}} +{{- $mem = mulf (trimSuffix "gi" $mem | float64) 0x1p30 -}} +{{- else if hasSuffix "m" $mem -}} +{{- $mem = mulf (trimSuffix "m" $mem | float64) 1e6 -}} +{{- else if hasSuffix "mi" $mem -}} +{{- $mem = mulf (trimSuffix "mi" $mem | float64) 0x1p20 -}} +{{- else if hasSuffix "k" $mem -}} +{{- $mem = mulf (trimSuffix "k" $mem | float64) 1e3 -}} +{{- else if hasSuffix "ki" $mem -}} +{{- $mem = mulf (trimSuffix "ki" $mem | float64) 0x1p10 -}} +{{- end }} +{{- divf $mem 0x1p20 | floor -}} +{{- end }} + +{{/* +Create a filter expression for multiline logs configuration. +*/}} +{{- define "splunk-otel-collector.newlineExpr" }} +{{- $expr := "" }} +{{- if .namespaceName }} +{{- $useRegexp := eq (toString .namespaceName.useRegexp | default "false") "true" }} +{{- $expr = cat "(resource[\"k8s.namespace.name\"])" (ternary "matches" "==" $useRegexp) (quote .namespaceName.value) "&&" }} +{{- end }} +{{- if .podName }} +{{- $useRegexp := eq (toString .podName.useRegexp | default "false") "true" }} +{{- $expr = cat $expr "(resource[\"k8s.pod.name\"])" (ternary "matches" "==" $useRegexp) (quote .podName.value) "&&" }} +{{- end }} +{{- if .containerName }} +{{- $useRegexp := eq (toString .containerName.useRegexp | default "false") "true" }} +{{- $expr = cat $expr "(resource[\"k8s.container.name\"])" (ternary "matches" "==" $useRegexp) (quote .containerName.value) "&&" }} +{{- end }} +{{- $expr | trimSuffix "&&" | trim }} +{{- end -}} + +{{/* +Create an identifier for multiline logs configuration. +*/}} +{{- define "splunk-otel-collector.newlineKey" }} +{{- $key := "" }} +{{- if .namespaceName }} +{{- $key = printf "%s_" .namespaceName.value }} +{{- end }} +{{- if .podName }} +{{- $key = printf "%s%s_" $key .podName.value }} +{{- end }} +{{- if .containerName }} +{{- $key = printf "%s%s" $key .containerName.value }} +{{- end }} +{{- $key | trimSuffix "_" }} +{{- end -}} + +{{/* +Common labels shared by all Kubernetes objects in this chart. +*/}} +{{- define "splunk-otel-collector.commonLabels" -}} +app.kubernetes.io/name: {{ include "splunk-otel-collector.name" . }} +helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end -}} + +{{/* +cloudProvider helper to support backward compatibility with the deprecated name. +*/}} +{{- define "splunk-otel-collector.cloudProvider" -}} +{{- .Values.cloudProvider | default .Values.provider | default "" -}} +{{- end -}} + +{{/* +distribution helper to support backward compatibility with the deprecated name. +*/}} +{{- define "splunk-otel-collector.distribution" -}} +{{- .Values.distribution | default .Values.distro | default "" -}} +{{- end -}} + +{{/* +Helper that returns "agent" parameter group yaml taking care of backward +compatibility with the old config group name: "otelAgent". +*/}} +{{- define "splunk-otel-collector.agent" -}} +{{- if eq (toString .Values.otelAgent) "" }} +{{- .Values.agent | toYaml }} +{{- else }} +{{- deepCopy .Values.otelAgent | mustMergeOverwrite (deepCopy .Values.agent) | toYaml }} +{{- end }} +{{- end -}} + +{{/* +The apiVersion for podDisruptionBudget policies. +*/}} +{{- define "splunk-otel-collector.PDB-apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "policy/v1" -}} +{{- print "policy/v1" -}} +{{- else -}} +{{- print "policy/v1beta1" -}} +{{- end -}} +{{- end -}} + +{{/* +The name of the gateway service. +*/}} +{{- define "splunk-otel-collector.gatewayServiceName" -}} +{{ (include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +Whether the gateway is enabled, either through network explorer, or through its own flag. +*/}} +{{- define "splunk-otel-collector.gatewayEnabled" -}} +{{- $gateway := fromYaml (include "splunk-otel-collector.gateway" .) }} +{{- $gateway.enabled }} +{{- end -}} + +{{/* +Helper that returns "gateway" parameter group yaml taking care of backward +compatibility with the old config group name: "otelCollector". +*/}} +{{- define "splunk-otel-collector.gateway" -}} +{{- if eq (toString .Values.otelCollector) "" }} +{{- .Values.gateway | toYaml }} +{{- else }} +{{- deepCopy .Values.otelCollector | mustMergeOverwrite (deepCopy .Values.gateway) | toYaml }} +{{- end }} +{{- end -}} + +{{/* +Helper that returns "clusterReceiver" parameter group yaml taking care of backward +compatibility with the old config group name: "otelK8sClusterReceiver". +*/}} +{{- define "splunk-otel-collector.clusterReceiver" -}} +{{- if eq (toString .Values.otelK8sClusterReceiver) "" }} +{{- .Values.clusterReceiver | toYaml }} +{{- else }} +{{- deepCopy .Values.otelK8sClusterReceiver | mustMergeOverwrite (deepCopy .Values.clusterReceiver) | toYaml }} +{{- end }} +{{- end -}} + +{{/* +"clusterReceiverTruncatedName" for the eks/fargate cluster receiver statefulSet name accounting for 11 appended random chars +*/}} +{{- define "splunk-otel-collector.clusterReceiverTruncatedName" -}} +{{ printf "%s-k8s-cluster-receiver" ( include "splunk-otel-collector.fullname" . ) | trunc 52 | trimSuffix "-" }} +{{- end -}} + +{{/* +"clusterReceiverServiceName" for the eks/fargate cluster receiver statefulSet headless service +*/}} +{{- define "splunk-otel-collector.clusterReceiverServiceName" -}} +{{ printf "%s-k8s-cluster-receiver" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +"clusterReceiverNodeDiscovererScript" for the eks/fargate cluster receiver statefulSet initContainer +*/}} +{{- define "splunk-otel-collector.clusterReceiverNodeDiscovererScript" -}} +{{ printf "%s-cr-node-discoverer-script" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{/* +"o11yInfraMonEventsEnabled" helper defines whether Observability Infrastructure monitoring events are enabled +*/}} +{{- define "splunk-otel-collector.o11yInfraMonEventsEnabled" -}} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- if eq (toString $clusterReceiver.k8sEventsEnabled) "" }} +{{- .Values.splunkObservability.infrastructureMonitoringEventsEnabled }} +{{- else }} +{{- $clusterReceiver.k8sEventsEnabled }} +{{- end }} +{{- end -}} + + +{{/* +Whether object collection by k8s object receiver is enabled +*/}} +{{- define "splunk-otel-collector.objectsEnabled" -}} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- gt (len $clusterReceiver.k8sObjects) 0 }} +{{- end -}} + +{{/* +Whether object collection by k8s object receiver or/and event collection by k8s event receiver is enabled +*/}} +{{- define "splunk-otel-collector.objectsOrEventsEnabled" -}} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- or $clusterReceiver.eventsEnabled (eq (include "splunk-otel-collector.objectsEnabled" .) "true") -}} +{{- end -}} + + +{{/* +Whether clusterReceiver should be enabled +*/}} +{{- define "splunk-otel-collector.clusterReceiverEnabled" -}} +{{- $clusterReceiver := fromYaml (include "splunk-otel-collector.clusterReceiver" .) }} +{{- and $clusterReceiver.enabled (or (eq (include "splunk-otel-collector.metricsEnabled" .) "true") (eq (include "splunk-otel-collector.objectsOrEventsEnabled" .) "true")) -}} +{{- end -}} + + +{{/* +Build the securityContext for Linux and Windows +*/}} +{{- define "splunk-otel-collector.securityContext" -}} +{{- if .isWindows }} +{{- $_ := unset .securityContext "runAsUser" }} +{{- if not (hasKey .securityContext "windowsOptions")}} +{{- $_ := set .securityContext "windowsOptions" dict }} +{{- end }} +{{- if and (not (hasKey .securityContext.windowsOptions "runAsUserName")) (.setRunAsUser) }} +{{- $_ := set .securityContext.windowsOptions "runAsUserName" "ContainerAdministrator"}} +{{- end }} +{{- else }} +{{- if and (eq (toString .securityContext.runAsUser) "") (.setRunAsUser) }} +{{- $_ := set .securityContext "runAsUser" 0 }} +{{- end }} +{{- end }} +{{- toYaml .securityContext }} +{{- end -}} + +{{/* +Whether the clusterName configuration option is optional +*/}} +{{- define "splunk-otel-collector.clusterNameOptional" -}} +{{- or (hasPrefix "gke" (include "splunk-otel-collector.distribution" .)) (hasPrefix "eks" (include "splunk-otel-collector.distribution" .)) }} +{{- end -}} + +{{/* +Whether the helm chart should detect the cluster name automatically +*/}} +{{- define "splunk-otel-collector.autoDetectClusterName" -}} +{{- and (include "splunk-otel-collector.clusterNameOptional" .) (not .Values.clusterName) }} +{{- end -}} + +{{/* +Helper used to define a namspace. +- Returns namespace from a release +- If namespaceOverride value is filled in it will replace the namespace +*/}} +{{- define "splunk-otel-collector.namespace" -}} + {{- default .Release.Namespace .Values.namespaceOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the target allocator service account to use +*/}} +{{- define "splunk-otel-collector.targetAllocatorServiceAccountName" -}} +{{- default (printf "%s-ta" ( include "splunk-otel-collector.fullname" .) | trunc 63 | trimSuffix "-") .Values.targetAllocator.serviceAccount.name -}} +{{- end -}} + +{{/* +Create the name of the target allocator cluster role to use +*/}} +{{- define "splunk-otel-collector.targetAllocatorClusterRoleName" -}} +{{- printf "%s-ta-clusterRole" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the target allocator cluster config map to use +*/}} +{{- define "splunk-otel-collector.targetAllocatorConfigMapName" -}} +{{- printf "%s-ta-configmap" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the target allocator cluster role binding to use +*/}} +{{- define "splunk-otel-collector.targetAllocatorClusterRoleBindingName" -}} +{{- printf "%s-ta-clusterRoleBinding" ( include "splunk-otel-collector.fullname" . ) | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/charts/openTelemetry/templates/clusterRole.yaml b/charts/openTelemetry/templates/clusterRole.yaml new file mode 100644 index 00000000..ab8effd2 --- /dev/null +++ b/charts/openTelemetry/templates/clusterRole.yaml @@ -0,0 +1,119 @@ +{{ if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "splunk-otel-collector.fullname" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: +{{- if eq (include "splunk-otel-collector.distribution" .) "openshift" }} +- apiGroups: + - quota.openshift.io + resources: + - clusterresourcequotas + verbs: + - get + - list + - watch +{{- end }} +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - nodes/stats + - nodes/proxy + - pods + - pods/status + - persistentvolumeclaims + - persistentvolumes + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get + - list + - watch +{{- if eq (include "splunk-otel-collector.distribution" .) "eks/fargate" }} +- apiGroups: + - "" + resources: + - nodes + verbs: + - patch +{{- end }} +{{- if hasPrefix "eks" (include "splunk-otel-collector.distribution" .) }} +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + resourceNames: + - aws-auth +{{- end }} +- apiGroups: + - events.k8s.io + resources: + - events + - namespaces + verbs: + - get + - list + - watch +{{- with .Values.rbac.customRules }} +{{ toYaml . }} +{{- end }} +{{- end }} diff --git a/charts/openTelemetry/templates/clusterRoleBinding.yaml b/charts/openTelemetry/templates/clusterRoleBinding.yaml new file mode 100644 index 00000000..599b0ae5 --- /dev/null +++ b/charts/openTelemetry/templates/clusterRoleBinding.yaml @@ -0,0 +1,20 @@ +{{ if .Values.rbac.create -}} +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "splunk-otel-collector.fullname" . }} + labels: + {{- include "splunk-otel-collector.commonLabels" . | nindent 4 }} + app: {{ template "splunk-otel-collector.name" . }} + chart: {{ template "splunk-otel-collector.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "splunk-otel-collector.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ template "splunk-otel-collector.serviceAccountName" . }} + namespace: {{ template "splunk-otel-collector.namespace" . }} +{{- end }} diff --git a/charts/openTelemetry/templates/config/_common.tpl b/charts/openTelemetry/templates/config/_common.tpl new file mode 100644 index 00000000..93f3b39b --- /dev/null +++ b/charts/openTelemetry/templates/config/_common.tpl @@ -0,0 +1,525 @@ +{{/* +Common config for the otel-collector memory_limiter processor +*/}} +{{- define "splunk-otel-collector.otelMemoryLimiterConfig" -}} +memory_limiter: + # check_interval is the time between measurements of memory usage. + check_interval: 2s + # By default limit_mib is set to 90% of container memory limit + limit_mib: ${SPLUNK_MEMORY_LIMIT_MIB} +{{- end }} + +{{/* +Common config for the otel-collector sapm exporter +*/}} +{{- define "splunk-otel-collector.otelSapmExporter" -}} +{{- if (eq (include "splunk-otel-collector.tracesEnabled" .) "true") }} +sapm: + endpoint: {{ include "splunk-otel-collector.o11yIngestUrl" . }}/v2/trace + access_token: ${SPLUNK_OBSERVABILITY_ACCESS_TOKEN} +{{- end }} +{{- end }} + +{{/* +Common config for the otel-collector traces receivers +*/}} +{{- define "splunk-otel-collector.otelReceivers" -}} +otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + # https://github.com/open-telemetry/opentelemetry-collector/blob/9d3a8a4608a7dbd9f787867226a78356ace9b5e4/receiver/otlpreceiver/otlp.go#L140-L152 + endpoint: 0.0.0.0:4318 + +{{- if (eq (include "splunk-otel-collector.tracesEnabled" .) "true") }} +jaeger: + protocols: + thrift_http: + endpoint: 0.0.0.0:14268 + grpc: + endpoint: 0.0.0.0:14250 +zipkin: + endpoint: 0.0.0.0:9411 +{{- end }} +{{- end }} + +{{/* +Filter Attributes Function +*/}} +{{- define "splunk-otel-collector.filterAttr" -}} +{{- if .Values.logsCollection.containers.useSplunkIncludeAnnotation -}} +splunk.com/include +{{- else -}} +splunk.com/exclude +{{- end }} +{{- end }} + +{{/* +Common config for resourcedetection processor +*/}} +{{- define "splunk-otel-collector.resourceDetectionProcessor" -}} +resourcedetection: + detectors: + # Note: Kubernetes distro detectors need to come first so they set the proper cloud.platform + # before it gets set later by the cloud provider detector. + - env + {{- if or (hasPrefix "gke" (include "splunk-otel-collector.distribution" .)) (eq (include "splunk-otel-collector.cloudProvider" .) "gcp") }} + - gcp + {{- else if hasPrefix "eks" (include "splunk-otel-collector.distribution" .) }} + - eks + {{- else if eq (include "splunk-otel-collector.distribution" .) "aks" }} + - aks + {{- end }} + {{- if eq (include "splunk-otel-collector.cloudProvider" .) "aws" }} + - ec2 + {{- else if eq (include "splunk-otel-collector.cloudProvider" .) "azure" }} + - azure + {{- end }} + # The `system` detector goes last so it can't preclude cloud detectors from setting host/os info. + # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor#ordering + - system + {{- if and (hasPrefix "gke" (include "splunk-otel-collector.distribution" .)) (not .Values.clusterName) }} + gcp: + resource_attributes: + k8s.cluster.name: + enabled: true + {{- else if and (hasPrefix "eks" (include "splunk-otel-collector.distribution" .)) (not .Values.clusterName) }} + eks: + resource_attributes: + k8s.cluster.name: + enabled: true + {{- end }} + override: true + timeout: 15s +{{- end }} + +{{/* +Common config for adding k8s.cluster.name using the resourcedetection processor +*/}} +{{- define "splunk-otel-collector.resourceDetectionProcessorKubernetesClusterName" -}} +resourcedetection/k8s_cluster_name: + detectors: + {{- if hasPrefix "gke" (include "splunk-otel-collector.distribution" .) }} + - gcp + {{- else if hasPrefix "eks" (include "splunk-otel-collector.distribution" .) }} + - eks + {{- end }} + {{- if hasPrefix "gke" (include "splunk-otel-collector.distribution" .) }} + gcp: + resource_attributes: + k8s.cluster.name: + enabled: true + host.name: + enabled: false + host.id: + enabled: false + host.type: + enabled: false + cloud.provider: + enabled: false + cloud.platform: + enabled: false + cloud.account.id: + enabled: false + cloud.region: + enabled: false + cloud.availability_zone: + enabled: false + faas.name: + enabled: false + faas.version: + enabled: false + faas.id: + enabled: false + faas.instance: + enabled: false + gcp.cloud_run.job.execution: + enabled: false + gcp.cloud_run.job.task_index: + enabled: false + gcp.gce.instance.name: + enabled: false + gcp.gce.instance.hostname: + enabled: false + {{- else if hasPrefix "eks" (include "splunk-otel-collector.distribution" .) }} + eks: + resource_attributes: + k8s.cluster.name: + enabled: true + cloud.provider: + enabled: false + cloud.platform: + enabled: false + {{- end }} + override: true + timeout: 15s +{{- end }} + +{{/* +Common config for K8s attributes processor adding k8s metadata to resource attributes. +*/}} +{{- define "splunk-otel-collector.k8sAttributesProcessor" -}} +k8sattributes: + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: ip + - sources: + - from: connection + - sources: + - from: resource_attribute + name: host.name + extract: + metadata: + - k8s.namespace.name + - k8s.node.name + - k8s.pod.name + - k8s.pod.uid + - container.id + - container.image.name + - container.image.tag + annotations: + - key: splunk.com/sourcetype + from: pod + - key: {{ include "splunk-otel-collector.filterAttr" . }} + tag_name: {{ include "splunk-otel-collector.filterAttr" . }} + from: namespace + - key: {{ include "splunk-otel-collector.filterAttr" . }} + tag_name: {{ include "splunk-otel-collector.filterAttr" . }} + from: pod + - key: splunk.com/index + tag_name: com.splunk.index + from: namespace + - key: splunk.com/index + tag_name: com.splunk.index + from: pod + {{- include "splunk-otel-collector.addExtraAnnotations" . | nindent 6 }} + {{- if or .Values.extraAttributes.podLabels .Values.extraAttributes.fromLabels }} + labels: + {{- range .Values.extraAttributes.podLabels }} + - key: {{ . }} + {{- end }} + {{- include "splunk-otel-collector.addExtraLabels" . | nindent 6 }} + {{- end }} +{{- end }} + +{{/* +Common config for K8s attributes processor adding k8s metadata to metrics resource attributes. +*/}} +{{- define "splunk-otel-collector.k8sAttributesSplunkPlatformMetrics" -}} +k8sattributes/metrics: + pod_association: + - sources: + - from: resource_attribute + name: k8s.node.name + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: ip + - sources: + - from: connection + extract: + metadata: [] + annotations: + - key: splunk.com/sourcetype + from: pod + - key: splunk.com/metricsIndex + tag_name: com.splunk.index + from: namespace + - key: splunk.com/metricsIndex + tag_name: com.splunk.index + from: pod +{{- end }} + +{{/* +Resource processor for logs manipulations +*/}} +{{- define "splunk-otel-collector.resourceLogsProcessor" -}} +resource/logs: + attributes: + {{- if .Values.splunkPlatform.sourcetype }} + - key: com.splunk.sourcetype + value: "{{.Values.splunkPlatform.sourcetype }}" + action: upsert + {{- end }} + - key: com.splunk.sourcetype + from_attribute: k8s.pod.annotations.splunk.com/sourcetype + action: upsert + - key: k8s.pod.annotations.splunk.com/sourcetype + action: delete + - key: {{ include "splunk-otel-collector.filterAttr" . }} + action: delete + {{- if .Values.splunkPlatform.fieldNameConvention.renameFieldsSck }} + - key: container_name + from_attribute: k8s.container.name + action: upsert + - key: container_id + from_attribute: container.id + action: upsert + - key: pod + from_attribute: k8s.pod.name + action: upsert + - key: pod_uid + from_attribute: k8s.pod.uid + action: upsert + - key: namespace + from_attribute: k8s.namespace.name + action: upsert + - key: label_app + from_attribute: k8s.pod.labels.app + action: upsert + {{- range $_, $label := .Values.extraAttributes.podLabels }} + - key: {{ printf "label_%s" $label }} + from_attribute: {{ printf "k8s.pod.labels.%s" $label }} + action: upsert + {{- end }} + {{- if not .Values.splunkPlatform.fieldNameConvention.keepOtelConvention }} + - key: k8s.container.name + action: delete + - key: container.id + action: delete + - key: k8s.pod.name + action: delete + - key: k8s.pod.uid + action: delete + - key: k8s.namespace.name + action: delete + - key: k8s.pod.labels.app + action: delete + {{- range $_, $label := .Values.extraAttributes.podLabels }} + - key: {{ printf "k8s.pod.labels.%s" $label }} + action: delete + {{- end }} + {{- end }} + {{- end }} +{{- end }} + +{{/* +The transform processor adds service.name attribute to logs the same way as it's done by istio for the generated traces +https://github.com/istio/istio/blob/6237cb4e63cf9a332327cc0a815d6b46257e6f8a/pkg/config/analysis/analyzers/testdata/common/sidecar-injector-configmap.yaml#L110-L115 +This enables the correlation between logs and traces in Splunk Observability Cloud. +*/}} +{{- define "splunk-otel-collector.transformLogsProcessor" -}} +transform/istio_service_name: + error_mode: ignore + log_statements: + - context: resource + statements: + - set(attributes["service.name"], Concat([attributes["k8s.pod.labels.app"], attributes["k8s.namespace.name"]], ".")) where attributes["service.name"] == nil and attributes["k8s.pod.labels.app"] != nil and attributes["k8s.namespace.name"] != nil + - set(cache["owner_name"], attributes["k8s.pod.name"]) where attributes["service.name"] == nil and attributes["k8s.pod.name"] != nil + # Name of the object owning the pod is taken from "k8s.pod.name" attribute by striping the pod suffix according + # to the k8s name generation rules (we don't want to put pressure on the k8s API server to get the owner name): + # https://github.com/kubernetes/apimachinery/blob/ff522ab81c745a9ac5f7eeb7852fac134194a3b6/pkg/util/rand/rand.go#L92-L127 + - replace_pattern(cache["owner_name"], "^(.+?)-(?:(?:[0-9bcdf]+-)?[bcdfghjklmnpqrstvwxz2456789]{5}|[0-9]+)$$", "$$1") where attributes["service.name"] == nil and cache["owner_name"] != nil + - set(attributes["service.name"], Concat([cache["owner_name"], attributes["k8s.namespace.name"]], ".")) where attributes["service.name"] == nil and cache["owner_name"] != nil and attributes["k8s.namespace.name"] != nil +{{- end }} + +{{/* +Filter logs processor +*/}} +{{- define "splunk-otel-collector.filterLogsProcessors" -}} +# Drop logs coming from pods and namespaces with splunk.com/exclude annotation. +filter/logs: + logs: + {{ .Values.logsCollection.containers.useSplunkIncludeAnnotation | ternary "include" "exclude" }}: + match_type: strict + resource_attributes: + - key: {{ include "splunk-otel-collector.filterAttr" . }} + value: "true" +{{- end }} + +{{/* +Splunk Platform Logs exporter +*/}} +{{- define "splunk-otel-collector.splunkPlatformLogsExporter" -}} +splunk_hec/platform_logs: + endpoint: {{ .Values.splunkPlatform.endpoint | quote }} + token: "${SPLUNK_PLATFORM_HEC_TOKEN}" + index: {{ .Values.splunkPlatform.index | quote }} + source: {{ .Values.splunkPlatform.source | quote }} + max_idle_conns: {{ .Values.splunkPlatform.maxConnections }} + max_idle_conns_per_host: {{ .Values.splunkPlatform.maxConnections }} + disable_compression: {{ .Values.splunkPlatform.disableCompression }} + timeout: {{ .Values.splunkPlatform.timeout }} + idle_conn_timeout: {{ .Values.splunkPlatform.idleConnTimeout }} + splunk_app_name: {{ .Chart.Name }} + splunk_app_version: {{ .Chart.Version }} + profiling_data_enabled: false + tls: + insecure_skip_verify: {{ .Values.splunkPlatform.insecureSkipVerify }} + {{- if .Values.splunkPlatform.clientCert }} + cert_file: /otel/etc/splunk_platform_hec_client_cert + {{- end }} + {{- if .Values.splunkPlatform.clientKey }} + key_file: /otel/etc/splunk_platform_hec_client_key + {{- end }} + {{- if .Values.splunkPlatform.caFile }} + ca_file: /otel/etc/splunk_platform_hec_ca_file + {{- end }} + retry_on_failure: + enabled: {{ .Values.splunkPlatform.retryOnFailure.enabled }} + initial_interval: {{ .Values.splunkPlatform.retryOnFailure.initialInterval }} + max_interval: {{ .Values.splunkPlatform.retryOnFailure.maxInterval }} + {{- if .Values.featureGates.noDropLogsPipeline }} + max_elapsed_time: 0s + {{- else }} + max_elapsed_time: {{ .Values.splunkPlatform.retryOnFailure.maxElapsedTime }} + {{- end }} + sending_queue: + enabled: {{ .Values.splunkPlatform.sendingQueue.enabled }} + queue_size: {{ .Values.splunkPlatform.sendingQueue.queueSize }} + {{- if .addPersistentStorage }} + storage: file_storage/persistent_queue + {{- end }} + {{- if not .Values.featureGates.noDropLogsPipeline }} + num_consumers: {{ .Values.splunkPlatform.sendingQueue.numConsumers }} + {{- else }} + num_consumers: 25 + batcher: + enabled: true + flush_timeout: 200ms + min_size_items: 2048 + {{- end }} +{{- end }} + +{{/* +Splunk Platform Metrics exporter +*/}} +{{- define "splunk-otel-collector.splunkPlatformMetricsExporter" -}} +splunk_hec/platform_metrics: + endpoint: {{ .Values.splunkPlatform.endpoint | quote }} + token: "${SPLUNK_PLATFORM_HEC_TOKEN}" + index: {{ .Values.splunkPlatform.metricsIndex | quote }} + source: {{ .Values.splunkPlatform.source | quote }} + max_idle_conns: {{ .Values.splunkPlatform.maxConnections }} + max_idle_conns_per_host: {{ .Values.splunkPlatform.maxConnections }} + disable_compression: {{ .Values.splunkPlatform.disableCompression }} + timeout: {{ .Values.splunkPlatform.timeout }} + idle_conn_timeout: {{ .Values.splunkPlatform.idleConnTimeout }} + splunk_app_name: {{ .Chart.Name }} + splunk_app_version: {{ .Chart.Version }} + tls: + insecure_skip_verify: {{ .Values.splunkPlatform.insecureSkipVerify }} + {{- if .Values.splunkPlatform.clientCert }} + cert_file: /otel/etc/splunk_platform_hec_client_cert + {{- end }} + {{- if .Values.splunkPlatform.clientKey }} + key_file: /otel/etc/splunk_platform_hec_client_key + {{- end }} + {{- if .Values.splunkPlatform.caFile }} + ca_file: /otel/etc/splunk_platform_hec_ca_file + {{- end }} + retry_on_failure: + enabled: {{ .Values.splunkPlatform.retryOnFailure.enabled }} + initial_interval: {{ .Values.splunkPlatform.retryOnFailure.initialInterval }} + max_interval: {{ .Values.splunkPlatform.retryOnFailure.maxInterval }} + max_elapsed_time: {{ .Values.splunkPlatform.retryOnFailure.maxElapsedTime }} + sending_queue: + enabled: {{ .Values.splunkPlatform.sendingQueue.enabled }} + num_consumers: {{ .Values.splunkPlatform.sendingQueue.numConsumers }} + queue_size: {{ .Values.splunkPlatform.sendingQueue.queueSize }} + {{- if .addPersistentStorage }} + storage: file_storage/persistent_queue + {{- end }} +{{- end }} + +{{/* +Splunk Platform Traces exporter +*/}} +{{- define "splunk-otel-collector.splunkPlatformTracesExporter" -}} +splunk_hec/platform_traces: + endpoint: {{ .Values.splunkPlatform.endpoint | quote }} + token: "${SPLUNK_PLATFORM_HEC_TOKEN}" + index: {{ .Values.splunkPlatform.tracesIndex | quote }} + source: {{ .Values.splunkPlatform.source | quote }} + max_idle_conns: {{ .Values.splunkPlatform.maxConnections }} + max_idle_conns_per_host: {{ .Values.splunkPlatform.maxConnections }} + disable_compression: {{ .Values.splunkPlatform.disableCompression }} + timeout: {{ .Values.splunkPlatform.timeout }} + idle_conn_timeout: {{ .Values.splunkPlatform.idleConnTimeout }} + splunk_app_name: {{ .Chart.Name }} + splunk_app_version: {{ .Chart.Version }} + tls: + insecure_skip_verify: {{ .Values.splunkPlatform.insecureSkipVerify }} + {{- if .Values.splunkPlatform.clientCert }} + cert_file: /otel/etc/splunk_platform_hec_client_cert + {{- end }} + {{- if .Values.splunkPlatform.clientKey }} + key_file: /otel/etc/splunk_platform_hec_client_key + {{- end }} + {{- if .Values.splunkPlatform.caFile }} + ca_file: /otel/etc/splunk_platform_hec_ca_file + {{- end }} + retry_on_failure: + enabled: {{ .Values.splunkPlatform.retryOnFailure.enabled }} + initial_interval: {{ .Values.splunkPlatform.retryOnFailure.initialInterval }} + max_interval: {{ .Values.splunkPlatform.retryOnFailure.maxInterval }} + max_elapsed_time: {{ .Values.splunkPlatform.retryOnFailure.maxElapsedTime }} + sending_queue: + enabled: {{ .Values.splunkPlatform.sendingQueue.enabled }} + num_consumers: {{ .Values.splunkPlatform.sendingQueue.numConsumers }} + queue_size: {{ .Values.splunkPlatform.sendingQueue.queueSize }} + {{- if .addPersistentStorage }} + storage: file_storage/persistent_queue + {{- end }} +{{- end }} + +{{/* +Add Extra Labels +*/}} +{{- define "splunk-otel-collector.addExtraLabels" -}} +{{- with .Values.extraAttributes.fromLabels }} +{{ . | toYaml}} +{{- end }} +{{- end }} + +{{/* +Add Extra Annotations +*/}} +{{- define "splunk-otel-collector.addExtraAnnotations" -}} +{{- with .Values.extraAttributes.fromAnnotations }} +{{ . | toYaml}} +{{- end }} +{{- end }} + +{{/* +Generates prometheus receiver config for internal metrics. +Provide the component name as the input. +*/}} +{{- define "splunk-otel-collector.prometheusInternalMetrics" -}} +{{- $receiver := . | lower | replace "-" "_" }} +{{- $job := . | lower }} +prometheus/{{ $receiver }}: + config: + scrape_configs: + - job_name: "otel-{{ $job }}" + metric_relabel_configs: + - action: drop + regex: "otelcol_rpc_.*" + source_labels: + - __name__ + - action: drop + regex: "otelcol_http_.*" + source_labels: + - __name__ + - action: drop + regex: "otelcol_processor_batch_.*" + source_labels: + - __name__ + scrape_interval: 10s + static_configs: + - targets: + - "${K8S_POD_IP}:8889" +{{- end }} diff --git a/charts/openTelemetry/templates/config/_otel-agent.tpl b/charts/openTelemetry/templates/config/_otel-agent.tpl new file mode 100644 index 00000000..2a9c2613 --- /dev/null +++ b/charts/openTelemetry/templates/config/_otel-agent.tpl @@ -0,0 +1,1083 @@ +{{/* +Config for the otel-collector agent +The values can be overridden in .Values.agent.config +*/}} +{{- define "splunk-otel-collector.agentConfig" -}} +{{ $gateway := fromYaml (include "splunk-otel-collector.gateway" .) -}} +{{ $gatewayEnabled := eq (include "splunk-otel-collector.gatewayEnabled" .) "true" }} +extensions: + {{- if and (eq (include "splunk-otel-collector.logsEnabled" .) "true") (eq .Values.logsEngine "otel") }} + file_storage: + directory: {{ .Values.logsCollection.checkpointPath }} + {{- if not (eq (toString .Values.splunkPlatform.fsyncEnabled) "") }} + fsync: {{ .Values.splunkPlatform.fsyncEnabled }} + {{- end }} + {{- end }} + + {{- if .Values.splunkPlatform.sendingQueue.persistentQueue.enabled }} + file_storage/persistent_queue: + directory: {{ .Values.splunkPlatform.sendingQueue.persistentQueue.storagePath }}/agent + timeout: 0 + {{- if not (eq (toString .Values.splunkPlatform.fsyncEnabled) "") }} + fsync: {{ .Values.splunkPlatform.fsyncEnabled }} + {{- end }} + {{- end }} + + + health_check: + endpoint: 0.0.0.0:13133 + + k8s_observer: + auth_type: serviceAccount + node: ${K8S_NODE_NAME} + + zpages: + +receivers: + {{- include "splunk-otel-collector.otelReceivers" . | nindent 2 }} + {{- if (eq (include "splunk-otel-collector.logsEnabled" .) "true") }} + fluentforward: + endpoint: 0.0.0.0:8006 + {{- end }} + + # Placeholder receiver needed for discovery mode + nop: + + # Prometheus receiver scraping metrics from the pod itself + {{- include "splunk-otel-collector.prometheusInternalMetrics" "agent" | nindent 2}} + + {{- if (eq (include "splunk-otel-collector.metricsEnabled" .) "true") }} + hostmetrics: + collection_interval: 10s + scrapers: + cpu: + disk: + filesystem: + memory: + network: + # System load average metrics https://en.wikipedia.org/wiki/Load_(computing) + load: + # Paging/Swap space utilization and I/O metrics + paging: + # Aggregated system process count metrics + processes: + # System processes metrics, disabled by default + # process: + + receiver_creator: + watch_observers: [k8s_observer] + receivers: + {{- if or .Values.autodetect.prometheus .Values.autodetect.istio }} + {{- if .Values.featureGates.useLightPrometheusReceiver }} + lightprometheus: + {{- else }} + prometheus_simple: + {{- end }} + {{- if .Values.autodetect.prometheus }} + # Enable prometheus scraping for pods with standard prometheus annotations + rule: type == "pod" && annotations["prometheus.io/scrape"] == "true" + {{- else }} + # Enable prometheus scraping for istio pods only + rule: type == "pod" && annotations["prometheus.io/scrape"] == "true" && "istio.io/rev" in labels + {{- end }} + config: + {{- if .Values.featureGates.useLightPrometheusReceiver }} + endpoint: 'http://`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 9090``"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`' + resource_attributes: + service.name: + enabled: false + service.instance.id: + enabled: false + {{- else }} + metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`' + endpoint: '`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 9090`' + {{- end }} + {{- end }} + + # Receivers for collecting k8s control plane metrics. + # Distributions besides Kubernetes and Openshift are not supported. + # Verified with Kubernetes v1.22 and Openshift v4.10.59. + {{- if and (or (eq .Values.distribution "openshift") (eq .Values.distribution "")) (not (.Values.featureGates.useControlPlaneMetricsHistogramData)) }} + # Below, the TLS certificate verification is often skipped because the k8s default certificate is self signed and + # will fail the verification. + {{- if .Values.agent.controlPlaneMetrics.coredns.enabled }} + smartagent/coredns: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && namespace == "openshift-dns" && name contains "dns" + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-dns" + {{- end }} + config: + extraDimensions: + metric_source: k8s-coredns + type: coredns + {{- if eq .Values.distribution "openshift" }} + port: 9154 + skipVerify: true + useHTTPS: true + useServiceAccount: true + {{- else }} + port: 9153 + {{- end }} + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.etcd.enabled }} + smartagent/etcd: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["k8s-app"] == "etcd" + {{- else }} + rule: type == "pod" && (labels["k8s-app"] == "etcd-manager-events" || labels["k8s-app"] == "etcd-manager-main") + {{- end }} + config: + clientCertPath: /otel/etc/etcd/tls.crt + clientKeyPath: /otel/etc/etcd/tls.key + useHTTPS: true + type: etcd + {{- if .Values.agent.controlPlaneMetrics.etcd.skipVerify }} + skipVerify: true + {{- else }} + caCertPath: /otel/etc/etcd/cacert.pem + skipVerify: false + {{- end }} + {{- if eq .Values.distribution "openshift" }} + port: 9979 + {{- else }} + port: 4001 + {{- end }} + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.controllerManager.enabled }} + smartagent/kube-controller-manager: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["app"] == "kube-controller-manager" && labels["kube-controller-manager"] == "true" + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-controller-manager" + {{- end }} + config: + extraDimensions: + metric_source: kubernetes-controller-manager + port: 10257 + skipVerify: true + type: kube-controller-manager + useHTTPS: true + useServiceAccount: true + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.apiserver.enabled }} + smartagent/kubernetes-apiserver: + {{- if eq .Values.distribution "openshift" }} + rule: type == "port" && port == 6443 && pod.labels["app"] == "openshift-kube-apiserver" && pod.labels["apiserver"] == "true" + {{- else }} + rule: type == "port" && port == 443 && pod.labels["k8s-app"] == "kube-apiserver" + {{- end }} + config: + extraDimensions: + metric_source: kubernetes-apiserver + skipVerify: true + type: kubernetes-apiserver + useHTTPS: true + useServiceAccount: true + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.proxy.enabled }} + smartagent/kubernetes-proxy: + {{- if eq .Values.distribution "openshift" }} + rule: type == "port" && pod.labels["app"] == "sdn" && (port == 9101 || port == 29101) + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-proxy" + {{- end }} + config: + extraDimensions: + metric_source: kubernetes-proxy + type: kubernetes-proxy + # Connecting to kube proxy in unknown Kubernetes distributions can be troublesome and generate log noise + # For now, set the scrape failure log level to debug when no specific distribution is selected + {{- if eq .Values.distribution "" }} + scrapeFailureLogLevel: debug + {{- end }} + {{- if eq .Values.distribution "openshift" }} + skipVerify: true + useHTTPS: true + useServiceAccount: true + {{- else }} + port: 10249 + {{- end }} + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.scheduler.enabled }} + smartagent/kubernetes-scheduler: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["app"] == "openshift-kube-scheduler" && labels["scheduler"] == "true" + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-scheduler" + {{- end }} + config: + extraDimensions: + metric_source: kubernetes-scheduler + skipVerify: true + port: 10259 + type: kubernetes-scheduler + useHTTPS: true + useServiceAccount: true + {{- end }} + {{- end }} + + {{- if and (eq (include "splunk-otel-collector.splunkO11yEnabled" .) "true") .Values.featureGates.useControlPlaneMetricsHistogramData }} + # Receivers for collecting k8s control plane metrics as native OpenTelemetry metrics, including histogram data. + {{- if or (eq .Values.distribution "openshift") (eq .Values.distribution "") }} + # Below, the TLS certificate verification is often skipped because the k8s default certificate is self signed and + # will fail the verification. + {{- if .Values.agent.controlPlaneMetrics.coredns.enabled }} + prometheus/coredns: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && namespace == "openshift-dns" && name contains "dns" + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-dns" + {{- end }} + config: + config: + scrape_configs: + - job_name: "coredns" + {{- if eq .Values.distribution "openshift" }} + static_configs: + - targets: ["`endpoint`:9154"] + tls_config: + insecure_skip_verify: true + {{- else }} + static_configs: + - targets: ["`endpoint`:9153"] + {{- end }} + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.etcd.enabled }} + prometheus/etcd: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["k8s-app"] == "etcd" + {{- else }} + rule: type == "pod" && (labels["k8s-app"] == "etcd-manager-events" || labels["k8s-app"] == "etcd-manager-main" || labels["component"] == "etcd") + {{- end }} + config: + config: + scrape_configs: + - job_name: "etcd" + static_configs: + - targets: ["`endpoint`:2381"] + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.controllerManager.enabled }} + prometheus/kube-controller-manager: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["app"] == "kube-controller-manager" && labels["kube-controller-manager"] == "true" + {{- else }} + rule: type == "pod" && (labels["k8s-app"] == "kube-controller-manager" || labels["component"] == "kube-controller-manager") + {{- end }} + config: + config: + scrape_configs: + - job_name: "kube-controller-manager" + static_configs: + - targets: ["`endpoint`:10257"] + scheme: https + authorization: + credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + type: Bearer + tls_config: + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify: true + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.apiserver.enabled }} + prometheus/kubernetes-apiserver: + {{- if eq .Values.distribution "openshift" }} + rule: type == "port" && port == 6443 && pod.labels["app"] == "openshift-kube-apiserver" && pod.labels["apiserver"] == "true" + {{- else }} + rule: type == "port" && port == 443 && (pod.labels["k8s-app"] == "kube-apiserver" || pod.labels["component"] == "kube-apiserver") + {{- end }} + config: + config: + scrape_configs: + - job_name: "kubernetes-apiserver" + scheme: https + authorization: + credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + type: Bearer + tls_config: + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify: true + static_configs: + - targets: ["`endpoint`:`port`"] + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.proxy.enabled }} + prometheus/kubernetes-proxy: + {{- if eq .Values.distribution "openshift" }} + rule: type == "port" && pod.labels["app"] == "sdn" && (port == 9101 || port == 29101) + {{- else }} + rule: type == "pod" && labels["k8s-app"] == "kube-proxy" + {{- end }} + config: + config: + scrape_configs: + - job_name: "kubernetes-proxy" + {{- if eq .Values.distribution "openshift" }} + scheme: https + tls_config: + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify: true + authorization: + credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + type: Bearer + static_configs: + - targets: ["`endpoint`:`port`"] + {{- else }} + static_configs: + - targets: ["`endpoint`:10249"] + {{- end }} + {{- end }} + {{- if .Values.agent.controlPlaneMetrics.scheduler.enabled }} + prometheus/kubernetes-scheduler: + {{- if eq .Values.distribution "openshift" }} + rule: type == "pod" && labels["app"] == "openshift-kube-scheduler" && labels["scheduler"] == "true" + {{- else }} + rule: type == "pod" && (labels["k8s-app"] == "kube-scheduler" || labels["component"] == "kube-scheduler") + {{- end }} + config: + config: + scrape_configs: + - job_name: "kubernetes-scheduler" + static_configs: + - targets: ["`endpoint`:10259"] + scheme: https + tls_config: + ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify: true + authorization: + credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + type: Bearer + {{- end }} + {{- end }} + {{- end }} + + kubeletstats: + collection_interval: 10s + {{- if eq .Values.distribution "gke/autopilot" }} + # GKE Autopilot doesn't allow using the secure kubelet endpoint, + # use the read-only endpoint instead. + auth_type: none + endpoint: ${K8S_NODE_IP}:10255 + {{- else }} + auth_type: serviceAccount + endpoint: ${K8S_NODE_IP}:10250 + {{- end }} + metric_groups: + - container + - pod + - node + # Volume metrics are not collected by default + # - volume + # To collect metadata from underlying storage resources, set k8s_api_config and list k8s.volume.type + # under extra_metadata_labels + # k8s_api_config: + # auth_type: serviceAccount + extra_metadata_labels: + - container.id + # - k8s.volume.type + + signalfx: + endpoint: 0.0.0.0:9943 + {{- end }} + + {{- if (eq (include "splunk-otel-collector.o11yTracesEnabled" .) "true") }} + smartagent/signalfx-forwarder: + type: signalfx-forwarder + listenAddress: 0.0.0.0:9080 + {{- end }} + + {{- if .Values.targetAllocator.enabled }} + prometheus/ta: + config: + global: + scrape_interval: 30s + target_allocator: + endpoint: http://{{ template "splunk-otel-collector.fullname" . }}-ta.{{ template "splunk-otel-collector.namespace" . }}.svc.cluster.local:80 + interval: 30s + collector_id: ${env:K8S_POD_NAME} + {{- end }} + + {{- if and (eq (include "splunk-otel-collector.logsEnabled" .) "true") (eq .Values.logsEngine "otel") }} + {{- if .Values.logsCollection.containers.enabled }} + filelog: + {{- if .Values.isWindows }} + include: ["C:\\var\\log\\pods\\*\\*\\*.log"] + {{- else }} + include: ["/var/log/pods/*/*/*.log"] + {{- end }} + # Exclude logs. The file format is + # /var/log/pods/__//.log + exclude: + {{- if .Values.logsCollection.containers.excludeAgentLogs }} + {{- if .Values.isWindows }} + - "C:\\var\\log\\pods\\{{ template "splunk-otel-collector.namespace" . }}_{{ include "splunk-otel-collector.fullname" . }}*_*\\otel-collector\\*.log" + {{- else }} + - /var/log/pods/{{ template "splunk-otel-collector.namespace" . }}_{{ include "splunk-otel-collector.fullname" . }}*_*/otel-collector/*.log + {{- end }} + {{- end }} + {{- range $_, $excludePath := .Values.logsCollection.containers.excludePaths }} + - {{ $excludePath }} + {{- end }} + start_at: beginning + include_file_path: true + include_file_name: false + poll_interval: 200ms + max_concurrent_files: 1024 + encoding: utf-8 + fingerprint_size: 1kb + max_log_size: 1MiB + # Disable force flush until this issue is fixed: + # https://github.com/open-telemetry/opentelemetry-log-collection/issues/292 + force_flush_period: "0" + storage: file_storage + retry_on_failure: + enabled: true + {{- if .Values.featureGates.noDropLogsPipeline }} + max_elapsed_time: 0s + {{- end }} + operators: + {{- if not .Values.logsCollection.containers.containerRuntime }} + - type: router + id: get-format + routes: + - output: parser-docker + expr: 'body matches "^\\{"' + - output: parser-crio + expr: 'body matches "^[^ Z]+ "' + - output: parser-containerd + expr: 'body matches "^[^ ]+ "' + {{- end }} + {{- if or (not .Values.logsCollection.containers.containerRuntime) (eq .Values.logsCollection.containers.containerRuntime "cri-o") }} + # Parse CRI-O format + - type: regex_parser + id: parser-crio + regex: '^(?P