Skip to content

Commit

Permalink
feat(metrics): Improve metrics inventory performance and support larg…
Browse files Browse the repository at this point in the history
…e scale clusters (#261)
  • Loading branch information
galHalup authored Dec 17, 2023
2 parents d06eb9d + a3e2e9a commit 26a7525
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 115 deletions.
11 changes: 6 additions & 5 deletions .buildkite/tests/values_components_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def test_override_deployment_tolerations():
deployment_tolerations = get_yaml_from_helm_template("test=test", "Deployment", deployment_name,
"spec.template.spec.tolerations", values_file=values_file)

assert deployment_tolerations[0]["key"] == "gpu", f"Expected gpu in deployment tolerations {deployment_tolerations}"
assert deployment_tolerations[0][
"key"] == "gpu", f"Expected gpu in deployment tolerations {deployment_tolerations}"


def test_override_deployment_node_selector():
Expand Down Expand Up @@ -87,9 +88,8 @@ def test_override_deployment_affinity():
@pytest.mark.parametrize(
"component, container_index",
[
("metrics", "0"),
("watcher", "2"),
("supervisor", "3"),
("watcher", "1"),
("supervisor", "2"),
]
)
def test_extra_env_vars(component, container_index):
Expand All @@ -107,4 +107,5 @@ def test_extra_env_vars(component, container_index):
f"spec.template.spec.containers.{container_index}.env",
values_file=values_file)

assert deployment_env_vars[-1]["name"] == "TEST_ENV_VAR", f"Expected TEST_ENV_VAR in deployment env vars {deployment_env_vars}"
assert deployment_env_vars[-1][
"name"] == "TEST_ENV_VAR", f"Expected TEST_ENV_VAR in deployment env vars {deployment_env_vars}"
2 changes: 1 addition & 1 deletion charts/komodor-agent/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ HELM_DOCS_ARGS := -s file \
-y="components.komodorAgent.watcher" \
-y="components.komodorAgent.supervisor" \
-y="components.komodorAgent.networkMapper" \
-y="components.komodorAgent.metrics" \
-y="components.komodorDaemon.metrics" \
-y="components.komodorDaemon" \
-y="allowedResources"

Expand Down
10 changes: 4 additions & 6 deletions charts/komodor-agent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,21 +151,19 @@ The command removes all the Kubernetes components associated with the chart and
| components.komodorAgent.supervisor.extraEnvVars | list | `[]` | List of additional environment variables, Each entry is a key-value pair |
| components.komodorAgent.networkMapper.image | object | `{"name":"network-mapper","tag":"v1.0.3"}` | Override the komodor agent network mapper image name or tag. |
| components.komodorAgent.networkMapper.resources | object | `{}` | Set custom resources to the komodor agent network mapper container |
| components.komodorAgent.metrics.image | object | `{"name":"telegraf","tag":1.27}` | Override the komodor agent metrics image name or tag. |
| components.komodorAgent.metrics.resources | object | `{}` | Set custom resources to the komodor agent metrics container |
| components.komodorAgent.metrics.extraEnvVars | list | `[]` | List of additional environment variables, Each entry is a key-value pair |
| components.komodorDaemon.affinity | object | `{}` | Set node affinity for the komodor agent daemon |
| components.komodorDaemon.annotations | object | `{}` | Adds custom annotations - Example: `--set podAnnotations."app\.komodor\.com/app"="komodor-agent"` |
| components.komodorDaemon.nodeSelector | object | `{}` | Set node selectors for the komodor agent daemon |
| components.komodorDaemon.tolerations | list | `[]` | Add tolerations to the komodor agent daemon |
| components.komodorDaemon.podAnnotations | object | `{}` | # Add annotations to the komodor agent watcher pod |
| components.komodorDaemon.metrics | object | `{"extraEnvVars":[],"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":0.1,"memory":"384Mi"}}}` | Configure the komodor daemon metrics components |
| components.komodorDaemon.metrics.resources | object | `{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":0.1,"memory":"384Mi"}}` | Add custom resources to the komodor agent watcher container |
| components.komodorDaemon.metrics.extraEnvVars | list | `[]` | List of additional environment variables, Each entry is a key-value pair |
| components.komodorDaemon.metricsInit | object | See sub-values | Configure the komodor daemon metrics init container |
| components.komodorDaemon.metricsInit.image | object | `{ "name": "init-daemon-agent", "tag": .Chart.AppVersion }` | Override the komodor agent metrics init image name or tag. |
| components.komodorDaemon.metricsInit.resources | object | `{}` | Set custom resources to the komodor agent metrics init container |
| components.komodorDaemon.metricsInit.extraEnvVars | list | `[]` | List of additional environment variables, Each entry is a key-value pair |
| components.komodorDaemon.metrics | object | `{"extraEnvVars":[],"image":{"name":"telegraf","tag":"1.29.1-alpine"},"resources":{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":0.1,"memory":"384Mi"}}}` | Configure the komodor daemon metrics components |
| components.komodorDaemon.metrics.image | object | `{"name":"telegraf","tag":"1.29.1-alpine"}` | Override the komodor agent metrics image name or tag. |
| components.komodorDaemon.metrics.resources | object | `{"limits":{"cpu":1,"memory":"1Gi"},"requests":{"cpu":0.1,"memory":"384Mi"}}` | Set custom resources to the komodor agent metrics container |
| components.komodorDaemon.metrics.extraEnvVars | list | `[]` | List of additional environment variables, Each entry is a key-value pair |
| components.komodorDaemon.networkSniffer | object | See sub-values | Configure the komodor daemon network sniffer components |
| components.komodorDaemon.networkSniffer.image | object | `{"name":"network-mapper-sniffer","tag":"v1.0.3"}` | Override the komodor agent network sniffer image name or tag. |
| components.komodorDaemon.networkSniffer.resources | object | `{}` | Set custom resources to the komodor agent network sniffer container |
Expand Down
2 changes: 0 additions & 2 deletions charts/komodor-agent/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,11 @@ spec:
{{- include "ca-init.container" . | trim | nindent 8 }}

containers:
{{- include "metrics.container" . | trim | nindent 8 }}
{{- include "network_mapper.container" . | trim | nindent 8 }}
{{- include "watcher.container" . | trim | nindent 8 }}
{{- include "supervisor.container" . | trim | nindent 8 }}

volumes:
{{- include "metrics.deploy.volumes" . | trim | nindent 8 }}
{{- include "agent.deploy.volumes" . | trim | nindent 8 }}
{{- include "custom-ca.volume" . | trim | nindent 8 }}
{{- include "custom-ca.trusted-volume" . | trim | nindent 8 }}
29 changes: 2 additions & 27 deletions charts/komodor-agent/templates/metrics/_containers.tpl
Original file line number Diff line number Diff line change
@@ -1,35 +1,10 @@
{{- define "metrics.container" -}}
{{- if .Values.capabilities.metrics -}}
- name: metrics
image: {{ .Values.imageRepo }}/{{ .Values.components.komodorAgent.metrics.image.name}}:{{ .Values.components.komodorAgent.metrics.image.tag }}
imagePullPolicy: {{ .Values.pullPolicy }}
resources:
{{ toYaml .Values.components.komodorAgent.metrics.resources | trim | nindent 4 }}
volumeMounts:
- name: {{ include "metrics.config.name" . }}
mountPath: /etc/telegraf/telegraf.conf
subPath: telegraf.conf
{{- include "custom-ca.trusted-volumeMounts" . | indent 2 }}
envFrom:
- configMapRef:
name: "k8s-watcher-daemon-env-vars"
env:
{{- include "komodorAgent.proxy-conf" . | indent 2 }}
- name: CLUSTER_NAME
value: {{ .Values.clusterName }}
{{- if gt (len .Values.components.komodorAgent.metrics.extraEnvVars) 0 }}
{{ toYaml .Values.components.komodorAgent.metrics.extraEnvVars | nindent 2 }}
{{- end }}
{{- end }}
{{- end }}

{{- define "metrics.daemonset.container" }}
{{- if .Values.capabilities.metrics }}
- name: metrics
image: {{ .Values.imageRepo }}/{{ .Values.components.komodorAgent.metrics.image.name}}:{{ .Values.components.komodorAgent.metrics.image.tag }}
image: {{ .Values.imageRepo }}/{{ .Values.components.komodorDaemon.metrics.image.name}}:{{ .Values.components.komodorDaemon.metrics.image.tag }}
imagePullPolicy: {{ .Values.pullPolicy }}
resources:
{{ toYaml .Values.components.komodorAgent.metrics.resources | trim | nindent 4 }}
{{ toYaml .Values.components.komodorDaemon.metrics.resources | trim | nindent 4 }}
volumeMounts:
- name: {{ include "metrics.daemon.config.name" . }}
mountPath: /etc/telegraf/telegraf.conf
Expand Down
4 changes: 0 additions & 4 deletions charts/komodor-agent/templates/metrics/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
{{ .Values.communications.serverHost }}/metrics-collector/api/v1/collect
{{- end -}}

{{- define "metrics.config.name" -}}
{{ include "komodorAgent.name" . }}-metrics-config
{{- end -}}

{{- define "metrics.daemon.config.name" -}}
{{ include "komodorAgent.name" . }}-daemon-config
{{- end -}}
8 changes: 0 additions & 8 deletions charts/komodor-agent/templates/metrics/_volumes.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,4 @@
- key: komodor-k8s-watcher.yaml
path: komodor-k8s-watcher.yaml
{{- end }}
{{- end }}

{{- define "metrics.deploy.volumes" }}
{{- if .Values.capabilities.metrics }}
- name: {{ include "metrics.config.name" . }}
configMap:
name: {{ include "metrics.config.name" . }}
{{- end }}
{{- end }}
15 changes: 15 additions & 0 deletions charts/komodor-agent/templates/metrics/configmap-daemon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,20 @@ data:
namedrop = ["kubernetes_system_container"]
[inputs.kubernetes.tags]
measure_type = "usage"
[[inputs.kube_inventory]]
## URL for the Kubernetes API
url = {{ .Values.communications.apiServerUrl | quote }}
url_kubelet = "https://$NODE_IP:10250"
namespace = ""
bearer_token = "/run/secrets/kubernetes.io/serviceaccount/token"
insecure_skip_verify = true
resource_include = [ "pods", "nodes" ]
interval = "${INTERVAL_INVENTORY}"
flush_interval = "${FLUSH_INTERVAL_INVENTORY}"
node_name = "${NODE_NAME}"
fieldpass = ["resource_requests_millicpu_units","resource_limits_millicpu_units","resource_requests_memory_bytes","resource_limits_memory_bytes", "capacity_cpu_cores", "capacity_millicpu_cores", "capacity_memory_bytes"]
[inputs.kube_inventory.tags]
measure_type = "inventory"
{{- end }}
41 changes: 0 additions & 41 deletions charts/komodor-agent/templates/metrics/configmap.yaml

This file was deleted.

37 changes: 16 additions & 21 deletions charts/komodor-agent/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,6 @@ components:
# components.komodorAgent.networkMapper.resources -- Set custom resources to the komodor agent network mapper container
resources: {}

metrics:
# components.komodorAgent.metrics.image -- Override the komodor agent metrics image name or tag.
image:
name: telegraf
tag: 1.27
# components.komodorAgent.metrics.resources -- Set custom resources to the komodor agent metrics container
resources: {}
# components.komodorAgent.metrics.extraEnvVars -- List of additional environment variables, Each entry is a key-value pair
extraEnvVars: []

komodorDaemon:
# components.komodorDaemon.affinity -- Set node affinity for the komodor agent daemon
affinity: { }
Expand All @@ -184,17 +174,6 @@ components:
# components.komodorDaemon.podAnnotations -- # Add annotations to the komodor agent watcher pod
podAnnotations: {}
# components.komodorDaemon.metrics -- Configure the komodor daemon metrics components
metrics:
# components.komodorDaemon.metrics.resources -- Add custom resources to the komodor agent watcher container
resources:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 0.1
memory: 384Mi
# components.komodorDaemon.metrics.extraEnvVars -- List of additional environment variables, Each entry is a key-value pair
extraEnvVars: []

# components.komodorDaemon.metricsInit -- Configure the komodor daemon metrics init container
# @default -- See sub-values
Expand All @@ -209,6 +188,22 @@ components:
# components.komodorDaemon.metricsInit.extraEnvVars -- List of additional environment variables, Each entry is a key-value pair
extraEnvVars: []

metrics:
# components.komodorDaemon.metrics.image -- Override the komodor agent metrics image name or tag.
image:
name: telegraf
tag: 1.29.1-alpine
# components.komodorDaemon.metrics.resources -- Set custom resources to the komodor agent metrics container
resources:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 0.1
memory: 384Mi
# components.komodorDaemon.metrics.extraEnvVars -- List of additional environment variables, Each entry is a key-value pair
extraEnvVars: []

# components.komodorDaemon.networkSniffer -- Configure the komodor daemon network sniffer components
# @default -- See sub-values
networkSniffer:
Expand Down
37 changes: 37 additions & 0 deletions scripts/telegraf/update-ecr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

ECR_REPO="public.ecr.aws/komodor-public"
IMAGE_NAME="telegraf"
PLATFORMS=("linux/amd64" "linux/arm64/v8")

if [ "$#" -ne 1 ]; then
echo "Usage: $0 <telegraf-tag>"
exit 1
fi

IMAGE_TAG="$1"

# Authenticate with AWS ECR & Docker Hub
komo ci docker-login

# Pull, tag, and push for each platform
for PLATFORM in "${PLATFORMS[@]}"; do
docker pull --platform "${PLATFORM}" "${IMAGE_NAME}:${IMAGE_TAG}"

PLATFORM_TAG=${PLATFORM//\//_}
docker tag "${IMAGE_NAME}:${IMAGE_TAG}" "${ECR_REPO}/${IMAGE_NAME}:${IMAGE_TAG}-${PLATFORM_TAG}"

docker push "${ECR_REPO}/${IMAGE_NAME}:${IMAGE_TAG}-${PLATFORM_TAG}"
done

# Create and push the manifest
MANIFEST_TAG="${IMAGE_NAME}:${IMAGE_TAG}"
docker manifest create "${ECR_REPO}/${MANIFEST_TAG}" \
"${ECR_REPO}/${IMAGE_NAME}:${IMAGE_TAG}-linux_amd64" \
"${ECR_REPO}/${IMAGE_NAME}:${IMAGE_TAG}-linux_arm64_v8" \
--amend

# Push the manifest
docker manifest push "${ECR_REPO}/${MANIFEST_TAG}"

echo "Images and manifest pushed successfully to ECR."

0 comments on commit 26a7525

Please sign in to comment.