From a957a2e3f71263ca1ee11dd6287abd0f92f2d659 Mon Sep 17 00:00:00 2001 From: concentricspheres <107502292+concentricspheres@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:30:08 -0600 Subject: [PATCH] added swarm metrics for running and desired tasks --- README.md | 2 ++ cmf_docker_metrics/main.py | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/README.md b/README.md index 1ebdd57..b5a9888 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,5 @@ Port: `9090` - container_restart_count - container_status - container_oom_killed +- service_running_replicas +- service_desired_replicas diff --git a/cmf_docker_metrics/main.py b/cmf_docker_metrics/main.py index c740dea..9e7419a 100644 --- a/cmf_docker_metrics/main.py +++ b/cmf_docker_metrics/main.py @@ -1,5 +1,6 @@ import threading import time +import logging import docker @@ -8,6 +9,11 @@ from werkzeug.middleware.dispatcher import DispatcherMiddleware from waitress import serve +logging.basicConfig() + +LOGGER = logging.getLogger() +LOGGER.setLevel(logging.INFO) + app = Flask(__name__) cli = docker.DockerClient(base_url="unix:///var/run/docker.sock") @@ -21,8 +27,14 @@ CONTAINER_OOM_KILLED = Gauge('container_oom_killed', 'Is the container OOMKilled', ['name', 'compose_project', 'compose_service']) CONTAINER_STATUS = Enum('container_status', 'Container Status', ['name', 'compose_project', 'compose_service'], states=['restarting', 'running', 'paused', 'exited']) +# Swarm. +SERVICE_RUNNING_REPLICAS = Gauge('service_running_replicas', 'Number of replicas running', ['service_name', 'stack', 'swarm_nodes']) +SERVICE_DESIRED_REPLICAS = Gauge('service_desired_replicas', 'Number of replicas that should be running', ['service_name', 'stack', 'swarm_nodes']) + def make_metrics(): def update_metrics(): + LOGGER.info("Updating docker metrics...") + # Get containers metrics. containers = cli.containers.list() for container in containers: @@ -46,6 +58,45 @@ def update_metrics(): compose_service=container.labels.get('com.docker.compose.service', ''), ).state(container.status) + # If this is a docker swarm, let's get replicas states. + docker_info = cli.info() + + if docker_info['Swarm']['NodeID'] != "": + LOGGER.info("Updating Swarm metrics...") + swarm_nodes_count = docker_info['Swarm']['Nodes'] + services = cli.services.list() + + for service in services: + replicas = 0 + running = 0 + + tasks = service.tasks() + + for task in tasks: + # print(task) + if task['DesiredState'] != 'shutdown': + replicas += 1 + + if task['Status']['State'] == 'running': + running += 1 + + if service.attrs['Spec']['Mode'].get('Replicated'): + replicas = service.attrs['Spec']['Mode']['Replicated']['Replicas'] + + SERVICE_RUNNING_REPLICAS.labels( + service_name=service.name, + stack=service.attrs['Spec']['Labels'].get('com.docker.stack.namespace', ''), + swarm_nodes=swarm_nodes_count, + ).set(running) + + SERVICE_DESIRED_REPLICAS.labels( + service_name=service.name, + stack=service.attrs['Spec']['Labels'].get('com.docker.stack.namespace', ''), + swarm_nodes=swarm_nodes_count, + ).set(running) + + LOGGER.info("Done.") + time.sleep(5) update_metrics()