Skip to content

Commit

Permalink
added swarm metrics for running and desired tasks
Browse files Browse the repository at this point in the history
  • Loading branch information
concentricspheres committed Jun 25, 2024
1 parent 3d45363 commit a957a2e
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ Port: `9090`
- container_restart_count
- container_status
- container_oom_killed
- service_running_replicas
- service_desired_replicas
51 changes: 51 additions & 0 deletions cmf_docker_metrics/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import threading
import time
import logging

import docker

Expand All @@ -8,6 +9,11 @@
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from waitress import serve

logging.basicConfig()

LOGGER = logging.getLogger()
LOGGER.setLevel(logging.INFO)

app = Flask(__name__)
cli = docker.DockerClient(base_url="unix:///var/run/docker.sock")

Expand All @@ -21,8 +27,14 @@
CONTAINER_OOM_KILLED = Gauge('container_oom_killed', 'Is the container OOMKilled', ['name', 'compose_project', 'compose_service'])
CONTAINER_STATUS = Enum('container_status', 'Container Status', ['name', 'compose_project', 'compose_service'], states=['restarting', 'running', 'paused', 'exited'])

# Swarm.
SERVICE_RUNNING_REPLICAS = Gauge('service_running_replicas', 'Number of replicas running', ['service_name', 'stack', 'swarm_nodes'])
SERVICE_DESIRED_REPLICAS = Gauge('service_desired_replicas', 'Number of replicas that should be running', ['service_name', 'stack', 'swarm_nodes'])

def make_metrics():
def update_metrics():
LOGGER.info("Updating docker metrics...")
# Get containers metrics.
containers = cli.containers.list()

for container in containers:
Expand All @@ -46,6 +58,45 @@ def update_metrics():
compose_service=container.labels.get('com.docker.compose.service', ''),
).state(container.status)

# If this is a docker swarm, let's get replicas states.
docker_info = cli.info()

if docker_info['Swarm']['NodeID'] != "":
LOGGER.info("Updating Swarm metrics...")
swarm_nodes_count = docker_info['Swarm']['Nodes']
services = cli.services.list()

for service in services:
replicas = 0
running = 0

tasks = service.tasks()

for task in tasks:
# print(task)
if task['DesiredState'] != 'shutdown':
replicas += 1

if task['Status']['State'] == 'running':
running += 1

if service.attrs['Spec']['Mode'].get('Replicated'):
replicas = service.attrs['Spec']['Mode']['Replicated']['Replicas']

SERVICE_RUNNING_REPLICAS.labels(
service_name=service.name,
stack=service.attrs['Spec']['Labels'].get('com.docker.stack.namespace', ''),
swarm_nodes=swarm_nodes_count,
).set(running)

SERVICE_DESIRED_REPLICAS.labels(
service_name=service.name,
stack=service.attrs['Spec']['Labels'].get('com.docker.stack.namespace', ''),
swarm_nodes=swarm_nodes_count,
).set(running)

LOGGER.info("Done.")

time.sleep(5)
update_metrics()

Expand Down

0 comments on commit a957a2e

Please sign in to comment.