Skip to content

Commit

Permalink
Add Watchman service component
Browse files Browse the repository at this point in the history
This component is designed to create a list of EXPECTED endpoints given a config file
and check if those endpoints exist/are healthy. Rather than pinging Ambassador directly,
we want to have a list of endpoints the user was expecting, if something happens between
actually deploying the model we can report the endpoint is not up.

Can later be extended to report additional information as well per endpoint / model / project level
  • Loading branch information
milesgranger committed Jan 11, 2019
1 parent 7f0b7d5 commit e70db0a
Show file tree
Hide file tree
Showing 8 changed files with 237 additions and 4 deletions.
25 changes: 25 additions & 0 deletions Dockerfile-Watchman
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM python:3.6.8 as builder

# Copy source code
COPY . /code
# Copy .git to deduce version number
COPY .git /code/

WORKDIR /code
RUN rm -rf /code/dist \
&& python setup.py sdist \
&& mv /code/dist/$(ls /code/dist | head -1) /code/dist/gordo-components-packed.tar.gz

FROM python:3.6.8-slim-stretch

# Install requirements separately for improved docker caching
COPY requirements.txt /code/
RUN pip install -r /code/requirements.txt

# Install gordo-components, packaged from earlier 'python setup.py sdist'
COPY --from=builder /code/dist/gordo-components-packed.tar.gz .

# Install gordo-components, packaged from earlier 'python setup.py sdist'
RUN pip install ./gordo-components-packed.tar.gz

CMD ["gordo-components", "run-watchman", "--host", "0.0.0.0", "--port", "5556"]
16 changes: 13 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export DOCKER_REGISTRY := auroradevacr.azurecr.io

MODEL_BUILDER_IMG_NAME := gordo-components/gordo-model-builder
MODEL_SERVER_IMG_NAME := gordo-components/gordo-model-server
WATCHMAN_IMG_NAME := gordo-components/gordo-watchman

# Create the image capable to building/training a model
model-builder:
Expand All @@ -11,6 +12,10 @@ model-builder:
model-server:
docker build . -f Dockerfile-ModelServer -t $(MODEL_SERVER_IMG_NAME)

# Create the image which reports status of expected model endpoints for the project
watchman:
docker build . -f Dockerfile-Watchman -t $(WATCHMAN_IMG_NAME)

push-server: model-server
export DOCKER_NAME=$(MODEL_SERVER_IMG_NAME);\
export DOCKER_IMAGE=$(MODEL_SERVER_IMG_NAME);\
Expand All @@ -21,19 +26,24 @@ push-builder: model-builder
export DOCKER_IMAGE=$(MODEL_BUILDER_IMG_NAME);\
./docker_push.sh

push-watchman: watchman
export DOCKER_NAME=$(WATCHMAN_IMG_NAME);\
export DOCKER_IMAGE=$(WATCHMAN_IMG_NAME);\
./docker_push.sh

# Publish images to the currently logged in docker repo
push-dev-images: push-builder push-server
push-dev-images: push-builder push-server push-watchman

push-prod-images: export GORDO_PROD_MODE:="true"
push-prod-images: push-builder push-server
push-prod-images: push-builder push-server push-watchman

# Make the python source distribution
sdist:
# Ensure the dist directory is empty/non-existant before sdist
rm -rf ./dist/
python setup.py sdist

images: model-builder model-server
images: model-builder model-server watchman

test:
python setup.py test
Expand Down
20 changes: 20 additions & 0 deletions gordo_components/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import click
from gordo_components.builder import build_model
from gordo_components.server import server
from gordo_components import watchman

import dateutil.parser

Expand Down Expand Up @@ -80,5 +81,24 @@ def run_server_cli(host, port):
server.run_server(host, port)


@click.command('run-watchman')
@click.option('--host', type=str, help='The host to run the server on.')
@click.option('--port', type=int, help='The port to run the server on.')
def run_watchman_cli(host, port):
"""
Start the Gordo Watchman server for this project. Which is responsible
for dynamically comparing expected URLs derived from a project config fle
against those actually deployed to determine and report their health.
\b
Must have the following environment variables set:
PROJECT_NAME: project_name for the config file
TARGET_NAMES: A list of non-sanitized machine / target names
TARGET_NAMES_SANITIZED: Same list of names, only sanitized
"""
watchman.server.run_server(host, port)


gordo.add_command(build)
gordo.add_command(run_server_cli)
gordo.add_command(run_watchman_cli)
1 change: 1 addition & 0 deletions gordo_components/watchman/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import server
98 changes: 98 additions & 0 deletions gordo_components/watchman/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-

import os
import yaml
import ast
import requests
import logging
from flask import Flask, jsonify, make_response
from flask.views import MethodView
from concurrent.futures import ThreadPoolExecutor

from gordo_components import __version__


# Will contain a list of endpoints to expected models via Ambassador
# see _load_endpoints()
ENDPOINTS = None


logger = logging.getLogger(__name__)


class WatchmanApi(MethodView):
"""
API view to list expected endpoints in this project space and report if they
are up or not.
"""
@staticmethod
def _check_endpoint(endpoint: str):
endpoint = endpoint[1:] if endpoint.startswith('/') else endpoint
try:
return requests.get(f'http://ambassador/{endpoint}', timeout=2).ok
except Exception as exc:
logger.error(f'Failed to check health of gordo-server: {endpoint} --> Error: {exc}')
return False

def get(self):
with ThreadPoolExecutor(max_workers=25) as executor:
futures = {executor.submit(self._check_endpoint, endpoint): endpoint for endpoint in ENDPOINTS}

# List of dicts: [{'endpoint': /path/to/endpoint, 'healthy': bool}]
results = [{'endpoint': futures[f], 'healthy': f.result()} for f in futures]

payload = jsonify({'endpoints': results, 'project_name': os.environ['PROJECT_NAME']})
resp = make_response(payload, 200)
resp.headers['Cache-Control'] = 'max-age=0'
return resp


def healthcheck():
"""
Return gordo version, route for Watchman server
"""
payload = jsonify({'version': __version__, 'config': yaml.load(os.environ['TARGET_NAMES'])})
return payload, 200


def build_app():
"""
Build app and any associated routes
"""
global ENDPOINTS
ENDPOINTS = _load_endpoints()

app = Flask(__name__)
app.add_url_rule(rule='/healthcheck', view_func=healthcheck, methods=['GET'])
app.add_url_rule(rule='/', view_func=WatchmanApi.as_view('sentinel_api'), methods=['GET'])
return app


def run_server(host: str = '0.0.0.0', port: int = 5555, debug: bool = False):
app = build_app()
app.run(host, port, debug=debug)


def _load_endpoints():
"""
Given the current environment vars of TARGET_NAMES, PROJECT_NAME, AMBASSADORHOST and PORT: build a list
of pre-computed expected endpoints
"""
if 'TARGET_NAMES_SANITIZED' not in os.environ or 'TARGET_NAMES' not in os.environ:
raise EnvironmentError('Need to have TARGET_NAMES_SANITIZED and TARGET_NAMES environment variables set as a'
' list of expected, sanitized and non-sanitized target / machine names.')
if 'PROJECT_NAME' not in os.environ:
raise EnvironmentError('Need to have PROJECT_NAME environment variable set.')

TARGET_NAMES_SANITIZED = ast.literal_eval(os.environ['TARGET_NAMES_SANITIZED'])
_TARGET_NAMES = ast.literal_eval(os.environ['TARGET_NAMES'])
project_name = os.environ["PROJECT_NAME"]

# Precompute list of expected endpoints from config file
endpoints = [f'/gordo/v0/{project_name}/{sanitized_name}/healthcheck'
for sanitized_name in TARGET_NAMES_SANITIZED]
return endpoints


if __name__ == '__main__':
run_server()
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ test = pytest --addopts "-vs --doctest-modules --mypy -p gordo_components --log-
testpipetranslator = pytest --addopts "-vs -k pipe --log-cli-level=INFO"
test_influx = pytest --addopts "-vs -k PredictionInfluxTestCase --log-cli-level=INFO"
testserver = pytest --addopts "-vs -k GordoServer --log-cli-level=DEBUG"
testwatchman = pytest --addopts "-vs -k Watchman --log-cli-level=DEBUG"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
setup_requirements = ['pytest-runner', 'setuptools_scm']

# Test requirements
test_requirements = ['docker==3.6.0', 'pytest==4.0.0', 'ruamel.yaml==0.15.76', 'pytest-mypy==0.3.2']
test_requirements = ['docker==3.6.0', 'pytest==4.0.0', 'ruamel.yaml==0.15.76', 'pytest-mypy==0.3.2', 'responses==0.10.5']

setup(
author="Miles Granger",
Expand Down
78 changes: 78 additions & 0 deletions tests/test_watchman.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import unittest
import json
import re

import responses

from gordo_components import __version__
from gordo_components.watchman import server
from tests.utils import temp_env_vars


TARGET_NAMES = ['CT-machine-name-456', 'CT-machine-name-123']
TARGET_NAMES_STR = str(TARGET_NAMES)
TARGET_NAMES_SANITIZED = ['ct-machine-name-456-kn209d', 'ct-machine-name-123-ksno0s9f092']
TARGET_NAMES_SANITIZED_STR = str(TARGET_NAMES_SANITIZED)
PROJECT_NAME = 'some-project-name'
AMBASSADORHOST = 'ambassador'
URL_FORMAT = 'http://{host}/gordo/v0/{project_name}/{sanitized_name}/healthcheck'


def request_callback(_request):
"""
Mock the Sentinel request to check if a given endpoint is alive or not.
This imitating a simple /healtcheck endpoint,
"""
headers = {}
payload = {'version': __version__}
return 200, headers, json.dumps(payload)


class WatchmanTestCase(unittest.TestCase):

@temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME)
def setUp(self):
app = server.build_app()
app.testing = True
self.app = app.test_client()

@temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME)
def test_healthcheck(self):
resp = self.app.get('/healthcheck')
self.assertEqual(resp.status_code, 200)
resp = resp.get_json()
self.assertTrue('version' in resp)

@temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME)
@responses.activate
def test_api(self):
"""
Ensure Sentinel API gives a list of expected endpoints and if they are healthy or not.
"""
# Fake this request; The Sentinel server will start pinging the expected endpoints to see if they are healthy
# all of which start with the AMBASSADORHOST server; we'll fake these requests.
responses.add_callback(
responses.GET, re.compile(rf'.*{AMBASSADORHOST}.*/healthcheck'),
callback=request_callback,
content_type='application/json',
)

resp = self.app.get('/')
self.assertEqual(resp.status_code, 200)

# List of expected endpoints given the current CONFIG_FILE and the project name
expected_endpoints = [URL_FORMAT.format(host=AMBASSADORHOST,
project_name=PROJECT_NAME,
sanitized_name=sanitized_name)
for sanitized_name in TARGET_NAMES_SANITIZED]

data = resp.get_json()

# Gives back project name as well.
self.assertEqual(data['project_name'], PROJECT_NAME)

for expected, actual in zip(expected_endpoints, data['endpoints']):

# actual is a dict of {'endpoint': str, 'healthy': bool}
self.assertEqual(expected.replace(f'http://{AMBASSADORHOST}', ''), actual['endpoint'])
self.assertTrue(actual['healthy'])

0 comments on commit e70db0a

Please sign in to comment.