diff --git a/Dockerfile-Watchman b/Dockerfile-Watchman new file mode 100644 index 000000000..2cb578ffb --- /dev/null +++ b/Dockerfile-Watchman @@ -0,0 +1,25 @@ +FROM python:3.6.8 as builder + +# Copy source code +COPY . /code +# Copy .git to deduce version number +COPY .git /code/ + +WORKDIR /code +RUN rm -rf /code/dist \ + && python setup.py sdist \ + && mv /code/dist/$(ls /code/dist | head -1) /code/dist/gordo-components-packed.tar.gz + +FROM python:3.6.8-slim-stretch + +# Install requirements separately for improved docker caching +COPY requirements.txt /code/ +RUN pip install -r /code/requirements.txt + +# Install gordo-components, packaged from earlier 'python setup.py sdist' +COPY --from=builder /code/dist/gordo-components-packed.tar.gz . + +# Install gordo-components, packaged from earlier 'python setup.py sdist' +RUN pip install ./gordo-components-packed.tar.gz + +CMD ["gordo-components", "run-watchman", "--host", "0.0.0.0", "--port", "5556"] diff --git a/Makefile b/Makefile index 1e25da1fa..f777c9814 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ export DOCKER_REGISTRY := auroradevacr.azurecr.io MODEL_BUILDER_IMG_NAME := gordo-components/gordo-model-builder MODEL_SERVER_IMG_NAME := gordo-components/gordo-model-server +WATCHMAN_IMG_NAME := gordo-components/gordo-watchman # Create the image capable to building/training a model model-builder: @@ -11,6 +12,10 @@ model-builder: model-server: docker build . -f Dockerfile-ModelServer -t $(MODEL_SERVER_IMG_NAME) +# Create the image which reports status of expected model endpoints for the project +watchman: + docker build . -f Dockerfile-Watchman -t $(WATCHMAN_IMG_NAME) + push-server: model-server export DOCKER_NAME=$(MODEL_SERVER_IMG_NAME);\ export DOCKER_IMAGE=$(MODEL_SERVER_IMG_NAME);\ @@ -21,11 +26,16 @@ push-builder: model-builder export DOCKER_IMAGE=$(MODEL_BUILDER_IMG_NAME);\ ./docker_push.sh +push-watchman: watchman + export DOCKER_NAME=$(WATCHMAN_IMG_NAME);\ + export DOCKER_IMAGE=$(WATCHMAN_IMG_NAME);\ + ./docker_push.sh + # Publish images to the currently logged in docker repo -push-dev-images: push-builder push-server +push-dev-images: push-builder push-server push-watchman push-prod-images: export GORDO_PROD_MODE:="true" -push-prod-images: push-builder push-server +push-prod-images: push-builder push-server push-watchman # Make the python source distribution sdist: @@ -33,7 +43,7 @@ sdist: rm -rf ./dist/ python setup.py sdist -images: model-builder model-server +images: model-builder model-server watchman test: python setup.py test diff --git a/gordo_components/cli.py b/gordo_components/cli.py index 0933650b4..ac7520317 100644 --- a/gordo_components/cli.py +++ b/gordo_components/cli.py @@ -12,6 +12,7 @@ import click from gordo_components.builder import build_model from gordo_components.server import server +from gordo_components import watchman import dateutil.parser @@ -80,5 +81,24 @@ def run_server_cli(host, port): server.run_server(host, port) +@click.command('run-watchman') +@click.option('--host', type=str, help='The host to run the server on.') +@click.option('--port', type=int, help='The port to run the server on.') +def run_watchman_cli(host, port): + """ + Start the Gordo Watchman server for this project. Which is responsible + for dynamically comparing expected URLs derived from a project config fle + against those actually deployed to determine and report their health. + + \b + Must have the following environment variables set: + PROJECT_NAME: project_name for the config file + TARGET_NAMES: A list of non-sanitized machine / target names + TARGET_NAMES_SANITIZED: Same list of names, only sanitized + """ + watchman.server.run_server(host, port) + + gordo.add_command(build) gordo.add_command(run_server_cli) +gordo.add_command(run_watchman_cli) diff --git a/gordo_components/watchman/__init__.py b/gordo_components/watchman/__init__.py new file mode 100644 index 000000000..6712684c6 --- /dev/null +++ b/gordo_components/watchman/__init__.py @@ -0,0 +1 @@ +from . import server diff --git a/gordo_components/watchman/server.py b/gordo_components/watchman/server.py new file mode 100644 index 000000000..8537fd315 --- /dev/null +++ b/gordo_components/watchman/server.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +import os +import yaml +import ast +import requests +import logging +from flask import Flask, jsonify, make_response +from flask.views import MethodView +from concurrent.futures import ThreadPoolExecutor + +from gordo_components import __version__ + + +# Will contain a list of endpoints to expected models via Ambassador +# see _load_endpoints() +ENDPOINTS = None + + +logger = logging.getLogger(__name__) + + +class WatchmanApi(MethodView): + """ + API view to list expected endpoints in this project space and report if they + are up or not. + """ + @staticmethod + def _check_endpoint(endpoint: str): + endpoint = endpoint[1:] if endpoint.startswith('/') else endpoint + try: + return requests.get(f'http://ambassador/{endpoint}', timeout=2).ok + except Exception as exc: + logger.error(f'Failed to check health of gordo-server: {endpoint} --> Error: {exc}') + return False + + def get(self): + with ThreadPoolExecutor(max_workers=25) as executor: + futures = {executor.submit(self._check_endpoint, endpoint): endpoint for endpoint in ENDPOINTS} + + # List of dicts: [{'endpoint': /path/to/endpoint, 'healthy': bool}] + results = [{'endpoint': futures[f], 'healthy': f.result()} for f in futures] + + payload = jsonify({'endpoints': results, 'project_name': os.environ['PROJECT_NAME']}) + resp = make_response(payload, 200) + resp.headers['Cache-Control'] = 'max-age=0' + return resp + + +def healthcheck(): + """ + Return gordo version, route for Watchman server + """ + payload = jsonify({'version': __version__, 'config': yaml.load(os.environ['TARGET_NAMES'])}) + return payload, 200 + + +def build_app(): + """ + Build app and any associated routes + """ + global ENDPOINTS + ENDPOINTS = _load_endpoints() + + app = Flask(__name__) + app.add_url_rule(rule='/healthcheck', view_func=healthcheck, methods=['GET']) + app.add_url_rule(rule='/', view_func=WatchmanApi.as_view('sentinel_api'), methods=['GET']) + return app + + +def run_server(host: str = '0.0.0.0', port: int = 5555, debug: bool = False): + app = build_app() + app.run(host, port, debug=debug) + + +def _load_endpoints(): + """ + Given the current environment vars of TARGET_NAMES, PROJECT_NAME, AMBASSADORHOST and PORT: build a list + of pre-computed expected endpoints + """ + if 'TARGET_NAMES_SANITIZED' not in os.environ or 'TARGET_NAMES' not in os.environ: + raise EnvironmentError('Need to have TARGET_NAMES_SANITIZED and TARGET_NAMES environment variables set as a' + ' list of expected, sanitized and non-sanitized target / machine names.') + if 'PROJECT_NAME' not in os.environ: + raise EnvironmentError('Need to have PROJECT_NAME environment variable set.') + + TARGET_NAMES_SANITIZED = ast.literal_eval(os.environ['TARGET_NAMES_SANITIZED']) + _TARGET_NAMES = ast.literal_eval(os.environ['TARGET_NAMES']) + project_name = os.environ["PROJECT_NAME"] + + # Precompute list of expected endpoints from config file + endpoints = [f'/gordo/v0/{project_name}/{sanitized_name}/healthcheck' + for sanitized_name in TARGET_NAMES_SANITIZED] + return endpoints + + +if __name__ == '__main__': + run_server() diff --git a/setup.cfg b/setup.cfg index dcd6e1852..51666f973 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,3 +3,4 @@ test = pytest --addopts "-vs --doctest-modules --mypy -p gordo_components --log- testpipetranslator = pytest --addopts "-vs -k pipe --log-cli-level=INFO" test_influx = pytest --addopts "-vs -k PredictionInfluxTestCase --log-cli-level=INFO" testserver = pytest --addopts "-vs -k GordoServer --log-cli-level=DEBUG" +testwatchman = pytest --addopts "-vs -k Watchman --log-cli-level=DEBUG" diff --git a/setup.py b/setup.py index 7f1e8e5eb..b96c620ac 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup_requirements = ['pytest-runner', 'setuptools_scm'] # Test requirements -test_requirements = ['docker==3.6.0', 'pytest==4.0.0', 'ruamel.yaml==0.15.76', 'pytest-mypy==0.3.2'] +test_requirements = ['docker==3.6.0', 'pytest==4.0.0', 'ruamel.yaml==0.15.76', 'pytest-mypy==0.3.2', 'responses==0.10.5'] setup( author="Miles Granger", diff --git a/tests/test_watchman.py b/tests/test_watchman.py new file mode 100644 index 000000000..07753f3a6 --- /dev/null +++ b/tests/test_watchman.py @@ -0,0 +1,78 @@ +import unittest +import json +import re + +import responses + +from gordo_components import __version__ +from gordo_components.watchman import server +from tests.utils import temp_env_vars + + +TARGET_NAMES = ['CT-machine-name-456', 'CT-machine-name-123'] +TARGET_NAMES_STR = str(TARGET_NAMES) +TARGET_NAMES_SANITIZED = ['ct-machine-name-456-kn209d', 'ct-machine-name-123-ksno0s9f092'] +TARGET_NAMES_SANITIZED_STR = str(TARGET_NAMES_SANITIZED) +PROJECT_NAME = 'some-project-name' +AMBASSADORHOST = 'ambassador' +URL_FORMAT = 'http://{host}/gordo/v0/{project_name}/{sanitized_name}/healthcheck' + + +def request_callback(_request): + """ + Mock the Sentinel request to check if a given endpoint is alive or not. + This imitating a simple /healtcheck endpoint, + """ + headers = {} + payload = {'version': __version__} + return 200, headers, json.dumps(payload) + + +class WatchmanTestCase(unittest.TestCase): + + @temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME) + def setUp(self): + app = server.build_app() + app.testing = True + self.app = app.test_client() + + @temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME) + def test_healthcheck(self): + resp = self.app.get('/healthcheck') + self.assertEqual(resp.status_code, 200) + resp = resp.get_json() + self.assertTrue('version' in resp) + + @temp_env_vars(TARGET_NAMES=TARGET_NAMES_STR, TARGET_NAMES_SANITIZED=TARGET_NAMES_SANITIZED_STR, PROJECT_NAME=PROJECT_NAME) + @responses.activate + def test_api(self): + """ + Ensure Sentinel API gives a list of expected endpoints and if they are healthy or not. + """ + # Fake this request; The Sentinel server will start pinging the expected endpoints to see if they are healthy + # all of which start with the AMBASSADORHOST server; we'll fake these requests. + responses.add_callback( + responses.GET, re.compile(rf'.*{AMBASSADORHOST}.*/healthcheck'), + callback=request_callback, + content_type='application/json', + ) + + resp = self.app.get('/') + self.assertEqual(resp.status_code, 200) + + # List of expected endpoints given the current CONFIG_FILE and the project name + expected_endpoints = [URL_FORMAT.format(host=AMBASSADORHOST, + project_name=PROJECT_NAME, + sanitized_name=sanitized_name) + for sanitized_name in TARGET_NAMES_SANITIZED] + + data = resp.get_json() + + # Gives back project name as well. + self.assertEqual(data['project_name'], PROJECT_NAME) + + for expected, actual in zip(expected_endpoints, data['endpoints']): + + # actual is a dict of {'endpoint': str, 'healthy': bool} + self.assertEqual(expected.replace(f'http://{AMBASSADORHOST}', ''), actual['endpoint']) + self.assertTrue(actual['healthy'])