Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented simple, structured data logging #1061

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
`GroupTitle` and `Tags` . This feature has been added in order to support
grouping and filtering of datasets in UIs,
see https://github.com/xcube-dev/xcube-viewer/issues/385.

* xcube server can now be configured to produce structured data logs.
If `data_logging` is `true` server requests are logged in form of
JSON data records, that can be machine-detected and -parsed by lines
starting with a `{` character. (#1060)

* Added server endpoint `GET /statistics/{varName}` with query parameters
`lon`, `lat`, `time` which is used to extract single point data.
Expand Down
6 changes: 6 additions & 0 deletions test/server/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ def test_config_schema_effectively_merged(self):
"additionalProperties": False,
},
},
"data_logging": {
"type": "boolean",
"title": (
"Log JSON data records instead of human-readable message logs."
),
},
"trace_perf": {
"type": "boolean",
"title": "Output performance measures",
Expand Down
44 changes: 39 additions & 5 deletions test/server/webservers/test_tornado.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
Callable,
Any,
Union,
Tuple,
Type,
Dict,
)
from collections.abc import Sequence, Awaitable

Expand Down Expand Up @@ -84,6 +81,38 @@ def test_start_and_update_and_stop(self):
self.assertEqual(1, self.io_loop.start_count)
self.assertEqual(1, self.io_loop.stop_count)

def test_data_logging_disabled_by_default(self):
server = mock_server(framework=self.framework, config={})
self.framework.start(server.ctx)
self.assertNotIn("log_function", self.application.settings)
self.framework.stop(server.ctx)

def test_enable_data_logging(self):
server = mock_server(framework=self.framework, config={"data_logging": True})
self.framework.start(server.ctx)
self.assertIn("log_function", self.application.settings)
log_function = self.application.settings["log_function"]
self.assertTrue(callable(log_function))

class DatasetsHandler(tornado.web.RequestHandler):
def get(self):
return {}

# noinspection PyTypeChecker
handler = DatasetsHandler(
self.application,
tornado.httputil.HTTPServerRequest(
method="GET", uri="/datasets", connection=MockConnection()
),
)

# For time being, smoke test only, logging is side effect.
# Check if we can return data record from log_function(),
# and verify fields are as expected.
log_function(handler)

self.framework.stop(server.ctx)

def test_async_exec(self):
def my_func(a, b):
return a + b
Expand Down Expand Up @@ -158,13 +187,15 @@ def test_path_to_pattern_fails(self):
with self.assertRaises(ValueError) as cm:
p2p("/datasets/{dataset_id")
self.assertEqual(
'missing closing "}" in "/datasets/{dataset_id"', f"{cm.exception}",
'missing closing "}" in "/datasets/{dataset_id"',
f"{cm.exception}",
)

with self.assertRaises(ValueError) as cm:
p2p("/datasets/dataset_id}/bbox")
self.assertEqual(
'missing opening "{" in "/datasets/dataset_id}/bbox"', f"{cm.exception}",
'missing opening "{" in "/datasets/dataset_id}/bbox"',
f"{cm.exception}",
)

with self.assertRaises(ValueError) as cm:
Expand Down Expand Up @@ -421,6 +452,9 @@ def run_in_executor(self, *args, **kwargs):
class MockApplication:
def __init__(self):
self.handlers = []
self.settings = {}
self.ui_modules = {}
self.ui_methods = {}
self.listen_count = 0

# noinspection PyUnusedLocal
Expand Down
3 changes: 3 additions & 0 deletions xcube/server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
" returned by server responses."
" Can be an absolute URL or a relative URL path.",
),
data_logging=JsonBooleanSchema(
title="Log JSON data records instead of human-readable message logs."
),
trace_perf=JsonBooleanSchema(
title="Output performance measures",
),
Expand Down
90 changes: 81 additions & 9 deletions xcube/server/webservers/tornado.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@

import asyncio
import concurrent.futures
import datetime
import functools
import json
import logging
import traceback
import urllib.parse
from typing import Any, Optional, Union, Callable, Type
from typing import Any, Optional, Union, Callable
from collections.abc import Sequence, Awaitable, Mapping

import tornado.escape
Expand Down Expand Up @@ -124,16 +126,18 @@ def add_routes(self, api_routes: Sequence[ApiRoute], url_prefix: str):
handlers = []

for api_route in api_routes:
handlers.append((
url_prefix + self.path_to_pattern(api_route.path)
+ ("/?" if api_route.slash else ""),
TornadoRequestHandler,
{"api_route": api_route},
))
handlers.append(
(
url_prefix
+ self.path_to_pattern(api_route.path)
+ ("/?" if api_route.slash else ""),
TornadoRequestHandler,
{"api_route": api_route},
)
)
LOG.log(
LOG_LEVEL_DETAIL,
f"Added route {api_route.path!r}"
f" from API {api_route.api_name!r}",
f"Added route {api_route.path!r}" f" from API {api_route.api_name!r}",
)

if handlers:
Expand All @@ -150,6 +154,14 @@ def start(self, ctx: Context):
url_prefix = get_url_prefix(config)
tornado_settings = config.get("tornado", {})

if config.get("data_logging"):
# Reset formatters to just output the message (= JSON data record)
for h in logging.getLogger().handlers:
if isinstance(h, logging.StreamHandler):
h.setFormatter(logging.Formatter("%(message)s"))

self.application.settings["log_function"] = _get_data_log_function(ctx)

self.application.listen(port, address=address, **tornado_settings)

address_ = "127.0.0.1" if address == "0.0.0.0" else address
Expand Down Expand Up @@ -474,3 +486,63 @@ def finish(
):
self.write(data, content_type)
return self._handler.finish()


# noinspection PyUnusedLocal
def _get_data_log_function(ctx: Context):
"""Get a logging function that outputs JSON records.

Args:
ctx: Application context.

Returns:
A Tornado log function.
"""

# Closure here, because we may want to include
# auth info from context later
def log_function(handler: tornado.web.RequestHandler):
request = handler.request
status = handler.get_status()

if status < 400:
level = "INFO"
elif status < 500:
level = "WARNING"
else:
level = "ERROR"

duration = round(1000.0 * request.request_time())

# will not be set yet
user_id = (
f"{handler.current_user}" if handler.current_user is not None else None
)

data = {
"timestamp": str(datetime.datetime.utcnow()),
"level": level,
"status": status,
"user_id": user_id,
"duration": duration, # milliseconds
"request": {
"method": request.method,
"uri": request.uri,
"host": request.host,
"protocol": request.protocol,
"remote_ip": request.remote_ip,
"user_agent": request.headers.get("User-Agent"),
},
}

LOG.log(getattr(logging, level), DataLogMessage(data))

return log_function


class DataLogMessage:
def __init__(self, data: dict[str, Any]):
self.data = data

def __str__(self):
return json.dumps(self.data)
Loading