diff --git a/docker-compose.gcb.yml b/docker-compose.gcb.yml index 8c73632b65..5653a1f807 100644 --- a/docker-compose.gcb.yml +++ b/docker-compose.gcb.yml @@ -17,7 +17,7 @@ x-test-common: &test-common - ".artifacts:/.artifacts" environment: SNUBA_SETTINGS: "$SNUBA_SETTINGS" - CLICKHOUSE_HOST: clickhouse + CLICKHOUSE_HOST: clickhouse.local USE_REDIS_CLUSTER: "1" REDIS_HOST: "redis-cluster" REDIS_PORT: 7000 @@ -71,6 +71,13 @@ services: KAFKA_TOOLS_LOG4J_LOGLEVEL: "WARN" clickhouse: image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse.local + extra_hosts: + - "clickhouse.local:127.0.0.1" # Add entry to /etc/hosts file + ports: + - "8123:8123" + - "9000:9000" + - "9009:9009" volumes: - ./config/clickhouse/macros.xml:/etc/clickhouse-server/config.d/macros.xml - ./config/clickhouse/zookeeper.xml:/etc/clickhouse-server/config.d/zookeeper.xml @@ -85,6 +92,9 @@ services: depends_on: - zookeeper image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse-query.local + extra_hosts: + - "clickhouse-query.local:127.0.0.1" # Add entry to /etc/hosts file profiles: ["multi_node"] volumes: - ./test_distributed_migrations/config/clickhouse/zookeeper.xml:/etc/clickhouse-server/config.d/zookeeper.xml @@ -97,6 +107,9 @@ services: depends_on: - zookeeper image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse-01.local + extra_hosts: + - "clickhouse-01.local:127.0.0.1" # Add entry to /etc/hosts file profiles: ["multi_node"] volumes: - ./test_distributed_migrations/config/clickhouse/macros-01.xml:/etc/clickhouse-server/config.d/macros.xml @@ -110,6 +123,9 @@ services: depends_on: - zookeeper image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse-02.local + extra_hosts: + - "clickhouse-02.local:127.0.0.1" # Add entry to /etc/hosts file profiles: ["multi_node"] volumes: - ./test_distributed_migrations/config/clickhouse/macros-02.xml:/etc/clickhouse-server/config.d/macros.xml @@ -123,6 +139,9 @@ services: depends_on: - zookeeper image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse-03.local + extra_hosts: + - "clickhouse-03.local:127.0.0.1" # Add entry to /etc/hosts file profiles: ["multi_node"] volumes: - ./test_distributed_migrations/config/clickhouse/macros-03.xml:/etc/clickhouse-server/config.d/macros.xml @@ -136,6 +155,9 @@ services: depends_on: - zookeeper image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}" + hostname: clickhouse-04.local + extra_hosts: + - "clickhouse-04.local:127.0.0.1" # Add entry to /etc/hosts file profiles: ["multi_node"] volumes: - ./test_distributed_migrations/config/clickhouse/macros-04.xml:/etc/clickhouse-server/config.d/macros.xml diff --git a/snuba/admin/views.py b/snuba/admin/views.py index 177cb3e725..1286783390 100644 --- a/snuba/admin/views.py +++ b/snuba/admin/views.py @@ -1,11 +1,13 @@ from __future__ import annotations import io +import socket import sys +import time from contextlib import redirect_stdout from dataclasses import asdict from datetime import datetime -from typing import Any, List, Mapping, Optional, Sequence, Tuple, Type, cast +from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, cast import sentry_sdk import simplejson as json @@ -500,14 +502,46 @@ def clickhouse_trace_query() -> Response: profile_events_raw_sql = "SELECT ProfileEvents FROM system.query_log WHERE query_id = '{}' AND type = 'QueryFinish'" - for query_trace_data in parse_trace_for_query_ids(query_trace, storage): + for query_trace_data in parse_trace_for_query_ids(query_trace): sql = profile_events_raw_sql.format(query_trace_data.query_id) logger.info( - "Profile event gathering host: {}, port = {}, storage = {}, sql = {}, g.user = {}".format( + "Gathering profile event using host: {}, port = {}, storage = {}, sql = {}, g.user = {}".format( query_trace_data.host, query_trace_data.port, storage, sql, g.user ) ) - # TODO: Onkar to add the profile event logic later. + system_query_result, counter = None, 0 + while counter < 30: + # There is a race between the trace query and the 'SELECT ProfileEvents...' query. ClickHouse does not immediately + # return the rows for 'SELECT ProfileEvents...' query. To make it return rows, sleep between the query executions. + system_query_result = run_system_query_on_host_with_sql( + query_trace_data.host, + int(query_trace_data.port), + storage, + sql, + False, + g.user, + ) + if not system_query_result.results: + time.sleep(1) + counter += 1 + else: + break + + if system_query_result is not None and len(system_query_result.results) > 0: + query_trace.profile_events_meta.append(system_query_result.meta) + query_trace.profile_events_profile = cast( + Dict[str, int], system_query_result.profile + ) + columns = system_query_result.meta + if columns: + res = {} + res["column_names"] = [name for name, _ in columns] + res["rows"] = [] + for query_result in system_query_result.results: + if query_result[0]: + res["rows"].append(json.dumps(query_result[0])) + query_trace.profile_events_results[query_trace_data.node_name] = res + return make_response(jsonify(asdict(query_trace)), 200) except InvalidCustomQuery as err: return make_response( @@ -522,6 +556,7 @@ def clickhouse_trace_query() -> Response: 400, ) except ClickhouseError as err: + logger.error(err, exc_info=True) details = { "type": "clickhouse", "message": str(err), @@ -529,34 +564,38 @@ def clickhouse_trace_query() -> Response: } return make_response(jsonify({"error": details}), 400) except Exception as err: + logger.error(err, exc_info=True) return make_response( jsonify({"error": {"type": "unknown", "message": str(err)}}), 500, ) -def parse_trace_for_query_ids( - trace_output: TraceOutput, storage_key: str -) -> List[QueryTraceData]: - result = [] +def hostname_resolves(hostname: str) -> bool: + try: + socket.gethostbyname(hostname) + except socket.error: + return False + else: + return True + + +def parse_trace_for_query_ids(trace_output: TraceOutput) -> List[QueryTraceData]: summarized_trace_output = trace_output.summarized_trace_output - storage_info = get_storage_info() - matched = next( - (info for info in storage_info if info["storage_name"] == storage_key), None - ) - if matched is not None: - local_nodes = matched.get("local_nodes", []) - query_node = matched.get("query_node", None) - result = [ - QueryTraceData( - host=local_nodes[0].get("host") if local_nodes else query_node.get("host"), # type: ignore - port=local_nodes[0].get("port") if local_nodes else query_node.get("port"), # type: ignore - query_id=query_summary.query_id, - node_name=node_name, - ) - for node_name, query_summary in summarized_trace_output.query_summaries.items() - ] - return result + node_name_to_query_id = { + node_name: query_summary.query_id + for node_name, query_summary in summarized_trace_output.query_summaries.items() + } + logger.info("node to query id mapping: {}".format(node_name_to_query_id)) + return [ + QueryTraceData( + host=node_name if hostname_resolves(node_name) else "127.0.0.1", + port=9000, + query_id=query_id, + node_name=node_name, + ) + for node_name, query_id in node_name_to_query_id.items() + ] @application.route("/clickhouse_querylog_query", methods=["POST"]) diff --git a/tests/admin/test_api.py b/tests/admin/test_api.py index 47cb68560b..0a386cf0fa 100644 --- a/tests/admin/test_api.py +++ b/tests/admin/test_api.py @@ -336,6 +336,7 @@ def test_query_trace(admin_api: FlaskClient) -> None: data = json.loads(response.data) assert " executeQuery" in data["trace_output"] assert "summarized_trace_output" in data + assert "profile_events_results" in data @pytest.mark.redis_db