Skip to content

Commit

Permalink
result_summary: cache fetched nodes to reduce the amount of queries
Browse files Browse the repository at this point in the history
The bulk of the time is spent on API requests to search for nodes. Use a
node cache to reduce the amount of requests.

Signed-off-by: Ricardo Cañuelo <[email protected]>
  • Loading branch information
r-c-n authored and nuclearcat committed Oct 8, 2024
1 parent 121c428 commit 2854d2c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/result_summary/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def run(service, context):
result_summary.logger.debug(f"Query matches found: {len(query_results)}")
nodes.extend(query_results)
result_summary.logger.info(f"Total nodes found: {len(nodes)}")
utils.node_cache_write(nodes)

# Post-process nodes
# Filter log files
Expand Down
33 changes: 28 additions & 5 deletions src/result_summary/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,32 @@
import requests
import yaml
from typing import Any, Dict
from threading import Lock

import result_summary


CONFIG_TRACES_FILE_PATH = './config/traces_config.yaml'
LAVA_JOB_URL = 'https://lava.collabora.dev/scheduler/job/'

node_cache = {}
node_cache_lock = Lock()

def node_cache_write(nodes):
global node_cache
with node_cache_lock:
for node in nodes:
if node['id'] not in node_cache:
node_cache[node['id']] = node

def node_cache_read(id):
return None
global node_cache
node = None
with node_cache_lock:
node = node_cache.get(id)
return node


def split_query_params(query_string):
"""Given a string input formatted like this:
Expand Down Expand Up @@ -213,16 +232,20 @@ def post_process_node(node, api):
key 'logs', which contains a dictionary of processed log
data (see get_logs()).
"""
node_cache = {}

def get_parent(node, api):
nonlocal node_cache
"""Fetches and returns a node parent, if it exists. Uses and
updates the node cache in the process"""
parent_id = node.get('parent')
if parent_id:
if parent_id in node_cache:
return node_cache[parent_id]
parent = node_cache_read(parent_id)
if parent:
return parent
else:
return api.node.get(node['parent'])
parent = api.node.get(node['parent'])
if parent:
node_cache_write([parent])
return parent
return None

def get_job_id(node, api):
Expand Down

0 comments on commit 2854d2c

Please sign in to comment.