Skip to content

Commit

Permalink
Merge pull request #93 from bento-platform/features/beacon-network
Browse files Browse the repository at this point in the history
Features/beacon network
  • Loading branch information
gsfk authored Oct 3, 2024
2 parents b3b8b78 + 2478ba1 commit d278832
Show file tree
Hide file tree
Showing 8 changed files with 417 additions and 17 deletions.
17 changes: 14 additions & 3 deletions bento_beacon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from .endpoints.biosamples import biosamples
from .endpoints.cohorts import cohorts
from .endpoints.datasets import datasets
from .network.network import network
from .network.utils import init_network_service_registry
from .utils.exceptions import APIException
from werkzeug.exceptions import HTTPException
from .authz.middleware import authz_middleware
Expand Down Expand Up @@ -44,7 +46,7 @@

app.register_blueprint(info)

blueprints = {
endpoint_blueprints = {
"biosamples": biosamples,
"cohorts": cohorts,
"datasets": datasets,
Expand All @@ -53,12 +55,21 @@
}

with app.app_context():
# load blueprints
# load blueprints for endpoints
endpoint_sets = current_app.config["BEACON_CONFIG"].get("endpointSets")
for endpoint_set in endpoint_sets:
if endpoint_set not in BEACON_MODELS:
raise APIException(message="beacon config contains unknown endpoint set")
app.register_blueprint(blueprints[endpoint_set])
app.register_blueprint(endpoint_blueprints[endpoint_set])

# load blueprint for network
if current_app.config["USE_BEACON_NETWORK"]:
app.register_blueprint(network)
try:
init_network_service_registry()
except APIException:
# trouble setting up network, swallow for now
current_app.logger.error("API Error when initializing beacon network")

# get censorship settings from katsu
max_filters = None
Expand Down
36 changes: 28 additions & 8 deletions bento_beacon/config_files/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import urllib3
from ..constants import GRANULARITY_COUNT, GRANULARITY_RECORD


GA4GH_BEACON_REPO_URL = "https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2"
Expand All @@ -27,12 +28,13 @@ class Config:

# default when no requested granularity, as well as max granularity for anonymous users
DEFAULT_GRANULARITY = {
"individuals": "count",
"variants": "count",
"biosamples": "count",
"cohorts": "record",
"datasets": "record",
"info": "record",
"individuals": GRANULARITY_COUNT,
"variants": GRANULARITY_COUNT,
"biosamples": GRANULARITY_COUNT,
"cohorts": GRANULARITY_RECORD,
"datasets": GRANULARITY_RECORD,
"info": GRANULARITY_RECORD,
"network": GRANULARITY_COUNT,
}

DEFAULT_PAGINATION_PAGE_SIZE = 10
Expand Down Expand Up @@ -166,8 +168,6 @@ class Config:

MAP_EXTRA_PROPERTIES_TO_INFO = str_to_bool(os.environ.get("MAP_EXTRA_PROPERTIES_TO_INFO", ""))

PHENOPACKETS_SCHEMA_REFERENCE = {"entityType": "individual", "schema": "phenopackets v1"}

MAX_RETRIES_FOR_CENSORSHIP_PARAMS = 2

# don't let anonymous users query arbitrary phenopacket or experiment fields
Expand Down Expand Up @@ -229,3 +229,23 @@ def retrieve_config_json(filename):
BEACON_COHORT = retrieve_config_json("beacon_cohort.json")

BEACON_CONFIG = retrieve_config_json("beacon_config.json")

# -------------------
# network

USE_BEACON_NETWORK = os.environ.get("BENTO_BEACON_NETWORK_ENABLED", "false").strip().lower() in ("true", "1", "t")

NETWORK_CONFIG = retrieve_config_json("beacon_network_config.json")

NETWORK_URLS = NETWORK_CONFIG.get("beacons", [])
NETWORK_DEFAULT_TIMEOUT_SECONDS = NETWORK_CONFIG.get("network_default_timeout_seconds", 30)
NETWORK_VARIANTS_QUERY_TIMEOUT_SECONDS = NETWORK_CONFIG.get("network_variants_query_timeout_seconds", GOHAN_TIMEOUT)
NETWORK_VALID_QUERY_ENDPOINTS = [
"analyses",
"biosamples",
"cohorts",
"datasets",
"g_variants",
"individuals",
"runs",
]
Empty file.
91 changes: 91 additions & 0 deletions bento_beacon/network/bento_public_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import copy

# TEMP FILE
#
# handling for bento public query terms, currently how beacon UI handles search options to present to the user
# to be replaced by beacon spec filtering_terms in a future version
# best approach here is not yet clear:
# - intersection of all query terms is too small
# - union of all query terms loses any organization into categories, which varies across instances
#
# we may prefer to make network query terms configurable rather than generating them automatically


def flatten(nested_list):
return [item for nested_items in nested_list for item in nested_items]


def fields_dict(search_fields):
"""
Given a list of bento_public search fields, one for each instance,
return a dictionary of search fields keyed to phenopackets mapping, with an array of all fields for that mapping
"""
# create a single array of all search fields for all instances, removing nesting
copy = search_fields[:]

all_fields = []
for sf in copy:
for f in sf:
all_fields.extend(f["fields"])

# make a dict of entries, keyed to phenopackets mapping + group_by, etc, keeping duplicate values
all_fields_by_mapping = {}
for f in all_fields:
field_key = f["mapping"] + f.get("group_by", "") + f.get("group_by_value", "") + f.get("value_mapping", "")
all_fields_by_mapping[field_key] = all_fields_by_mapping.get(field_key, []) + [f]

return all_fields_by_mapping


def options_union(options_list):
# remove duplicates but keep any ordering
return list(dict.fromkeys(flatten(options_list[:])))


def options_intersection(options_list):
num_instances = len(options_list)
flat_options = flatten(options_list[:])
# only keep options that are present in all instances, preserving order
counter = {}
for option in flat_options:
counter[option] = counter.get(option, 0) + 1

intersection = [key for key in counter if counter[key] == num_instances]
return intersection


# any filters that exist in all beacons
# bins should be joined also, although some ordering may disappear
# still unclear if this is an useful feature or not
# shortcomings here can be addressed by keeping our configs consistent where possible
def fields_union(search_fields):
fields = fields_dict(search_fields)

# create one entry for each mapping
union_fields = []
for f in fields.values():
entry = copy.deepcopy(f[0]) # arbitrarily get name, description, etc from first entry
entry["options"] = options_union([e["options"] for e in f])
union_fields.append(entry)

return union_fields


def fields_intersection(search_fields):
num_instances = len(search_fields)
fields = fields_dict(search_fields)

# remove any fields not in all entries
intersection_dict = {mapping: entries for mapping, entries in fields.items() if len(entries) == num_instances}

# create one entry for each mapping
intersection_fields = []
for f in intersection_dict.values():
entry = {}
entry = copy.deepcopy(f[0]) # arbitrarily get name, description, etc from first entry
options = options_intersection([e["options"] for e in f])
if options:
entry["options"] = options
intersection_fields.append(entry)

return intersection_fields
58 changes: 58 additions & 0 deletions bento_beacon/network/network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from flask import current_app, request, Blueprint
from ..utils.exceptions import APIException, NotFoundException
from .utils import network_beacon_get, network_beacon_post, host_beacon_response, filters_intersection, filters_union

network = Blueprint("network", __name__, url_prefix="/network")


# TODOs:
# filtering terms XXXXXXXXXXXXXXXXXXXXXXXXXXX
# /service-info? there's already one at beacon root
# async calls

# standard beacon info endpoints at the network level: /map, /configuration, etc
# handle GET args


@network.route("")
@network.route("/beacons")
def network_beacons():
beacons_dict = current_app.config.get("NETWORK_BEACONS")
if not beacons_dict:
raise APIException("no beacons found in network config")

# filters handling still experimental
return {
"filtersUnion": current_app.config["ALL_NETWORK_FILTERS"],
"filtersIntersection": current_app.config["COMMON_NETWORK_FILTERS"],
"beacons": list(beacons_dict.values()),
}


# returns 404 if endpoint missing
@network.route("/beacons/<beacon_id>/<endpoint>", methods=["GET", "POST"])
def query(beacon_id, endpoint):
beacon = current_app.config["NETWORK_BEACONS"].get(beacon_id)

if not beacon:
raise NotFoundException(message=f"no beacon found with id {beacon_id}")

if endpoint not in current_app.config["NETWORK_VALID_QUERY_ENDPOINTS"]:
raise NotFoundException()

# special handling for host beacon, avoid circular http calls
host_id = current_app.config["BEACON_ID"]
if beacon_id == host_id:
return host_beacon_response(endpoint)

# all other beacons
api_url = beacon.get("apiUrl")

if request.method == "POST":
payload = request.get_json()
r = network_beacon_post(api_url, payload, endpoint)
else:
# TODO: pass get args
r = network_beacon_get(api_url, endpoint)

return r
Loading

0 comments on commit d278832

Please sign in to comment.