Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/beacon network #93

Merged
merged 43 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
4cb54fe
add first draft of network utils
gsfk May 27, 2024
4d10ceb
first draft of endpoints for beacon network
gsfk May 28, 2024
c21d71d
don't crash if a network beacon is down
gsfk May 31, 2024
f2b6a48
handle filters
gsfk Jun 11, 2024
912b2e2
temp network config
gsfk Jun 11, 2024
3e458aa
network fixes
gsfk Jun 11, 2024
b100bef
lint
gsfk Jun 11, 2024
69c7221
workaround for flask url_prefix issues is to not use it
gsfk Jun 11, 2024
ae63b28
tinkering with querySections
gsfk Jun 17, 2024
378a3e8
add katsu rule changes
gsfk Jul 22, 2024
8b320d7
tweak network timeouts
gsfk Aug 12, 2024
901e1f1
Merge branch 'master' into features/beacon-network
gsfk Aug 12, 2024
aace127
lint
gsfk Aug 12, 2024
b4a3877
temp network test
gsfk Aug 12, 2024
8b91bbe
temp bento public query handling
gsfk Aug 15, 2024
25e4e36
headers edge case
gsfk Aug 15, 2024
3426ab9
config additions for network
gsfk Aug 15, 2024
7098261
network config
gsfk Aug 15, 2024
76bab6b
delete temp hardcoded stuff
gsfk Aug 15, 2024
2d33fde
network updates
gsfk Aug 15, 2024
e5579f9
network utilts
gsfk Aug 15, 2024
ee184ef
add network timeout params
gsfk Aug 15, 2024
1cb958e
completed todo
gsfk Aug 23, 2024
4f47193
completed todo
gsfk Aug 23, 2024
0f4cb38
parameterize beacon network urls
gsfk Aug 28, 2024
0d01716
don't allow filters with empty options list
gsfk Aug 29, 2024
ee67e8f
lint
gsfk Aug 29, 2024
d472877
unused config
gsfk Sep 4, 2024
51dcd6f
snake case
gsfk Sep 4, 2024
a09a117
better handling for missing request headers
gsfk Sep 4, 2024
2417c65
merge description changes
gsfk Sep 10, 2024
20d1005
fix variants count for beacon network host
gsfk Sep 10, 2024
a2cdb71
Merge branch 'master' into features/beacon-network
gsfk Sep 18, 2024
631eae2
fix for network query sections auth
gsfk Sep 18, 2024
7ad1134
use "none" auth for beacon network
gsfk Sep 19, 2024
c473f2b
beacon network init fixes
gsfk Oct 2, 2024
cc3b528
comments
gsfk Oct 3, 2024
0ec58be
lint
gsfk Oct 3, 2024
6b5d553
fixes for katsu private overview changes
gsfk Oct 3, 2024
cd5833b
reorder imports
gsfk Oct 3, 2024
7f2c7f6
auth type hint
gsfk Oct 3, 2024
0346e92
lint
gsfk Oct 3, 2024
2478ba1
correct network config file name
gsfk Oct 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions bento_beacon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from .endpoints.biosamples import biosamples
from .endpoints.cohorts import cohorts
from .endpoints.datasets import datasets
from .network.network import network
from .network.utils import init_network_service_registry
from .utils.exceptions import APIException
from werkzeug.exceptions import HTTPException
from .authz.middleware import authz_middleware
Expand Down Expand Up @@ -44,7 +46,7 @@

app.register_blueprint(info)

blueprints = {
endpoint_blueprints = {
"biosamples": biosamples,
"cohorts": cohorts,
"datasets": datasets,
Expand All @@ -53,12 +55,21 @@
}

with app.app_context():
# load blueprints
# load blueprints for endpoints
endpoint_sets = current_app.config["BEACON_CONFIG"].get("endpointSets")
for endpoint_set in endpoint_sets:
if endpoint_set not in BEACON_MODELS:
raise APIException(message="beacon config contains unknown endpoint set")
app.register_blueprint(blueprints[endpoint_set])
app.register_blueprint(endpoint_blueprints[endpoint_set])

# load blueprint for network
if current_app.config["USE_BEACON_NETWORK"]:
app.register_blueprint(network)
try:
init_network_service_registry()
except APIException:
# trouble setting up network, swallow for now
current_app.logger.error("API Error when initializing beacon network")

# get censorship settings from katsu
max_filters = None
Expand Down
36 changes: 28 additions & 8 deletions bento_beacon/config_files/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import urllib3
from ..constants import GRANULARITY_COUNT, GRANULARITY_RECORD


GA4GH_BEACON_REPO_URL = "https://raw.githubusercontent.com/ga4gh-beacon/beacon-v2"
Expand All @@ -27,12 +28,13 @@ class Config:

# default when no requested granularity, as well as max granularity for anonymous users
DEFAULT_GRANULARITY = {
"individuals": "count",
"variants": "count",
"biosamples": "count",
"cohorts": "record",
"datasets": "record",
"info": "record",
"individuals": GRANULARITY_COUNT,
"variants": GRANULARITY_COUNT,
"biosamples": GRANULARITY_COUNT,
"cohorts": GRANULARITY_RECORD,
"datasets": GRANULARITY_RECORD,
"info": GRANULARITY_RECORD,
"network": GRANULARITY_COUNT,
}

DEFAULT_PAGINATION_PAGE_SIZE = 10
Expand Down Expand Up @@ -166,8 +168,6 @@ class Config:

MAP_EXTRA_PROPERTIES_TO_INFO = str_to_bool(os.environ.get("MAP_EXTRA_PROPERTIES_TO_INFO", ""))

PHENOPACKETS_SCHEMA_REFERENCE = {"entityType": "individual", "schema": "phenopackets v1"}

MAX_RETRIES_FOR_CENSORSHIP_PARAMS = 2

# don't let anonymous users query arbitrary phenopacket or experiment fields
Expand Down Expand Up @@ -229,3 +229,23 @@ def retrieve_config_json(filename):
BEACON_COHORT = retrieve_config_json("beacon_cohort.json")

BEACON_CONFIG = retrieve_config_json("beacon_config.json")

# -------------------
# network

USE_BEACON_NETWORK = os.environ.get("BENTO_BEACON_NETWORK_ENABLED", "false").strip().lower() in ("true", "1", "t")

NETWORK_CONFIG = retrieve_config_json("beacon_network_config.json")

NETWORK_URLS = NETWORK_CONFIG.get("beacons", [])
NETWORK_DEFAULT_TIMEOUT_SECONDS = NETWORK_CONFIG.get("network_default_timeout_seconds", 30)
NETWORK_VARIANTS_QUERY_TIMEOUT_SECONDS = NETWORK_CONFIG.get("network_variants_query_timeout_seconds", GOHAN_TIMEOUT)
NETWORK_VALID_QUERY_ENDPOINTS = [
"analyses",
"biosamples",
"cohorts",
"datasets",
"g_variants",
"individuals",
"runs",
]
Empty file.
91 changes: 91 additions & 0 deletions bento_beacon/network/bento_public_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import copy

# TEMP FILE
#
# handling for bento public query terms, currently how beacon UI handles search options to present to the user
# to be replaced by beacon spec filtering_terms in a future version
# best approach here is not yet clear:
# - intersection of all query terms is too small
# - union of all query terms loses any organization into categories, which varies across instances
#
# we may prefer to make network query terms configurable rather than generating them automatically


def flatten(nested_list):
return [item for nested_items in nested_list for item in nested_items]


def fields_dict(search_fields):
"""
Given a list of bento_public search fields, one for each instance,
return a dictionary of search fields keyed to phenopackets mapping, with an array of all fields for that mapping
"""
# create a single array of all search fields for all instances, removing nesting
copy = search_fields[:]

all_fields = []
for sf in copy:
for f in sf:
all_fields.extend(f["fields"])

# make a dict of entries, keyed to phenopackets mapping + group_by, etc, keeping duplicate values
all_fields_by_mapping = {}
for f in all_fields:
field_key = f["mapping"] + f.get("group_by", "") + f.get("group_by_value", "") + f.get("value_mapping", "")
all_fields_by_mapping[field_key] = all_fields_by_mapping.get(field_key, []) + [f]

return all_fields_by_mapping


def options_union(options_list):
# remove duplicates but keep any ordering
return list(dict.fromkeys(flatten(options_list[:])))


def options_intersection(options_list):
num_instances = len(options_list)
flat_options = flatten(options_list[:])
# only keep options that are present in all instances, preserving order
counter = {}
for option in flat_options:
counter[option] = counter.get(option, 0) + 1

intersection = [key for key in counter if counter[key] == num_instances]
return intersection


# any filters that exist in all beacons
# bins should be joined also, although some ordering may disappear
# still unclear if this is an useful feature or not
# shortcomings here can be addressed by keeping our configs consistent where possible
def fields_union(search_fields):
fields = fields_dict(search_fields)

# create one entry for each mapping
union_fields = []
for f in fields.values():
entry = copy.deepcopy(f[0]) # arbitrarily get name, description, etc from first entry
entry["options"] = options_union([e["options"] for e in f])
union_fields.append(entry)

return union_fields


def fields_intersection(search_fields):
num_instances = len(search_fields)
fields = fields_dict(search_fields)

# remove any fields not in all entries
intersection_dict = {mapping: entries for mapping, entries in fields.items() if len(entries) == num_instances}

# create one entry for each mapping
intersection_fields = []
for f in intersection_dict.values():
entry = {}
entry = copy.deepcopy(f[0]) # arbitrarily get name, description, etc from first entry
options = options_intersection([e["options"] for e in f])
if options:
entry["options"] = options
intersection_fields.append(entry)

return intersection_fields
58 changes: 58 additions & 0 deletions bento_beacon/network/network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from flask import current_app, request, Blueprint
from ..utils.exceptions import APIException, NotFoundException
from .utils import network_beacon_get, network_beacon_post, host_beacon_response, filters_intersection, filters_union

network = Blueprint("network", __name__, url_prefix="/network")


# TODOs:
# filtering terms XXXXXXXXXXXXXXXXXXXXXXXXXXX
# /service-info? there's already one at beacon root
# async calls

# standard beacon info endpoints at the network level: /map, /configuration, etc
# handle GET args


@network.route("")
@network.route("/beacons")
def network_beacons():
beacons_dict = current_app.config.get("NETWORK_BEACONS")
if not beacons_dict:
raise APIException("no beacons found in network config")

# filters handling still experimental
return {
"filtersUnion": current_app.config["ALL_NETWORK_FILTERS"],
"filtersIntersection": current_app.config["COMMON_NETWORK_FILTERS"],
"beacons": list(beacons_dict.values()),
}


# returns 404 if endpoint missing
@network.route("/beacons/<beacon_id>/<endpoint>", methods=["GET", "POST"])
def query(beacon_id, endpoint):
beacon = current_app.config["NETWORK_BEACONS"].get(beacon_id)

if not beacon:
raise NotFoundException(message=f"no beacon found with id {beacon_id}")

if endpoint not in current_app.config["NETWORK_VALID_QUERY_ENDPOINTS"]:
raise NotFoundException()

# special handling for host beacon, avoid circular http calls
host_id = current_app.config["BEACON_ID"]
if beacon_id == host_id:
return host_beacon_response(endpoint)

# all other beacons
api_url = beacon.get("apiUrl")

if request.method == "POST":
payload = request.get_json()
r = network_beacon_post(api_url, payload, endpoint)
else:
# TODO: pass get args
r = network_beacon_get(api_url, endpoint)

return r
Loading