Skip to content

Commit

Permalink
re-do metadata fetch until endpoint is fixed (#381)
Browse files Browse the repository at this point in the history
* re-do metadata fetch until endpoint is fixed

* better notebook

* rename variables

* code style

* fix replace in comments

* update comments
  • Loading branch information
ssssarah authored Feb 13, 2024
1 parent 7e77eaa commit 6ddd5f3
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 48 deletions.
28 changes: 14 additions & 14 deletions examples/notebooks/use-cases/BBP KG Forge retrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,26 +78,26 @@
"output_type": "stream",
"text": [
"Execution 0 using: id inside: same bucket - Cross bucket: True - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 1 using: self inside: same bucket - Cross bucket: True - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 2 using: id inside: other bucket - Cross bucket: True - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org http://purl.obolibrary.org/obo/GO_0038048\n",
"______________________\n",
"Execution 3 using: self inside: other bucket - Cross bucket: True - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org http://purl.obolibrary.org/obo/GO_0038048\n",
"______________________\n",
"Execution 4 using: id inside: same bucket - Cross bucket: False - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 5 using: self inside: same bucket - Cross bucket: False - Retrieve source: True\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 6 using: id inside: other bucket - Cross bucket: False - Retrieve source: True\n",
"<action> catch_http_error\n",
"<error> RetrievalError: resource 'http://purl.obolibrary.org/obo/GO_0038048' not found in project 'dke/kgforge'\n",
"<error> RetrievalError: 404 Client Error: Not Found for url: https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FGO_0038048\n",
"\n",
"Not found\n",
"______________________\n",
Expand All @@ -108,22 +108,22 @@
"Not found\n",
"______________________\n",
"Execution 8 using: id inside: same bucket - Cross bucket: True - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 9 using: self inside: same bucket - Cross bucket: True - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 10 using: id inside: other bucket - Cross bucket: True - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org GO:0038048\n",
"______________________\n",
"Execution 11 using: self inside: other bucket - Cross bucket: True - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org GO:0038048\n",
"______________________\n",
"Execution 12 using: id inside: same bucket - Cross bucket: False - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 13 using: self inside: same bucket - Cross bucket: False - Retrieve source: False\n",
"rev 3\n",
"rev 3 https://bbp.neuroshapes.org https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/20fbc97a-fb26-43ff-8093-9136aab25dff\n",
"______________________\n",
"Execution 14 using: id inside: other bucket - Cross bucket: False - Retrieve source: False\n",
"<action> catch_http_error\n",
Expand Down Expand Up @@ -167,7 +167,7 @@
" if not e:\n",
" print(\"Not found\")\n",
" else:\n",
" print(\"rev\", e._store_metadata._rev)\n",
" print(\"rev\", e._store_metadata._rev, e.context, e.id)\n",
" print(\"______________________\")\n",
" i += 1\n"
]
Expand Down
86 changes: 52 additions & 34 deletions kgforge/specializations/stores/bluebrain_nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,66 +298,84 @@ def _retrieve_id(
# Solution: first API call used to retrieve metadata
# afterwards, second API call to retrieve data

url = f"{url_resource}/source" if retrieve_source else url_resource
# TODO temporary
# url = f"{url_resource}/source" if retrieve_source else url_resource
#
# # if cross_bucket, no support for /source and metadata.
# # So this will fetch the right metadata. The source data will be fetched later
# if cross_bucket:
# url = url_resource

# if cross_bucket, no support for /source and metadata.
# So this will fetch the right metadata. The source data will be fetched later
if cross_bucket:
url = url_resource
url = url_resource

response = requests.get(
response_not_source_with_metadata = requests.get(
url, params=query_params, headers=self.service.headers, timeout=REQUEST_TIMEOUT
)
catch_http_error_nexus(response, RetrievalError)
catch_http_error_nexus(response_not_source_with_metadata, RetrievalError)

try:
data = response.json()
resource = self.service.to_resource(data)
not_source_with_metadata = response_not_source_with_metadata.json()

# TODO temporary
# if not (retrieve_source and cross_bucket):
# return self.service.to_resource(not_source_with_metadata)

if not retrieve_source:
return self.service.to_resource(not_source_with_metadata)

except Exception as e:
raise RetrievalError(e) from e

if not (retrieve_source and cross_bucket):
return resource

# specific case that requires additional fetching of data without source
_self = data.get("_self", None)
# specific case that requires additional fetching of data with source
_self = not_source_with_metadata.get("_self", None)

# Retrieves the appropriate data if retrieve_source = True and cross_bucket = True
# Retrieves the appropriate data if retrieve_source = True
if _self:
response_source = requests.get(
url=f"{_self}/source",
params=query_params, headers=self.service.headers, timeout=REQUEST_TIMEOUT
)
catch_http_error_nexus(response_source, RetrievalError)
# turns the retrieved data into a resource
resource = self.service.to_resource(response_source.json())
# uses the metadata of the first call
self.service.synchronize_resource(
resource, data, self.retrieve.__name__, True, True
)
return resource
return self._merge_metadata_with_source_data(_self, not_source_with_metadata, query_params)

raise RetrievalError("Cannot find metadata in payload")

def _merge_metadata_with_source_data(self, _self, data_not_source_with_metadata, query_params):
response_source = requests.get(
url=f"{_self}/source",
params=query_params, headers=self.service.headers,
timeout=REQUEST_TIMEOUT
)
catch_http_error_nexus(response_source, RetrievalError)
# turns the retrieved data into a resource
data_source = response_source.json()
resource = self.service.to_resource(data_source)
# uses the metadata of the first call
self.service.synchronize_resource(
resource, data_not_source_with_metadata, self.retrieve.__name__, True, True
)
return resource

def _retrieve_self(
self, self_, retrieve_source: bool, query_params: Dict
) -> Resource:
"""
Retrieves assuming the provided identifier is actually the resource's _self field
"""
url = f"{self_}/source" if retrieve_source else self_
# TODO temporary
# url = f"{self_}/source" if retrieve_source else self_
url = self_

response = requests.get(
response_not_source_with_metadata = requests.get(
url, params=query_params, headers=self.service.headers, timeout=REQUEST_TIMEOUT
)
catch_http_error_nexus(response, RetrievalError)
catch_http_error_nexus(response_not_source_with_metadata, RetrievalError)

try:
data = response.json()
return self.service.to_resource(data)
not_source_with_metadata = response_not_source_with_metadata.json()
if not retrieve_source:
return self.service.to_resource(not_source_with_metadata)

except Exception as e:
raise RetrievalError(e) from e

return self._merge_metadata_with_source_data(self_, not_source_with_metadata, query_params)

def retrieve(
self, id_: str, version: Optional[Union[int, str]], cross_bucket: bool = False, **params
) -> Optional[Resource]:
Expand Down Expand Up @@ -389,8 +407,8 @@ def retrieve(

retrieve_source = params.get('retrieve_source', True)

if retrieve_source:
query_params.update({"annotate": True})
# if retrieve_source:
# query_params.update({"annotate": True})

try:
return self._retrieve_id(
Expand Down

0 comments on commit 6ddd5f3

Please sign in to comment.