-
Notifications
You must be signed in to change notification settings - Fork 209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Pagination Features for FHIRClient and Bundle Classes #174
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
+2 −0 | .gitignore | |
+1 −2 | fhirclass.py | |
+45 −0 | fhirrenderer.py | |
+1 −1 | generate.py | |
+27 −0 | tests/fhirclass_bundle_generator_test.py |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import urllib | ||
from typing import Optional, Iterable | ||
|
||
import requests | ||
|
||
|
||
# Use forward references to avoid circular imports | ||
def _fetch_next_page(bundle: 'Bundle') -> Optional['Bundle']: | ||
""" | ||
Fetch the next page of results using the `next` link provided in the bundle. | ||
|
||
Args: | ||
bundle (Bundle): The FHIR Bundle containing the `next` link. | ||
|
||
Returns: | ||
Optional[Bundle]: The next page of results as a FHIR Bundle, or None if no "next" link is found. | ||
""" | ||
next_link = _get_next_link(bundle) | ||
if next_link: | ||
sanitized_next_link = _sanitize_next_link(next_link) | ||
return _execute_pagination_request(sanitized_next_link) | ||
return None | ||
|
||
|
||
def _get_next_link(bundle: 'Bundle') -> Optional[str]: | ||
""" | ||
Extract the `next` link from the Bundle's links. | ||
|
||
Args: | ||
bundle (Bundle): The FHIR Bundle containing pagination links. | ||
|
||
Returns: | ||
Optional[str]: The URL of the next page if available, None otherwise. | ||
""" | ||
if not bundle.link: | ||
return None | ||
|
||
for link in bundle.link: | ||
if link.relation == "next": | ||
return link.url | ||
return None | ||
|
||
|
||
def _sanitize_next_link(next_link: str) -> str: | ||
""" | ||
Sanitize the `next` link to ensure it is safe to use. | ||
|
||
Args: | ||
next_link (str): The raw `next` link URL. | ||
|
||
Returns: | ||
str: The sanitized URL. | ||
|
||
Raises: | ||
ValueError: If the URL scheme or domain is invalid. | ||
""" | ||
parsed_url = urllib.parse.urlparse(next_link) | ||
|
||
# Validate scheme and netloc (domain) | ||
if parsed_url.scheme not in ["http", "https"]: | ||
raise ValueError("Invalid URL scheme in `next` link.") | ||
if not parsed_url.netloc: | ||
raise ValueError("Invalid URL domain in `next` link.") | ||
|
||
# Additional sanitization if necessary, e.g., removing dangerous query parameters | ||
query_params = urllib.parse.parse_qs(parsed_url.query) | ||
sanitized_query = {k: v for k, v in query_params.items()} | ||
|
||
# Rebuild the sanitized URL | ||
sanitized_url = urllib.parse.urlunparse( | ||
( | ||
parsed_url.scheme, | ||
parsed_url.netloc, | ||
parsed_url.path, | ||
parsed_url.params, | ||
urllib.parse.urlencode(sanitized_query, doseq=True), | ||
parsed_url.fragment, | ||
) | ||
) | ||
|
||
return sanitized_url | ||
|
||
|
||
def _execute_pagination_request(sanitized_url: str) -> Optional['Bundle']: | ||
""" | ||
Execute the request to retrieve the next page using the sanitized URL. | ||
|
||
Args: | ||
sanitized_url (str): The sanitized URL to fetch the next page. | ||
|
||
Returns: | ||
Optional[Bundle]: The next page of results as a FHIR Bundle, or None. | ||
|
||
Raises: | ||
HTTPError: If the request fails due to network issues or server errors. | ||
""" | ||
from fhirclient.models.bundle import Bundle # Import here to avoid circular import | ||
|
||
try: | ||
# Use requests.get directly to make the HTTP request | ||
response = requests.get(sanitized_url) | ||
response.raise_for_status() | ||
next_bundle_data = response.json() | ||
next_bundle = Bundle(next_bundle_data) | ||
|
||
return next_bundle | ||
|
||
except requests.exceptions.HTTPError as e: | ||
# Handle specific HTTP errors as needed, possibly including retry logic | ||
raise e | ||
|
||
|
||
def iter_pages(first_bundle: 'Bundle') -> Iterable['Bundle']: | ||
""" | ||
Iterator that yields each page of results as a FHIR Bundle. | ||
|
||
Args: | ||
first_bundle (Bundle): The first Bundle to start pagination. | ||
|
||
Yields: | ||
Bundle: Each page of results as a FHIR Bundle. | ||
""" | ||
bundle = first_bundle | ||
while bundle: | ||
yield bundle | ||
bundle = _fetch_next_page(bundle) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,19 +50,35 @@ def __init__(self, jsondict=None, strict=True): | |
Type `str`. """ | ||
|
||
super(Bundle, self).__init__(jsondict=jsondict, strict=strict) | ||
|
||
|
||
def __iter__(self): | ||
""" Makes the Bundle itself an iterator by returning an iterator over its entries. """ | ||
if self.entry is None: | ||
self._entry_iter = iter([]) | ||
else: | ||
self._entry_iter = iter(self.entry) | ||
return self | ||
|
||
def __next__(self): | ||
""" Returns the next BundleEntry in the Bundle's entry list using the internal iterator. """ | ||
# return next(self._entry_iter) | ||
|
||
if not hasattr(self, '_entry_iter'): | ||
self.__iter__() | ||
return next(self._entry_iter) | ||
Comment on lines
+54
to
+68
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: could this whole thing just be:
Like, what's the advantage of making Bundle an iterator itself instead of just an iterable? (Honest question, I'm not deep in the iterable weeds) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also... having read both PRs now - maybe adding direct iteration support to Bundle is not worth the effort? Like the code in That said! I think it's cool to have the direct iteration. But is a new direction for us - adding custom cool Python features to individual classes. Which is nice! But if you want to avoid that complexity here, I'd be happy with all this iteration logic just living as fhirsearch-specific logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I know I sent you down the path of fhir-parser - so this is probably annoying to hear - I am now just thinking that everyone is likely going to get their Bundles from fhirsearch anyway... I guess it shows up in other places... So direct iteration is still useful... But I think in my head, the iteration would be the pagination itself - which is annoying to do oneself and thus helpful for us to provide. But if the iteration is over the entries, and we do pagination all in fhirsearch, maybe this bit of synctatic sugar can be left off for now, since it reduces the PR complexity a fair bit.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And to be clear, iterating over the entries makes sense too - my initial thought was over the pages, but entries feels more natural ( |
||
|
||
def elementProperties(self): | ||
js = super(Bundle, self).elementProperties() | ||
js.extend([ | ||
("entry", "entry", BundleEntry, True, None, False), | ||
("identifier", "identifier", identifier.Identifier, False, None, False), | ||
("link", "link", BundleLink, True, None, False), | ||
("signature", "signature", signature.Signature, False, None, False), | ||
("timestamp", "timestamp", fhirinstant.FHIRInstant, False, None, False), | ||
("total", "total", int, False, None, False), | ||
("type", "type", str, False, None, True), | ||
]) | ||
return js | ||
js = super(Bundle, self).elementProperties() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This section feels like accidental over-indentation? |
||
js.extend([ | ||
("entry", "entry", BundleEntry, True, None, False), | ||
("identifier", "identifier", identifier.Identifier, False, None, False), | ||
("link", "link", BundleLink, True, None, False), | ||
("signature", "signature", signature.Signature, False, None, False), | ||
("timestamp", "timestamp", fhirinstant.FHIRInstant, False, None, False), | ||
("total", "total", int, False, None, False), | ||
("type", "type", str, False, None, True), | ||
]) | ||
return js | ||
|
||
|
||
from . import backboneelement | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,8 +5,12 @@ | |
# 2014, SMART Health IT. | ||
|
||
import logging | ||
import warnings | ||
from typing import Iterator | ||
|
||
|
||
from . import fhirreference | ||
from .._utils import iter_pages | ||
|
||
try: | ||
from urllib import quote_plus | ||
|
@@ -124,14 +128,35 @@ def perform(self, server): | |
bundle = bundle.Bundle(res) | ||
bundle.origin_server = server | ||
return bundle | ||
|
||
|
||
# Use forward references to avoid circular imports | ||
def perform_iter(self, server) -> Iterator['Resource']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way this is written, it will iterate on Should this be re-written as |
||
""" Perform the search by calling `perform` and return an iterator that yields | ||
Bundle instances. | ||
|
||
:param server: The server against which to perform the search | ||
:returns: An iterator of Bundle instances | ||
""" | ||
|
||
first_bundle = self.perform(server) | ||
if not first_bundle: | ||
return iter([]) | ||
yield first_bundle | ||
|
||
def perform_resources(self, server): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: while we're here, maybe add (And probably add |
||
""" Performs the search by calling `perform`, then extracts all Bundle | ||
entries and returns a list of Resource instances. | ||
|
||
:param server: The server against which to perform the search | ||
:returns: A list of Resource instances | ||
""" | ||
# Old method with deprecation warning | ||
warnings.warn( | ||
"perform_resources() is deprecated and will be removed in a future release. " | ||
"Please use perform_resources_iter() instead.", | ||
DeprecationWarning, | ||
) | ||
Comment on lines
+153
to
+158
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fun - I almost never use the warnings module. I would personally say that we should deprecate When we remove |
||
|
||
bundle = self.perform(server) | ||
resources = [] | ||
if bundle is not None and bundle.entry is not None: | ||
Comment on lines
160
to
162
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this code could just be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, that change would be a behavior change - but I think an important one? Previously, this did no pagination. But that just means that we'd throw resources on the floor with no indication we had done so or way to work around that. So I think switching this method to use pagination under the hood is a valuable fix. |
||
|
@@ -140,6 +165,23 @@ def perform_resources(self, server): | |
|
||
return resources | ||
|
||
# Use forward references to avoid circular imports | ||
def perform_resources_iter(self, server) -> Iterator['Resource']: | ||
""" Performs the search by calling `perform`, then extracts all Bundle | ||
entries and returns an iterator of Resource instances. | ||
|
||
:param server: The server against which to perform the search | ||
:returns: An iterator of Resource instances | ||
""" | ||
first_bundle = self.perform(server) | ||
|
||
if not first_bundle or not first_bundle.entry: | ||
return iter([]) | ||
Comment on lines
+178
to
+179
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check isn't needed, is it? This method could be written as the following, I think?
|
||
|
||
for bundle in iter_pages(first_bundle): | ||
if bundle.entry: | ||
yield from (entry.resource for entry in bundle.entry) | ||
|
||
|
||
class FHIRSearchParam(object): | ||
""" Holds one search parameter. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this type hinting work without fully specifying it? Like
'fhirclient.models.bundle.Bundle'
? If so, great. If not and that seems too wordy for you, I think you could also do something like: