Skip to content

Commit

Permalink
fix: Preset API pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
betodealmeida committed Sep 10, 2024
1 parent 7ecfc4c commit cafac4e
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 51 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ Changelog
Next
====

- Removes pagination from Preset API and adds note to documentation (#469)
- Add ``UPGRADE()`` function to upgrade the library (#468)

Version 1.2.26 - 2024-08-08
===========================

Expand Down
2 changes: 2 additions & 0 deletions docs/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ If an adapter declares support for ``LIMIT`` and ``OFFSET`` a corresponding para
Now the adapter can handle ``limit`` and ``offset``, reducing the amount of data that is returned. Note that even if the adapter declares supporting ``LIMIT``, SQLite will still enforce the limit, ie, if for any reason the adapter returns more rows than the limit SQLite will fix the problem. The same is not true for the offset.

Note that an adapter can only support ``limit`` and ``offset`` if it has filters on all the columns. Otherwise it would potentially return only a subset of the data, since after returning ``limit`` rows the data could be filtered further by SQLite.

Returning only the requested columns
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 0 additions & 2 deletions src/shillelagh/adapters/api/generic_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ def get_data( # pylint: disable=unused-argument, too-many-arguments
self,
bounds: Dict[str, Filter],
order: List[Tuple[str, RequestedOrder]],
limit: Optional[int] = None,
offset: Optional[int] = None,
requested_columns: Optional[Set[str]] = None,
**kwargs: Any,
) -> Iterator[Row]:
Expand Down
2 changes: 0 additions & 2 deletions src/shillelagh/adapters/api/generic_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ def get_data( # pylint: disable=unused-argument, too-many-arguments
self,
bounds: Dict[str, Filter],
order: List[Tuple[str, RequestedOrder]],
limit: Optional[int] = None,
offset: Optional[int] = None,
requested_columns: Optional[Set[str]] = None,
**kwargs: Any,
) -> Iterator[Row]:
Expand Down
52 changes: 19 additions & 33 deletions src/shillelagh/adapters/api/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
This is a derivation of the generic JSON adapter that handles Preset auth.
"""

import itertools
import logging
import re
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, cast
Expand Down Expand Up @@ -81,55 +82,36 @@ def __init__( # pylint: disable=too-many-arguments
)


def get_urls(
resource_url: str,
offset: Optional[int] = None,
limit: Optional[int] = None,
page_size: int = MAX_PAGE_SIZE,
) -> Iterator[Tuple[str, slice]]:
def get_urls(resource_url: str, page_size: int = MAX_PAGE_SIZE) -> Iterator[str]:
"""
Get all paginated URLs to download data together with a limit/offset slice.
Get all URLs to download data.
"""
start = offset or 0
stop = start + limit if limit is not None else None

baseurl = URL(resource_url)
query = baseurl.query.get("q", "()")
try:
params = prison.loads(query)
except Exception: # pylint: disable=broad-except
yield str(baseurl), slice(start, stop)
yield str(baseurl)
return

# assume the user knows better and keep the URL unmodified
if "page" in params or "page_size" in params:
yield str(baseurl), slice(start, stop)
yield str(baseurl)
return

page = start // page_size
start = start % page_size
remaining = limit if limit is not None else float("inf")
params["page_size"] = page_size
page = 0
while True:
params["page"] = page
params["page_size"] = min(start + remaining, page_size)
yield str(baseurl.with_query({"q": prison.dumps(params)})), slice(start, None)

remaining -= page_size - start
if remaining <= 0:
break

yield str(baseurl.with_query({"q": prison.dumps(params)}))
page += 1
start = 0


class PresetWorkspaceAPI(PresetAPI):
"""
Adapter for Preset workspaces.
"""

supports_limit = True
supports_offset = True

default_path = "$.result[*]"
cache_name = "preset_cache"

Expand All @@ -143,10 +125,16 @@ def supports(cls, uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]:
)

def _set_columns(self) -> None:
# request only a single page of results to infer schema
rows = list(self.get_data({}, [], limit=MAX_PAGE_SIZE))
column_names = list(rows[0].keys()) if rows else []
rows = self.get_data({}, [])

try:
row = next(rows)
rows = itertools.chain([row], rows)
column_names = list(row.keys())
except StopIteration:
column_names = []

rows = itertools.islice(rows, MAX_PAGE_SIZE)
_, order, types = analyze(iter(rows))

self.columns = {
Expand All @@ -163,12 +151,10 @@ def get_data( # pylint: disable=unused-argument, too-many-arguments, too-many-l
self,
bounds: Dict[str, Filter],
order: List[Tuple[str, RequestedOrder]],
limit: Optional[int] = None,
offset: Optional[int] = None,
requested_columns: Optional[Set[str]] = None,
**kwargs: Any,
) -> Iterator[Row]:
for url, slice_ in get_urls(self.uri, offset, limit, MAX_PAGE_SIZE):
for url in get_urls(self.uri, MAX_PAGE_SIZE):
response = self._session.get(str(url))
payload = response.json()
if not response.ok:
Expand All @@ -178,7 +164,7 @@ def get_data( # pylint: disable=unused-argument, too-many-arguments, too-many-l
)
raise ProgrammingError(f"Error: {messages}")

rows = jsonpath.findall(self.path, payload)[slice_]
rows = jsonpath.findall(self.path, payload)
if not rows:
break

Expand Down
26 changes: 12 additions & 14 deletions tests/adapters/api/preset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,19 @@ def test_get_urls() -> None:
"""
gen = get_urls(
"https://abcdef01.us1a.app-sdx.preset.io/api/v1/chart/",
offset=45,
limit=50,
page_size=42,
)

url, slice_ = next(gen)
url = next(gen)
assert (
url
== "https://abcdef01.us1a.app-sdx.preset.io/api/v1/chart/?q=(page:1,page_size:42)"
== "https://abcdef01.us1a.app-sdx.preset.io/api/v1/chart/?q=(page:0,page_size:42)"
)
assert slice_.start == 3
url, slice_ = next(gen)
url = next(gen)
assert (
url
== "https://abcdef01.us1a.app-sdx.preset.io/api/v1/chart/?q=(page:2,page_size:11)"
== "https://abcdef01.us1a.app-sdx.preset.io/api/v1/chart/?q=(page:1,page_size:42)"
)
assert slice_.start == 0
with pytest.raises(StopIteration):
next(gen)


def test_get_urls_unable_to_parse() -> None:
Expand All @@ -126,7 +120,7 @@ def test_get_urls_unable_to_parse() -> None:
"""

gen = get_urls("https://example.org/?q=(((")
assert next(gen)[0] == "https://example.org/?q=((("
assert next(gen) == "https://example.org/?q=((("
with pytest.raises(StopIteration):
next(gen)

Expand All @@ -137,7 +131,7 @@ def test_get_urls_with_page_parameters() -> None:
"""

gen = get_urls("https://example.org/?q=(page:0,page_size:42)")
assert next(gen)[0] == "https://example.org/?q=(page:0,page_size:42)"
assert next(gen) == "https://example.org/?q=(page:0,page_size:42)"
with pytest.raises(StopIteration):
next(gen)

Expand Down Expand Up @@ -197,13 +191,17 @@ def test_preset_workspace_pagination(requests_mock: Mocker) -> None:
},
)
requests_mock.get(
"https://abcdef01.us1a.app.preset.io/api/v1/chart/?q=(page:1,page_size:3)",
"https://abcdef01.us1a.app.preset.io/api/v1/chart/?q=(page:1,page_size:100)",
json={
"result": [
{"id": i + 101, "slice_name": f"Team {i+101}"} for i in range(3)
{"id": i + 101, "slice_name": f"Team {i+101}"} for i in range(50)
],
},
)
requests_mock.get(
"https://abcdef01.us1a.app.preset.io/api/v1/chart/?q=(page:2,page_size:100)",
json={"result": []},
)

connection = connect(
":memory:",
Expand Down

0 comments on commit cafac4e

Please sign in to comment.