Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

818 stac requests resilience #1022

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions openeogeotrellis/integrations/stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import (
Dict,
Optional,
)
from urllib.error import HTTPError
from urllib.parse import urlparse

from pystac.stac_io import DefaultStacIO
from urllib3 import Retry, PoolManager


class StacApiIO(DefaultStacIO):
"""A STAC IO implementation that supports reading with timeout and retry."""

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document what this class adds on top of the existing DefaultStacIO?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While checking some pystac details I stumbled on pystac.stac_io.RetryStacIO https://pystac.readthedocs.io/en/stable/api/stac_io.html#pystac.stac_io.RetryStacIO shouldn't we just use that instead?

Copy link
Member

@soxofaan soxofaan Jan 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as discussed: RetryStacIO does not have timeout control.

I opened a ticket to feature-request that already:

I would add a technical debt comment to migrate to that solution in the future instead of having to maintain this home-grown wrapper/hack

def __init__(
self,
headers: Optional[Dict[str, str]] = None,
timeout: Optional[float] = None,
retry: Optional[Retry] = None,
):
super().__init__(headers=headers)
self.timeout = timeout or 20
self.retry = retry or Retry()

def read_text_from_href(self, href: str) -> str:
"""Reads file as a UTF-8 string, with retry and timeout support.
soxofaan marked this conversation as resolved.
Show resolved Hide resolved

Args:
href : The URI of the file to open.
"""
is_url = urlparse(href).scheme != ""
if is_url:
http = PoolManager(retries=self.retry, timeout=self.timeout)
try:
response = http.request(
"GET", href
)
return response.data.decode("utf-8")
except HTTPError as e:
raise Exception("Could not read uri {}".format(href)) from e
else:
return super().read_text_from_href(href)
7 changes: 6 additions & 1 deletion openeogeotrellis/load_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,18 @@
from pathlib import Path
from pystac import STACObject
from shapely.geometry import Polygon, shape
from urllib3 import Retry

from openeogeotrellis import datacube_parameters
from openeogeotrellis.config import get_backend_config
from openeogeotrellis.constants import EVAL_ENV_KEY
from openeogeotrellis.geopysparkcubemetadata import GeopysparkCubeMetadata
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
from openeogeotrellis.integrations.stac import StacApiIO
from openeogeotrellis.utils import normalize_temporal_extent, get_jvm, to_projected_polygons

logger = logging.getLogger(__name__)
REQUESTS_TIMEOUT_SECONDS = 60

def load_stac(url: str, load_params: LoadParameters, env: EvalEnv, layer_properties: Dict[str, object],
batch_jobs: Optional[backend.BatchJobs], override_band_names: List[str] = None) -> GeopysparkDataCube:
Expand Down Expand Up @@ -694,7 +697,9 @@ def get_dependency_job_info() -> Optional[BatchJobMetadata]:

def _await_stac_object(url, poll_interval_seconds, max_poll_delay_seconds, max_poll_time) -> STACObject:
while True:
stac_object = pystac.read_file(href=url) # TODO: add retries and set timeout
retry = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
stac_io = StacApiIO(timeout=REQUESTS_TIMEOUT_SECONDS, retry=retry)
stac_object = pystac.read_file(href=url, stac_io=stac_io)

partial_job_status = (stac_object
.to_dict(include_self_link=False, transform_hrefs=False)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
"traceback-with-variables==2.0.4",
'scipy>=1.8', # used by sentinel-3 reader
"PyJWT[crypto]>=2.9.0", # For identity tokens
"urllib3>=1.26.20"
],
extras_require={
"dev": tests_require,
Expand Down
Loading