From a1c187bf0965a81d6dfb78ab2a7aff31e598dd56 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 23 Jan 2025 18:25:14 +0100 Subject: [PATCH 01/13] add custom StacApiIO with timeout and retry support #818 Due to a bug in the original StacAPIIo class we were forced to create a custom implementation (https://github.com/stac-utils/pystac-client/issues/706). See #818 for more information. --- openeogeotrellis/integrations/stac.py | 208 ++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 openeogeotrellis/integrations/stac.py diff --git a/openeogeotrellis/integrations/stac.py b/openeogeotrellis/integrations/stac.py new file mode 100644 index 00000000..5a836da9 --- /dev/null +++ b/openeogeotrellis/integrations/stac.py @@ -0,0 +1,208 @@ +import json +import logging +import warnings +from copy import deepcopy +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Optional, + Tuple, + Union, List, Callable, +) + +import pystac +import pystac_client +from pystac.serialization import ( + identify_stac_object, + identify_stac_object_type, + merge_common_properties, + migrate_to_latest, +) +from pystac_client.exceptions import APIError +from pystac_client.stac_api_io import StacApiIO +from requests import Request, Session +from requests.adapters import HTTPAdapter +from urllib3 import Retry + +if TYPE_CHECKING: + from pystac.catalog import Catalog as Catalog_Type + from pystac.stac_object import STACObject as STACObject_Type + +logger = logging.getLogger(__name__) + + +Timeout = Union[float, Tuple[float, float], Tuple[float, None]] + + +class StacApiIO(StacApiIO): + + def __init__( + self, + headers: Optional[Dict[str, str]] = None, + conformance: Optional[List[str]] = None, + parameters: Optional[Dict[str, Any]] = None, + request_modifier: Optional[Callable[[Request], Union[Request, None]]] = None, + timeout: Optional[Timeout] = None, + max_retries: Optional[Union[int, Retry]] = 5, + ): + """Initialize class for API IO + + Args: + headers : Optional dictionary of headers to include in all requests + conformance (DEPRECATED) : Optional list of `Conformance Classes + `__. + + .. deprecated:: 0.7.0 + Conformance can be altered on the client class directly + + parameters: Optional dictionary of query string parameters to + include in all requests. + request_modifier: Optional callable that can be used to modify Request + objects before they are sent. If provided, the callable receives a + `request.Request` and must either modify the object directly or return + a new / modified request instance. + timeout: Optional float or (float, float) tuple following the semantics + defined by `Requests + `__. + max_retries: The number of times to retry requests. Set to ``None`` to + disable retries. + + Return: + StacApiIO : StacApiIO instance + """ + if conformance is not None: + warnings.warn( + ( + "The `conformance` option is deprecated and will be " + "removed in the next major release. Instead use " + "`Client.set_conforms_to` or `Client.add_conforms_to` to control " + "behavior." + ), + category=FutureWarning, + ) + + self.session = Session() + if max_retries: + self.session.mount("http://", HTTPAdapter(max_retries=max_retries)) + self.session.mount("https://", HTTPAdapter(max_retries=max_retries)) + self.update( + headers=headers, parameters=parameters, request_modifier=request_modifier, timeout=timeout + ) + + def request( + self, + href: str, + method: Optional[str] = None, + headers: Optional[Dict[str, str]] = None, + parameters: Optional[Dict[str, Any]] = None, + ) -> str: + """Makes a request to an http endpoint + + Args: + href (str): The request URL + method (Optional[str], optional): The http method to use, 'GET' or 'POST'. + Defaults to None, which will result in 'GET' being used. + headers (Optional[Dict[str, str]], optional): Additional headers to include + in request. Defaults to None. + parameters (Optional[Dict[str, Any]], optional): parameters to send with + request. Defaults to None. + + Raises: + APIError: raised if the server returns an error response + + Return: + str: The decoded response from the endpoint + """ + if method == "POST": + request = Request(method=method, url=href, headers=headers, json=parameters) + else: + params = deepcopy(parameters) or {} + request = Request(method="GET", url=href, headers=headers, params=params) + try: + modified = self._req_modifier(request) if self._req_modifier else None + prepped = self.session.prepare_request(modified or request) + msg = f"{prepped.method} {prepped.url} Headers: {prepped.headers}" + if method == "POST": + msg += f" Payload: {json.dumps(request.json)}" + if self.timeout is not None: + msg += f" Timeout: {self.timeout}" + logger.debug(msg) + # The only difference with the super implementation are these extra send kwargs. + send_kwargs = self.session.merge_environment_settings( + prepped.url, proxies={}, stream=None, verify=True, cert=None + ) + resp = self.session.send(prepped, timeout=self.timeout, **send_kwargs) + except Exception as err: + logger.debug(err) + raise APIError(str(err)) + if resp.status_code != 200: + raise APIError.from_response(resp) + try: + return resp.content.decode("utf-8") + except Exception as err: + raise APIError(str(err)) + + def stac_object_from_dict( + self, + d: Dict[str, Any], + href: Optional[pystac.link.HREF] = None, + root: Optional["Catalog_Type"] = None, + preserve_dict: bool = True, + ) -> "STACObject_Type": + """Deserializes a :class:`~pystac.STACObject` sub-class instance from a + dictionary. + + Args: + d : The dictionary to deserialize + href : Optional href to associate with the STAC object + root : Optional root :class:`~pystac.Catalog` to associate with the + STAC object. + preserve_dict: If ``False``, the dict parameter ``d`` may be modified + during this method call. Otherwise the dict is not mutated. + Defaults to ``True``, which results results in a deepcopy of the + parameter. Set to ``False`` when possible to avoid the performance + hit of a deepcopy. + """ + if identify_stac_object_type(d) == pystac.STACObjectType.ITEM: + collection_cache = None + if root is not None: + collection_cache = root._resolved_objects.as_collection_cache() + + # Merge common properties in case this is an older STAC object. + merge_common_properties( + d, json_href=str(href), collection_cache=collection_cache + ) + + info = identify_stac_object(d) + d = migrate_to_latest(d, info) + + if info.object_type == pystac.STACObjectType.CATALOG: + result = pystac_client.client.Client.from_dict( + d, href=str(href), root=root, migrate=False, preserve_dict=preserve_dict + ) + result._stac_io = self + return result + + if info.object_type == pystac.STACObjectType.COLLECTION: + collection_client = ( + pystac_client.collection_client.CollectionClient.from_dict( + d, + href=str(href), + root=root, + migrate=False, + preserve_dict=preserve_dict, + ) + ) + # The only difference with the super implementation is that we set _stac_io here. + # This ensures root_link.resolve_stac_object() uses this StacApiIO and not the default one. + # Which in turn makes sure that the Catalog is always of type pystac_client.client.Client. + collection_client._stac_io = self + return collection_client + + if info.object_type == pystac.STACObjectType.ITEM: + return pystac.Item.from_dict( + d, href=str(href), root=root, migrate=False, preserve_dict=preserve_dict + ) + + raise ValueError(f"Unknown STAC object type {info.object_type}") From 068a74130e7b64ceaf8cf069a855f05536dabb7b Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 23 Jan 2025 18:26:27 +0100 Subject: [PATCH 02/13] add test for the new custom StacApiIO #818 --- setup.py | 1 + tests/test_stac_client.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/test_stac_client.py diff --git a/setup.py b/setup.py index f1e790ad..a771d59c 100644 --- a/setup.py +++ b/setup.py @@ -101,6 +101,7 @@ "traceback-with-variables==2.0.4", 'scipy>=1.8', # used by sentinel-3 reader "PyJWT[crypto]>=2.9.0", # For identity tokens + "urllib3~=1.26.20" ], extras_require={ "dev": tests_require, diff --git a/tests/test_stac_client.py b/tests/test_stac_client.py new file mode 100644 index 00000000..797b3807 --- /dev/null +++ b/tests/test_stac_client.py @@ -0,0 +1,24 @@ +import pystac +import unittest +from unittest.mock import patch, Mock + +import pystac_client +from openeogeotrellis.integrations.stac import StacApiIO + +class TestStacClient(unittest.TestCase): + + @patch('requests.Session') + def test_stac_client_timeout(self, mock_session): + timeout_value = 42 + mock_session.send.return_value.status_code = 400 + mock_request = Mock() + mock_session.prepare_request.return_value = mock_request + + stac_io = StacApiIO(timeout=timeout_value) + stac_io.session = mock_session + + stac_object = pystac.read_file(href="collection.json", stac_io=stac_io) + with self.assertRaises(pystac_client.exceptions.APIError): + print(stac_object.get_root().id) + + mock_session.send.assert_called_with(mock_request, timeout=timeout_value) From 0ac36c03754a3f2f22408df4d5f5b2f1dba02481 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 23 Jan 2025 18:27:27 +0100 Subject: [PATCH 03/13] add retry and timeout support to load_stac requests #818 --- openeogeotrellis/load_stac.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openeogeotrellis/load_stac.py b/openeogeotrellis/load_stac.py index 41e314b6..8dade471 100644 --- a/openeogeotrellis/load_stac.py +++ b/openeogeotrellis/load_stac.py @@ -30,15 +30,18 @@ from pathlib import Path from pystac import STACObject from shapely.geometry import Polygon, shape +from urllib3 import Retry from openeogeotrellis import datacube_parameters from openeogeotrellis.config import get_backend_config from openeogeotrellis.constants import EVAL_ENV_KEY from openeogeotrellis.geopysparkcubemetadata import GeopysparkCubeMetadata from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube +from openeogeotrellis.integrations.stac import StacApiIO from openeogeotrellis.utils import normalize_temporal_extent, get_jvm, to_projected_polygons logger = logging.getLogger(__name__) +REQUESTS_TIMEOUT_SECONDS = 60 def load_stac(url: str, load_params: LoadParameters, env: EvalEnv, layer_properties: Dict[str, object], batch_jobs: Optional[backend.BatchJobs], override_band_names: List[str] = None) -> GeopysparkDataCube: @@ -694,7 +697,9 @@ def get_dependency_job_info() -> Optional[BatchJobMetadata]: def _await_stac_object(url, poll_interval_seconds, max_poll_delay_seconds, max_poll_time) -> STACObject: while True: - stac_object = pystac.read_file(href=url) # TODO: add retries and set timeout + retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) + stac_io = StacApiIO(timeout=REQUESTS_TIMEOUT_SECONDS, max_retries=retries) + stac_object = pystac.read_file(href=url, stac_io=stac_io) partial_job_status = (stac_object .to_dict(include_self_link=False, transform_hrefs=False) From c4ccbf3315871b31c089dfcdf869837019cdf24e Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Mon, 27 Jan 2025 09:43:45 +0100 Subject: [PATCH 04/13] simplify custom StacApiIO implementation #818 --- openeogeotrellis/integrations/stac.py | 215 +++----------------------- openeogeotrellis/load_stac.py | 4 +- 2 files changed, 25 insertions(+), 194 deletions(-) diff --git a/openeogeotrellis/integrations/stac.py b/openeogeotrellis/integrations/stac.py index 5a836da9..da8d9bd8 100644 --- a/openeogeotrellis/integrations/stac.py +++ b/openeogeotrellis/integrations/stac.py @@ -1,208 +1,39 @@ -import json -import logging -import warnings -from copy import deepcopy from typing import ( - TYPE_CHECKING, - Any, Dict, Optional, - Tuple, - Union, List, Callable, ) +from urllib.error import HTTPError -import pystac -import pystac_client -from pystac.serialization import ( - identify_stac_object, - identify_stac_object_type, - merge_common_properties, - migrate_to_latest, -) -from pystac_client.exceptions import APIError -from pystac_client.stac_api_io import StacApiIO -from requests import Request, Session -from requests.adapters import HTTPAdapter -from urllib3 import Retry - -if TYPE_CHECKING: - from pystac.catalog import Catalog as Catalog_Type - from pystac.stac_object import STACObject as STACObject_Type - -logger = logging.getLogger(__name__) - +from pystac.stac_io import DefaultStacIO, _is_url +from urllib3 import Retry, PoolManager -Timeout = Union[float, Tuple[float, float], Tuple[float, None]] - -class StacApiIO(StacApiIO): +class StacApiIO(DefaultStacIO): def __init__( self, headers: Optional[Dict[str, str]] = None, - conformance: Optional[List[str]] = None, - parameters: Optional[Dict[str, Any]] = None, - request_modifier: Optional[Callable[[Request], Union[Request, None]]] = None, - timeout: Optional[Timeout] = None, - max_retries: Optional[Union[int, Retry]] = 5, - ): - """Initialize class for API IO - - Args: - headers : Optional dictionary of headers to include in all requests - conformance (DEPRECATED) : Optional list of `Conformance Classes - `__. - - .. deprecated:: 0.7.0 - Conformance can be altered on the client class directly + timeout: Optional[float] = None, + retry: Optional[Retry] = None, + ): + super().__init__(headers=headers) + self.timeout = timeout or 20 + self.retry = retry or Retry() - parameters: Optional dictionary of query string parameters to - include in all requests. - request_modifier: Optional callable that can be used to modify Request - objects before they are sent. If provided, the callable receives a - `request.Request` and must either modify the object directly or return - a new / modified request instance. - timeout: Optional float or (float, float) tuple following the semantics - defined by `Requests - `__. - max_retries: The number of times to retry requests. Set to ``None`` to - disable retries. - - Return: - StacApiIO : StacApiIO instance - """ - if conformance is not None: - warnings.warn( - ( - "The `conformance` option is deprecated and will be " - "removed in the next major release. Instead use " - "`Client.set_conforms_to` or `Client.add_conforms_to` to control " - "behavior." - ), - category=FutureWarning, - ) - - self.session = Session() - if max_retries: - self.session.mount("http://", HTTPAdapter(max_retries=max_retries)) - self.session.mount("https://", HTTPAdapter(max_retries=max_retries)) - self.update( - headers=headers, parameters=parameters, request_modifier=request_modifier, timeout=timeout - ) - - def request( - self, - href: str, - method: Optional[str] = None, - headers: Optional[Dict[str, str]] = None, - parameters: Optional[Dict[str, Any]] = None, - ) -> str: - """Makes a request to an http endpoint + def read_text_from_href(self, href: str) -> str: + """Reads file as a UTF-8 string, with retry and timeout support. Args: - href (str): The request URL - method (Optional[str], optional): The http method to use, 'GET' or 'POST'. - Defaults to None, which will result in 'GET' being used. - headers (Optional[Dict[str, str]], optional): Additional headers to include - in request. Defaults to None. - parameters (Optional[Dict[str, Any]], optional): parameters to send with - request. Defaults to None. - - Raises: - APIError: raised if the server returns an error response - - Return: - str: The decoded response from the endpoint - """ - if method == "POST": - request = Request(method=method, url=href, headers=headers, json=parameters) - else: - params = deepcopy(parameters) or {} - request = Request(method="GET", url=href, headers=headers, params=params) - try: - modified = self._req_modifier(request) if self._req_modifier else None - prepped = self.session.prepare_request(modified or request) - msg = f"{prepped.method} {prepped.url} Headers: {prepped.headers}" - if method == "POST": - msg += f" Payload: {json.dumps(request.json)}" - if self.timeout is not None: - msg += f" Timeout: {self.timeout}" - logger.debug(msg) - # The only difference with the super implementation are these extra send kwargs. - send_kwargs = self.session.merge_environment_settings( - prepped.url, proxies={}, stream=None, verify=True, cert=None - ) - resp = self.session.send(prepped, timeout=self.timeout, **send_kwargs) - except Exception as err: - logger.debug(err) - raise APIError(str(err)) - if resp.status_code != 200: - raise APIError.from_response(resp) - try: - return resp.content.decode("utf-8") - except Exception as err: - raise APIError(str(err)) - - def stac_object_from_dict( - self, - d: Dict[str, Any], - href: Optional[pystac.link.HREF] = None, - root: Optional["Catalog_Type"] = None, - preserve_dict: bool = True, - ) -> "STACObject_Type": - """Deserializes a :class:`~pystac.STACObject` sub-class instance from a - dictionary. - - Args: - d : The dictionary to deserialize - href : Optional href to associate with the STAC object - root : Optional root :class:`~pystac.Catalog` to associate with the - STAC object. - preserve_dict: If ``False``, the dict parameter ``d`` may be modified - during this method call. Otherwise the dict is not mutated. - Defaults to ``True``, which results results in a deepcopy of the - parameter. Set to ``False`` when possible to avoid the performance - hit of a deepcopy. + href : The URI of the file to open. """ - if identify_stac_object_type(d) == pystac.STACObjectType.ITEM: - collection_cache = None - if root is not None: - collection_cache = root._resolved_objects.as_collection_cache() - - # Merge common properties in case this is an older STAC object. - merge_common_properties( - d, json_href=str(href), collection_cache=collection_cache - ) - - info = identify_stac_object(d) - d = migrate_to_latest(d, info) - - if info.object_type == pystac.STACObjectType.CATALOG: - result = pystac_client.client.Client.from_dict( - d, href=str(href), root=root, migrate=False, preserve_dict=preserve_dict - ) - result._stac_io = self - return result - - if info.object_type == pystac.STACObjectType.COLLECTION: - collection_client = ( - pystac_client.collection_client.CollectionClient.from_dict( - d, - href=str(href), - root=root, - migrate=False, - preserve_dict=preserve_dict, + if _is_url(href): + http = PoolManager(retries=self.retry, timeout=20) + try: + response = http.request( + "GET", href ) - ) - # The only difference with the super implementation is that we set _stac_io here. - # This ensures root_link.resolve_stac_object() uses this StacApiIO and not the default one. - # Which in turn makes sure that the Catalog is always of type pystac_client.client.Client. - collection_client._stac_io = self - return collection_client - - if info.object_type == pystac.STACObjectType.ITEM: - return pystac.Item.from_dict( - d, href=str(href), root=root, migrate=False, preserve_dict=preserve_dict - ) - - raise ValueError(f"Unknown STAC object type {info.object_type}") + return response.data.decode("utf-8") + except HTTPError as e: + raise Exception("Could not read uri {}".format(href)) from e + else: + return super().read_text_from_href(href) diff --git a/openeogeotrellis/load_stac.py b/openeogeotrellis/load_stac.py index 8dade471..09af2907 100644 --- a/openeogeotrellis/load_stac.py +++ b/openeogeotrellis/load_stac.py @@ -697,8 +697,8 @@ def get_dependency_job_info() -> Optional[BatchJobMetadata]: def _await_stac_object(url, poll_interval_seconds, max_poll_delay_seconds, max_poll_time) -> STACObject: while True: - retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) - stac_io = StacApiIO(timeout=REQUESTS_TIMEOUT_SECONDS, max_retries=retries) + retry = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) + stac_io = StacApiIO(timeout=REQUESTS_TIMEOUT_SECONDS, retry=retry) stac_object = pystac.read_file(href=url, stac_io=stac_io) partial_job_status = (stac_object From 8709adfc7e38be38468f5dcc28961a2944392455 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Mon, 27 Jan 2025 09:46:18 +0100 Subject: [PATCH 05/13] add UrlopenMocker to test_api_result.py #818 Given that we are using urllib3.Poolmanager.request instead of urllib3.request this required a different patch. --- tests/test_api_result.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tests/test_api_result.py b/tests/test_api_result.py index 02fbf975..e1090689 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -1941,10 +1941,31 @@ def jvm_mock(): yield jvm_mock +class UrlopenMocker(UrllibMocker): + @contextlib.contextmanager + def patch(self): + with mock.patch('urllib3.poolmanager.PoolManager.request', new=self._request): + yield self + + def _request(self, method, url, fields=None, headers={}, **urlopen_kw): + for match_url in [url, self._drop_query_string(url)]: + key = (method, match_url) + if key in self.response_callbacks: + return self.response_callbacks[key](urllib.request.Request(url, method=method, headers=headers)) + return self.Response(code=404, msg="Not Found") + + +@pytest.fixture +def urlopen_mocker() -> UrlopenMocker: + with UrlopenMocker().patch() as mocker: + yield mocker + + class UrllibAndRequestMocker: - def __init__(self, urllib_mock, requests_mock): + def __init__(self, urllib_mock, requests_mock, urlopen_mocker): self.urllib_mock = urllib_mock self.requests_mock = requests_mock + self.urlopen_mocker = urlopen_mocker def get(self, href, data): code = 200 @@ -1952,11 +1973,11 @@ def get(self, href, data): if isinstance(data, str): data = data.encode("utf-8") self.requests_mock.get(href, content=data) - + self.urlopen_mocker.get(href, data, code) @pytest.fixture -def urllib_and_request_mock(urllib_mock, requests_mock) -> UrllibAndRequestMocker: - yield UrllibAndRequestMocker(urllib_mock, requests_mock) +def urllib_and_request_mock(urllib_mock, requests_mock, urlopen_mocker) -> UrllibAndRequestMocker: + yield UrllibAndRequestMocker(urllib_mock, requests_mock, urlopen_mocker) @pytest.fixture From 822c96d54716cbaf973d1ac749f64ce48cb1ff4e Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Mon, 27 Jan 2025 09:51:37 +0100 Subject: [PATCH 06/13] adjust load_stac tests to use new StacApiIO #818 --- tests/test_api_result.py | 103 ++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 55 deletions(-) diff --git a/tests/test_api_result.py b/tests/test_api_result.py index e1090689..3c0f64fc 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -3007,7 +3007,7 @@ def _setup_metadata_request_mocking( api: ApiTester, results_dir: Path, results_url: str, - urllib_mock: UrllibMocker, + urllib_mock, ): # Use ApiTester to easily build responses for the metadata request we have to mock api.set_auth_bearer_token() @@ -3451,7 +3451,7 @@ def test_stac_api_item_search_bbox_is_epsg_4326(self, api110): 9.844419570631366, 50.246156678379016)) def test_stac_collection_multiple_items_no_spatial_extent_specified(self, api110, zk_job_registry, - batch_job_output_root, urllib_mock): + batch_job_output_root, urllib_and_request_mock): job_id = "j-ec5d3e778ba5423d8d88a50b08cb9f63" results_url = f"https://foobar.test/job/{job_id}/results" @@ -3466,7 +3466,7 @@ def test_stac_collection_multiple_items_no_spatial_extent_specified(self, api110 api=api110, results_dir=results_dir, results_url=results_url, - urllib_mock=urllib_mock, + urllib_mock=urllib_and_request_mock, ) # sanity check: multiple items @@ -3513,7 +3513,7 @@ def test_stac_api_no_spatial_extent_specified(self, api110): mock_stac_client.search.assert_called_once() - def test_stac_api_caching(self, imagecollection_with_two_bands_and_one_date, api110, urllib_mock, tmp_path): + def test_stac_api_caching(self, imagecollection_with_two_bands_and_one_date, api110, tmp_path): with mock.patch("openeogeotrellis.load_stac.load_stac") as mock_load_stac: mock_load_stac.return_value = imagecollection_with_two_bands_and_one_date @@ -3578,7 +3578,7 @@ def _mock_stac_api_collection() -> Collection: "stac/item02.json", ], ) - def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_mock, tmp_path): + def test_load_stac_with_stac_item_json(self, item_path, api110, urlopen_mocker, tmp_path): """load_stac with a simple STAC item (as JSON file)""" item_json = ( get_test_data_file(item_path).read_text() @@ -3588,7 +3588,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_mock, tmp "asset01.tiff", f"file://{get_test_data_file('binary/load_stac/BVL_v1/BVL_v1_2021.tif').absolute()}" ) ) - urllib_mock.get("https://stac.test/item.json", data=item_json) + urlopen_mocker.get("https://stac.test/item.json", data=item_json) process_graph = { "loadstac1": { @@ -3613,7 +3613,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_mock, tmp assert ds.coords["y"].values.min() == pytest.approx(3014000, abs=10) def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_properties( - self, api110, urllib_mock, tmp_path + self, api110, urlopen_mocker, tmp_path ): """ https://github.com/Open-EO/openeo-geopyspark-driver/issues/619 @@ -3630,7 +3630,7 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_propert .replace("asset_green.tiff", f"file://{tiff_path}") .replace("asset_blue.tiff", f"file://{tiff_path}") ) - urllib_mock.get("https://stac.test/item01.json", data=item_json) + urlopen_mocker.get("https://stac.test/item01.json", data=item_json) process_graph = { "loadstac1": { @@ -3655,7 +3655,7 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_propert assert ds.coords["y"].values.min() == pytest.approx(3014000, abs=10) def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_parent_collection_summaries( - self, api110, urllib_mock, tmp_path + self, api110, urllib_and_request_mock, tmp_path ): """ https://github.com/Open-EO/openeo-geopyspark-driver/issues/619 @@ -3672,8 +3672,8 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_parent_colle .replace("asset_green.tiff", f"file://{tiff_path}") .replace("asset_blue.tiff", f"file://{tiff_path}") ) - urllib_mock.get("https://stac.test/item02.json", data=item_json) - urllib_mock.get( + urllib_and_request_mock.get("https://stac.test/item02.json", data=item_json) + urllib_and_request_mock.get( "https://stac.test/collection02.json", data=get_test_data_file("stac/issue619-eobands-int/collection02.json").read_text(), ) @@ -3700,7 +3700,7 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_parent_colle assert ds.coords["x"].values.min() == pytest.approx(4309000, abs=10) assert ds.coords["y"].values.min() == pytest.approx(3014000, abs=10) - def test_load_stac_from_stac_item_respects_collection_bands_order(self, api110, urllib_mock, tmp_path): + def test_load_stac_from_stac_item_respects_collection_bands_order(self, api110, urllib_and_request_mock, tmp_path): """load_stac with a STAC item that lacks "properties"/"eo:bands" and therefore falls back to its collection's "summaries"/"eo:bands" """ @@ -3713,8 +3713,8 @@ def test_load_stac_from_stac_item_respects_collection_bands_order(self, api110, "asset02.tiff", f"file://{get_test_data_file('binary/load_stac/collection01/asset02.tif').absolute()}" ) ) - urllib_mock.get("https://stac.test/item.json", data=item_json) - urllib_mock.get("https://stac.test/collection01.json", + urllib_and_request_mock.get("https://stac.test/item.json", data=item_json) + urllib_and_request_mock.get("https://stac.test/collection01.json", data=get_test_data_file("stac/collection01.json").read_text()) process_graph = { @@ -3754,7 +3754,7 @@ def test_load_stac_from_stac_item_respects_collection_bands_order(self, api110, ("2021-02-03T00:00:00Z", "2021-02-04T00:00:01Z", ["2021-02-03", "2021-02-04"]), ], ) - def test_load_stac_from_stac_collection_upper_temporal_bound(self, api110, urllib_mock, tmp_path, + def test_load_stac_from_stac_collection_upper_temporal_bound(self, api110, urllib_and_request_mock, tmp_path, lower_temporal_bound, upper_temporal_bound, expected_timestamps): """load_stac from a STAC Collection with two items that have different timestamps""" @@ -3768,11 +3768,11 @@ def item_json(path): ) ) - urllib_mock.get("https://stac.test/collection.json", + urllib_and_request_mock.get("https://stac.test/collection.json", data=get_test_data_file("stac/issue609-collection-temporal-bound-exclusive/collection.json").read_text()) - urllib_mock.get("https://stac.test/item01.json", + urllib_and_request_mock.get("https://stac.test/item01.json", data=item_json("stac/issue609-collection-temporal-bound-exclusive/item01.json")) - urllib_mock.get("https://stac.test/item02.json", + urllib_and_request_mock.get("https://stac.test/item02.json", data=item_json("stac/issue609-collection-temporal-bound-exclusive/item02.json")) process_graph = { @@ -3931,7 +3931,7 @@ def item_json(path): ("2021-02-03T00:00:00Z", "2021-02-04T00:00:01Z", ["2021-02-03", "2021-02-04"]), ], ) - def test_load_stac_from_stac_api_upper_temporal_bound(self, api110, urllib_mock, requests_mock, tmp_path, + def test_load_stac_from_stac_api_upper_temporal_bound(self, api110, urllib_and_request_mock, requests_mock, tmp_path, lower_temporal_bound, upper_temporal_bound, expected_timestamps): """load_stac from a STAC API with two items that have different timestamps""" @@ -3964,12 +3964,10 @@ def item(path) -> dict: "features": intersecting_items, } - urllib_mock.get("https://stac.test/collections/collection", + urllib_and_request_mock.get("https://stac.test/collections/collection", data=get_test_data_file("stac/issue609-api-temporal-bound-exclusive/collection.json").read_text()) - urllib_mock.get("https://stac.test", # for pystac + urllib_and_request_mock.get("https://stac.test", data=get_test_data_file("stac/issue609-api-temporal-bound-exclusive/catalog.json").read_text()) - requests_mock.get("https://stac.test", # for pystac_client - text=get_test_data_file("stac/issue609-api-temporal-bound-exclusive/catalog.json").read_text()) requests_mock.get("https://stac.test/search", json=feature_collection) @@ -3999,7 +3997,7 @@ def item(path) -> dict: assert (ds["band2"] == 2).all() assert (ds["band3"] == 3).all() - def test_load_stac_from_stac_collection_item_start_datetime_zulu(self, api110, urllib_mock, tmp_path): + def test_load_stac_from_stac_collection_item_start_datetime_zulu(self, api110, urllib_and_request_mock, tmp_path): """load_stac from a STAC Collection with an item that has a start_datetime in Zulu time (time zone 'Z')""" def item_json(path): @@ -4011,9 +4009,9 @@ def item_json(path): ) ) - urllib_mock.get("https://stac.test/collection.json", + urllib_and_request_mock.get("https://stac.test/collection.json", data=get_test_data_file("stac/issue646_start_datetime_zulu/collection.json").read_text()) - urllib_mock.get("https://stac.test/item01.json", + urllib_and_request_mock.get("https://stac.test/item01.json", data=item_json("stac/issue646_start_datetime_zulu/item01.json")) process_graph = { @@ -4037,7 +4035,7 @@ def item_json(path): assert ds.dims == {"t": 1, "x": 10, "y": 10} assert numpy.datetime_as_string(ds.coords["t"].values, unit='h', timezone='UTC').tolist() == ["2022-03-04T00Z"] - def test_load_stac_from_spatial_netcdf_job_results(self, api110, urllib_mock, tmp_path): + def test_load_stac_from_spatial_netcdf_job_results(self, api110, urllib_and_request_mock, tmp_path): def item_json(path): return ( get_test_data_file(path).read_text() @@ -4047,11 +4045,11 @@ def item_json(path): f"{get_test_data_file('binary/load_stac/spatial_netcdf/openEO_1.nc').absolute()}") ) - urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results", + urllib_and_request_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results", data=get_test_data_file("stac/issue646_spatial_netcdf/collection.json").read_text()) - urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/openEO_0.nc", + urllib_and_request_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/openEO_0.nc", data=item_json("stac/issue646_spatial_netcdf/item01.json")) - urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/openEO_1.nc", + urllib_and_request_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/openEO_1.nc", data=item_json("stac/issue646_spatial_netcdf/item02.json")) process_graph = { @@ -4082,7 +4080,7 @@ def item_json(path): assert ds.coords["x"].values.max() == pytest.approx(702325.000, abs=10) assert ds.coords["y"].values.max() == pytest.approx(5626335.000, abs=10) - def test_load_stac_from_spatiotemporal_netcdf_job_results(self, api110, urllib_mock, tmp_path): + def test_load_stac_from_spatiotemporal_netcdf_job_results(self, api110, tmp_path): process_graph = { "loadstac1": { @@ -4171,7 +4169,7 @@ def test_load_stac_from_spatiotemporal_netcdf_job_results(self, api110, urllib_m ], ) def test_stac_api_property_filter( - self, api110, urllib_mock, requests_mock, catalog_url, tmp_path, use_filter_extension, filter_lang, filter, body + self, api110, urllib_and_request_mock, requests_mock, catalog_url, tmp_path, use_filter_extension, filter_lang, filter, body ): def feature_collection(request, _) -> dict: assert "fields" not in request.qs @@ -4233,15 +4231,12 @@ def item(path) -> dict: } } - urllib_mock.get(f"{catalog_url}/collections/collection", + urllib_and_request_mock.get(f"{catalog_url}/collections/collection", data=get_test_data_file("stac/issue640-api-property-filter/collection.json").read_text() .replace("$CATALOG_URL", catalog_url)) - urllib_mock.get(catalog_url, + urllib_and_request_mock.get(catalog_url, data=get_test_data_file("stac/issue640-api-property-filter/catalog.json").read_text() .replace("$CATALOG_URL", catalog_url)) - requests_mock.get(catalog_url, - text=get_test_data_file("stac/issue640-api-property-filter/catalog.json").read_text() - .replace("$CATALOG_URL", catalog_url)) requests_mock.get(f"{catalog_url}/search", json=feature_collection) requests_mock.post(f"{catalog_url}/search", json=feature_collection) @@ -4256,7 +4251,7 @@ def item(path) -> dict: assert ds.shape == (10, 10) assert tuple(ds.bounds) == (5.0, 50.0, 6.0, 51.0) - def test_load_stac_from_unsigned_job_results_respects_proj_metadata(self, api110, urllib_mock, tmp_path, + def test_load_stac_from_unsigned_job_results_respects_proj_metadata(self, api110, tmp_path, batch_job_output_root, zk_job_registry): # get results from own batch job rather than crawl signed STAC URLs results_dir = _setup_existing_job( @@ -4300,7 +4295,7 @@ def test_load_stac_from_unsigned_job_results_respects_proj_metadata(self, api110 assert tuple(ds.bounds) == tuple(map(pytest.approx, expected_bbox)) @gps_config_overrides(job_dependencies_poll_interval_seconds=0, job_dependencies_max_poll_delay_seconds=60) - def test_load_stac_from_partial_job_results_basic(self, api110, urllib_mock, tmp_path, caplog): + def test_load_stac_from_partial_job_results_basic(self, api110, urlopen_mocker, urllib_mock, tmp_path, caplog): """load_stac from partial job results Collection (signed case)""" caplog.set_level("DEBUG") @@ -4328,9 +4323,11 @@ def item_json(path): ) ) - urllib_mock.register("GET", - "https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true", - response=collection_json("stac/issue786_partial_job_results/collection.json")) + results_url = "https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true" + response = collection_json("stac/issue786_partial_job_results/collection.json") + urllib_mock.register("GET", results_url, response=response) + urlopen_mocker.register("GET", results_url, response=response) + urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/item01.json", data=item_json("stac/issue786_partial_job_results/item01.json")) @@ -4338,7 +4335,7 @@ def item_json(path): "loadstac1": { "process_id": "load_stac", "arguments": { - "url": "https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true" + "url": results_url } }, "saveresult1": { @@ -4350,12 +4347,8 @@ def item_json(path): api110.result(process_graph).assert_status_code(200) - assert ("OpenEO batch job results status of" - " https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true: running" - in caplog.messages) - assert ("OpenEO batch job results status of" - " https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true: finished" - in caplog.messages) + assert ("OpenEO batch job results status of " + results_url + ": running" in caplog.messages) + assert ("OpenEO batch job results status of " + results_url + ": finished" in caplog.messages) @gps_config_overrides(job_dependencies_poll_interval_seconds=0, job_dependencies_max_poll_delay_seconds=60) def test_load_stac_from_unsigned_partial_job_results_basic(self, api110, batch_job_output_root, zk_job_registry, @@ -4405,7 +4398,7 @@ def test_load_stac_from_unsigned_partial_job_results_basic(self, api110, batch_j assert ("OpenEO batch job results status of own job j-2405078f40904a0b85cf8dc5dd55b07e: finished" in caplog.messages) - def test_load_stac_loads_assets_without_eo_bands(self, api110, urllib_mock, requests_mock, tmp_path): + def test_load_stac_loads_assets_without_eo_bands(self, api110, urllib_and_request_mock, requests_mock, tmp_path): """load_stac from a STAC API with one item and two assets, one of which does not carry eo:bands""" def feature_collection(request, _) -> dict: @@ -4447,11 +4440,11 @@ def item(path) -> dict: "features": intersecting_items, } - urllib_mock.get( + urllib_and_request_mock.get( "https://stac.test/collections/collection", data=get_test_data_file("stac/issue762-api-no-eo-bands/collection.json").read_text(), ) - urllib_mock.get( + urllib_and_request_mock.get( "https://stac.test", # for pystac data=get_test_data_file("stac/issue762-api-no-eo-bands/catalog.json").read_text(), ) @@ -4488,7 +4481,7 @@ def item(path) -> dict: assert (ds["band3"] == 3).all() assert (ds["band4"] == 4).all() - def test_load_stac_omits_default_temporal_extent(self, api110, urllib_mock, requests_mock, tmp_path): + def test_load_stac_omits_default_temporal_extent(self, api110, urllib_and_request_mock, requests_mock, tmp_path): """load_stac from a STAC API without specifying a temporal_extent""" def feature_collection(request, _) -> dict: @@ -4500,11 +4493,11 @@ def feature_collection(request, _) -> dict: "features": [], } - urllib_mock.get( + urllib_and_request_mock.get( "https://stac.test/collections/collection", data=get_test_data_file("stac/issue950-api-omit-temporal-extent/collection.json").read_text(), ) - urllib_mock.get( + urllib_and_request_mock.get( "https://stac.test", # for pystac data=get_test_data_file("stac/issue950-api-omit-temporal-extent/catalog.json").read_text(), ) From afb1c186054b03b13216146467f618cad1995ea5 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Tue, 28 Jan 2025 14:15:23 +0100 Subject: [PATCH 07/13] loosen restriction on urllib3 version #818 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a771d59c..c5b3fa81 100644 --- a/setup.py +++ b/setup.py @@ -101,7 +101,7 @@ "traceback-with-variables==2.0.4", 'scipy>=1.8', # used by sentinel-3 reader "PyJWT[crypto]>=2.9.0", # For identity tokens - "urllib3~=1.26.20" + "urllib3>=1.26.20" ], extras_require={ "dev": tests_require, From 92fdf5aa15b41dd5ce0323db3b8febe0be041b7d Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Tue, 28 Jan 2025 14:17:35 +0100 Subject: [PATCH 08/13] remove test_stac_client.py #818 --- tests/test_stac_client.py | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 tests/test_stac_client.py diff --git a/tests/test_stac_client.py b/tests/test_stac_client.py deleted file mode 100644 index 797b3807..00000000 --- a/tests/test_stac_client.py +++ /dev/null @@ -1,24 +0,0 @@ -import pystac -import unittest -from unittest.mock import patch, Mock - -import pystac_client -from openeogeotrellis.integrations.stac import StacApiIO - -class TestStacClient(unittest.TestCase): - - @patch('requests.Session') - def test_stac_client_timeout(self, mock_session): - timeout_value = 42 - mock_session.send.return_value.status_code = 400 - mock_request = Mock() - mock_session.prepare_request.return_value = mock_request - - stac_io = StacApiIO(timeout=timeout_value) - stac_io.session = mock_session - - stac_object = pystac.read_file(href="collection.json", stac_io=stac_io) - with self.assertRaises(pystac_client.exceptions.APIError): - print(stac_object.get_root().id) - - mock_session.send.assert_called_with(mock_request, timeout=timeout_value) From 9d3d3ceee935b4d856b53a1842c67633d3a74281 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 30 Jan 2025 16:25:47 +0100 Subject: [PATCH 09/13] avoid importing pystac.stac_io._is_url #818 --- openeogeotrellis/integrations/stac.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openeogeotrellis/integrations/stac.py b/openeogeotrellis/integrations/stac.py index da8d9bd8..3578a41f 100644 --- a/openeogeotrellis/integrations/stac.py +++ b/openeogeotrellis/integrations/stac.py @@ -3,8 +3,9 @@ Optional, ) from urllib.error import HTTPError +from urllib.parse import urlparse -from pystac.stac_io import DefaultStacIO, _is_url +from pystac.stac_io import DefaultStacIO from urllib3 import Retry, PoolManager @@ -26,7 +27,8 @@ def read_text_from_href(self, href: str) -> str: Args: href : The URI of the file to open. """ - if _is_url(href): + is_url = urlparse(href).scheme != "" + if is_url: http = PoolManager(retries=self.retry, timeout=20) try: response = http.request( From dd008061f31e488e717ec5423035717b0081678e Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 30 Jan 2025 16:28:50 +0100 Subject: [PATCH 10/13] fix: use timeout field properly in StacApiIO #818 --- openeogeotrellis/integrations/stac.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openeogeotrellis/integrations/stac.py b/openeogeotrellis/integrations/stac.py index 3578a41f..830da884 100644 --- a/openeogeotrellis/integrations/stac.py +++ b/openeogeotrellis/integrations/stac.py @@ -10,6 +10,7 @@ class StacApiIO(DefaultStacIO): + """A STAC IO implementation that supports reading with timeout and retry.""" def __init__( self, @@ -29,7 +30,7 @@ def read_text_from_href(self, href: str) -> str: """ is_url = urlparse(href).scheme != "" if is_url: - http = PoolManager(retries=self.retry, timeout=20) + http = PoolManager(retries=self.retry, timeout=self.timeout) try: response = http.request( "GET", href From ee361d28c901827b46eea7ac43d3e7bb34289d31 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 30 Jan 2025 16:32:02 +0100 Subject: [PATCH 11/13] rename UrlopenMocker to UrllibPoolManagerMocker #818 --- tests/test_api_result.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_api_result.py b/tests/test_api_result.py index 3c0f64fc..8ea71f5c 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -1941,7 +1941,7 @@ def jvm_mock(): yield jvm_mock -class UrlopenMocker(UrllibMocker): +class UrllibPoolManagerMocker(UrllibMocker): @contextlib.contextmanager def patch(self): with mock.patch('urllib3.poolmanager.PoolManager.request', new=self._request): @@ -1956,16 +1956,16 @@ def _request(self, method, url, fields=None, headers={}, **urlopen_kw): @pytest.fixture -def urlopen_mocker() -> UrlopenMocker: - with UrlopenMocker().patch() as mocker: +def urllib_poolmanager_mocker() -> UrllibPoolManagerMocker: + with UrllibPoolManagerMocker().patch() as mocker: yield mocker class UrllibAndRequestMocker: - def __init__(self, urllib_mock, requests_mock, urlopen_mocker): + def __init__(self, urllib_mock, requests_mock, urllib_poolmanager_mocker): self.urllib_mock = urllib_mock self.requests_mock = requests_mock - self.urlopen_mocker = urlopen_mocker + self.urllib_poolmanager_mocker = urllib_poolmanager_mocker def get(self, href, data): code = 200 @@ -1973,11 +1973,11 @@ def get(self, href, data): if isinstance(data, str): data = data.encode("utf-8") self.requests_mock.get(href, content=data) - self.urlopen_mocker.get(href, data, code) + self.urllib_poolmanager_mocker.get(href, data, code) @pytest.fixture -def urllib_and_request_mock(urllib_mock, requests_mock, urlopen_mocker) -> UrllibAndRequestMocker: - yield UrllibAndRequestMocker(urllib_mock, requests_mock, urlopen_mocker) +def urllib_and_request_mock(urllib_mock, requests_mock, urllib_poolmanager_mocker) -> UrllibAndRequestMocker: + yield UrllibAndRequestMocker(urllib_mock, requests_mock, urllib_poolmanager_mocker) @pytest.fixture @@ -3578,7 +3578,7 @@ def _mock_stac_api_collection() -> Collection: "stac/item02.json", ], ) - def test_load_stac_with_stac_item_json(self, item_path, api110, urlopen_mocker, tmp_path): + def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_poolmanager_mocker, tmp_path): """load_stac with a simple STAC item (as JSON file)""" item_json = ( get_test_data_file(item_path).read_text() @@ -3588,7 +3588,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urlopen_mocker, "asset01.tiff", f"file://{get_test_data_file('binary/load_stac/BVL_v1/BVL_v1_2021.tif').absolute()}" ) ) - urlopen_mocker.get("https://stac.test/item.json", data=item_json) + urllib_poolmanager_mocker.get("https://stac.test/item.json", data=item_json) process_graph = { "loadstac1": { @@ -3613,7 +3613,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urlopen_mocker, assert ds.coords["y"].values.min() == pytest.approx(3014000, abs=10) def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_properties( - self, api110, urlopen_mocker, tmp_path + self, api110, urllib_poolmanager_mocker, tmp_path ): """ https://github.com/Open-EO/openeo-geopyspark-driver/issues/619 @@ -3630,7 +3630,7 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_propert .replace("asset_green.tiff", f"file://{tiff_path}") .replace("asset_blue.tiff", f"file://{tiff_path}") ) - urlopen_mocker.get("https://stac.test/item01.json", data=item_json) + urllib_poolmanager_mocker.get("https://stac.test/item01.json", data=item_json) process_graph = { "loadstac1": { @@ -4295,7 +4295,7 @@ def test_load_stac_from_unsigned_job_results_respects_proj_metadata(self, api110 assert tuple(ds.bounds) == tuple(map(pytest.approx, expected_bbox)) @gps_config_overrides(job_dependencies_poll_interval_seconds=0, job_dependencies_max_poll_delay_seconds=60) - def test_load_stac_from_partial_job_results_basic(self, api110, urlopen_mocker, urllib_mock, tmp_path, caplog): + def test_load_stac_from_partial_job_results_basic(self, api110, urllib_poolmanager_mocker, urllib_mock, tmp_path, caplog): """load_stac from partial job results Collection (signed case)""" caplog.set_level("DEBUG") @@ -4326,7 +4326,7 @@ def item_json(path): results_url = "https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true" response = collection_json("stac/issue786_partial_job_results/collection.json") urllib_mock.register("GET", results_url, response=response) - urlopen_mocker.register("GET", results_url, response=response) + urllib_poolmanager_mocker.register("GET", results_url, response=response) urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/item01.json", data=item_json("stac/issue786_partial_job_results/item01.json")) From cdd75f2badb566d65a39c2348d27b7fc3f023dcd Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 30 Jan 2025 16:33:43 +0100 Subject: [PATCH 12/13] rename urllib_poolmanager_mocker to urllib_poolmanager_mock #818 --- tests/test_api_result.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_api_result.py b/tests/test_api_result.py index 8ea71f5c..fbdb3b3e 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -1956,16 +1956,16 @@ def _request(self, method, url, fields=None, headers={}, **urlopen_kw): @pytest.fixture -def urllib_poolmanager_mocker() -> UrllibPoolManagerMocker: +def urllib_poolmanager_mock() -> UrllibPoolManagerMocker: with UrllibPoolManagerMocker().patch() as mocker: yield mocker class UrllibAndRequestMocker: - def __init__(self, urllib_mock, requests_mock, urllib_poolmanager_mocker): + def __init__(self, urllib_mock, requests_mock, urllib_poolmanager_mock): self.urllib_mock = urllib_mock self.requests_mock = requests_mock - self.urllib_poolmanager_mocker = urllib_poolmanager_mocker + self.urllib_poolmanager_mock = urllib_poolmanager_mock def get(self, href, data): code = 200 @@ -1973,11 +1973,11 @@ def get(self, href, data): if isinstance(data, str): data = data.encode("utf-8") self.requests_mock.get(href, content=data) - self.urllib_poolmanager_mocker.get(href, data, code) + self.urllib_poolmanager_mock.get(href, data, code) @pytest.fixture -def urllib_and_request_mock(urllib_mock, requests_mock, urllib_poolmanager_mocker) -> UrllibAndRequestMocker: - yield UrllibAndRequestMocker(urllib_mock, requests_mock, urllib_poolmanager_mocker) +def urllib_and_request_mock(urllib_mock, requests_mock, urllib_poolmanager_mock) -> UrllibAndRequestMocker: + yield UrllibAndRequestMocker(urllib_mock, requests_mock, urllib_poolmanager_mock) @pytest.fixture @@ -3578,7 +3578,7 @@ def _mock_stac_api_collection() -> Collection: "stac/item02.json", ], ) - def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_poolmanager_mocker, tmp_path): + def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_poolmanager_mock, tmp_path): """load_stac with a simple STAC item (as JSON file)""" item_json = ( get_test_data_file(item_path).read_text() @@ -3588,7 +3588,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_poolmanag "asset01.tiff", f"file://{get_test_data_file('binary/load_stac/BVL_v1/BVL_v1_2021.tif').absolute()}" ) ) - urllib_poolmanager_mocker.get("https://stac.test/item.json", data=item_json) + urllib_poolmanager_mock.get("https://stac.test/item.json", data=item_json) process_graph = { "loadstac1": { @@ -3613,7 +3613,7 @@ def test_load_stac_with_stac_item_json(self, item_path, api110, urllib_poolmanag assert ds.coords["y"].values.min() == pytest.approx(3014000, abs=10) def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_properties( - self, api110, urllib_poolmanager_mocker, tmp_path + self, api110, urllib_poolmanager_mock, tmp_path ): """ https://github.com/Open-EO/openeo-geopyspark-driver/issues/619 @@ -3630,7 +3630,7 @@ def test_load_stac_with_stac_item_issue619_non_standard_int_eobands_item_propert .replace("asset_green.tiff", f"file://{tiff_path}") .replace("asset_blue.tiff", f"file://{tiff_path}") ) - urllib_poolmanager_mocker.get("https://stac.test/item01.json", data=item_json) + urllib_poolmanager_mock.get("https://stac.test/item01.json", data=item_json) process_graph = { "loadstac1": { @@ -4295,7 +4295,7 @@ def test_load_stac_from_unsigned_job_results_respects_proj_metadata(self, api110 assert tuple(ds.bounds) == tuple(map(pytest.approx, expected_bbox)) @gps_config_overrides(job_dependencies_poll_interval_seconds=0, job_dependencies_max_poll_delay_seconds=60) - def test_load_stac_from_partial_job_results_basic(self, api110, urllib_poolmanager_mocker, urllib_mock, tmp_path, caplog): + def test_load_stac_from_partial_job_results_basic(self, api110, urllib_poolmanager_mock, urllib_mock, tmp_path, caplog): """load_stac from partial job results Collection (signed case)""" caplog.set_level("DEBUG") @@ -4326,7 +4326,7 @@ def item_json(path): results_url = "https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results?partial=true" response = collection_json("stac/issue786_partial_job_results/collection.json") urllib_mock.register("GET", results_url, response=response) - urllib_poolmanager_mocker.register("GET", results_url, response=response) + urllib_poolmanager_mock.register("GET", results_url, response=response) urllib_mock.get("https://openeo.test/openeo/jobs/j-2402094545c945c09e1307503aa58a3a/results/items/item01.json", data=item_json("stac/issue786_partial_job_results/item01.json")) From 036957651a3b7f67774eb55f0e32871a838db2e5 Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 30 Jan 2025 16:36:52 +0100 Subject: [PATCH 13/13] fix: don't encode urllib_poolmanager_mock data #818 --- tests/test_api_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_api_result.py b/tests/test_api_result.py index fbdb3b3e..d68a9de4 100644 --- a/tests/test_api_result.py +++ b/tests/test_api_result.py @@ -1970,10 +1970,10 @@ def __init__(self, urllib_mock, requests_mock, urllib_poolmanager_mock): def get(self, href, data): code = 200 self.urllib_mock.get(href, data, code) + self.urllib_poolmanager_mock.get(href, data, code) if isinstance(data, str): data = data.encode("utf-8") self.requests_mock.get(href, content=data) - self.urllib_poolmanager_mock.get(href, data, code) @pytest.fixture def urllib_and_request_mock(urllib_mock, requests_mock, urllib_poolmanager_mock) -> UrllibAndRequestMocker: