diff --git a/CHANGES.md b/CHANGES.md index cf4a57899..9a0bc464a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,16 +9,21 @@ * The behaviour of the function `xcube.core.resample.resample_in_space()` has been changed if no `tile_size` is specified for the target grid mapping. It now defaults to the `tile_size` of the source grid mapping, improving the - user-friendliness of resampling and reprojection. + user-friendliness of resampling and reprojection. (#1082) * The `"https"` data store (`store = new_data_store("https", ...)`) now allows for lazily accessing NetCDF files. Implementation note: For this to work, the `DatasetNetcdfFsDataAccessor` - class has been adjusted. + class has been adjusted. (#1083) ### Fixes * The function `xcube.core.resample.resample_in_space()` now always operates - lazily and therefore supports chunk-wise, parallel processing. (#1 + lazily and therefore supports chunk-wise, parallel processing. (#1082) +* Bux fix in the `has_data` method of the `"https"` data store + (`store = new_data_store("https", ...)`). (#1084) +* Bux fix in the `has_data` method of all filesystem-based data store + (`"file", "s3", "https"`). `data_type` can be any of the supported data types, + e.g. for `.tif` file, `data_type` can be either `dataset` or `mldataset`. (#1084) * The explaination of the parameter `xy_scale` in the method `xcube.core.gridmapping.GridMapping.scale` has been corrected. (#1086) diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index b10c28665..a77fc5eca 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -2,12 +2,15 @@ # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. import unittest +from unittest.mock import patch +from unittest.mock import MagicMock from fsspec.registry import register_implementation from xcube.core.store import DataStoreError from xcube.core.store import list_data_store_ids from xcube.core.store import new_data_store + import pytest @@ -57,6 +60,35 @@ def test_get_data_opener_ids(self): store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"), ) + @patch("fsspec.filesystem") + def test_has_data(self, mock_filesystem): + # Mock the HTTPFileSystem instance and its `exists` method + mock_http_fs = MagicMock() + mock_filesystem.return_value = mock_http_fs + mock_http_fs.exists.return_value = True + mock_http_fs.sep = "/" + + store = new_data_store("https", root="test.org") + + res = store.has_data(data_id="test.tif") + self.assertEqual(mock_filesystem.call_count, 1) + mock_http_fs.exists.assert_called_once_with("https://test.org/test.tif") + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="dataset") + mock_http_fs.exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_http_fs.exists.call_count, 2) + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="mldataset") + mock_http_fs.exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_http_fs.exists.call_count, 3) + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="geodataframe") + self.assertEqual(mock_http_fs.exists.call_count, 3) + self.assertFalse(res) + def test_fsspec_instantiation_error(): error_string = "deliberate instantiation error for testing" diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 2852130ab..69327d29b 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -275,8 +275,10 @@ def get_data_ids( def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool: assert_given(data_id, "data_id") - if self._is_data_specified(data_id, data_type): + if self._is_data_type_available(data_id, data_type): fs_path = self._convert_data_id_into_fs_path(data_id) + if self.protocol == "https": + fs_path = f"{self.protocol}://{fs_path}" return self.fs.exists(fs_path) return False @@ -509,6 +511,18 @@ def _is_data_specified( return False return True + def _is_data_type_available(self, data_id: str, data_type: DataTypeLike) -> bool: + ext = self._get_filename_ext(data_id) + format_id = _FILENAME_EXT_TO_FORMAT.get(ext.lower()) + if format_id is None: + return False + avail_data_types = _FORMAT_TO_DATA_TYPE_ALIASES.get(format_id) + data_type = DataType.normalize(data_type) + return any( + data_type.is_super_type_of(avail_data_type) + for avail_data_type in avail_data_types + ) + def _assert_data_specified(self, data_id, data_type: DataTypeLike): self._is_data_specified(data_id, data_type, require=True)