Skip to content

Commit

Permalink
Merge pull request #1084 from xcube-dev/konstntokas-xxx-small_adjustm…
Browse files Browse the repository at this point in the history
…ents_https_datastore_for_xcube_zenodo

Bug fix in `has_data` method for `"https"` data store
  • Loading branch information
konstntokas authored Nov 13, 2024
2 parents 765d0aa + 2fd4010 commit a16335d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 4 deletions.
11 changes: 8 additions & 3 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,21 @@
* The behaviour of the function `xcube.core.resample.resample_in_space()` has
been changed if no `tile_size` is specified for the target grid mapping. It now
defaults to the `tile_size` of the source grid mapping, improving the
user-friendliness of resampling and reprojection.
user-friendliness of resampling and reprojection. (#1082)
* The `"https"` data store (`store = new_data_store("https", ...)`) now allows
for lazily accessing NetCDF files.
Implementation note: For this to work, the `DatasetNetcdfFsDataAccessor`
class has been adjusted.
class has been adjusted. (#1083)

### Fixes

* The function `xcube.core.resample.resample_in_space()` now always operates
lazily and therefore supports chunk-wise, parallel processing. (#1
lazily and therefore supports chunk-wise, parallel processing. (#1082)
* Bux fix in the `has_data` method of the `"https"` data store
(`store = new_data_store("https", ...)`). (#1084)
* Bux fix in the `has_data` method of all filesystem-based data store
(`"file", "s3", "https"`). `data_type` can be any of the supported data types,
e.g. for `.tif` file, `data_type` can be either `dataset` or `mldataset`. (#1084)
* The explaination of the parameter `xy_scale` in the method
`xcube.core.gridmapping.GridMapping.scale` has been corrected. (#1086)

Expand Down
32 changes: 32 additions & 0 deletions test/core/store/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
# Permissions are hereby granted under the terms of the MIT License:
# https://opensource.org/licenses/MIT.
import unittest
from unittest.mock import patch
from unittest.mock import MagicMock

from fsspec.registry import register_implementation

from xcube.core.store import DataStoreError
from xcube.core.store import list_data_store_ids
from xcube.core.store import new_data_store

import pytest


Expand Down Expand Up @@ -57,6 +60,35 @@ def test_get_data_opener_ids(self):
store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"),
)

@patch("fsspec.filesystem")
def test_has_data(self, mock_filesystem):
# Mock the HTTPFileSystem instance and its `exists` method
mock_http_fs = MagicMock()
mock_filesystem.return_value = mock_http_fs
mock_http_fs.exists.return_value = True
mock_http_fs.sep = "/"

store = new_data_store("https", root="test.org")

res = store.has_data(data_id="test.tif")
self.assertEqual(mock_filesystem.call_count, 1)
mock_http_fs.exists.assert_called_once_with("https://test.org/test.tif")
self.assertTrue(res)

res = store.has_data(data_id="test.tif", data_type="dataset")
mock_http_fs.exists.assert_called_with("https://test.org/test.tif")
self.assertEqual(mock_http_fs.exists.call_count, 2)
self.assertTrue(res)

res = store.has_data(data_id="test.tif", data_type="mldataset")
mock_http_fs.exists.assert_called_with("https://test.org/test.tif")
self.assertEqual(mock_http_fs.exists.call_count, 3)
self.assertTrue(res)

res = store.has_data(data_id="test.tif", data_type="geodataframe")
self.assertEqual(mock_http_fs.exists.call_count, 3)
self.assertFalse(res)


def test_fsspec_instantiation_error():
error_string = "deliberate instantiation error for testing"
Expand Down
16 changes: 15 additions & 1 deletion xcube/core/store/fs/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,10 @@ def get_data_ids(

def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool:
assert_given(data_id, "data_id")
if self._is_data_specified(data_id, data_type):
if self._is_data_type_available(data_id, data_type):
fs_path = self._convert_data_id_into_fs_path(data_id)
if self.protocol == "https":
fs_path = f"{self.protocol}://{fs_path}"
return self.fs.exists(fs_path)
return False

Expand Down Expand Up @@ -509,6 +511,18 @@ def _is_data_specified(
return False
return True

def _is_data_type_available(self, data_id: str, data_type: DataTypeLike) -> bool:
ext = self._get_filename_ext(data_id)
format_id = _FILENAME_EXT_TO_FORMAT.get(ext.lower())
if format_id is None:
return False
avail_data_types = _FORMAT_TO_DATA_TYPE_ALIASES.get(format_id)
data_type = DataType.normalize(data_type)
return any(
data_type.is_super_type_of(avail_data_type)
for avail_data_type in avail_data_types
)

def _assert_data_specified(self, data_id, data_type: DataTypeLike):
self._is_data_specified(data_id, data_type, require=True)

Expand Down

0 comments on commit a16335d

Please sign in to comment.