From 3ab96223abf7e060274a49ede3b9f79271a225ef Mon Sep 17 00:00:00 2001 From: konstntokas Date: Wed, 6 Nov 2024 17:39:14 +0100 Subject: [PATCH 01/10] tests are missing --- xcube/core/store/fs/store.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 2852130ab..93101876d 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -275,8 +275,10 @@ def get_data_ids( def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool: assert_given(data_id, "data_id") - if self._is_data_specified(data_id, data_type): + if self._is_data_type_available(data_id, data_type): fs_path = self._convert_data_id_into_fs_path(data_id) + if self.protocol == "https": + fs_path = f"{self.protocol}://{fs_path}" return self.fs.exists(fs_path) return False @@ -509,6 +511,23 @@ def _is_data_specified( return False return True + def _is_data_type_available(self, data_id: str, data_type: DataTypeLike) -> bool: + ext = self._get_filename_ext(data_id) + format_id = _FILENAME_EXT_TO_FORMAT.get(ext.lower()) + if format_id is None: + return False + avail_data_types = _FORMAT_TO_DATA_TYPE_ALIASES.get(format_id) + data_type = DataType.normalize(data_type) + if any( + [ + data_type.is_super_type_of(avail_data_type) + for avail_data_type in avail_data_types + ] + ): + return True + else: + return False + def _assert_data_specified(self, data_id, data_type: DataTypeLike): self._is_data_specified(data_id, data_type, require=True) From 4daacba86b14453709f8f88f9a9401d71e81545f Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 10:23:04 +0100 Subject: [PATCH 02/10] unit test added, CHANGES.log updated --- CHANGES.md | 8 +++++--- test/core/store/test_store.py | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d9b6a76eb..4ce203798 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,16 +9,18 @@ * The behaviour of the function `xcube.core.resample.resample_in_space()` has been changed if no `tile_size` is specified for the target grid mapping. It now defaults to the `tile_size` of the source grid mapping, improving the - user-friendliness of resampling and reprojection. + user-friendliness of resampling and reprojection. (#1082) * The `"https"` data store (`store = new_data_store("https", ...)`) now allows for lazily accessing NetCDF files. Implementation note: For this to work, the `DatasetNetcdfFsDataAccessor` - class has been adjusted. + class has been adjusted. (#1083) ### Fixes * The function `xcube.core.resample.resample_in_space()` now always operates - lazily and therefore supports chunk-wise, parallel processing. (#1 + lazily and therefore supports chunk-wise, parallel processing. (#1082) +* Bux fix in the `has_data` method `"https"` data store + (`store = new_data_store("https", ...)`). (#1084) ## Changes in 1.7.1 diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index b10c28665..3fceee507 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -2,6 +2,7 @@ # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. import unittest +from unittest.mock import patch from fsspec.registry import register_implementation @@ -57,6 +58,29 @@ def test_get_data_opener_ids(self): store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"), ) + @patch("fsspec.implementations.http.HTTPFileSystem.exists") + def test_has_data(self, mock_fs_exists): + mock_fs_exists.return_value = True + store = new_data_store("https", root="test.org") + + res = store.has_data(data_id="test.tif") + mock_fs_exists.assert_called_once_with("https://test.org/test.tif") + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="dataset") + mock_fs_exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_fs_exists.call_count, 2) + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="mldataset") + mock_fs_exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_fs_exists.call_count, 3) + self.assertTrue(res) + + res = store.has_data(data_id="test.tif", data_type="geodataframe") + self.assertEqual(mock_fs_exists.call_count, 3) + self.assertFalse(res) + def test_fsspec_instantiation_error(): error_string = "deliberate instantiation error for testing" From 49aa068be9898ebc33426dffd73a4b5c9f6b988b Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 10:34:30 +0100 Subject: [PATCH 03/10] updated CHNAGES.log --- CHANGES.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 4ce203798..29f920852 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -19,8 +19,11 @@ * The function `xcube.core.resample.resample_in_space()` now always operates lazily and therefore supports chunk-wise, parallel processing. (#1082) -* Bux fix in the `has_data` method `"https"` data store +* Bux fix in the `has_data` method of the `"https"` data store (`store = new_data_store("https", ...)`). (#1084) +* Bux fix in the `has_data` method of all filesystem-based data store + (`"file", "s3", "https"`). `data_type` can be any of the supported data types, + e.g. for `.tif` file, `data_type` can be either `dataset` or `mldataset`. (#1084) ## Changes in 1.7.1 From 3eb369453b0a9cde84d8b207de12dd230ec3929d Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 11:20:02 +0100 Subject: [PATCH 04/10] unittest patch changed --- test/core/store/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index 3fceee507..01c7d11a9 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -58,7 +58,7 @@ def test_get_data_opener_ids(self): store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"), ) - @patch("fsspec.implementations.http.HTTPFileSystem.exists") + @patch("fsspec.implementations.http.HTTPFileSystem._exists") def test_has_data(self, mock_fs_exists): mock_fs_exists.return_value = True store = new_data_store("https", root="test.org") From caf176a0c838dceb30d83a07d9ccab579d2de0d4 Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 11:42:50 +0100 Subject: [PATCH 05/10] debug test --- test/core/store/test_store.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index 01c7d11a9..2bde2acda 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -58,27 +58,36 @@ def test_get_data_opener_ids(self): store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"), ) - @patch("fsspec.implementations.http.HTTPFileSystem._exists") + @patch("fsspec.implementations.http.HTTPFileSystem.exists") def test_has_data(self, mock_fs_exists): mock_fs_exists.return_value = True store = new_data_store("https", root="test.org") res = store.has_data(data_id="test.tif") - mock_fs_exists.assert_called_once_with("https://test.org/test.tif") + # mock_fs_exists.assert_called_once_with("https://test.org/test.tif") self.assertTrue(res) + print(mock_fs_exists.call_count) + print(mock_fs_exists.call_args) + res = store.has_data(data_id="test.tif", data_type="dataset") - mock_fs_exists.assert_called_with("https://test.org/test.tif") - self.assertEqual(mock_fs_exists.call_count, 2) + # mock_fs_exists.assert_called_with("https://test.org/test.tif") + # self.assertEqual(mock_fs_exists.call_count, 2) self.assertTrue(res) + print(mock_fs_exists.call_count) + print(mock_fs_exists.call_args) + res = store.has_data(data_id="test.tif", data_type="mldataset") - mock_fs_exists.assert_called_with("https://test.org/test.tif") - self.assertEqual(mock_fs_exists.call_count, 3) + # mock_fs_exists.assert_called_with("https://test.org/test.tif") + # self.assertEqual(mock_fs_exists.call_count, 3) self.assertTrue(res) + print(mock_fs_exists.call_count) + print(mock_fs_exists.call_args) + res = store.has_data(data_id="test.tif", data_type="geodataframe") - self.assertEqual(mock_fs_exists.call_count, 3) + # self.assertEqual(mock_fs_exists.call_count, 3) self.assertFalse(res) From e08d80bd539be16b281b02a48cf142418e07a1da Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 12:06:57 +0100 Subject: [PATCH 06/10] debug unitttest --- .github/workflows/xcube_workflow.yaml | 2 +- xcube/core/store/fs/store.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/xcube_workflow.yaml b/.github/workflows/xcube_workflow.yaml index 2fb56bffe..673200476 100644 --- a/.github/workflows/xcube_workflow.yaml +++ b/.github/workflows/xcube_workflow.yaml @@ -41,7 +41,7 @@ jobs: - name: unittest-xcube shell: bash -l {0} run: | - pytest --cov=xcube --cov-report=xml + pytest -k test_has_data --cov=xcube --cov-report=xml - uses: codecov/codecov-action@v4 with: verbose: true # optional (default = false) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 93101876d..b3c969b8d 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -275,10 +275,13 @@ def get_data_ids( def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool: assert_given(data_id, "data_id") + print(data_id) if self._is_data_type_available(data_id, data_type): fs_path = self._convert_data_id_into_fs_path(data_id) if self.protocol == "https": fs_path = f"{self.protocol}://{fs_path}" + print(fs_path) + print(self.fs) return self.fs.exists(fs_path) return False From af96ea04c3389aadb50e44edb9a076445f87502f Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 12:12:11 +0100 Subject: [PATCH 07/10] debug unitttest --- test/core/store/test_store.py | 21 ++++++--------------- xcube/core/store/fs/store.py | 3 --- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index 2bde2acda..3fceee507 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -64,30 +64,21 @@ def test_has_data(self, mock_fs_exists): store = new_data_store("https", root="test.org") res = store.has_data(data_id="test.tif") - # mock_fs_exists.assert_called_once_with("https://test.org/test.tif") + mock_fs_exists.assert_called_once_with("https://test.org/test.tif") self.assertTrue(res) - print(mock_fs_exists.call_count) - print(mock_fs_exists.call_args) - res = store.has_data(data_id="test.tif", data_type="dataset") - # mock_fs_exists.assert_called_with("https://test.org/test.tif") - # self.assertEqual(mock_fs_exists.call_count, 2) + mock_fs_exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_fs_exists.call_count, 2) self.assertTrue(res) - print(mock_fs_exists.call_count) - print(mock_fs_exists.call_args) - res = store.has_data(data_id="test.tif", data_type="mldataset") - # mock_fs_exists.assert_called_with("https://test.org/test.tif") - # self.assertEqual(mock_fs_exists.call_count, 3) + mock_fs_exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_fs_exists.call_count, 3) self.assertTrue(res) - print(mock_fs_exists.call_count) - print(mock_fs_exists.call_args) - res = store.has_data(data_id="test.tif", data_type="geodataframe") - # self.assertEqual(mock_fs_exists.call_count, 3) + self.assertEqual(mock_fs_exists.call_count, 3) self.assertFalse(res) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index b3c969b8d..93101876d 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -275,13 +275,10 @@ def get_data_ids( def has_data(self, data_id: str, data_type: DataTypeLike = None) -> bool: assert_given(data_id, "data_id") - print(data_id) if self._is_data_type_available(data_id, data_type): fs_path = self._convert_data_id_into_fs_path(data_id) if self.protocol == "https": fs_path = f"{self.protocol}://{fs_path}" - print(fs_path) - print(self.fs) return self.fs.exists(fs_path) return False From a8e0968ac67257879ad46191fd222128abd6a589 Mon Sep 17 00:00:00 2001 From: konstntokas Date: Thu, 7 Nov 2024 12:14:53 +0100 Subject: [PATCH 08/10] debug unitttest --- .github/workflows/xcube_workflow.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/xcube_workflow.yaml b/.github/workflows/xcube_workflow.yaml index 673200476..2fb56bffe 100644 --- a/.github/workflows/xcube_workflow.yaml +++ b/.github/workflows/xcube_workflow.yaml @@ -41,7 +41,7 @@ jobs: - name: unittest-xcube shell: bash -l {0} run: | - pytest -k test_has_data --cov=xcube --cov-report=xml + pytest --cov=xcube --cov-report=xml - uses: codecov/codecov-action@v4 with: verbose: true # optional (default = false) From 2ed21496160125219bbd85a2f88cbe546cbab035 Mon Sep 17 00:00:00 2001 From: konstntokas Date: Fri, 8 Nov 2024 08:08:07 +0100 Subject: [PATCH 09/10] problems with mocking resolved --- test/core/store/test_store.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/test/core/store/test_store.py b/test/core/store/test_store.py index 3fceee507..a77fc5eca 100644 --- a/test/core/store/test_store.py +++ b/test/core/store/test_store.py @@ -3,12 +3,14 @@ # https://opensource.org/licenses/MIT. import unittest from unittest.mock import patch +from unittest.mock import MagicMock from fsspec.registry import register_implementation from xcube.core.store import DataStoreError from xcube.core.store import list_data_store_ids from xcube.core.store import new_data_store + import pytest @@ -58,27 +60,33 @@ def test_get_data_opener_ids(self): store.get_data_opener_ids(data_id="test.geotiff", data_type="mldataset"), ) - @patch("fsspec.implementations.http.HTTPFileSystem.exists") - def test_has_data(self, mock_fs_exists): - mock_fs_exists.return_value = True + @patch("fsspec.filesystem") + def test_has_data(self, mock_filesystem): + # Mock the HTTPFileSystem instance and its `exists` method + mock_http_fs = MagicMock() + mock_filesystem.return_value = mock_http_fs + mock_http_fs.exists.return_value = True + mock_http_fs.sep = "/" + store = new_data_store("https", root="test.org") res = store.has_data(data_id="test.tif") - mock_fs_exists.assert_called_once_with("https://test.org/test.tif") + self.assertEqual(mock_filesystem.call_count, 1) + mock_http_fs.exists.assert_called_once_with("https://test.org/test.tif") self.assertTrue(res) res = store.has_data(data_id="test.tif", data_type="dataset") - mock_fs_exists.assert_called_with("https://test.org/test.tif") - self.assertEqual(mock_fs_exists.call_count, 2) + mock_http_fs.exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_http_fs.exists.call_count, 2) self.assertTrue(res) res = store.has_data(data_id="test.tif", data_type="mldataset") - mock_fs_exists.assert_called_with("https://test.org/test.tif") - self.assertEqual(mock_fs_exists.call_count, 3) + mock_http_fs.exists.assert_called_with("https://test.org/test.tif") + self.assertEqual(mock_http_fs.exists.call_count, 3) self.assertTrue(res) res = store.has_data(data_id="test.tif", data_type="geodataframe") - self.assertEqual(mock_fs_exists.call_count, 3) + self.assertEqual(mock_http_fs.exists.call_count, 3) self.assertFalse(res) From b4c0a865147a492cb27639b0456c2dd52b051ac2 Mon Sep 17 00:00:00 2001 From: Konstantin Ntokas <38956538+konstntokas@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:44:18 +0100 Subject: [PATCH 10/10] Update xcube/core/store/fs/store.py Co-authored-by: Norman Fomferra --- xcube/core/store/fs/store.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 93101876d..69327d29b 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -518,15 +518,10 @@ def _is_data_type_available(self, data_id: str, data_type: DataTypeLike) -> bool return False avail_data_types = _FORMAT_TO_DATA_TYPE_ALIASES.get(format_id) data_type = DataType.normalize(data_type) - if any( - [ - data_type.is_super_type_of(avail_data_type) - for avail_data_type in avail_data_types - ] - ): - return True - else: - return False + return any( + data_type.is_super_type_of(avail_data_type) + for avail_data_type in avail_data_types + ) def _assert_data_specified(self, data_id, data_type: DataTypeLike): self._is_data_specified(data_id, data_type, require=True)