From 02821c780a57767b520807d8227950474cb4bb81 Mon Sep 17 00:00:00 2001 From: Charles Turner Date: Thu, 14 Nov 2024 13:00:39 +0800 Subject: [PATCH] Updated DfFileCatalog.add and related tests to describe errors relating to columns with iterables in a more satisfactory manner --- src/intake_dataframe_catalog/core.py | 29 ++++++++++++++++++++++++---- tests/test_core.py | 12 +++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/intake_dataframe_catalog/core.py b/src/intake_dataframe_catalog/core.py index 3e31dcf..caea211 100644 --- a/src/intake_dataframe_catalog/core.py +++ b/src/intake_dataframe_catalog/core.py @@ -266,6 +266,11 @@ def add( overwrite: bool, optional If True, overwrite all existing entries in the dataframe catalog with name_column entries that match the name of this source. Otherwise the entry is appended to the dataframe catalog. + + Raises + ------ + DfFileCatalogError + If the source cannot be added to the dataframe catalog. """ metadata = metadata or {} @@ -293,11 +298,27 @@ def add( # Check that new entries contain iterables when they should entry_iterable_columns = _columns_with_iterables(row) if entry_iterable_columns != self.columns_with_iterables: - raise DfFileCatalogError( - f"Cannot add entry with iterable metadata columns: {entry_iterable_columns} " - f"to dataframe catalog with iterable metadata columns: {self.columns_with_iterables}. " - " Please ensure that metadata entries are consistent." + missing_iterable_cols = set(self.columns_with_iterables) - set( + entry_iterable_columns ) + unexpected_iterable_cols = set(entry_iterable_columns) - set( + self.columns_with_iterables + ) + + if missing_iterable_cols: + err_msg = ( + f"Expected additional iterable metadata columns: {list(self.columns_with_iterables)}. " + f"Unable to add entry with iterable metadata columns '{list(entry_iterable_columns)}' to dataframe catalog: " + f"columns {list(missing_iterable_cols)} must be iterable to ensure metadata entries are consistent." + ) + elif unexpected_iterable_cols: + err_msg = ( + f"Expected iterable metadata columns: {list(self.columns_with_iterables)}. " + f"Unable to add entry with metadata columns '{list(entry_iterable_columns)}' to dataframe catalog: " + f"columns {list(unexpected_iterable_cols)} must not be iterable to ensure metadata entries are consistent." + ) + + raise DfFileCatalogError(err_msg) if set(self.columns) == set(row.columns): if ( diff --git a/tests/test_core.py b/tests/test_core.py index 1aad549..253762e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -331,7 +331,17 @@ def test_catalog_add(catalog_path, source_path): with pytest.raises(DfFileCatalogError) as excinfo: cat.add(gistemp, metadata={"realm": "atmos", "variable": "tas"}) - assert "Cannot add entry with iterable metadata columns" in str(excinfo.value) + assert ( + str(excinfo.value) + == "Expected additional iterable metadata columns: ['variable']. Unable to add entry with iterable metadata columns '[]' to dataframe catalog: columns ['variable'] must be iterable to ensure metadata entries are consistent." + ) + + with pytest.raises(DfFileCatalogError) as excinfo: + cat.add(gistemp, metadata={"realm": ["atmos"], "variable": ["tas"]}) + assert ( + str(excinfo.value) + == "Expected iterable metadata columns: ['variable']. Unable to add entry with metadata columns '['realm', 'variable']' to dataframe catalog: columns ['realm'] must not be iterable to ensure metadata entries are consistent." + ) with pytest.raises(DfFileCatalogError) as excinfo: cat.add(gistemp, metadata={"foo": "bar", "variable": ["tas"]})