Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

223-model-required #255

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/access_nri_intake/catalog/manager.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0

""" Manager for adding/updating intake sources in an intake-dataframe-catalog like the ACCESS-NRI catalog """
"""Manager for adding/updating intake sources in an intake-dataframe-catalog like the ACCESS-NRI catalog"""

import os
from typing import Optional, Union

import intake
from intake_dataframe_catalog.core import DfFileCatalog
from intake_dataframe_catalog.core import DfFileCatalog, DfFileCatalogError

from ..utils import validate_against_schema
from . import (
Expand All @@ -23,6 +23,7 @@

class CatalogManagerError(Exception):
"Generic Exception for the CatalogManager class"

pass


Expand Down Expand Up @@ -198,7 +199,20 @@ def _add(self):

overwrite = True
for _, row in self.source_metadata.iterrows():
self.dfcat.add(self.source, row.to_dict(), overwrite=overwrite)
try:
self.dfcat.add(self.source, row.to_dict(), overwrite=overwrite)
except DfFileCatalogError as exc:
# If we have 'iterable metadata' in the error message, it likely relates to
# issues discussed at https://github.com/ACCESS-NRI/access-nri-intake-catalog/issues/223,
# so if the error message contains 'iterable metadata', we wrap the error with some
# additional information about catalog issues and then raise
if "iterable metadata" in str(exc):
raise CatalogManagerError(
f"Error adding source '{name}' to the catalog due to iterable metadata issues. "
" See https://github.com/ACCESS-NRI/access-nri-intake-catalog/issues/223: likely"
" due to issues with 'model' column in the catalog"
) from exc
raise CatalogManagerError(exc)
overwrite = False

def save(self, **kwargs):
Expand Down
31 changes: 1 addition & 30 deletions src/access_nri_intake/catalog/translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from intake import DataSource

from . import COLUMNS_WITH_ITERABLES
from .utils import _to_tuple, tuplify_series

FREQUENCY_TRANSLATIONS = {
"monthly-averaged-by-hour": "1hr",
Expand All @@ -35,36 +36,6 @@
}


def _to_tuple(series: pd.Series) -> pd.Series:
"""
Make each entry in the provided series a tuple

Parameters
----------
series: :py:class:`~pandas.Series`
A pandas Series or another object with an `apply` method
"""
return series.apply(lambda x: (x,))


def tuplify_series(func: Callable) -> Callable:
"""
Decorator that wraps a function that returns a pandas Series and converts
each entry in the series to a tuple
"""

def wrapper(*args, **kwargs):
# Check if the first argument is 'self'
if len(args) > 0 and hasattr(args[0], "__class__"):
self = args[0]
series = func(self, *args[1:], **kwargs)
else:
series = func(*args, **kwargs)
return _to_tuple(series)

return wrapper


class TranslatorError(Exception):
"Generic Exception for the Translator classes"

Expand Down
28 changes: 28 additions & 0 deletions src/access_nri_intake/catalog/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Callable

import pandas as pd


def _to_tuple(series: pd.Series) -> pd.Series:
"""
Make each entry in the provided series a tuple

Parameters
----------
series: :py:class:`~pandas.Series`
A pandas Series or another object with an `apply` method
"""
return series.apply(lambda x: (x,))


def tuplify_series(func: Callable) -> Callable:
"""
Decorator that wraps a function that returns a pandas Series and converts
each entry in the series to a tuple
"""

def wrapper(*args, **kwargs):
series = func(*args, **kwargs)
return _to_tuple(series)

return wrapper
37 changes: 37 additions & 0 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Copyright 2023 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0


from unittest import mock

import pytest
from intake_dataframe_catalog.core import DfFileCatalogError

from access_nri_intake.catalog import EXP_JSONSCHEMA
from access_nri_intake.catalog.manager import CatalogManager, CatalogManagerError
Expand Down Expand Up @@ -162,3 +166,36 @@ def test_CatalogManager_all(tmp_path, test_data):
assert len(cat.dfcat) == len(models) + 1
cat.save()
assert len(CatalogManager(path).dfcat) == len(models) + 1


def test_CatalogManager_load_invalid_model(tmp_path, test_data):
"""Test loading and adding an Intake-ESM datastore"""
path = str(tmp_path / "cat.csv")
cat = CatalogManager(path)

# Test can load when path is len 1 list
path = test_data / "esm_datastore/cmip5-al33.json"
# Load source
load_args = dict(
name="cmip5-al33",
description="cmip5-al33",
path=str(test_data / "esm_datastore/cmip5-al33.json"),
translator=Cmip5Translator,
)

with mock.patch.object(
cat.dfcat,
"add",
side_effect=DfFileCatalogError(
"Expected iterable metadata columns: ['model']. "
"Unable to add entry with iterable metadata columns '[]' to dataframe "
"catalog: columns ['model'] must be iterable to ensure metadata entries are consistent."
),
):
with pytest.raises(CatalogManagerError) as excinfo:
cat.load(**load_args)

assert "Error adding source 'cmip5-al33' to the catalog" in str(excinfo.value)
assert "Expected iterable metadata columns: ['model']" in str(
excinfo.value.__cause__
)
Loading