Skip to content

Commit

Permalink
Update schema (#161)
Browse files Browse the repository at this point in the history
* update schema

* update metadata-template

* account for set in metadata
  • Loading branch information
dougiesquire authored Mar 28, 2024
1 parent c711078 commit e2f3cc7
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 39 deletions.
11 changes: 6 additions & 5 deletions docs/management/schema.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
Updating schema
===============

A specific commit of the schema `here <https://github.com/ACCESS-NRI/schema/blob/main/file_asset.json>`_ is
A specific version of the schema
`here <https://github.com/ACCESS-NRI/schema/tree/main/au.org.access-nri/model/output/file-metadata>`_ is
downloaded when :code:`access_nri_intake.source` is first imported. This schema is used to validated Intake-ESM
datastore entries. Similarly a specific commit of the schema
`here <https://github.com/ACCESS-NRI/schema/blob/main/experiment_asset.json>`_ is downloaded when
:code:`access_nri_intake.catalog` is first imported and this is used to validate intake-dataframe-catalog
entries.
datastore entries. Similarly a specific version of the schema
`here <https://github.com/ACCESS-NRI/schema/tree/main/au.org.access-nri/model/output/experiment-metadata>`_ is
downloaded when :code:`access_nri_intake.catalog` is first imported and this is used to validate
intake-dataframe-catalog entries.

Schema can be updated by updating the file(s) at https://github.com/ACCESS-NRI/schema and editing the
appropriate :code:`SCHEMA_URL` path(s) in :code:`access_nri_intake.source.__init__` and
Expand Down
1 change: 1 addition & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
schema_version: <The version of the schema (string)>
name: <REQUIRED The name of the experiment (string)>
experiment_uuid: <REQUIRED Unique uuid for the experiment (string)>
description: <REQUIRED Short description of the experiment (string, < 150 char)>
Expand Down
10 changes: 4 additions & 6 deletions src/access_nri_intake/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
""" Tools for managing intake-dataframe-catalogs like the ACCESS-NRI catalog """


from ..utils import get_jsonschema
from ..utils import _can_be_array, get_jsonschema

CORE_COLUMNS = [
"name",
Expand All @@ -18,15 +18,13 @@
NAME_COLUMN = "name"
TRANSLATOR_GROUPBY_COLUMNS = ["model", "realm", "frequency"]

SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/36abe2fe28eb2853a54f41c5eedfd964617d9d68/experiment_asset.json"
SCHEMA_HASH = "60d439a9ad5602464c7dad54072ac276d1fae3634f9524edcc82073a5a92616a"
SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/e9055da95093ec2faa555c090fc5af17923d1566/au.org.access-nri/model/output/experiment-metadata/1-0-2.json"
SCHEMA_HASH = "ecb72c1adde3679896ceeca96aa6500d07ea2e05810155ec7a5dc301593c1dc7"

EXP_JSONSCHEMA, CATALOG_JSONSCHEMA = get_jsonschema(
url=SCHEMA_URL, known_hash=SCHEMA_HASH, required=CORE_COLUMNS
)

COLUMNS_WITH_ITERABLES = [
col
for col in CORE_COLUMNS
if CATALOG_JSONSCHEMA["properties"][col]["type"] == "array"
col for col in CORE_COLUMNS if _can_be_array(CATALOG_JSONSCHEMA["properties"][col])
]
5 changes: 4 additions & 1 deletion src/access_nri_intake/catalog/translators.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ def _default_translator(self, column):
val = getattr(self.source, column)
elif column in self.source.metadata:
val = self.source.metadata[column]
if isinstance(val, list):
# Some metadata fields can be a value _or_ array
if isinstance(val, (list, tuple, set)):
val = tuple(val)
elif column in COLUMNS_WITH_ITERABLES:
val = (val,)
else:
raise TranslatorError(
f"Could not translate '{column}' from {self.source.name} using {self.__class__.__name__}"
Expand Down
4 changes: 2 additions & 2 deletions src/access_nri_intake/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .catalog import EXP_JSONSCHEMA, translators
from .catalog.manager import CatalogManager
from .source import builders
from .utils import load_metadata_yaml
from .utils import _can_be_array, load_metadata_yaml


class MetadataCheckError(Exception):
Expand Down Expand Up @@ -258,7 +258,7 @@ def metadata_template():
else:
description = f"<{descr['description']}>"

if descr["type"] == "array":
if _can_be_array(descr):
description = [description]

template[name] = description
Expand Down
4 changes: 2 additions & 2 deletions src/access_nri_intake/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
PATH_COLUMN = "path"
VARIABLE_COLUMN = "variable"

SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/25643eb437e95ee48b3fa6b620c7a0986c2c3bb0/file_asset.json"
SCHEMA_HASH = "d7b5fcab71861f6c4b319e64cfde75f36de2bdc797f13b5b4f7029b41ce51e5a"
SCHEMA_URL = "https://raw.githubusercontent.com/ACCESS-NRI/schema/e9055da95093ec2faa555c090fc5af17923d1566/au.org.access-nri/model/output/file-metadata/1-0-1.json"
SCHEMA_HASH = "8f2f069fa06d81ff086b91daa6503f75615aa90385ab61ee2d1a7956dc96f9a6"

_, ESM_JSONSCHEMA = get_jsonschema(
url=SCHEMA_URL, known_hash=SCHEMA_HASH, required=CORE_COLUMNS
Expand Down
18 changes: 18 additions & 0 deletions src/access_nri_intake/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,21 @@ def validate_against_schema(instance, schema):
)

TupleAllowingValidator(schema).validate(instance)


def _can_be_array(field):
"""
Does the schema allow the provided field to be an array?
"""

def _is_array(field):
try:
return field["type"] == "array"
except KeyError:
return False

is_array = _is_array(field)
if (not is_array) and ("oneOf" in field):
for nfield in field["oneOf"]:
is_array = is_array or _is_array(nfield)
return is_array
28 changes: 28 additions & 0 deletions tests/data/access-cm2/by578/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: by578
experiment_uuid: 1fd9e682-d393-4b17-a9cd-934c3a48a1f8
description: >-
Pacemaker variation of CMIP6 ssp245 simulation with Tropical Atlantic region replaced
with fixed SSTs from observations
long_description: >-
Pacemaker variation of CMIP6 ssp245 simulation with 5 ensemble members and Tropical
Atlantic region replaced with fixed SSTs from observations. Branched from parent in
2015
model:
- ACCESS-CM2
nominal_resolution:
- atmos = n96
- ocean = 1 degree
version: 1
contact: Dave Bi
email: [email protected]
created: null
reference: null
license: null
url: null
parent_experiment: 948d8676-2c56-49db-8ea1-b80572b074c8
related_experiments:
- 57243597-43c0-4a8f-a404-b10cecdeb3f7
notes: >-
null
keywords:
- null
28 changes: 28 additions & 0 deletions tests/data/access-cm2/by578a/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: by578
experiment_uuid: 1fd9e682-d393-4b17-a9cd-934c3a48a1f8
description: >-
Pacemaker variation of CMIP6 ssp245 simulation with Tropical Atlantic region replaced
with fixed SSTs from observations
long_description: >-
Pacemaker variation of CMIP6 ssp245 simulation with 5 ensemble members and Tropical
Atlantic region replaced with fixed SSTs from observations. Branched from parent in
2015
model:
- ACCESS-CM2
nominal_resolution:
- atmos = n96
- ocean = 1 degree
version: 1
contact: Dave Bi
email: [email protected]
created: null
reference: null
license: null
url: null
parent_experiment: 948d8676-2c56-49db-8ea1-b80572b074c8
related_experiments:
- 57243597-43c0-4a8f-a404-b10cecdeb3f7
notes: >-
null
keywords:
- null
3 changes: 1 addition & 2 deletions tests/data/access-om3/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
experiment_uuid: 4cf0c4ee-09c9-4675-ae1f-ce46f0d848ed
created: '2024-02-27'
name: MOM6-CICE6-WW3-1deg_jra55do_ryf-4cf0c4ee
model:
- ACCESS-OM3
model: ACCESS-OM3
description: An early ACCESS-OM3 test run
long_description: An early ACCESS-OM3 test run
url: [email protected]:COSIMA/MOM6-CICE6-WW3.git
Expand Down
44 changes: 23 additions & 21 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest

from access_nri_intake.catalog import EXP_JSONSCHEMA
from access_nri_intake.catalog.manager import CatalogManager, CatalogManagerError
from access_nri_intake.catalog.translators import (
Cmip5Translator,
Expand All @@ -15,6 +16,7 @@
AccessOm2Builder,
AccessOm3Builder,
)
from access_nri_intake.utils import load_metadata_yaml


def test_CatalogManager_init(tmp_path):
Expand Down Expand Up @@ -44,16 +46,15 @@ def test_CatalogManager_build_esm(tmp_path, test_data, builder, basedir, kwargs)
path = str(tmp_path / "cat.csv")
cat = CatalogManager(path)

metadata = load_metadata_yaml(
str(test_data / basedir / "metadata.yaml"), EXP_JSONSCHEMA
)
args = dict(
name="test",
description="test",
builder=builder,
path=str(test_data / basedir),
metadata=dict(
model=[
basedir,
]
),
metadata=metadata,
directory=str(tmp_path),
**kwargs,
)
Expand Down Expand Up @@ -138,27 +139,28 @@ def test_CatalogManager_all(tmp_path, test_data):
cat.save()
assert len(CatalogManager(path).dfcat) == 1

# Build source
cat.build_esm(
name="access-om2",
description="access-om2",
builder=AccessOm2Builder,
path=str(test_data / "access-om2"),
metadata=dict(
model=[
"ACCESS-OM2",
]
),
directory=str(tmp_path),
)
# Build sources
models = {"access-om2": AccessOm2Builder, "access-om3": AccessOm3Builder}
for model, builder in models.items():
metadata = load_metadata_yaml(
str(test_data / model / "metadata.yaml"), EXP_JSONSCHEMA
)
cat.build_esm(
name=model,
description=model,
builder=builder,
path=str(test_data / model),
metadata=metadata,
directory=str(tmp_path),
)
# Still only one entry on disk
assert len(cat.dfcat) == 2
assert len(cat.dfcat) == len(models) + 1
assert len(CatalogManager(path).dfcat) == 1

# Check that entry with same name overwrites correctly
cat.load(
**load_args,
)
assert len(cat.dfcat) == 2
assert len(cat.dfcat) == len(models) + 1
cat.save()
assert len(CatalogManager(path).dfcat) == 2
assert len(CatalogManager(path).dfcat) == len(models) + 1

0 comments on commit e2f3cc7

Please sign in to comment.