Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BF(workaround): loop through namespaces while validating nwb #1036

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 71 additions & 2 deletions dandi/pynwb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,76 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None:
container.external_file[no] = str(name_new)


# borrowed from
# https://github.com/NeurodataWithoutBorders/pynwb/blob/745aaf26fa56958254e1d22a73d4c962c8074332/src/pynwb/validate.py#L29
# which is part of the https://github.com/NeurodataWithoutBorders/pynwb/pull/1432
# and needed to overcome errors like in https://github.com/dandi/helpdesk/discussions/43
def get_cached_namespaces_to_validate(path):
"""
Determine the most specific namespace(s) (i.e., extensions) that are cached in the given
NWB file that should be used for validation.

Example
-------

The following example illustrates how we can use this function to validate against namespaces
cached in a file. This is useful, e.g., when a file was created using an extension

>>> from pynwb import validate
>>> from pynwb.validate import get_cached_namespaces_to_validate
>>> path = "my_nwb_file.nwb"
>>> validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path)
>>> with NWBHDF5IO(path, "r", manager=manager) as reader:
>>> errors = []
>>> for ns in validate_namespaces:
>>> errors += validate(io=reader, namespace=ns)

:param path: Path for the NWB file
:return: Tuple with:
- List of strings with the most specific namespace(s) to use for validation.
- BuildManager object for opening the file for validation
- Dict with the full result from NWBHDF5IO.load_namespaces
"""
from hdmf.build import BuildManager, TypeMap
from hdmf.spec import NamespaceCatalog
from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace

catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
ns_deps = NWBHDF5IO.load_namespaces(catalog, path)
# determine which namespaces are the most specific (i.e. extensions) and validate against those
s = set(ns_deps.keys())
for k in ns_deps:
s -= ns_deps[k].keys()
# TODO remove this workaround for issue
# https://github.com/NeurodataWithoutBorders/pynwb/issues/1357
if "hdmf-experimental" in s:
s.remove("hdmf-experimental") # remove validation of hdmf-experimental for now
yarikoptic marked this conversation as resolved.
Show resolved Hide resolved
namespaces = list(sorted(s))
yarikoptic marked this conversation as resolved.
Show resolved Hide resolved

if len(namespaces) > 0:
tm = TypeMap(catalog)
manager = BuildManager(tm)
else:
manager = None

return namespaces, manager, ns_deps


def validate_namespaces(path: Union[str, Path]) -> List[str]:
"""pynwb.validate which validates each validatable namespace separately

Proposed by @orugbel in https://github.com/dandi/dandi-cli/issues/917#issuecomment-1045154252
"""
namespaces_validate, manager, namespaces_cached = get_cached_namespaces_to_validate(
path
)
with NWBHDF5IO(path, "r", manager=manager) as reader:
errors = []
for ns in namespaces_validate:
errors += validate(io=reader, namespace=ns)
yarikoptic marked this conversation as resolved.
Show resolved Hide resolved
return errors


@validate_cache.memoize_path
def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
"""Run validation on a file and return errors
Expand All @@ -333,8 +403,7 @@ def validate(path: Union[str, Path], devel_debug: bool = False) -> List[str]:
path = str(path) # Might come in as pathlib's PATH
errors: List[str]
try:
with pynwb.NWBHDF5IO(path, "r", load_namespaces=True) as reader:
errors = pynwb.validate(reader)
errors = validate_namespaces(path)
lgr.warning(
"pynwb validation errors for %s: %s",
path,
Expand Down