Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NWBZarrIO.read_nwb convenience function #226

Merged
merged 11 commits into from
Nov 8, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206)
* Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207)
* Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215)
* Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mavaylon1, was this released already? it is on the changelog on the 0.9.0 release but the PR was merged a week ago. Is that a changelog error?


## 0.8.0 (June 4, 2024)
### Bug Fixes
Expand Down
28 changes: 27 additions & 1 deletion src/hdmf_zarr/nwb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module with Zarr backend for NWB for integration with PyNWB"""
from warnings import warn
from .backend import ZarrIO
from pathlib import Path
from .backend import ZarrIO, SUPPORTED_ZARR_STORES

from hdmf.utils import (docval,
popargs,
Expand Down Expand Up @@ -63,5 +64,30 @@ def export(self, **kwargs):
kwargs['container'] = nwbfile
super().export(**kwargs)

@staticmethod
@docval({'name': 'path',
'type': (str, Path, *SUPPORTED_ZARR_STORES),
'doc': 'the path to the Zarr file or a supported Zarr store'},
is_method=False)
rly marked this conversation as resolved.
Show resolved Hide resolved
def read_nwb(**kwargs):
"""
Helper factory method for reading an NWB file and return the NWBFile object
"""
# Retrieve the filepath
path = popargs('path', kwargs)
if isinstance(path, Path):
path = str(path)
# determine default storage options to use when opening a file from S3
storage_options = {}
if isinstance(path, str) and path.startswith(("s3://")):
storage_options = dict(anon=True)

# open the file with NWBZarrIO and rad the file
io = NWBZarrIO(path=path, mode="r", load_namespaces=True, storage_options=storage_options)
nwbfile = io.read()

# return the NWBFile object
return nwbfile

except ImportError:
warn("PyNWB is not installed. Support for NWBZarrIO is disabled.")
28 changes: 21 additions & 7 deletions tests/unit/test_fsspec_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@


class TestFSSpecStreaming(unittest.TestCase):
@unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
def test_fsspec_streaming(self):

def setUp(self):
# PLACEHOLDER test file from Allen Institute for Neural Dynamics
# TODO: store a small test file and use it to speed up testing
remote_path = (
self.s3_aind_path = (
rly marked this conversation as resolved.
Show resolved Hide resolved
"s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/"
"ecephys_625749_2022-08-03_15-15-06_nwb/"
"ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/"
)
# DANDISET: 000719/icephys_9_27_2024
self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/7515c603-9940-4598-aa1b-8bf32dc9b10c/"

with NWBZarrIO(remote_path, mode="r", storage_options=dict(anon=True)) as io:
@unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
def test_fsspec_streaming(self):
with NWBZarrIO(self.s3_aind_path, mode="r", storage_options=dict(anon=True)) as io:
nwbfile = io.read()

self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06")
Expand All @@ -32,10 +36,20 @@ def test_s3_open_with_consolidated_(self):
"""
The file is a Zarr file with consolidated metadata.
"""
s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/"
with NWBZarrIO(s3_path, mode='r') as read_io:
with NWBZarrIO(self.https_s3_path, mode='r') as read_io:
read_io.open()
self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore)
with NWBZarrIO(s3_path, mode='-r') as read_io:
with NWBZarrIO(self.https_s3_path, mode='-r') as read_io:
read_io.open()
self.assertIsInstance(read_io.file.store, zarr.storage.FSStore)


@unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed")
def test_fsspec_streaming_via_read_nwb(self):
"""
Test reading from s3 using the convenience function NWBZarrIO.read_nwb
"""
# Test with a s3:// URL
nwbfile = NWBZarrIO.read_nwb(self.s3_aind_path)
self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06")
self.assertEqual(nwbfile.institution, "AIND")
56 changes: 56 additions & 0 deletions tests/unit/test_nwbzarrio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest
from hdmf_zarr import NWBZarrIO
import os
import shutil
from datetime import datetime
from dateutil.tz import tzlocal

try:
from pynwb import NWBFile
PYNWB_AVAILABLE = True
except ImportError:
PYNWB_AVAILABLE = False


@unittest.skipIf(not PYNWB_AVAILABLE, "PyNWB not installed")
class TestNWBZarrIO(unittest.TestCase):

def setUp(self):
self.filepath = "test_io.zarr"

def tearDown(self):
if os.path.exists(self.filepath):
shutil.rmtree(self.filepath)

def write_test_file(self):
# Create the NWBFile
nwbfile = NWBFile(
session_description="my first synthetic recording",
identifier="EXAMPLE_ID",
session_start_time=datetime.now(tzlocal()),
experimenter="Dr. Bilbo Baggins",
lab="Bag End Laboratory",
institution="University of Middle Earth at the Shire",
experiment_description="I went on an adventure with thirteen dwarves "
"to reclaim vast treasures.",
session_id="LONELYMTN",
)

# Create a device
_ = nwbfile.create_device(
name="array", description="the best array", manufacturer="Probe Company 9000"
)
with NWBZarrIO(path=self.filepath, mode="w") as io:
io.write(nwbfile)

def test_read_nwb(self):
"""
Test reading a local file with NWBZarrIO.read_nwb.

NOTE: See TestFSSpecStreaming.test_fsspec_streaming_via_read_nwb for corresponding tests
for reading a remote file with NWBZarrIO.read_nwb
"""
self.write_test_file()
nwbfile = NWBZarrIO.read_nwb(path=self.filepath)
self.assertEqual(len(nwbfile.devices), 1)
self.assertTupleEqual(nwbfile.experimenter, ('Dr. Bilbo Baggins',))
Loading