From cca7b209b7a42929819869130dc277b80485c238 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 6 Nov 2024 17:49:46 -0800 Subject: [PATCH] Fix #225 Add NWBZarrIO.read_nwb convenience function --- src/hdmf_zarr/nwb.py | 28 +++++++++++++- tests/unit/test_fsspec_streaming.py | 32 ++++++++++++---- tests/unit/test_nwbzarrio.py | 57 +++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_nwbzarrio.py diff --git a/src/hdmf_zarr/nwb.py b/src/hdmf_zarr/nwb.py index 582f1a63..762b6b3c 100644 --- a/src/hdmf_zarr/nwb.py +++ b/src/hdmf_zarr/nwb.py @@ -1,6 +1,7 @@ """Module with Zarr backend for NWB for integration with PyNWB""" from warnings import warn -from .backend import ZarrIO +from pathlib import Path +from .backend import ZarrIO, SUPPORTED_ZARR_STORES from hdmf.utils import (docval, popargs, @@ -63,5 +64,30 @@ def export(self, **kwargs): kwargs['container'] = nwbfile super().export(**kwargs) + @staticmethod + @docval({'name': 'path', + 'type': (str, Path, *SUPPORTED_ZARR_STORES), + 'doc': 'the path to the Zarr file or a supported Zarr store'}, + is_method=False) + def read_nwb(**kwargs): + """ + Helper factory method for reading an NWB file and return the NWBFile object + """ + # Retrieve the filepath + path = popargs('path', kwargs) + if isinstance(path, Path): + path = str(path) + # determine default storage options to use when opening a file from S3 + storage_options = {} + if isinstance(path, str) and path.startswith(("s3://")): + storage_options = dict(anon=True) + + # open the file with NWBZarrIO and rad the file + io = NWBZarrIO(path=path, mode="r", load_namespaces=True, storage_options=storage_options) + nwbfile = io.read() + + # return the NWBFile object + return nwbfile + except ImportError: warn("PyNWB is not installed. Support for NWBZarrIO is disabled.") diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py index cbb0bf06..1864f4a3 100644 --- a/tests/unit/test_fsspec_streaming.py +++ b/tests/unit/test_fsspec_streaming.py @@ -8,17 +8,20 @@ class TestFSSpecStreaming(unittest.TestCase): - @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") - def test_fsspec_streaming(self): + + def setUp(self): # PLACEHOLDER test file from Allen Institute for Neural Dynamics # TODO: store a small test file and use it to speed up testing - remote_path = ( + self.s3_aind_path = ( "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/" "ecephys_625749_2022-08-03_15-15-06_nwb/" "ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/" ) + self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" - with NWBZarrIO(remote_path, mode="r", storage_options=dict(anon=True)) as io: + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming(self): + with NWBZarrIO(self.s3_aind_path, mode="r", storage_options=dict(anon=True)) as io: nwbfile = io.read() self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") @@ -32,10 +35,25 @@ def test_s3_open_with_consolidated_(self): """ The file is a Zarr file with consolidated metadata. """ - s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" - with NWBZarrIO(s3_path, mode='r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore) - with NWBZarrIO(s3_path, mode='-r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='-r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.FSStore) + + + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming_via_read_nwb(self): + """ + Test reading from s3 using the convenience function NWBZarrIO.read_nwb + """ + # Test with a s3:// URL + nwbfile = NWBZarrIO.read_nwb(self.s3_aind_path) + self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") + self.assertEqual(nwbfile.institution, "AIND") + + # Try a https:// URL as well + nwbfile = NWBZarrIO.read_nwb(self.https_s3_path) + self.assertIsNotNone(nwbfile) + diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py new file mode 100644 index 00000000..10a98802 --- /dev/null +++ b/tests/unit/test_nwbzarrio.py @@ -0,0 +1,57 @@ +import unittest +from hdmf_zarr import NWBZarrIO +import os +import shutil +import datetime +from dateutil.tz import tzlocal + +try: + from pynwb import NWBFile + PYNWB_AVAILABLE = True +except ImportError: + PYNWB_AVAILABLE = False + + +@unittest.skipIf(not PYNWB_AVAILABLE, "PyNWB not installed") +class TestNWBZarrIO(unittest.TestCase): + + def setUp(self): + self.filepath = "test_io.zarr" + + def tearDown(self): + if os.path.exists(self.filepath): + shutil.rmtree(self.filepath) + + def write_test_file(self): + # Create the NWBFile + nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + experimenter="Dr. Bilbo Baggins", + lab="Bag End Laboratory", + institution="University of Middle Earth at the Shire", + experiment_description="I went on an adventure with thirteen dwarves " + "to reclaim vast treasures.", + session_id="LONELYMTN", + ) + + # Create a device + device = nwbfile.create_device( + name="array", description="the best array", manufacturer="Probe Company 9000" + ) + with NWBZarrIO(path=self.filepath, mode="w") as io: + io.write(nwbfile) + + @unittest.skip + def test_read_nwb(self): + """ + Test reading a local file with NWBZarrIO.read_nwb. + + NOTE: See TestFSSpecStreaming.test_fsspec_streaming_via_read_nwb for corresponding tests + for reading a remote file with NWBZarrIO.read_nwb + """ + self.write_test_file() + nwbfile = NWBZarrIO.read_nwb(path=self.filepath, mode="r") + self.assertEqual(len(nwbfile.devices), 1) + self.assertEqual(nwbfile.experimenter, "Dr. Bilbo Baggins")