From 6a6af5a5693216b88487a51afd5037c1af3296f0 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 12 Aug 2024 21:12:46 -0400 Subject: [PATCH 1/4] Add a docs page in the user guide about adding trials --- docs/user_guide/adding_trials.rst | 36 ++++++++++++++++++++++++++++++ docs/user_guide/datainterfaces.rst | 25 +++++++++++++++++++-- docs/user_guide/index.rst | 1 + docs/user_guide/nwbconverter.rst | 17 ++++++++++---- 4 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 docs/user_guide/adding_trials.rst diff --git a/docs/user_guide/adding_trials.rst b/docs/user_guide/adding_trials.rst new file mode 100644 index 000000000..7fbc63c61 --- /dev/null +++ b/docs/user_guide/adding_trials.rst @@ -0,0 +1,36 @@ +.. _adding_trials: + +Adding Trials to NWB Files +========================== + +NWB allows you to store information about time intervals in a structured way. These structure are often used to store +information about trials, epochs, or other time intervals in the data. +You can add time intervals to an NWBFile object before writing it using PyNWB. +Here is an example of how to add trials to an NWBFile object. +Here is how you would add trials to an NWB file: + +.. code-block:: python + + # you can add custom columns to the trials table + nwbfile.add_trials_column(name="trial_type", description="the type of trial") + + nwbfile.add_trial(start_time=0.0, stop_time=1.0, trial_type="go") + nwbfile.add_trial(start_time=1.0, stop_time=2.0, trial_type="nogo") + +You can also add epochs or other types of time intervals to an NWB File. See +`PyNWB Annotating Time Intervals `_ +for more information. + +Once this information is added, you can write the NWB file to disk. + +.. code-block:: python + + from neuroconv.tools.nwb_helpers import configure_and_write_nwbfile + + configure_and_write_nwbfile(nwbfile, save_path="path/to/destination.nwb", backend="hdf5") + +.. note:: + + NWB generally recommends storing the full continuous stream of data in the NWB file, and then adding trials or + epochs as time intervals. Trial-aligning the data is then done on-the-fly when reading the file. This allows for + more flexibility in the analysis of the data. \ No newline at end of file diff --git a/docs/user_guide/datainterfaces.rst b/docs/user_guide/datainterfaces.rst index 8752bd387..3fa1d07c3 100644 --- a/docs/user_guide/datainterfaces.rst +++ b/docs/user_guide/datainterfaces.rst @@ -143,8 +143,8 @@ Here we can see that ``metadata["Ecephys"]["ElectrodeGroup"][0]["location"]`` is Use ``.get_metadata_schema()`` to get the schema of the metadata dictionary. This schema is a JSON-schema-like dictionary that specifies required and optional fields in the metadata dictionary. See :ref:`metadata schema ` for more information. -4. Run conversion -~~~~~~~~~~~~~~~~~ +4a. Run conversion +~~~~~~~~~~~~~~~~~~ The ``.run_conversion`` method takes the (edited) metadata dictionary and the path of an NWB file, and launches the actual data conversion into NWB. @@ -159,3 +159,24 @@ This method reads and writes large datasets piece-by-piece, so you can convert large datasets without overloading the computer's available RAM. It also uses good defaults for data chunking and lossless compression, reducing the file size of the output NWB file and optimizing the file for cloud compute. + +4b. Create an in-memory NWB file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you want to create an in-memory NWB file, you can use the ``.create_nwbfile`` method. + +.. code-block:: python + + nwbfile = spikeglx_interface.create_nwbfile(metadata=metadata) + +This is useful for add data such as trials, epochs, or other time intervals to the NWB file. See +:ref:`Adding Time Intervals to NWB Files ` for more information. + +This does not load large datasets into memory. Those remain in the source files and are read piece-by-piece during the +write process. Once you make all the modifications you want to the NWBfile, you can save it to disk. The following code +automatically optimizes datasets for cloud compute and writes the file to disk. + +.. code-block:: python + + from neuroconv.tools.nwb_helpers import configure_and_write_nwbfile + + configure_and_write_nwbfile(nwbfile, save_path="path/to/destination.nwb", backend="hdf5") \ No newline at end of file diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst index e8c0827c8..4077f49be 100644 --- a/docs/user_guide/index.rst +++ b/docs/user_guide/index.rst @@ -20,6 +20,7 @@ and synchronize data across multiple sources. datainterfaces nwbconverter + adding_trials temporal_alignment csvs expand_path diff --git a/docs/user_guide/nwbconverter.rst b/docs/user_guide/nwbconverter.rst index 2360905e6..e1db63945 100644 --- a/docs/user_guide/nwbconverter.rst +++ b/docs/user_guide/nwbconverter.rst @@ -44,21 +44,30 @@ keys of``data_interface_classes``. This creates an :py:class:`.NWBConverter` object that can aggregate and distribute across the data interfaces. To fetch metadata across all of the interfaces and merge -them together, call:: +them together, call. + +.. code-block:: python metadata = converter.get_metadata() -The metadata can then be manually modified with any additional user-input, just like ``DataInterface`` objects:: +The metadata can then be manually modified with any additional user-input, just like ``DataInterface`` objects. + +.. code-block:: python metadata["NWBFile"]["session_description"] = "NeuroConv tutorial." metadata["NWBFile"]["experimenter"] = "My name" metadata["Subject"]["subject_id"] = "ID of experimental subject" -The final metadata dictionary should follow the form defined by -``converter.get_metadata_schema()``. Now run the entire conversion with:: +The final metadata dictionary should follow the form defined by :meth:`.NWBConverter.get_metadata_schema`. +Now run the entire conversion with. + +.. code-block:: python converter.run_conversion(metadata=metadata, nwbfile_path="my_nwbfile.nwb") +Like ``DataInterface`` objects, :py:class:`.NWBConverter` objects can output an in-memory NWBFile object by +calling :meth:`.NWBConverter.create_nwbfile`. This can be useful for debugging or for further processing. + Though this example was only for two data streams (recording and spike-sorted data), it can easily extend to any number of sources, including video of a subject, extracted position estimates, stimuli, or any other data source. From c26e451c1cb378f478214bd0d12dfaabf992bea9 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 23 Sep 2024 16:38:47 -0500 Subject: [PATCH 2/4] imrpove docstring for BaseDataInterface.get_metadata() --- src/neuroconv/basedatainterface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/basedatainterface.py b/src/neuroconv/basedatainterface.py index adcec89b5..32f3a3f30 100644 --- a/src/neuroconv/basedatainterface.py +++ b/src/neuroconv/basedatainterface.py @@ -70,7 +70,7 @@ def get_metadata_schema(self) -> dict: return metadata_schema def get_metadata(self) -> DeepDict: - """Child DataInterface classes should override this to match their metadata.""" + """Extract metadata from source files and return it as a DeepDict.""" metadata = DeepDict() metadata["NWBFile"]["session_description"] = "" metadata["NWBFile"]["identifier"] = str(uuid.uuid4()) From e9596f3b3c6803d6250c563c27a9d5b4b9c81c7e Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 27 Dec 2024 09:21:51 -0500 Subject: [PATCH 3/4] Refactor type hints in utility functions for improved clarity and type safety - Updated type hints in `checks.py`, `dict.py`, and `json_schema.py` to use more specific types (e.g., `ArrayLike`, `NDArray`, `dict[str, Any]`). - Enhanced function signatures to reflect expected input and output types, improving code readability and maintainability. --- src/neuroconv/utils/checks.py | 5 ++-- src/neuroconv/utils/dict.py | 40 ++++++++++++------------------ src/neuroconv/utils/json_schema.py | 25 ++++++++++--------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/src/neuroconv/utils/checks.py b/src/neuroconv/utils/checks.py index 3a17e4776..e38cef306 100644 --- a/src/neuroconv/utils/checks.py +++ b/src/neuroconv/utils/checks.py @@ -1,10 +1,11 @@ from numbers import Real -from typing import Optional +from typing import Optional, Union import numpy as np +from numpy.typing import ArrayLike, NDArray -def calculate_regular_series_rate(series: np.ndarray, tolerance_decimals: int = 6) -> Optional[Real]: +def calculate_regular_series_rate(series: Union[ArrayLike, NDArray], tolerance_decimals: int = 6) -> Optional[float]: """Calculates the rate of a series as the difference between all consecutive points. If the difference between all time points are all the same value, then the value of rate is a scalar otherwise it is None.""" diff --git a/src/neuroconv/utils/dict.py b/src/neuroconv/utils/dict.py index f0507b653..141869eb8 100644 --- a/src/neuroconv/utils/dict.py +++ b/src/neuroconv/utils/dict.py @@ -36,7 +36,7 @@ def remove_implicit_resolver(cls, tag_to_remove): _NoDatesSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp") -def load_dict_from_file(file_path: FilePath) -> dict: +def load_dict_from_file(file_path: FilePath) -> dict[str, Any]: """Safely load metadata from .yml or .json files.""" file_path = Path(file_path) assert file_path.is_file(), f"{file_path} is not a file." @@ -51,12 +51,18 @@ def load_dict_from_file(file_path: FilePath) -> dict: return dictionary -def exist_dict_in_list(d, ls): +def exist_dict_in_list(d: dict[str, Any], ls: list[dict[str, Any]]) -> bool: """Check if an identical dictionary exists in the list.""" return any([d == i for i in ls]) -def append_replace_dict_in_list(ls, d, compare_key, list_dict_deep_update: bool = True, remove_repeats: bool = True): +def append_replace_dict_in_list( + ls: list[dict[str, Any]], + d: dict[str, Any], + compare_key: str, + list_dict_deep_update: bool = True, + remove_repeats: bool = True +) -> list[dict[str, Any]]: """ Update the list ls with the dict d. @@ -107,14 +113,14 @@ def append_replace_dict_in_list(ls, d, compare_key, list_dict_deep_update: bool def dict_deep_update( - d: collections.abc.Mapping, - u: collections.abc.Mapping, + d: dict[str, Any], + u: dict[str, Any], append_list: bool = True, remove_repeats: bool = True, copy: bool = True, compare_key: str = "name", list_dict_deep_update: bool = True, -) -> collections.abc.Mapping: +) -> dict[str, Any]: """ Perform an update to all nested keys of dictionary d(input) from dictionary u(updating dict). @@ -214,34 +220,20 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if isinstance(value, dict): self[key] = DeepDict(value) - def deep_update(self, other: Optional[Union[dict, "DeepDict"]] = None, **kwargs) -> None: + def deep_update(self, other: Optional[Union[dict[str, Any], "DeepDict"]] = None, **kwargs: Any) -> None: for key, value in (other or kwargs).items(): if key in self and isinstance(self[key], dict) and isinstance(value, dict): self[key].deep_update(value) else: self[key] = value - def to_dict(self) -> dict: + def to_dict(self) -> dict[str, Any]: """Turn a DeepDict into a normal dictionary""" - - def _to_dict(d: Union[dict, "DeepDict"]) -> dict: + def _to_dict(d: Union[dict[str, Any], "DeepDict"]) -> dict[str, Any]: return {key: _to_dict(value) for key, value in d.items()} if isinstance(d, dict) else d - return _to_dict(self) - def __deepcopy__(self, memodict={}): - """ - - Parameters - ---------- - memodict: dict - unused - - Returns - ------- - DeepDict - - """ + def __deepcopy__(self, memodict: dict = {}) -> "DeepDict": return DeepDict(deepcopy(self.to_dict())) def __repr__(self) -> str: diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py index 182558b98..745437a9d 100644 --- a/src/neuroconv/utils/json_schema.py +++ b/src/neuroconv/utils/json_schema.py @@ -4,7 +4,7 @@ import warnings from datetime import datetime from pathlib import Path -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, Type, Union import docstring_parser import hdmf.data_utils @@ -65,9 +65,9 @@ def get_base_schema( root: bool = False, id_: Optional[str] = None, required: Optional[list[str]] = None, - properties: Optional[dict] = None, - **kwargs, -) -> dict: + properties: Optional[dict[str, Any]] = None, + **kwargs: Any +) -> dict[str, Any]: """Return the base schema used for all other schemas.""" base_schema = dict( required=required or [], @@ -85,7 +85,7 @@ def get_base_schema( return base_schema -def get_schema_from_method_signature(method: Callable, exclude: Optional[list[str]] = None) -> dict: +def get_schema_from_method_signature(method: Callable, exclude: Optional[list[str]] = None) -> dict[str, Any]: """Deprecated version of `get_json_schema_from_method_signature`.""" message = ( "The method `get_schema_from_method_signature` is now named `get_json_schema_from_method_signature`." @@ -96,7 +96,7 @@ def get_schema_from_method_signature(method: Callable, exclude: Optional[list[st return get_json_schema_from_method_signature(method=method, exclude=exclude) -def get_json_schema_from_method_signature(method: Callable, exclude: Optional[list[str]] = None) -> dict: +def get_json_schema_from_method_signature(method: Callable, exclude: Optional[list[str]] = None) -> dict[str, Any]: """ Get the equivalent JSON schema for a signature of a method. @@ -179,14 +179,14 @@ def get_json_schema_from_method_signature(method: Callable, exclude: Optional[li return json_schema -def _copy_without_title_keys(d: Any, /) -> Optional[dict]: +def _copy_without_title_keys(d: Any) -> Optional[dict[str, Any]]: if not isinstance(d, dict): return d return {key: _copy_without_title_keys(value) for key, value in d.items() if key != "title"} -def fill_defaults(schema: dict, defaults: dict, overwrite: bool = True): +def fill_defaults(schema: dict[str, Any], defaults: dict[str, Any], overwrite: bool = True) -> None: """ Insert the values of the defaults dict as default values in the schema in place. @@ -211,7 +211,7 @@ def fill_defaults(schema: dict, defaults: dict, overwrite: bool = True): val["default"] = defaults[key] -def unroot_schema(schema: dict): +def unroot_schema(schema: dict[str, Any]) -> dict[str, Any]: """ Modify a json-schema dictionary to make it not root. @@ -223,7 +223,7 @@ def unroot_schema(schema: dict): return {k: v for k, v in schema.items() if k in terms} -def _is_member(types, target_types): +def _is_member(types: Union[Type, tuple[Type, ...]], target_types: Union[Type, tuple[Type, ...]]) -> bool: if not isinstance(target_types, tuple): target_types = (target_types,) if not isinstance(types, tuple): @@ -231,7 +231,7 @@ def _is_member(types, target_types): return any(t in target_types for t in types) -def get_schema_from_hdmf_class(hdmf_class): +def get_schema_from_hdmf_class(hdmf_class: Type) -> dict[str, Any]: """Get metadata schema from hdmf class.""" schema = get_base_schema() schema["tag"] = hdmf_class.__module__ + "." + hdmf_class.__name__ @@ -244,6 +244,7 @@ def get_schema_from_hdmf_class(hdmf_class): # Temporary solution before this is solved: https://github.com/hdmf-dev/hdmf/issues/475 if "device" in pynwb_children_fields: pynwb_children_fields.remove("device") + docval = hdmf_class.__init__.__docval__ for docval_arg in docval["args"]: arg_name = docval_arg["name"] @@ -298,7 +299,7 @@ def get_schema_from_hdmf_class(hdmf_class): return schema -def get_metadata_schema_for_icephys() -> dict: +def get_metadata_schema_for_icephys() -> dict[str, Any]: """ Returns the metadata schema for icephys data. From 55f78c9026e22ee6588ddd8989589f5df0913df6 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 27 Dec 2024 09:24:24 -0500 Subject: [PATCH 4/4] Update src/neuroconv/utils/json_schema.py --- src/neuroconv/utils/json_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neuroconv/utils/json_schema.py b/src/neuroconv/utils/json_schema.py index 745437a9d..bb21dfb01 100644 --- a/src/neuroconv/utils/json_schema.py +++ b/src/neuroconv/utils/json_schema.py @@ -66,7 +66,7 @@ def get_base_schema( id_: Optional[str] = None, required: Optional[list[str]] = None, properties: Optional[dict[str, Any]] = None, - **kwargs: Any + **kwargs, ) -> dict[str, Any]: """Return the base schema used for all other schemas.""" base_schema = dict(