Skip to content

Commit

Permalink
cache __TYPE_MAP and init submodules (#1931)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryan Ly <[email protected]>
Co-authored-by: Matthew Avaylon <[email protected]>
  • Loading branch information
3 people authored Sep 17, 2024
1 parent b9f9e5a commit 17adccf
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 32 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,6 @@ tests/coverage/htmlcov

# Version
_version.py

.core_typemap_version
core_typemap.pkl
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# PyNWB Changelog

## PyNWB 2.8.3 (Upcoming)

### Performance
- Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931)

## PyNWB 2.8.2 (September 9, 2024)

### Enhancements and minor changes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ exclude = [
"__pycache__",
"build/",
"dist/",
"src/nwb-schema",
"src/pynwb/nwb-schema",
"docs/source/conf.py",
"docs/notebooks/*",
"src/pynwb/_due.py",
Expand Down
137 changes: 111 additions & 26 deletions src/pynwb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import os.path
from pathlib import Path
from copy import deepcopy
import subprocess
import pickle
from warnings import warn
import h5py

Expand All @@ -23,6 +25,16 @@
from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402
from .validate import validate # noqa: F401, E402

try:
# see https://effigies.gitlab.io/posts/python-packaging-2023/
from ._version import __version__
except ImportError: # pragma: no cover
# this is a relatively slower method for getting the version string
from importlib.metadata import version # noqa: E402

__version__ = version("pynwb")
del version


@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'},
{'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None},
Expand Down Expand Up @@ -51,7 +63,7 @@ def unload_type_config(**kwargs):
type_map = kwargs['type_map'] or get_type_map()
hdmf_unload_type_config(type_map=type_map)

def __get_resources():
def __get_resources() -> dict:
try:
from importlib.resources import files
except ImportError:
Expand All @@ -61,27 +73,35 @@ def __get_resources():
__location_of_this_file = files(__name__)
__core_ns_file_name = 'nwb.namespace.yaml'
__schema_dir = 'nwb-schema/core'
cached_core_typemap = __location_of_this_file / 'core_typemap.pkl'
cached_version_indicator = __location_of_this_file / '.core_typemap_version'

ret = dict()
ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name)
ret['cached_typemap_path'] = str(cached_core_typemap)
ret['cached_version_indicator'] = str(cached_version_indicator)
return ret


def _get_resources():
# LEGACY: Needed to support legacy implementation.
# TODO: Remove this in PyNWB 3.0.
warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning)
return __get_resources()


# a global namespace catalog
global __NS_CATALOG
# a global type map
global __TYPE_MAP

__NS_CATALOG = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
__ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)

hdmf_typemap = hdmf.common.get_type_map()
__TYPE_MAP = TypeMap(__NS_CATALOG)
__TYPE_MAP = TypeMap(__ns_catalog)
__TYPE_MAP.merge(hdmf_typemap, ns_catalog=True)

# load the core namespace, i.e. base NWB specification
__resources = __get_resources()


@docval({'name': 'extensions', 'type': (str, TypeMap, list),
'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps',
Expand Down Expand Up @@ -139,22 +159,95 @@ def load_namespaces(**kwargs):
namespace_path = getargs('namespace_path', kwargs)
return __TYPE_MAP.load_namespaces(namespace_path)

def available_namespaces():
"""Returns all namespaces registered in the namespace catalog"""
return __TYPE_MAP.namespace_catalog.namespaces

# load the core namespace, i.e. base NWB specification
__resources = __get_resources()
if os.path.exists(__resources['namespace_path']):
load_namespaces(__resources['namespace_path'])
else:
raise RuntimeError(
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to "
"use the --recurse_submodules flag when cloning. See developer installation instructions here: "
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository"
)

def __git_cmd(*args) -> subprocess.CompletedProcess:
"""
Call git with the package as the directory regardless of cwd.
Since any folder within a git repo works, don't try to ascend to the top, since
if we're *not* actually in a git repo we're only guaranteed to know about
the inner `pynwb` directory.
"""
parent_dir = str(Path(__file__).parent)
result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True)
return result


def __clone_submodules():
if __git_cmd('rev-parse').returncode == 0:
warn(
'NWB core schema not found in cloned installation, initializing submodules...',
stacklevel=1)
res = __git_cmd('submodule', 'update', '--init', '--recursive')
if not res.returncode == 0: # pragma: no cover
raise RuntimeError(
'Exception while initializing submodules, got:\n'
'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr)
else: # pragma: no cover
raise RuntimeError("Package is not installed from a git repository, can't clone submodules")


def __load_core_namespace(final:bool=False):
"""
Load the core namespace into __TYPE_MAP,
either by loading a pickled version or creating one anew and pickling it.
def available_namespaces():
"""Returns all namespaces registered in the namespace catalog"""
return __NS_CATALOG.namespaces
We keep a dotfile next to it that tracks what version of pynwb created it,
so that we invalidate it when the code changes.
Args:
final (bool): This function tries again if the submodules aren't cloned,
but it shouldn't go into an infinite loop.
If final is ``True``, don't recurse.
"""
global __TYPE_MAP
global __resources

# if we have a version indicator file and it doesn't match the current version,
# scrap the cached typemap
if os.path.exists(__resources['cached_version_indicator']):
with open(__resources['cached_version_indicator'], 'r') as f:
cached_version = f.read().strip()
if cached_version != __version__:
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)
else:
# remove any cached typemap, forcing re-creation
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)

# load pickled typemap if we have one
if os.path.exists(__resources['cached_typemap_path']):
with open(__resources['cached_typemap_path'], 'rb') as f:
__TYPE_MAP = pickle.load(f) # type: TypeMap

# otherwise make a new one and cache it
elif os.path.exists(__resources['namespace_path']):
load_namespaces(__resources['namespace_path'])
with open(__resources['cached_typemap_path'], 'wb') as f:
pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL)
with open(__resources['cached_version_indicator'], 'w') as f:
f.write(__version__)

# otherwise, we don't have the schema and try and initialize from submodules,
# afterwards trying to load the namespace again
else:
try:
__clone_submodules()
except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover
if 'core' not in available_namespaces():
warn(
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, "
"you need to use the --recurse_submodules flag when cloning. "
"See developer installation instructions here: "
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n"
f"Got exception: \n{e}"
)
if not final:
__load_core_namespace(final=True)
__load_core_namespace()


# a function to register a container classes with the global map
Expand Down Expand Up @@ -427,15 +520,7 @@ def export(self, **kwargs):
from hdmf.data_utils import DataChunkIterator # noqa: F401,E402
from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402

try:
# see https://effigies.gitlab.io/posts/python-packaging-2023/
from ._version import __version__
except ImportError: # pragma: no cover
# this is a relatively slower method for getting the version string
from importlib.metadata import version # noqa: E402

__version__ = version("pynwb")
del version

from ._due import due, BibTeX # noqa: E402
due.cite(
Expand Down
5 changes: 0 additions & 5 deletions tests/back_compat/test_import_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,14 @@ def test_outer_import_structure(self):
"TimeSeries",
"TypeMap",
"_HDF5IO",
"__NS_CATALOG",
"__TYPE_MAP",
"__builtins__",
"__cached__",
"__doc__",
"__file__",
"__get_resources",
"__io",
"__loader__",
"__name__",
"__package__",
"__path__",
"__resources",
"__spec__",
"__version__",
"_due",
Expand Down

0 comments on commit 17adccf

Please sign in to comment.