From 464e0ecfcae6b7e090f1b05d8b0ffe6b57e6aaf4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 13 Aug 2020 19:15:09 -0400 Subject: [PATCH 1/3] OPT: defer import of pandas in core.py until its explicitly asked for need/use --- src/pynwb/core.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pynwb/core.py b/src/pynwb/core.py index b06edf769..8bd80309e 100644 --- a/src/pynwb/core.py +++ b/src/pynwb/core.py @@ -1,6 +1,5 @@ from h5py import RegionReference import numpy as np -import pandas as pd from hdmf import Container, Data, DataRegion, get_region_slicer from hdmf.container import AbstractContainer, MultiContainerInterface as hdmf_MultiContainerInterface @@ -222,12 +221,14 @@ def to_dataframe(self): '''Produce a pandas DataFrame containing this table's data. ''' + import pandas as pd data = {colname: self[colname] for ii, colname in enumerate(self.columns)} return pd.DataFrame(data) @classmethod @docval( - {'name': 'df', 'type': pd.DataFrame, 'doc': 'input data'}, + # TODO: "real" pd.DataFrame? + {'name': 'df', 'type': "pd.DataFrame", 'doc': 'input data'}, {'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': None}, { 'name': 'extra_ok', @@ -241,6 +242,8 @@ def from_dataframe(cls, **kwargs): should match the columns defined on the NWBTable subclass. ''' + import pandas as pd + df, name, extra_ok = getargs('df', 'name', 'extra_ok', kwargs) cls_cols = list([col['name'] for col in getattr(cls, '__columns__')]) From b88f857fcc19ba8619a354da91240374c843e489 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 13 Aug 2020 19:15:20 -0400 Subject: [PATCH 2/3] OPT: remove pandas from requirements --- requirements-min.txt | 1 - requirements.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/requirements-min.txt b/requirements-min.txt index b86263de8..d440a6ba1 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -2,5 +2,4 @@ h5py==2.9 # support for setting attrs to lists of utf-8 added in 2.9 hdmf==2.1.0,<3 numpy==1.16 -pandas==0.23 python-dateutil==2.7 diff --git a/requirements.txt b/requirements.txt index 3669cd3aa..2ad86603f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ h5py==2.10.0 hdmf==2.1.0 numpy==1.18.5 -pandas==0.25.3 python-dateutil==2.8.1 From 6d6ea6c9af2e95f5dbe70b17e068601279fa6982 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 13 Aug 2020 19:19:18 -0400 Subject: [PATCH 3/3] OPT(+TODO): delay import of pandas only if DataFrame could be actually passed into --- src/pynwb/file.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/pynwb/file.py b/src/pynwb/file.py index fd39b2cc3..c86eba2c3 100644 --- a/src/pynwb/file.py +++ b/src/pynwb/file.py @@ -5,7 +5,6 @@ import copy as _copy import numpy as np -import pandas as pd from hdmf.utils import docval, getargs, call_docval_func, get_docval @@ -669,7 +668,8 @@ def add_stimulus_template(self, timeseries): self._add_stimulus_template_internal(timeseries) self._update_sweep_table(timeseries) - @docval({'name': 'data', 'type': (np.ndarray, list, tuple, pd.DataFrame, DynamicTable, NWBContainer, ScratchData), + # TODO: bring it pd.DataFrame somehow without causing import of pandas, e.g. if not already loaded? + @docval({'name': 'data', 'type': (np.ndarray, list, tuple, DynamicTable, NWBContainer, ScratchData), 'help': 'the data to add to the scratch space'}, {'name': 'name', 'type': str, 'help': 'the name of the data. Only used when passing in numpy.ndarray, list, or tuple', @@ -683,7 +683,12 @@ def add_stimulus_template(self, timeseries): def add_scratch(self, **kwargs): '''Add data to the scratch space''' data, name, notes = getargs('data', 'name', 'notes', kwargs) - if isinstance(data, (np.ndarray, pd.DataFrame, list, tuple)): + data_types = [np.ndarray, list, tuple] + if 'pandas' in sys.modules: + # delayed import and cannot pass DataFrame if pandas is not already loaded + import pandas as pd + data_types += [ pd.DataFrame ] + if isinstance(data, data_types): if name is None: raise ValueError('please provide a name for scratch data') if isinstance(data, pd.DataFrame):