diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ea0a59ce2ab31..c44a3feff7d57 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1038,6 +1038,38 @@ def groups(self): g._v_name != u('table'))) ] + def walk(self): + """ Walk the pytables group hierarchy yielding the group name and dataframe names + for each group. + + Returns + ------- + A generator yielding tuples (`path`, `groups`, `frames`) where: + + - `path` is the full path to a group, + - `groups` is a list of group names contained in `path` + - `frames` is a list of dataframe names contained in `path` + + """ + _tables() + self._check_if_open() + for g in self._handle.walk_groups(): + if (getattr(g, '_v_name', None) is None + or getattr(g._v_attrs, 'pandas_type', None) == 'frame'): + continue + + groups = [] + frames = [] + for child in g._v_children.values(): + pandas_type = getattr(child._v_attrs, 'pandas_type', None) + if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP' + and pandas_type is None): + groups.append(child._v_name) + elif pandas_type == 'frame': + frames.append(child._v_name) + yield (g._v_pathname, groups, frames) + + def get_node(self, key): """ return the node with the key or None if it does not exist """ self._check_if_open() diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index b4f1e6a429198..aff1762fd5eed 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4813,6 +4813,41 @@ def test_read_nokey(self): df.to_hdf(path, 'df2', mode='a') self.assertRaises(ValueError, read_hdf, path) + # GH10143 + def test_walk(self): + with tm.ensure_clean('walk_groups.hdf') as filename: + store = HDFStore(filename, 'w') + + dfs = { + 'df1': pd.DataFrame([1,2,3]), + 'df2': pd.DataFrame([4,5,6]), + 'df3': pd.DataFrame([6,7,8]), + 'df4': pd.DataFrame([9,10,11]), + } + + store.put('/first_group/df1', dfs['df1']) + store.put('/first_group/df2', dfs['df2']) + store.put('/second_group/df3', dfs['df3']) + store.put('/second_group/third_group/df4', dfs['df4']) + + expect = { + '/': (set(['first_group', 'second_group']), set()), + '/first_group': (set(), set(['df1', 'df2'])), + '/second_group': (set(['third_group']), set(['df3'])), + '/second_group/third_group': (set(), set(['df4'])), + } + + for path, groups, frames in store.walk(): + self.assertIn(path, expect) + expect_groups, expect_frames = expect[path] + + self.assertEqual(expect_groups, set(groups)) + self.assertEqual(expect_frames, set(frames)) + for frame in frames: + frame_path = '/'.join([path, frame]) + df = store.get(frame_path) + self.assert_(df.equals(dfs[frame])) + class TestHDFComplexValues(Base): # GH10447