Skip to content

Commit

Permalink
ENH: pandas-dev#10143 Function to walk the group hierarchy of a PyTab…
Browse files Browse the repository at this point in the history
…les HDF5 file.

This implementation is inspired by os.walk and follows the interface as much as possible.
  • Loading branch information
Stephen Pascoe authored and Stephen Pascoe committed Sep 2, 2015
1 parent 207efc2 commit ddc2132
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 0 deletions.
32 changes: 32 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,38 @@ def groups(self):
g._v_name != u('table')))
]

def walk(self):
""" Walk the pytables group hierarchy yielding the group name and dataframe names
for each group.
Returns
-------
A generator yielding tuples (`path`, `groups`, `frames`) where:
- `path` is the full path to a group,
- `groups` is a list of group names contained in `path`
- `frames` is a list of dataframe names contained in `path`
"""
_tables()
self._check_if_open()
for g in self._handle.walk_groups():
if (getattr(g, '_v_name', None) is None
or getattr(g._v_attrs, 'pandas_type', None) == 'frame'):
continue

groups = []
frames = []
for child in g._v_children.values():
pandas_type = getattr(child._v_attrs, 'pandas_type', None)
if (getattr(child._v_attrs, 'CLASS', None) == 'GROUP'
and pandas_type is None):
groups.append(child._v_name)
elif pandas_type == 'frame':
frames.append(child._v_name)
yield (g._v_pathname, groups, frames)


def get_node(self, key):
""" return the node with the key or None if it does not exist """
self._check_if_open()
Expand Down
35 changes: 35 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4813,6 +4813,41 @@ def test_read_nokey(self):
df.to_hdf(path, 'df2', mode='a')
self.assertRaises(ValueError, read_hdf, path)

# GH10143
def test_walk(self):
with tm.ensure_clean('walk_groups.hdf') as filename:
store = HDFStore(filename, 'w')

dfs = {
'df1': pd.DataFrame([1,2,3]),
'df2': pd.DataFrame([4,5,6]),
'df3': pd.DataFrame([6,7,8]),
'df4': pd.DataFrame([9,10,11]),
}

store.put('/first_group/df1', dfs['df1'])
store.put('/first_group/df2', dfs['df2'])
store.put('/second_group/df3', dfs['df3'])
store.put('/second_group/third_group/df4', dfs['df4'])

expect = {
'/': (set(['first_group', 'second_group']), set()),
'/first_group': (set(), set(['df1', 'df2'])),
'/second_group': (set(['third_group']), set(['df3'])),
'/second_group/third_group': (set(), set(['df4'])),
}

for path, groups, frames in store.walk():
self.assertIn(path, expect)
expect_groups, expect_frames = expect[path]

self.assertEqual(expect_groups, set(groups))
self.assertEqual(expect_frames, set(frames))
for frame in frames:
frame_path = '/'.join([path, frame])
df = store.get(frame_path)
self.assert_(df.equals(dfs[frame]))


class TestHDFComplexValues(Base):
# GH10447
Expand Down

0 comments on commit ddc2132

Please sign in to comment.