Skip to content

Commit

Permalink
add MultiManager to h5pyd __init__.py
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Apr 22, 2024
1 parent 90025e8 commit e135c40
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 21 deletions.
2 changes: 1 addition & 1 deletion h5pyd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ._hl.files import File, is_hdf5
from ._hl.folders import Folder
from ._hl.group import Group, SoftLink, ExternalLink, UserDefinedLink, HardLink
from ._hl.dataset import Dataset
from ._hl.dataset import Dataset, MultiManager
from ._hl.table import Table
from ._hl.datatype import Datatype
from ._hl.attrs import AttributeManager
Expand Down
7 changes: 6 additions & 1 deletion h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import base64
import numpy
import os
import logging
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed

Expand Down Expand Up @@ -1741,10 +1742,14 @@ class MultiManager():
# Avoid overtaxing HSDS
max_workers = 16

def __init__(self, datasets=None):
def __init__(self, datasets=None, logger=None):
if (datasets is None) or (len(datasets) == 0):
raise ValueError("MultiManager requires non-empty list of datasets")
self.datasets = datasets
if logger is None:
self.log = logging
else:
self.log = logging.getLogger(logger)

def read_dset_tl(self, args):
"""
Expand Down
52 changes: 43 additions & 9 deletions test/hl/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from __future__ import absolute_import

import sys
import os
import os.path as op
import tempfile
Expand All @@ -38,6 +37,31 @@
del fname
del testfile

def getTestFileName(basename, subfolder=None):
"""
Get filepath for a test case given a testname
"""

if config.get("use_h5py"):
filename = "out"
if not op.isdir(filename):
os.mkdir(filename)
if subfolder:
filename = op.join(filename, subfolder)
if not op.isdir(filename):
os.mkdir(filename)
filename = op.join(filename, f"{basename}.h5")
else:
if "H5PYD_TEST_FOLDER" in os.environ:
filename = os.environ["H5PYD_TEST_FOLDER"]
else:
# default to the root folder
filename = "/"
if subfolder:
filename = op.join(filename, subfolder)
filename = op.join(filename, f"{basename}.h5")
return filename


class TestCase(ut.TestCase):

Expand Down Expand Up @@ -201,23 +225,33 @@ def assertNumpyBehavior(self, dset, arr, s):
with self.assertRaises(exc):
dset[s]

def getFileName(self, basename):
def getFileName(self, basename, subfolder=None):
"""
Get filepath for a test case given a testname
"""

# Just call the external function
filename = getTestFileName(basename, subfolder=subfolder)


if config.get("use_h5py"):
if not op.isdir("out"):
os.mkdir("out")
filename = "out/" + basename + ".h5"
filename = "out"
if not op.isdir(filename):
os.mkdir(filename)
if subfolder:
filename = op.join(filename, subfolder)
if not op.isdir(filename):
os.mkdir(filename)
filename = op.join(filename, f"{basename}.h5")
else:
if "H5PYD_TEST_FOLDER" in os.environ:
domain = os.environ["H5PYD_TEST_FOLDER"]
filename = os.environ["H5PYD_TEST_FOLDER"]
else:
# default to the root folder
domain = "/"
filename = op.join(domain, basename)
filename += ".h5"
filename = "/"
if subfolder:
filename = op.join(filename, subfolder)
filename = op.join(filename, f"{basename}.h5")
return filename

def getPathFromDomain(self, domain):
Expand Down
16 changes: 12 additions & 4 deletions test/hl/multi_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import subprocess
import re

from h5pyd._hl.dataset import MultiManager
from h5pyd import MultiManager
import h5pyd as h5py
from common import getTestFileName

# Flag to stop resource usage collection thread after a benchmark finishes
stop_stat_collection = False
Expand Down Expand Up @@ -244,12 +245,18 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters):
if __name__ == '__main__':
print("Executing multi read/write benchmark")
shape = (100, 100, 100)
count = 64
count = 4 # 64
num_iters = 50
dt = np.int32
stats = {}

fs = [h5py.File("/home/test_user1/h5pyd_multi_bm_" + str(i), mode='w') for i in range(count)]
fs = []

for i in range(count):
filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm")
f = h5py.File(filename, mode='w')
fs.append(f)

data_in = np.zeros(shape, dtype=dt)
datasets = [f.create_dataset("data", shape, dtype=dt, data=data_in) for f in fs]

Expand All @@ -266,7 +273,8 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters):

print("Testing with shared HTTP connection...")

f = h5py.File("/home/test_user1/h5pyd_multi_bm_shared", mode='w')
filename = getTestFileName("bm_shared", subfolder="multi_bm")
f = h5py.File(filename, mode='w')
datasets = [f.create_dataset("data" + str(i), data=data_in, dtype=dt) for i in range(count)]

run_benchmark("Read Multi (Shared HttpConn)", read_datasets_multi, stats, datasets, num_iters)
Expand Down
4 changes: 2 additions & 2 deletions test/hl/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,10 @@
import sys
import numpy as np
import platform
import warnings

from common import ut, TestCase
from h5pyd._hl.dataset import MultiManager
import config
from h5pyd import MultiManager

if config.get("use_h5py"):
from h5py import File, Dataset
Expand All @@ -37,6 +36,7 @@
import h5pyd as h5py



def is_empty_dataspace(obj):
shape_json = obj.shape_json
if "class" not in shape_json:
Expand Down
2 changes: 0 additions & 2 deletions test/hl/test_dataset_extend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
##############################################################################

import logging
import numpy as np
import math

import config

Expand Down
1 change: 0 additions & 1 deletion test/hl/test_dataset_fancyselect.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
##############################################################################

import numpy as np
import math

import config

Expand Down
1 change: 0 additions & 1 deletion test/hl/test_datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
##############################################################################

import numpy as np
import math
import logging
import config

Expand Down

0 comments on commit e135c40

Please sign in to comment.