diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 572c10c..e337fa0 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -21,7 +21,7 @@ from ._hl.files import File, is_hdf5 from ._hl.folders import Folder from ._hl.group import Group, SoftLink, ExternalLink, UserDefinedLink, HardLink -from ._hl.dataset import Dataset +from ._hl.dataset import Dataset, MultiManager from ._hl.table import Table from ._hl.datatype import Datatype from ._hl.attrs import AttributeManager diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 62cfcf0..57c6a8b 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -19,6 +19,7 @@ import base64 import numpy import os +import logging from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed @@ -1741,10 +1742,14 @@ class MultiManager(): # Avoid overtaxing HSDS max_workers = 16 - def __init__(self, datasets=None): + def __init__(self, datasets=None, logger=None): if (datasets is None) or (len(datasets) == 0): raise ValueError("MultiManager requires non-empty list of datasets") self.datasets = datasets + if logger is None: + self.log = logging + else: + self.log = logging.getLogger(logger) def read_dset_tl(self, args): """ diff --git a/test/hl/common.py b/test/hl/common.py index 5d3f4e2..3bab462 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -12,7 +12,6 @@ from __future__ import absolute_import -import sys import os import os.path as op import tempfile @@ -38,6 +37,31 @@ del fname del testfile +def getTestFileName(basename, subfolder=None): + """ + Get filepath for a test case given a testname + """ + + if config.get("use_h5py"): + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) + if not op.isdir(filename): + os.mkdir(filename) + filename = op.join(filename, f"{basename}.h5") + else: + if "H5PYD_TEST_FOLDER" in os.environ: + filename = os.environ["H5PYD_TEST_FOLDER"] + else: + # default to the root folder + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") + return filename + class TestCase(ut.TestCase): @@ -201,23 +225,33 @@ def assertNumpyBehavior(self, dset, arr, s): with self.assertRaises(exc): dset[s] - def getFileName(self, basename): + def getFileName(self, basename, subfolder=None): """ Get filepath for a test case given a testname """ + # Just call the external function + filename = getTestFileName(basename, subfolder=subfolder) + + if config.get("use_h5py"): - if not op.isdir("out"): - os.mkdir("out") - filename = "out/" + basename + ".h5" + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) + if not op.isdir(filename): + os.mkdir(filename) + filename = op.join(filename, f"{basename}.h5") else: if "H5PYD_TEST_FOLDER" in os.environ: - domain = os.environ["H5PYD_TEST_FOLDER"] + filename = os.environ["H5PYD_TEST_FOLDER"] else: # default to the root folder - domain = "/" - filename = op.join(domain, basename) - filename += ".h5" + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") return filename def getPathFromDomain(self, domain): diff --git a/test/hl/multi_benchmark.py b/test/hl/multi_benchmark.py index 44c210e..1f4af58 100644 --- a/test/hl/multi_benchmark.py +++ b/test/hl/multi_benchmark.py @@ -6,8 +6,9 @@ import subprocess import re -from h5pyd._hl.dataset import MultiManager +from h5pyd import MultiManager import h5pyd as h5py +from common import getTestFileName # Flag to stop resource usage collection thread after a benchmark finishes stop_stat_collection = False @@ -244,12 +245,18 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): if __name__ == '__main__': print("Executing multi read/write benchmark") shape = (100, 100, 100) - count = 64 + count = 4 # 64 num_iters = 50 dt = np.int32 stats = {} - fs = [h5py.File("/home/test_user1/h5pyd_multi_bm_" + str(i), mode='w') for i in range(count)] + fs = [] + + for i in range(count): + filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm") + f = h5py.File(filename, mode='w') + fs.append(f) + data_in = np.zeros(shape, dtype=dt) datasets = [f.create_dataset("data", shape, dtype=dt, data=data_in) for f in fs] @@ -266,7 +273,8 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): print("Testing with shared HTTP connection...") - f = h5py.File("/home/test_user1/h5pyd_multi_bm_shared", mode='w') + filename = getTestFileName("bm_shared", subfolder="multi_bm") + f = h5py.File(filename, mode='w') datasets = [f.create_dataset("data" + str(i), data=data_in, dtype=dt) for i in range(count)] run_benchmark("Read Multi (Shared HttpConn)", read_datasets_multi, stats, datasets, num_iters) diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index 7c5c96b..cfad3fe 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -23,11 +23,10 @@ import sys import numpy as np import platform -import warnings from common import ut, TestCase -from h5pyd._hl.dataset import MultiManager import config +from h5pyd import MultiManager if config.get("use_h5py"): from h5py import File, Dataset @@ -37,6 +36,7 @@ import h5pyd as h5py + def is_empty_dataspace(obj): shape_json = obj.shape_json if "class" not in shape_json: diff --git a/test/hl/test_dataset_extend.py b/test/hl/test_dataset_extend.py index d318786..d2c6f48 100644 --- a/test/hl/test_dataset_extend.py +++ b/test/hl/test_dataset_extend.py @@ -11,8 +11,6 @@ ############################################################################## import logging -import numpy as np -import math import config diff --git a/test/hl/test_dataset_fancyselect.py b/test/hl/test_dataset_fancyselect.py index a43e867..e506610 100644 --- a/test/hl/test_dataset_fancyselect.py +++ b/test/hl/test_dataset_fancyselect.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import config diff --git a/test/hl/test_datatype.py b/test/hl/test_datatype.py index 00c0c50..b5dceac 100644 --- a/test/hl/test_datatype.py +++ b/test/hl/test_datatype.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import logging import config