From 059e7c3e40a8d4b59e4b2b2b9bccb7985adee74d Mon Sep 17 00:00:00 2001 From: John Readey Date: Tue, 1 Oct 2024 05:36:05 -0500 Subject: [PATCH] Numpy 2.0 support (#220) * added support for numpy 2.0 * moved multimanger to its own testfile * remove python 3.8 support * update workflow * update workflow2 * add requirements.txt * update requirements.txt * update workflow3 * add debug line to multi test * workflow update * workflow - run hstouch after h5pyd setup * workflow - fix workflow syntax error * workflow - fix working directory * fix test_dataset_fancyselect to work with h5py * adjust numpy version requirements * fix numpy 2.0 vs 2.1.1 incompat issue * fix errors on windows * fix for wincompat in vlen test --- .github/workflows/main.yml | 16 +- h5pyd/_apps/utillib.py | 2 +- h5pyd/_hl/dataset.py | 27 +- pyproject.toml | 2 +- requirements.txt | 11 + test/hl/test_attribute.py | 6 +- test/hl/test_dataset.py | 562 ++-------------------------- test/hl/test_dataset_create.py | 3 +- test/hl/test_dataset_fancyselect.py | 40 ++ test/hl/test_dataset_multi.py | 544 +++++++++++++++++++++++++++ testall.py | 7 +- 11 files changed, 663 insertions(+), 557 deletions(-) create mode 100644 requirements.txt create mode 100644 test/hl/test_dataset_multi.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 43785a9..7841e1c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] runs-on: ${{ matrix.os }} steps: @@ -42,7 +42,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 shell: bash @@ -50,11 +49,6 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # stop the build if there are Python syntax errors or undefined names flake8 . --count --max-complexity=10 --max-line-length=127 --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - name: Install package - shell: bash - run: | - pip install -e . - - name: Checkout HSDS uses: actions/checkout@v4 with: @@ -84,10 +78,18 @@ jobs: - name: HSDS Setup shell: bash + env: + ADMIN_PASSWORD: admin + ADMIN_USERNAME: admin working-directory: ${{github.workspace}}/hsds run: | python tests/integ/setup_test.py + - name: Install h5pyd package + shell: bash + run: | + pip install -e . + - name: Create h5pyd test folder env: HS_USERNAME: test_user1 diff --git a/h5pyd/_apps/utillib.py b/h5pyd/_apps/utillib.py index dd0b966..8ccc098 100755 --- a/h5pyd/_apps/utillib.py +++ b/h5pyd/_apps/utillib.py @@ -338,7 +338,7 @@ def copy_array(src_arr, ctx): if has_reference(src_arr.dtype): # flatten array to simplify iteration - count = np.product(src_arr.shape) + count = int(np.prod(src_arr.shape)) tgt_arr_flat = tgt_arr.reshape((count,)) src_arr_flat = src_arr.reshape((count,)) for i in range(count): diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 5077bfb..ac07e37 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -1350,17 +1350,16 @@ def __setitem__(self, args, val): # Attempt to directly convert the input array of vlen data to its base class val = numpy.asarray(val, dtype=vlen_base_class) - except ValueError as ve: + except (ValueError, TypeError): # Failed to convert input array to vlen base class directly, instead create a new array where # each element is an array of the Dataset's dtype - self.log.debug(f"asarray ValueError: {ve}") try: # Force output shape tmp = numpy.empty(shape=val.shape, dtype=self.dtype) tmp[:] = [numpy.array(x, dtype=self.dtype) for x in val] val = tmp - except ValueError as e: - msg = f"ValueError converting value element by element: {e}" + except (ValueError, TypeError): + msg = "ValueError converting value element by element" self.log.debug(msg) if vlen_base_class == val.dtype: @@ -1589,19 +1588,21 @@ def write_direct(self, source, source_sel=None, dest_sel=None): data = source.__getitem__(slices) self.__setitem__(dest_sel, data) - def __array__(self, dtype=None): - """Create a Numpy array containing the whole dataset. DON'T THINK - THIS MEANS DATASETS ARE INTERCHANGABLE WITH ARRAYS. For one thing, - you have to read the whole dataset everytime this method is called. - """ - arr = numpy.empty(self._shape, dtype=self.dtype if dtype is None else dtype) + def __array__(self, dtype=None, copy=True): + if copy is False: + raise ValueError( + f"AstypeWrapper.__array__ received {copy=} " + f"but memory allocation cannot be avoided on read" + ) # Special case for (0,)*-shape datasets if self._shape is None or numpy.prod(self._shape) == 0: - return arr + return numpy.empty(self._shape, dtype=self.dtype if dtype is None else dtype) - self.read_direct(arr) - return arr + data = self[:] + if dtype is not None: + return data.astype(dtype, copy=False) + return data def __repr__(self): if not self: diff --git a/pyproject.toml b/pyproject.toml index cfdbdd6..3be03bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ requires-python = ">=3.8" version = "0.18.0" dependencies = [ - "numpy >= 1.17.3, < 2.0.0", + "numpy >=2.0.0rc1; python_version>='3.9'", "requests_unixsocket", "pytz", "pyjwt", diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5043247 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +certifi==2024.8.30 +charset-normalizer==3.3.2 +h5py==3.12.1 +idna==3.10 +numpy==2.1.1 +packaging==24.1 +PyJWT==2.9.0 +pytz==2024.2 +requests==2.32.3 +requests-unixsocket==0.3.0 +urllib3==2.2.3 diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 4c524c7..21d2290 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -26,7 +26,7 @@ class TestAttribute(TestCase): def test_create(self): - filename = self.getFileName("create_attribfute") + filename = self.getFileName("create_attribute") print("filename:", filename) f = h5py.File(filename, 'w') @@ -61,7 +61,7 @@ def test_create(self): self.assertEqual(value, "Hello HDF") # create attribute with as a fixed length string - g1.attrs.create('d1', np.string_("This is a numpy string")) + g1.attrs.create('d1', np.bytes_("This is a numpy string")) value = g1.attrs['d1'] self.assertEqual(value, b"This is a numpy string") @@ -89,7 +89,7 @@ def test_create(self): self.assertEqual(arr[i], 1) # array of strings - g1.attrs['strings'] = [np.string_("Hello"), np.string_("Good-bye")] + g1.attrs['strings'] = [np.bytes_("Hello"), np.bytes_("Good-bye")] arr = g1.attrs['strings'] self.assertEqual(arr.shape, (2,)) self.assertEqual(arr[0], b"Hello") diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index d10c304..9c9c31b 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -795,10 +795,10 @@ def test_int(self): nbits = 12 shape = (100, 300) - testdata = np.random.randint(0, 2 ** nbits - 1, size=shape) + testdata = np.random.randint(0, 2 ** nbits - 1, size=shape, dtype=np.int32) # Create dataset; note omission of nbits (for library-determined precision) - dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True) + dset = self.f.create_dataset('foo', shape, dtype=np.int32, scaleoffset=True) # Dataset reports scaleoffset enabled assert dset.scaleoffset is not None @@ -816,9 +816,9 @@ def test_int_with_minbits(self): nbits = 12 shape = (100, 300) - testdata = np.random.randint(0, 2 ** nbits, size=shape) + testdata = np.random.randint(0, 2 ** nbits, size=shape, dtype=np.int32) - dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits) + dset = self.f.create_dataset('foo', shape, scaleoffset=nbits, dtype=np.int32) # Dataset reports scaleoffset enabled with correct precision self.assertTrue(dset.scaleoffset == 12) @@ -838,7 +838,7 @@ def test_int_with_minbits_lossy(self): shape = (100, 300) testdata = np.random.randint(0, 2 ** (nbits + 1) - 1, size=shape) - dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits) + dset = self.f.create_dataset('foo', shape, dtype=np.int32, scaleoffset=nbits) # Dataset reports scaleoffset enabled with correct precision self.assertTrue(dset.scaleoffset == 12) @@ -985,7 +985,7 @@ def test_vlen_unicode(self): def test_string_fixed(self): """ Assignment of fixed-length byte string produces a fixed-length ascii dataset """ - self.f['x'] = np.string_("Hello there") + self.f['x'] = np.bytes_("Hello there") ds = self.f['x'] self.assert_string_type(ds, 'H5T_CSET_ASCII', variable=False) if self.is_hsds(): @@ -1278,7 +1278,7 @@ def test_roundtrip_fixed_bytes(self): data = b"Hello\xef" ds[0] = data out = ds[0] - self.assertEqual(type(out), np.string_) + self.assertEqual(type(out), np.bytes_) self.assertEqual(out, data) def test_retrieve_vlen_unicode(self): @@ -1671,28 +1671,23 @@ def test_scalar_compound(self): class TestVlen(BaseDataset): def test_int(self): - if platform.system() == "Windows": - # default np int type is 32 bit - dt = h5py.vlen_dtype(np.int32) - else: - # defualt np int type is 64 bit - dt = h5py.vlen_dtype(np.int64) - - ds = self.f.create_dataset('vlen', (4,), dtype=dt) + dt_int = np.int32 + dt_vlen = h5py.vlen_dtype(dt_int) + ds = self.f.create_dataset('vlen', (4,), dtype=dt_vlen) ds[0] = np.arange(3) ds[1] = np.arange(0) ds[2] = [1, 2, 3] ds[3] = np.arange(1) - self.assertArrayEqual(ds[0], np.arange(3)) - self.assertArrayEqual(ds[1], np.arange(0)) - self.assertArrayEqual(ds[2], np.array([1, 2, 3])) - self.assertArrayEqual(ds[1], np.arange(0)) + self.assertArrayEqual(ds[0], np.arange(3, dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(0, dtype=dt_int)) + self.assertArrayEqual(ds[2], np.array([1, 2, 3], dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(0, dtype=dt_int)) ds[0:2] = np.array([np.arange(5), np.arange(4)], dtype=object) - self.assertArrayEqual(ds[0], np.arange(5)) - self.assertArrayEqual(ds[1], np.arange(4)) + self.assertArrayEqual(ds[0], np.arange(5, dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(4, dtype=dt_int)) ds[0:2] = np.array([np.arange(3), np.arange(3)]) - self.assertArrayEqual(ds[0], np.arange(3)) - self.assertArrayEqual(ds[1], np.arange(3)) + self.assertArrayEqual(ds[0], np.arange(3, dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(3, dtype=dt_int)) def test_reuse_from_other(self): dt = h5py.vlen_dtype(int) @@ -1714,26 +1709,25 @@ def test_reuse_struct_from_other(self): self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype) def test_convert(self): - if platform.system() == "Windows": - # default np int type is 32 bit - dt = h5py.vlen_dtype(np.int32) - else: - # defualt np int type is 64 bit - dt = h5py.vlen_dtype(np.int64) - ds = self.f.create_dataset('vlen', (3,), dtype=dt) + # default int type is 32bit on Windows and 64bit on Linux, so use explicit types + dt_int = np.int32 + dt_vlen = h5py.vlen_dtype(dt_int) + + ds = self.f.create_dataset('vlen', (3,), dtype=dt_vlen) ds[0] = np.array([1.4, 1.2]) ds[1] = np.array([1.2]) ds[2] = [1.2, 2, 3] - self.assertArrayEqual(ds[0], np.array([1, 1])) - self.assertArrayEqual(ds[1], np.array([1])) - self.assertArrayEqual(ds[2], np.array([1, 2, 3])) - ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)], dtype=object) - self.assertArrayEqual(ds[0], np.arange(5)) - self.assertArrayEqual(ds[1], np.arange(4)) - ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]), - np.array([0.2, 1.2, 2.2])]) - self.assertArrayEqual(ds[0], np.arange(3)) - self.assertArrayEqual(ds[1], np.arange(3)) + self.assertArrayEqual(ds[0], np.array([1, 1], dtype=dt_int)) + self.assertArrayEqual(ds[1], np.array([1], dtype=dt_int)) + self.assertArrayEqual(ds[2], np.array([1, 2, 3], dtype=dt_int)) + test_arr = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)], dtype=object) + ds[0:2] = test_arr + self.assertArrayEqual(ds[0], np.arange(5, dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(4, dtype=dt_int)) + ds[0:2] = np.array([np.array([0.1, 1.2, 2.2], dtype=dt_int), + np.array([0.2, 1.2, 2.2], dtype=dt_int)]) + self.assertArrayEqual(ds[0], np.arange(3, dtype=dt_int)) + self.assertArrayEqual(ds[1], np.arange(3, dtype=dt_int)) def test_multidim(self): if platform.system() == "Windows": @@ -1987,494 +1981,6 @@ def test_basetype_commutative(self,): assert (val != dset) == (dset != val) -@ut.skipIf(config.get('use_h5py'), "h5py does not support MultiManager") -class TestMultiManager(BaseDataset): - def test_multi_read_scalar_dataspaces(self): - """ - Test reading from multiple datasets with scalar dataspaces - """ - shape = () - count = 3 - dt = np.int32 - - # Create datasets - data_in = np.array(1, dtype=dt) - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=(data_in + i)) - datasets.append(dset) - - mm = MultiManager(datasets) - - # Select via empty tuple - data_out = mm[()] - - self.assertEqual(len(data_out), count) - - for i in range(count): - np.testing.assert_array_equal(data_out[i], data_in + i) - - # Select via Ellipsis - data_out = mm[...] - - self.assertEqual(len(data_out), count) - - for i in range(count): - np.testing.assert_array_equal(data_out[i], data_in + i) - - def test_multi_read_non_scalar_dataspaces(self): - """ - Test reading from multiple datasets with non-scalar dataspaces - """ - shape = (10, 10, 10) - count = 3 - dt = np.int32 - - # Create datasets - data_in = np.reshape(np.arange(np.prod(shape)), shape) - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=(data_in + i)) - datasets.append(dset) - - mm = MultiManager(datasets) - data_out = mm[...] - - self.assertEqual(len(data_out), count) - - for i in range(count): - np.testing.assert_array_equal(data_out[i], data_in + i) - - # Partial Read - data_out = mm[:, :, 0] - - self.assertEqual(len(data_out), count) - - for i in range(count): - np.testing.assert_array_equal(data_out[i], (data_in + i)[:, :, 0]) - - def test_multi_read_mixed_dataspaces(self): - """ - Test reading from multiple datasets with scalar and - non-scalar dataspaces - """ - scalar_shape = () - shape = (10, 10, 10) - count = 3 - dt = np.int32 - - # Create datasets - data_scalar_in = np.array(1) - data_nonscalar_in = np.reshape(np.arange(np.prod(shape)), shape) - data_in = [data_scalar_in, data_nonscalar_in, - data_nonscalar_in, data_nonscalar_in] - datasets = [] - - for i in range(count): - if i == 0: - dset = self.f.create_dataset("data" + str(0), scalar_shape, - dtype=dt, data=data_scalar_in) - else: - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=(data_nonscalar_in + i)) - datasets.append(dset) - - # Set up MultiManager for read - mm = MultiManager(datasets=datasets) - - # Select via empty tuple - data_out = mm[()] - - self.assertEqual(len(data_out), count) - - for i in range(count): - if i == 0: - np.testing.assert_array_equal(data_out[i], data_in[i]) - else: - np.testing.assert_array_equal(data_out[i], data_in[i] + i) - - # Select via Ellipsis - data_out = mm[...] - - self.assertEqual(len(data_out), count) - - for i in range(count): - if i == 0: - np.testing.assert_array_equal(data_out[i], data_in[i]) - else: - np.testing.assert_array_equal(data_out[i], data_in[i] + i) - - def test_multi_read_mixed_types(self): - """ - Test reading from multiple datasets with different types - """ - shape = (10, 10, 10) - count = 4 - dts = [np.int32, np.int64, np.float64, np.dtype("S10")] - - # Create datasets - data_in = np.reshape(np.arange(np.prod(shape)), shape) - data_in_fixed_str = np.full(shape, "abcdefghij", dtype=dts[3]) - datasets = [] - - for i in range(count): - if i < 3: - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dts[i], data=(data_in + i)) - else: - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dts[i], data=data_in_fixed_str) - - datasets.append(dset) - - # Set up MultiManager for read - mm = MultiManager(datasets=datasets) - - # Perform read - data_out = mm[...] - - self.assertEqual(len(data_out), count) - - for i in range(count): - if i < 3: - np.testing.assert_array_equal(data_out[i], np.array(data_in + i, dtype=dts[i])) - else: - np.testing.assert_array_equal(data_out[i], data_in_fixed_str) - - self.assertEqual(data_out[i].dtype, dts[i]) - - def test_multi_read_vlen_str(self): - """ - Test reading from multiple datasets with a vlen string type - """ - shape = (10, 10, 10) - count = 3 - dt = h5py.string_dtype(encoding='utf-8') - data_in = np.full(shape, "abcdefghij", dt) - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shape, - data=data_in, dtype=dt) - datasets.append(dset) - - mm = MultiManager(datasets=datasets) - out = mm[...] - - self.assertEqual(len(out), count) - - for i in range(count): - self.assertEqual(out[i].dtype, dt) - out[i] = np.reshape(out[i], newshape=np.prod(shape)) - out[i] = np.reshape(np.array([s.decode() for s in out[i]], dtype=dt), - newshape=shape) - np.testing.assert_array_equal(out[i], data_in) - - def test_multi_read_mixed_shapes(self): - """ - Test reading a selection from multiple datasets with different shapes - """ - shapes = [(150), (10, 15), (5, 5, 6)] - count = 3 - dt = np.int32 - data = np.arange(150, dtype=dt) - data_in = [np.reshape(data, newshape=s) for s in shapes] - datasets = [] - sel_idx = 2 - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shapes[i], - dtype=dt, data=data_in[i]) - datasets.append(dset) - - mm = MultiManager(datasets=datasets) - # Perform multi read with selection - out = mm[sel_idx] - - # Verify - for i in range(count): - np.testing.assert_array_equal(out[i], data_in[i][sel_idx]) - - def test_multi_write_scalar_dataspaces(self): - """ - Test writing to multiple scalar datasets - """ - shape = () - count = 3 - dt = np.int32 - - # Create datasets - zeros = np.zeros(shape, dtype=dt) - data_in = [] - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=zeros) - datasets.append(dset) - - data_in.append(np.array([i])) - - mm = MultiManager(datasets) - # Perform write - mm[...] = data_in - - # Read back and check - for i in range(count): - data_out = self.f["data" + str(i)][...] - np.testing.assert_array_equal(data_out, data_in[i]) - - def test_multi_write_non_scalar_dataspaces(self): - """ - Test writing to multiple non-scalar datasets - """ - shape = (10, 10, 10) - count = 3 - dt = np.int32 - - # Create datasets - zeros = np.zeros(shape, dtype=dt) - data_in = [] - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=zeros) - datasets.append(dset) - - d_in = np.array(np.reshape(np.arange(np.prod(shape)), shape) + i, dtype=dt) - data_in.append(d_in) - - mm = MultiManager(datasets) - # Perform write - mm[...] = data_in - - # Read back and check - for i in range(count): - data_out = np.array(self.f["data" + str(i)][...], dtype=dt) - np.testing.assert_array_equal(data_out, data_in[i]) - - def test_multi_write_mixed_dataspaces(self): - """ - Test writing to multiple scalar and non-scalar datasets - """ - scalar_shape = () - shape = (10, 10, 10) - count = 3 - dt = np.int32 - - # Create datasets - data_in = [] - data_scalar_in = np.array(1, dtype=dt) - data_nonscalar_in = np.array(np.reshape(np.arange(np.prod(shape)), shape), dtype=dt) - datasets = [] - - for i in range(count): - if i == 0: - dset = self.f.create_dataset("data" + str(0), scalar_shape, - dtype=dt, data=np.array(0, dtype=dt)) - data_in.append(data_scalar_in) - else: - dset = self.f.create_dataset("data" + str(i), shape, - dtype=dt, data=np.zeros(shape)) - data_in.append(data_nonscalar_in) - datasets.append(dset) - - # Set up MultiManager for write - mm = MultiManager(datasets=datasets) - - # Select via empty tuple - mm[()] = data_in - - for i in range(count): - data_out = self.f["data" + str(i)][...] - np.testing.assert_array_equal(data_out, data_in[i]) - - # Reset datasets - for i in range(count): - if i == 0: - zeros = np.array([0]) - else: - zeros = np.zeros(shape) - self.f["data" + str(i)][...] = zeros - - # Select via Ellipsis - mm[...] = data_in - - for i in range(count): - data_out = self.f["data" + str(i)][...] - - if i == 0: - np.testing.assert_array_equal(data_out, data_in[i]) - else: - np.testing.assert_array_equal(data_out, data_in[i]) - - def test_multi_write_vlen_str(self): - """ - Test writing to multiple datasets with a vlen string type - """ - shape = (10, 10, 10) - count = 3 - dt = h5py.string_dtype(encoding='utf-8') - data_initial_vlen = np.full(shape, "aaaabbbbcc", dtype=dt) - data_in_vlen = np.full(shape, "abcdefghij", dtype=dt) - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shape, - data=data_initial_vlen, dtype=dt) - datasets.append(dset) - - mm = MultiManager(datasets=datasets) - # Perform write - mm[...] = [data_in_vlen, data_in_vlen, data_in_vlen] - - # Verify - for i in range(count): - out = self.f["data" + str(i)][...] - self.assertEqual(out.dtype, dt) - - out = np.reshape(out, newshape=np.prod(shape)) - out = np.reshape(np.array([s.decode() for s in out], dtype=dt), - newshape=shape) - np.testing.assert_array_equal(out, data_in_vlen) - - def test_multi_write_mixed_shapes(self): - """ - Test writing to a selection in multiple datasets with different shapes - """ - shapes = [(50, 5), (15, 10), (20, 15)] - count = 3 - dt = np.int32 - data_in = 99 - datasets = [] - sel_idx = 2 - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shapes[i], - dtype=dt, data=np.zeros(shapes[i], dtype=dt)) - datasets.append(dset) - - mm = MultiManager(datasets=datasets) - # Perform multi write with selection - mm[sel_idx, sel_idx] = [data_in, data_in + 1, data_in + 2] - - # Verify - for i in range(count): - out = self.f["data" + str(i)][...] - np.testing.assert_array_equal(out[sel_idx, sel_idx], data_in + i) - - def test_multi_selection(self): - """ - Test using a different selection - for each dataset in a MultiManager - """ - shape = (10, 10, 10) - count = 3 - dt = np.int32 - - # Create datasets - data_in = np.reshape(np.arange(np.prod(shape), dtype=dt), shape) - data_in_original = data_in.copy() - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shape, - dtype=dt, data=data_in) - datasets.append(dset) - - mm = h5py.MultiManager(datasets=datasets) - - # Selections to read from - sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 5:10, 1:4:2], np.s_[4, 5, 6]] - data_out = mm[sel] - - for i in range(count): - np.testing.assert_array_equal(data_out[i], data_in[sel[i]]) - - # If selection list has only a single element, apply it to all dsets - sel = [np.s_[0:10, 0:10, 0:10]] - data_out = mm[sel] - - for d in data_out: - np.testing.assert_array_equal(d, data_in[sel[0]]) - - # Selections to write to - sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 0:5, 0:5], np.s_[0, 0, 0]] - data_in = [np.zeros_like(data_in), np.ones_like(data_in), np.full_like(data_in, 2)] - mm[sel] = [data_in[i][sel[i]] for i in range(count)] - - for i in range(count): - np.testing.assert_array_equal(self.f["data" + str(i)][sel[i]], data_in[i][sel[i]]) - - # Check that unselected regions are unmodified - np.testing.assert_array_equal(self.f["data1"][5:, 5:, 5:], data_in_original[5:, 5:, 5:]) - np.testing.assert_array_equal(self.f["data2"][1:, 1:, 1:], data_in_original[1:, 1:, 1:]) - - # Save for later comparison - data_in_original = mm[...] - - # If selection list has only a single element, apply it to all dsets - sel = [np.s_[0:6, 0:6, 0:6]] - data_in = np.full(shape, 3, dtype=dt) - mm[sel] = [data_in[sel[0]]] * count - - for i in range(count): - np.testing.assert_array_equal(self.f["data" + str(i)][sel[0]], data_in[sel[0]]) - - # Check that unselected regions are unmodified - data_out = mm[...] - - for i in range(count): - np.testing.assert_array_equal(data_out[i][6:, 6:, 6:], data_in_original[i][6:, 6:, 6:]) - - def test_multi_field_selection(self): - """ - Test reading/writing to a field selection on multiple datasets - """ - dt = np.dtype([('a', np.float32), ('b', np.int32), ('c', np.float32)]) - shape = (100,) - data = np.ones(shape, dtype=dt) - count = 3 - datasets = [] - - for i in range(count): - dset = self.f.create_dataset("data" + str(i), shape=shape, - data=np.zeros(shape, dtype=dt), - dtype=dt) - datasets.append(dset) - - # Perform read from field 'b' - mm = MultiManager(datasets=datasets) - out = mm[..., 'b'] - - # Verify data returned - for i in range(count): - np.testing.assert_array_equal(out[i], np.zeros(shape, dtype=dt['b'])) - - # Perform write to field 'b' - mm = MultiManager(datasets=datasets) - mm[..., 'b'] = [data['b'], data['b'], data['b']] - - for i in range(count): - out = np.array(self.f["data" + str(i)], dtype=dt) - np.testing.assert_array_equal(out['a'], np.zeros(shape, dtype=dt['a'])) - np.testing.assert_array_equal(out['b'], data['b']) - np.testing.assert_array_equal(out['c'], np.zeros(shape, dtype=dt['c'])) - - # Test writing to entire compound type - data = np.zeros(shape, dtype=dt) - mm[...] = [data, data, data] - - for i in range(count): - out = np.array(self.f["data" + str(i)], dtype=dt) - np.testing.assert_array_equal(out, data) - - if __name__ == '__main__': loglevel = logging.ERROR logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) diff --git a/test/hl/test_dataset_create.py b/test/hl/test_dataset_create.py index 287015c..c68f577 100644 --- a/test/hl/test_dataset_create.py +++ b/test/hl/test_dataset_create.py @@ -99,7 +99,8 @@ def test_create_float16_dset(self): dset[...] = arr arr = dset[...] # read back - val = arr[2, 4] # test one value + # test one value + val = float(arr[2, 4]) # convert to float since np.float16 types not comparable self.assertTrue(val > 20.4 - 0.01) self.assertTrue(val < 20.4 + 0.01) diff --git a/test/hl/test_dataset_fancyselect.py b/test/hl/test_dataset_fancyselect.py index e1ed8d1..403110e 100644 --- a/test/hl/test_dataset_fancyselect.py +++ b/test/hl/test_dataset_fancyselect.py @@ -104,6 +104,46 @@ def test_dset_3d(self): f.close() + def test_bigdset(self): + filename = self.getFileName("fancy_select_dset_3d") + print("filename:", filename) + f = h5py.File(filename, "w") + # create a dataset + dset = f.create_dataset("dset", (5, 1000, 1000), dtype="i4", compression="gzip") + # write some values to the dataset + dset[:, 1, 10] = [95, 96, 97, 98, 99] + dset[:, 10, 100] = [195, 196, 197, 198, 199] + dset[:, 100, 500] = [295, 296, 297, 298, 299] + + # single coordinate, increasing + arr = dset[:, 10, [10, 100, 500]] + self.assertEqual(arr.shape, (5, 3)) + self.assertTrue((arr[:, 0] == [0, 0, 0, 0, 0]).all()) + self.assertTrue((arr[:, 1] == [195, 196, 197, 198, 199]).all()) + self.assertTrue((arr[:, 2] == [0, 0, 0, 0, 0]).all()) + + try: + # non-increasing indexes + arr = dset[:, 10, [100, 10, 500]] + self.assertEqual(arr.shape, (5, 3)) + self.assertTrue((arr[:, 0] == [195, 196, 197, 198, 199]).all()) + self.assertTrue((arr[:, 1] == [0, 0, 0, 0, 0]).all()) + self.assertTrue((arr[:, 2] == [0, 0, 0, 0, 0]).all()) + + # test multiple coordinates + arr = dset[:, [1, 10, 100], [10, 100, 500]] + self.assertEqual(arr.shape, (5, 3)) + self.assertTrue((arr[:, 0] == [95, 96, 97, 98, 99]).all()) + self.assertTrue((arr[:, 1] == [195, 196, 197, 198, 199]).all()) + self.assertTrue((arr[:, 2] == [295, 296, 297, 298, 299]).all()) + except TypeError: + if config.get("use_h5py"): + pass # multiple indexing vectors not allowed with h5py + else: + self.assertTrue(False) # but should be ok with h5pyd/hsds + + f.close() + if __name__ == '__main__': ut.main() diff --git a/test/hl/test_dataset_multi.py b/test/hl/test_dataset_multi.py new file mode 100644 index 0000000..a505f15 --- /dev/null +++ b/test/hl/test_dataset_multi.py @@ -0,0 +1,544 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and # +# Utilities. The full HDF5 REST Server copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## +""" + Dataset multi testing operations. + + Tests all MultiManager operations +""" + +import logging +import numpy as np + +from common import ut, TestCase +import config + +if config.get("use_h5py"): + import h5py +else: + from h5pyd import MultiManager + import h5pyd as h5py + + +@ut.skipIf(config.get('use_h5py'), "h5py does not support MultiManager") +class TestMultiManager(TestCase): + def test_multi_read_scalar_dataspaces(self): + """ + Test reading from multiple datasets with scalar dataspaces + """ + filename = self.getFileName("multi_read_scalar_dataspaces") + print("filename:", filename) + print(f"numpy version: {np.version.version}") + f = h5py.File(filename, 'w') + shape = () + count = 3 + dt = np.int32 + + # Create datasets + data_in = np.array(1, dtype=dt) + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=(data_in + i)) + datasets.append(dset) + + mm = MultiManager(datasets) + + # Select via empty tuple + data_out = mm[()] + + self.assertEqual(len(data_out), count) + + for i in range(count): + np.testing.assert_array_equal(data_out[i], data_in + i) + + # Select via Ellipsis + data_out = mm[...] + + self.assertEqual(len(data_out), count) + + for i in range(count): + np.testing.assert_array_equal(data_out[i], data_in + i) + + def test_multi_read_non_scalar_dataspaces(self): + """ + Test reading from multiple datasets with non-scalar dataspaces + """ + filename = self.getFileName("multi_read_non_scalar_dataspaces") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 3 + dt = np.int32 + + # Create datasets + data_in = np.reshape(np.arange(np.prod(shape)), shape) + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=(data_in + i)) + datasets.append(dset) + + mm = MultiManager(datasets) + data_out = mm[...] + + self.assertEqual(len(data_out), count) + + for i in range(count): + np.testing.assert_array_equal(data_out[i], data_in + i) + + # Partial Read + data_out = mm[:, :, 0] + + self.assertEqual(len(data_out), count) + + for i in range(count): + np.testing.assert_array_equal(data_out[i], (data_in + i)[:, :, 0]) + + def test_multi_read_mixed_dataspaces(self): + """ + Test reading from multiple datasets with scalar and + non-scalar dataspaces + """ + filename = self.getFileName("multi_read_mixed_dataspaces") + print("filename:", filename) + f = h5py.File(filename, 'w') + scalar_shape = () + shape = (10, 10, 10) + count = 3 + dt = np.int32 + + # Create datasets + data_scalar_in = np.array(1) + data_nonscalar_in = np.reshape(np.arange(np.prod(shape)), shape) + data_in = [data_scalar_in, data_nonscalar_in, + data_nonscalar_in, data_nonscalar_in] + datasets = [] + + for i in range(count): + if i == 0: + dset = f.create_dataset("data" + str(0), scalar_shape, dtype=dt, data=data_scalar_in) + else: + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=(data_nonscalar_in + i)) + datasets.append(dset) + + # Set up MultiManager for read + mm = MultiManager(datasets=datasets) + + # Select via empty tuple + data_out = mm[()] + + self.assertEqual(len(data_out), count) + + for i in range(count): + if i == 0: + np.testing.assert_array_equal(data_out[i], data_in[i]) + else: + np.testing.assert_array_equal(data_out[i], data_in[i] + i) + + # Select via Ellipsis + data_out = mm[...] + + self.assertEqual(len(data_out), count) + + for i in range(count): + if i == 0: + np.testing.assert_array_equal(data_out[i], data_in[i]) + else: + np.testing.assert_array_equal(data_out[i], data_in[i] + i) + + def test_multi_read_mixed_types(self): + """ + Test reading from multiple datasets with different types + """ + filename = self.getFileName("multi_read_mixed_types") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 4 + dts = [np.int32, np.int64, np.float64, np.dtype("S10")] + + # Create datasets + data_in = np.reshape(np.arange(np.prod(shape)), shape) + data_in_fixed_str = np.full(shape, "abcdefghij", dtype=dts[3]) + datasets = [] + + for i in range(count): + if i < 3: + dset = f.create_dataset("data" + str(i), shape, dtype=dts[i], data=(data_in + i)) + else: + dset = f.create_dataset("data" + str(i), shape, dtype=dts[i], data=data_in_fixed_str) + + datasets.append(dset) + + # Set up MultiManager for read + mm = MultiManager(datasets=datasets) + + # Perform read + data_out = mm[...] + + self.assertEqual(len(data_out), count) + + for i in range(count): + if i < 3: + np.testing.assert_array_equal(data_out[i], np.array(data_in + i, dtype=dts[i])) + else: + np.testing.assert_array_equal(data_out[i], data_in_fixed_str) + + self.assertEqual(data_out[i].dtype, dts[i]) + + def test_multi_read_vlen_str(self): + """ + Test reading from multiple datasets with a vlen string type + """ + filename = self.getFileName("multi_read_vlen_str") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 3 + dt = h5py.string_dtype(encoding='utf-8') + data_in = np.full(shape, "abcdefghij", dt) + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shape, data=data_in, dtype=dt) + datasets.append(dset) + + mm = MultiManager(datasets=datasets) + out = mm[...] + + self.assertEqual(len(out), count) + + for i in range(count): + self.assertEqual(out[i].dtype, dt) + out[i] = out[i].reshape(np.prod(shape)) + out[i] = np.array([s.decode() for s in out[i]], dtype=dt).reshape(shape) + np.testing.assert_array_equal(out[i], data_in) + + def test_multi_read_mixed_shapes(self): + """ + Test reading a selection from multiple datasets with different shapes + """ + filename = self.getFileName("multi_read_mixed_shapes") + print("filename:", filename) + f = h5py.File(filename, 'w') + shapes = [(150), (10, 15), (5, 5, 6)] + count = 3 + dt = np.int32 + data = np.arange(150, dtype=dt) + data_in = [data.reshape(s) for s in shapes] + datasets = [] + sel_idx = 2 + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shapes[i], dtype=dt, data=data_in[i]) + datasets.append(dset) + + mm = MultiManager(datasets=datasets) + # Perform multi read with selection + out = mm[sel_idx] + + # Verify + for i in range(count): + np.testing.assert_array_equal(out[i], data_in[i][sel_idx]) + + def test_multi_write_scalar_dataspaces(self): + """ + Test writing to multiple scalar datasets + """ + filename = self.getFileName("multi_write_scalar_dataspaces") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = () + count = 3 + dt = np.int32 + + # Create datasets + zeros = np.zeros(shape, dtype=dt) + data_in = [] + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=zeros) + datasets.append(dset) + + data_in.append(np.array([i])) + + mm = MultiManager(datasets) + # Perform write + mm[...] = data_in + + # Read back and check + for i in range(count): + data_out = f["data" + str(i)][...] + np.testing.assert_array_equal(data_out, data_in[i]) + + def test_multi_write_non_scalar_dataspaces(self): + """ + Test writing to multiple non-scalar datasets + """ + filename = self.getFileName("multi_write_non_scalar_dataspaces") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 3 + dt = np.int32 + + # Create datasets + zeros = np.zeros(shape, dtype=dt) + data_in = [] + datasets = [] + arr = np.arange(np.prod(shape), dtype=dt) + arr = arr.reshape(shape) + for i in range(count): + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=zeros) + datasets.append(dset) + d_in = arr + i + data_in.append(d_in) + + mm = MultiManager(datasets) + # Perform write + mm[...] = data_in + + # Read back and check + for i in range(count): + data_out = np.array(f["data" + str(i)][...], dtype=dt) + np.testing.assert_array_equal(data_out, data_in[i]) + + def test_multi_write_mixed_dataspaces(self): + """ + Test writing to multiple scalar and non-scalar datasets + """ + filename = self.getFileName("multi_write_mixed_dataspaces") + print("filename:", filename) + f = h5py.File(filename, 'w') + scalar_shape = () + shape = (10, 10, 10) + count = 3 + dt = np.int32 + + # Create datasets + data_in = [] + data_scalar_in = np.array(1, dtype=dt) + data_nonscalar_in = np.array(np.reshape(np.arange(np.prod(shape)), shape), dtype=dt) + datasets = [] + + for i in range(count): + if i == 0: + dset = f.create_dataset("data" + str(0), scalar_shape, dtype=dt, data=np.array(0, dtype=dt)) + data_in.append(data_scalar_in) + else: + dset = f.create_dataset("data" + str(i), shape, dtype=dt, data=np.zeros(shape)) + data_in.append(data_nonscalar_in) + datasets.append(dset) + + # Set up MultiManager for write + mm = MultiManager(datasets=datasets) + + # Select via empty tuple + mm[()] = data_in + + for i in range(count): + data_out = f["data" + str(i)][...] + np.testing.assert_array_equal(data_out, data_in[i]) + + # Reset datasets + for i in range(count): + if i == 0: + zeros = np.array([0]) + else: + zeros = np.zeros(shape) + f["data" + str(i)][...] = zeros + + # Select via Ellipsis + mm[...] = data_in + + for i in range(count): + data_out = f["data" + str(i)][...] + + if i == 0: + np.testing.assert_array_equal(data_out, data_in[i]) + else: + np.testing.assert_array_equal(data_out, data_in[i]) + + def test_multi_write_vlen_str(self): + """ + Test writing to multiple datasets with a vlen string type + """ + filename = self.getFileName("multi_write_vlen_str") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 3 + dt = h5py.string_dtype(encoding='utf-8') + data_initial_vlen = np.full(shape, "aaaabbbbcc", dtype=dt) + data_in_vlen = np.full(shape, "abcdefghij", dtype=dt) + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shape, data=data_initial_vlen, dtype=dt) + datasets.append(dset) + + mm = MultiManager(datasets=datasets) + # Perform write + mm[...] = [data_in_vlen, data_in_vlen, data_in_vlen] + + # Verify + for i in range(count): + arr = f["data" + str(i)][...] + self.assertEqual(arr.dtype, dt) + + arr = arr.reshape(np.prod(shape)) + out = np.array([s.decode() for s in arr], dtype=dt) + out = out.reshape(shape) + np.testing.assert_array_equal(out, data_in_vlen) + + def test_multi_write_mixed_shapes(self): + """ + Test writing to a selection in multiple datasets with different shapes + """ + filename = self.getFileName("multi_write_mixed_shapes") + print("filename:", filename) + f = h5py.File(filename, 'w') + shapes = [(50, 5), (15, 10), (20, 15)] + count = 3 + dt = np.int32 + data_in = 99 + datasets = [] + sel_idx = 2 + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shapes[i], dtype=dt, data=np.zeros(shapes[i], dtype=dt)) + datasets.append(dset) + + mm = MultiManager(datasets=datasets) + # Perform multi write with selection + mm[sel_idx, sel_idx] = [data_in, data_in + 1, data_in + 2] + + # Verify + for i in range(count): + out = f["data" + str(i)][...] + np.testing.assert_array_equal(out[sel_idx, sel_idx], data_in + i) + + def test_multi_selection(self): + """ + Test using a different selection + for each dataset in a MultiManager + """ + filename = self.getFileName("multi_selection") + print("filename:", filename) + f = h5py.File(filename, 'w') + shape = (10, 10, 10) + count = 3 + dt = np.int32 + + # Create datasets + data_in = np.reshape(np.arange(np.prod(shape), dtype=dt), shape) + data_in_original = data_in.copy() + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shape, dtype=dt, data=data_in) + datasets.append(dset) + + mm = h5py.MultiManager(datasets=datasets) + + # Selections to read from + sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 5:10, 1:4:2], np.s_[4, 5, 6]] + data_out = mm[sel] + + for i in range(count): + np.testing.assert_array_equal(data_out[i], data_in[sel[i]]) + + # If selection list has only a single element, apply it to all dsets + sel = [np.s_[0:10, 0:10, 0:10]] + data_out = mm[sel] + + for d in data_out: + np.testing.assert_array_equal(d, data_in[sel[0]]) + + # Selections to write to + sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 0:5, 0:5], np.s_[0, 0, 0]] + data_in = [np.zeros_like(data_in), np.ones_like(data_in), np.full_like(data_in, 2)] + mm[sel] = [data_in[i][sel[i]] for i in range(count)] + + for i in range(count): + np.testing.assert_array_equal(f["data" + str(i)][sel[i]], data_in[i][sel[i]]) + + # Check that unselected regions are unmodified + np.testing.assert_array_equal(f["data1"][5:, 5:, 5:], data_in_original[5:, 5:, 5:]) + np.testing.assert_array_equal(f["data2"][1:, 1:, 1:], data_in_original[1:, 1:, 1:]) + + # Save for later comparison + data_in_original = mm[...] + + # If selection list has only a single element, apply it to all dsets + sel = [np.s_[0:6, 0:6, 0:6]] + data_in = np.full(shape, 3, dtype=dt) + mm[sel] = [data_in[sel[0]]] * count + + for i in range(count): + np.testing.assert_array_equal(f["data" + str(i)][sel[0]], data_in[sel[0]]) + + # Check that unselected regions are unmodified + data_out = mm[...] + + for i in range(count): + np.testing.assert_array_equal(data_out[i][6:, 6:, 6:], data_in_original[i][6:, 6:, 6:]) + + def test_multi_field_selection(self): + """ + Test reading/writing to a field selection on multiple datasets + """ + filename = self.getFileName("multi_field_selection") + print("filename:", filename) + f = h5py.File(filename, 'w') + dt = np.dtype([('a', np.float32), ('b', np.int32), ('c', np.float32)]) + shape = (100,) + data = np.ones(shape, dtype=dt) + count = 3 + datasets = [] + + for i in range(count): + dset = f.create_dataset("data" + str(i), shape=shape, data=np.zeros(shape, dtype=dt), dtype=dt) + datasets.append(dset) + + # Perform read from field 'b' + mm = MultiManager(datasets=datasets) + out = mm[..., 'b'] + + # Verify data returned + for i in range(count): + np.testing.assert_array_equal(out[i], np.zeros(shape, dtype=dt['b'])) + + # Perform write to field 'b' + mm = MultiManager(datasets=datasets) + mm[..., 'b'] = [data['b'], data['b'], data['b']] + + for i in range(count): + out = np.array(f["data" + str(i)], dtype=dt) + np.testing.assert_array_equal(out['a'], np.zeros(shape, dtype=dt['a'])) + np.testing.assert_array_equal(out['b'], data['b']) + np.testing.assert_array_equal(out['c'], np.zeros(shape, dtype=dt['c'])) + + # Test writing to entire compound type + data = np.zeros(shape, dtype=dt) + mm[...] = [data, data, data] + + for i in range(count): + out = np.array(f["data" + str(i)], dtype=dt) + np.testing.assert_array_equal(out, data) + + +if __name__ == '__main__': + loglevel = logging.ERROR + logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) + ut.main() diff --git a/testall.py b/testall.py index fe7e4ef..10efbf1 100755 --- a/testall.py +++ b/testall.py @@ -23,8 +23,9 @@ 'test_dataset_create', 'test_dataset_extend', 'test_dataset_fancyselect', - 'test_dataset_objref', 'test_dataset_getitem', + 'test_dataset_multi', + 'test_dataset_objref', 'test_dataset_pointselect', 'test_dataset_scalar', 'test_dataset_setitem', @@ -32,11 +33,11 @@ 'test_datatype', 'test_dimscale', 'test_file', + 'test_folder', 'test_group', 'test_table', 'test_visit', - 'test_vlentype', - 'test_folder') + 'test_vlentype',) app_tests = ('test_hsinfo', 'test_tall_inspect', 'test_diamond_inspect',