From e3609edd04b92dba7e84325cc633301988345a63 Mon Sep 17 00:00:00 2001
From: Amanda Potts <ajpotts@users.noreply.github.com>
Date: Fri, 14 Feb 2025 13:10:21 -0500
Subject: [PATCH] Closes #4098:  upgrade to numpy 2.0.0

---
 arkouda-env-dev.yml                  |    2 +-
 arkouda-env.yml                      |    2 +-
 arkouda/numpy/__init__.py            |   33 +-
 arkouda/numpy/_numeric.py            |   22 +
 arkouda/numpy/dtypes/__init__.py     |   84 +
 arkouda/numpy/dtypes/dtypes.py       |   31 +-
 arkouda/numpy/exceptions/__init__.py |    2 +-
 arkouda/numpy/lib/__init__.py        |   20 +-
 arkouda/numpy/lib/npyio/__init__.py  |    1 +
 arkouda/numpy/rec/__init__.py        |    2 +-
 arkouda/pdarrayclass.py              |   71 +-
 arkouda/pdarraycreation.py           |   13 +-
 arkouda/util.py                      |    2 +-
 pydoc/requirements.txt               |    2 +-
 setup.py                             |    2 +-
 tests/dataframe_test.py              |  217 ++-
 tests/indexing_test.py               |    2 +-
 tests/io_test.py.working             | 2441 ++++++++++++++++++++++++++
 tests/numpy/dtypes_test.py           |   37 +-
 tests/operator_test.py               |  451 +++--
 tests/pdarray_creation_test.py       |  128 +-
 tests/setops_test.py                 |   12 +-
 22 files changed, 3281 insertions(+), 296 deletions(-)
 create mode 100644 arkouda/numpy/lib/npyio/__init__.py
 create mode 100644 tests/io_test.py.working

diff --git a/arkouda-env-dev.yml b/arkouda-env-dev.yml
index 16d66da822c..2d1986df93e 100644
--- a/arkouda-env-dev.yml
+++ b/arkouda-env-dev.yml
@@ -4,7 +4,7 @@ channels:
   - defaults
 dependencies:
   - python>=3.9,<3.12.4   # minimum 3.9
-  - numpy>=1.24.1,<2.0
+  - numpy>=2.0
   - pandas>=1.4.0,!=2.2.0
   - pyzmq>=20.0.0
   - tabulate
diff --git a/arkouda-env.yml b/arkouda-env.yml
index efced73406b..cb68d439285 100644
--- a/arkouda-env.yml
+++ b/arkouda-env.yml
@@ -4,7 +4,7 @@ channels:
   - defaults
 dependencies:
   - python>=3.9,<3.12.4   # minimum 3.9
-  - numpy>=1.24.1,<2.0
+  - numpy>=2.0
   - pandas>=1.4.0,!=2.2.0
   - pyzmq>=20.0.0
   - tabulate
diff --git a/arkouda/numpy/__init__.py b/arkouda/numpy/__init__.py
index 34387451601..c37af9b1e59 100644
--- a/arkouda/numpy/__init__.py
+++ b/arkouda/numpy/__init__.py
@@ -1,15 +1,6 @@
 # flake8: noqa
 from numpy import (  # noqa
-    NAN,
-    NINF,
-    NZERO,
-    PINF,
-    PZERO,
-    DataSource,
     False_,
-    Inf,
-    Infinity,
-    NaN,
     ScalarType,
     True_,
     base_repr,
@@ -17,9 +8,7 @@
     byte,
     bytes_,
     cdouble,
-    cfloat,
     clongdouble,
-    clongfloat,
     compat,
     csingle,
     datetime64,
@@ -28,7 +17,6 @@
     euler_gamma,
     finfo,
     flexible,
-    float_,
     floating,
     format_float_positional,
     format_float_scientific,
@@ -36,26 +24,22 @@
     iinfo,
     inexact,
     inf,
-    infty,
     intc,
     intp,
     isscalar,
-    issctype,
     issubdtype,
     longdouble,
-    longfloat,
     longlong,
-    maximum_sctype,
     nan,
     number,
     pi,
     promote_types,
     sctypeDict,
-    sctypes,
     short,
     signedinteger,
     single,
     timedelta64,
+    typename,
     ubyte,
     uint,
     uintc,
@@ -66,20 +50,7 @@
     void,
 )
 
-from arkouda.numpy import (
-    _builtins,
-    _mat,
-    _typing,
-    char,
-    ctypeslib,
-    dtypes,
-    exceptions,
-    fft,
-    lib,
-    linalg,
-    ma,
-    rec,
-)
+from arkouda.numpy.lib import *
 from arkouda.numpy._builtins import *
 from arkouda.numpy._mat import *
 from arkouda.numpy._typing import *
diff --git a/arkouda/numpy/_numeric.py b/arkouda/numpy/_numeric.py
index 175efa9dea4..b0eaa35aa02 100644
--- a/arkouda/numpy/_numeric.py
+++ b/arkouda/numpy/_numeric.py
@@ -119,6 +119,28 @@ def _merge_where(new_pda, where, ret):
     return new_pda
 
 
+def can_cast(from_, to) -> ak_bool:
+    from arkouda.util import is_int
+    from arkouda.numpy.dtypes import uint64 as ak_unit64
+    from arkouda.numpy.dtypes import _is_dtype_in_union
+    from arkouda.numpy.dtypes import isSupportedInt
+    from arkouda.numpy.dtypes import dtype as ak_dtype
+
+    if isSupportedInt(from_) and (from_ < 2**64) and (from_ >= 0) and (to == ak_dtype(ak_uint64)):
+        print("CASE 2")
+        return True
+
+    if (np.isscalar(from_) or _is_dtype_in_union(from_, numeric_scalars)) and not isinstance(
+        from_, (int, float, complex)
+    ):
+        print("CASE1")
+        return np.can_cast(from_, to)
+
+    print("CASE3")
+
+    return False
+
+
 @typechecked
 def cast(
     pda: Union[pdarray, Strings, Categorical],  # type: ignore
diff --git a/arkouda/numpy/dtypes/__init__.py b/arkouda/numpy/dtypes/__init__.py
index ee89c72a1d2..d2cd49a61c4 100644
--- a/arkouda/numpy/dtypes/__init__.py
+++ b/arkouda/numpy/dtypes/__init__.py
@@ -36,3 +36,87 @@
 )
 
 from .dtypes import *
+
+__all__ = [
+    "BoolDType",
+    "ByteDType",
+    "BytesDType",
+    "CLongDoubleDType",
+    "Complex64DType",
+    "Complex128DType",
+    "DateTime64DType",
+    "Float16DType",
+    "Float32DType",
+    "Float64DType",
+    "Int8DType",
+    "Int16DType",
+    "Int32DType",
+    "Int64DType",
+    "IntDType",
+    "LongDoubleDType",
+    "LongDType",
+    "LongLongDType",
+    "ObjectDType",
+    "ShortDType",
+    "StrDType",
+    "TimeDelta64DType",
+    "UByteDType",
+    "UInt8DType",
+    "UInt16DType",
+    "UInt32DType",
+    "UInt64DType",
+    "UIntDType",
+    "ULongDType",
+    "ULongLongDType",
+    "UShortDType",
+    "VoidDType",
+    "_datatype_check",
+    "ARKOUDA_SUPPORTED_DTYPES",
+    "ARKOUDA_SUPPORTED_INTS",
+    "DType",
+    "DTypeObjects",
+    "DTypes",
+    "NUMBER_FORMAT_STRINGS",
+    "NumericDTypes",
+    "ScalarDTypes",
+    "SeriesDTypes",
+    "_is_dtype_in_union",
+    "_val_isinstance_of_union",
+    "all_scalars",
+    "bigint",
+    "bitType",
+    "bool_",
+    "bool_scalars",
+    "complex128",
+    "complex64",
+    "dtype",
+    "float16",
+    "float32",
+    "float64",
+    "float_scalars",
+    "get_byteorder",
+    "get_server_byteorder",
+    "int16",
+    "int32",
+    "int64",
+    "int8",
+    "intTypes",
+    "int_scalars",
+    "isSupportedBool",
+    "isSupportedDType",
+    "isSupportedFloat",
+    "isSupportedInt",
+    "isSupportedNumber",
+    "numeric_and_bool_scalars",
+    "numeric_and_bool_scalars",
+    "numeric_scalars",
+    "numpy_scalars",
+    "resolve_scalar_dtype",
+    "resolve_scalar_dtype",
+    "str_",
+    "str_scalars",
+    "uint16",
+    "uint32",
+    "uint64",
+    "uint8",
+]
diff --git a/arkouda/numpy/dtypes/dtypes.py b/arkouda/numpy/dtypes/dtypes.py
index e0b2b0117f7..d4fbe179eaa 100644
--- a/arkouda/numpy/dtypes/dtypes.py
+++ b/arkouda/numpy/dtypes/dtypes.py
@@ -104,8 +104,18 @@ def dtype(x):
         return bigint()
     if isinstance(x, str) and x in ["Strings"]:
         return np.dtype(np.str_)
-    else:
-        return np.dtype(x)
+    if isinstance(x, int):
+        if 0 < x and x < 2**64:
+            return np.dtype(np.uint64)
+        elif x >= 2**64:
+            return bigint()
+        else:
+            return np.dtype(np.int64)
+    if isinstance(x, float):
+        return np.dtype(np.float64)
+    if isinstance(x, bool):
+        return np.dtype(np.bool)
+    return np.dtype(x)
 
 
 def _is_dtype_in_union(dtype, union_type) -> builtins.bool:
@@ -284,7 +294,18 @@ def __repr__(self) -> str:
 # missing full support for: float32, int32, int16, int8, uint32, uint16, complex64, complex128
 # ARKOUDA_SUPPORTED_DTYPES = frozenset([member.value for _, member in DType.__members__.items()])
 ARKOUDA_SUPPORTED_DTYPES = frozenset(
-    ["bool_", "float", "float64", "int", "int64", "uint", "uint64", "uint8", "bigint", "str"]
+    [
+        "bool_",
+        "float",
+        "float64",
+        "int",
+        "int64",
+        "uint",
+        "uint64",
+        "uint8",
+        "bigint",
+        "str",
+    ]
 )
 
 DTypes = frozenset([member.value for _, member in DType.__members__.items()])
@@ -347,9 +368,9 @@ def resolve_scalar_dtype(val: object) -> str:
         else:
             return "int64"
     # Python float or np.float*
-    elif isinstance(val, float) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "f"):
+    elif isinstance(val, float) or (hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "f"):
         return "float64"
-    elif isinstance(val, complex) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "c"):
+    elif isinstance(val, complex) or (hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "c"):
         return "float64"  # TODO: actually support complex values in the backend
     elif isinstance(val, builtins.str) or isinstance(val, np.str_):
         return "str"
diff --git a/arkouda/numpy/exceptions/__init__.py b/arkouda/numpy/exceptions/__init__.py
index 0f9abbca300..98c283e857e 100644
--- a/arkouda/numpy/exceptions/__init__.py
+++ b/arkouda/numpy/exceptions/__init__.py
@@ -1,3 +1,3 @@
-from numpy import RankWarning, TooHardError
+from numpy.exceptions import RankWarning, TooHardError
 
 __all__ = ["RankWarning", "TooHardError"]
diff --git a/arkouda/numpy/lib/__init__.py b/arkouda/numpy/lib/__init__.py
index a516f365d6a..93026e89d1d 100644
--- a/arkouda/numpy/lib/__init__.py
+++ b/arkouda/numpy/lib/__init__.py
@@ -1,30 +1,14 @@
 # flake8: noqa
-from numpy import (
-    RankWarning,
+from numpy.lib import (
     add_docstring,
     add_newdoc,
-    deprecate,
-    deprecate_with_doc,
-    disp,
-    issubclass_,
-    issubdtype,
-    polynomial,
-    typename,
 )
 
-from arkouda.numpy.lib import emath
 from arkouda.numpy.lib.emath import *
 
+
 __all__ = [
-    "RankWarning",
     "add_docstring",
     "add_newdoc",
-    "deprecate",
-    "deprecate_with_doc",
-    "disp",
     "emath",
-    "issubclass_",
-    "issubdtype",
-    "polynomial",
-    "typename",
 ]
diff --git a/arkouda/numpy/lib/npyio/__init__.py b/arkouda/numpy/lib/npyio/__init__.py
new file mode 100644
index 00000000000..ef5c423dd7b
--- /dev/null
+++ b/arkouda/numpy/lib/npyio/__init__.py
@@ -0,0 +1 @@
+from numpy.lib.npyio import DataSource
diff --git a/arkouda/numpy/rec/__init__.py b/arkouda/numpy/rec/__init__.py
index f752a4adcab..9a34c9ad52f 100644
--- a/arkouda/numpy/rec/__init__.py
+++ b/arkouda/numpy/rec/__init__.py
@@ -1,3 +1,3 @@
-from numpy import format_parser
+from numpy.rec import format_parser
 
 __all__ = ["format_parser"]
diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py
index ede5da86369..8c7fdcbc20d 100644
--- a/arkouda/pdarrayclass.py
+++ b/arkouda/pdarrayclass.py
@@ -11,6 +11,7 @@
 from typeguard import typechecked
 
 from arkouda.client import generic_msg
+from arkouda.dtypes import uint64 as ak_uint64
 from arkouda.infoclass import information, pretty_print_information
 from arkouda.logger import getArkoudaLogger
 from arkouda.numpy.dtypes import (
@@ -20,9 +21,15 @@
     bigint,
 )
 from arkouda.numpy.dtypes import bool_ as akbool
-from arkouda.numpy.dtypes import bool_scalars, dtype
+from arkouda.numpy.dtypes import (
+    bool_scalars,
+    dtype,
+)
 from arkouda.numpy.dtypes import float64 as akfloat64
-from arkouda.numpy.dtypes import get_byteorder, get_server_byteorder
+from arkouda.numpy.dtypes import (
+    get_byteorder,
+    get_server_byteorder,
+)
 from arkouda.numpy.dtypes import int64 as akint64
 from arkouda.numpy.dtypes import (
     int_scalars,
@@ -166,6 +173,14 @@ def unescape(s):
         if mydtype == akstr_:
             # String value will always be surrounded with double quotes, so remove them
             return mydtype.type(unescape(value[1:-1]))
+
+        if mydtype == ak_uint64:
+            if get_server_byteorder() == "little":
+                if value.startswith("-"):
+                    value = value.strip("-")
+                    uint_value = np.iinfo(np.uint64).max - ak_uint64(value) + 1
+                    return mydtype.type(uint_value)
+                return mydtype.type(value)
         return mydtype.type(value)
     except Exception:
         raise ValueError(f"unsupported value from server {mydtype.name} {value}")
@@ -571,11 +586,33 @@ def _binop(self, other: pdarray, op: str) -> pdarray:
         # pdarray binop scalar
         # If scalar cannot be safely cast, server will infer the return dtype
         dt = resolve_scalar_dtype(other)
-        if self.dtype != bigint and np.can_cast(other, self.dtype):
+
+        from arkouda.numpy._numeric import can_cast as ak_can_cast
+
+        print("\n**\ndt: ", dt)
+        print("other: ", other)
+        print("self.dtype: ", self.dtype)
+        print(
+            "ak_can_cast(other, self.dtype): ",
+            ak_can_cast(other, self.dtype),
+        )
+
+        from arkouda.dtypes import int64 as ak_int64
+        from arkouda.dtypes import float64 as ak_float64
+
+        if self.dtype == ak_uint64 and dtype(other) == ak_int64:
+            dt = "float64"
+            other = ak_float64(other)
+            print("new dt: ", dt)
+            print("new other: ", other)
+        elif self.dtype != bigint and ak_can_cast(other, self.dtype):
             # If scalar can be losslessly cast to array dtype,
             # do the cast so that return array will have same dtype
             dt = self.dtype.name
             other = self.dtype.type(other)
+            print("new dt: ", dt)
+            print("new other: ", other)
+
         if dt not in DTypes:
             raise TypeError(f"Unhandled scalar type: {other} ({type(other)})")
         repMsg = generic_msg(
@@ -616,7 +653,9 @@ def _r_binop(self, other: pdarray, op: str) -> pdarray:
         # pdarray binop scalar
         # If scalar cannot be safely cast, server will infer the return dtype
         dt = resolve_scalar_dtype(other)
-        if self.dtype != bigint and np.can_cast(other, self.dtype):
+        from arkouda.numpy._numeric import can_cast as ak_can_cast
+
+        if self.dtype != bigint and ak_can_cast(other, self.dtype):
             # If scalar can be losslessly cast to array dtype,
             # do the cast so that return array will have same dtype
             dt = self.dtype.name
@@ -894,6 +933,7 @@ def __iter__(self):
     # overload a[] to treat like list
     def __getitem__(self, key):
         if self.ndim == 1 and np.isscalar(key) and (resolve_scalar_dtype(key) in ["int64", "uint64"]):
+            print("\n\nCASE1\n\n")
             orig_key = key
             if key < 0:
                 # Interpret negative key as offset from end of array
@@ -4131,23 +4171,22 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume
         )
     # TODO: handle shape broadcasting for multidimensional arrays
 
+    #   The code below creates a command string for fmod2vv, fmod2vs or fmod2sv.
 
-#   The code below creates a command string for fmod2vv, fmod2vs or fmod2sv.
-
-    if isinstance(dividend, pdarray) and isinstance(divisor, pdarray) :
+    if isinstance(dividend, pdarray) and isinstance(divisor, pdarray):
         cmdstring = f"fmod2vv<{dividend.dtype},{dividend.ndim},{divisor.dtype}>"
 
-    elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)) :
-        if resolve_scalar_dtype(divisor) in ['float64', 'int64', 'uint64', 'bool'] :
-            acmd = 'fmod2vs_'+resolve_scalar_dtype(divisor)
-        else :  # this condition *should* be impossible because of the isSupportedNumber check
+    elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)):
+        if resolve_scalar_dtype(divisor) in ["float64", "int64", "uint64", "bool"]:
+            acmd = "fmod2vs_" + resolve_scalar_dtype(divisor)
+        else:  # this condition *should* be impossible because of the isSupportedNumber check
             raise TypeError(f"Scalar divisor type {resolve_scalar_dtype(divisor)} not allowed in fmod")
         cmdstring = f"{acmd}<{dividend.dtype},{dividend.ndim}>"
 
-    elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)) :
-        if resolve_scalar_dtype(dividend) in ['float64', 'int64', 'uint64', 'bool'] :
-            acmd = 'fmod2sv_'+resolve_scalar_dtype(dividend)
-        else :  # this condition *should* be impossible because of the isSupportedNumber check
+    elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)):
+        if resolve_scalar_dtype(dividend) in ["float64", "int64", "uint64", "bool"]:
+            acmd = "fmod2sv_" + resolve_scalar_dtype(dividend)
+        else:  # this condition *should* be impossible because of the isSupportedNumber check
             raise TypeError(f"Scalar dividend type {resolve_scalar_dtype(dividend)} not allowed in fmod")
         cmdstring = f"{acmd}<{divisor.dtype},{divisor.ndim}>"  # type: ignore[union-attr]
 
@@ -4155,7 +4194,7 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume
         m = mod(dividend, divisor)
         return _create_scalar_array(m)
 
-#   We reach here if this was any case other than scalar & scalar
+    #   We reach here if this was any case other than scalar & scalar
 
     return create_pdarray(
         cast(
diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py
index fde8b3fe997..697159a712d 100644
--- a/arkouda/pdarraycreation.py
+++ b/arkouda/pdarraycreation.py
@@ -275,7 +275,10 @@ def array(
             # early out if we would have more uint arrays than can fit in max_bits
             early_out = (max_bits // 64) + (max_bits % 64 != 0) if max_bits != -1 else float("inf")
             while any(a != 0) and len(uint_arrays) < early_out:
-                low, a = a % 2**64, a // 2**64
+                if isinstance(a, np.ndarray):
+                    low, a = a.astype("O") % 2**64, a.astype("O") // 2**64
+                else:
+                    low, a = a % 2**64, a // 2**64
                 uint_arrays.append(array(np.array(low, dtype=np.uint), dtype=akuint64))
             return bigint_from_uint_arrays(uint_arrays[::-1], max_bits=max_bits)
         except TypeError:
@@ -300,6 +303,14 @@ def array(
         # than our numpy array we need to swap to match since the server expects
         # native endian bytes
         aview = _array_memview(a_)
+
+        if get_server_byteorder() == "big":
+            if a.dtype.byteorder == "<":
+                a = a.view(a.dtype.newbyteorder("S")).byteswap()
+        else:
+            if a.dtype.byteorder == ">":
+                a = a.view(a.dtype.newbyteorder("S")).byteswap()
+
         rep_msg = generic_msg(
             cmd=f"array<{a_.dtype.name},{ndim}>",
             args={"dtype": a_.dtype.name, "shape": tuple(a_.shape), "seg_string": False},
diff --git a/arkouda/util.py b/arkouda/util.py
index e84d7dfa5fe..a0ba1b5f89d 100644
--- a/arkouda/util.py
+++ b/arkouda/util.py
@@ -425,7 +425,7 @@ def convert_bytes(nbytes, unit="B"):
 
 
 def is_numeric(
-    arry: Union[pdarray, Strings, Categorical, "Series", "Index"]  # noqa: F821
+    arry: Union[pdarray, Strings, Categorical, "Series", "Index"],  # noqa: F821
 ) -> builtins.bool:
     """
     Check if the dtype of the given array is numeric.
diff --git a/pydoc/requirements.txt b/pydoc/requirements.txt
index 618f5818123..069bf81b099 100644
--- a/pydoc/requirements.txt
+++ b/pydoc/requirements.txt
@@ -1,6 +1,6 @@
 # dependencies
 python>=3.9,<3.12.4
-numpy>=1.24.1,<2.0
+numpy>=2.0
 pandas>=1.4.0,!=2.2.0
 pyzmq>=20.0.0
 typeguard==2.10.0
diff --git a/setup.py b/setup.py
index 162f69a27b9..ac0b3a2723a 100644
--- a/setup.py
+++ b/setup.py
@@ -116,7 +116,7 @@
     # For an analysis of "install_requires" vs pip's requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
     install_requires=[
-        "numpy>=1.24.1,<2.0",
+        "numpy>=2.0",
         "pandas>=1.4.0,!=2.2.0",
         "pyzmq>=20.0.0",
         "typeguard==2.10.0",
diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py
index fd6c1f6b9c5..85b31b20436 100644
--- a/tests/dataframe_test.py
+++ b/tests/dataframe_test.py
@@ -94,7 +94,7 @@ def build_ak_df_example_numeric_types():
                 "float64": ak.randint(0, 1, 20, dtype=ak.float64),
                 "int64": ak.randint(0, 10, 20, dtype=ak.int64),
                 "uint64": ak.randint(0, 10, 20, dtype=ak.uint64),
-                "bigint": ak.randint(0, 10, 20, dtype=ak.uint64) + 2**200,
+                "bigint": ak.randint(2**200, 2**200 + 10, 20, dtype=ak.uint64),
             }
         )
         return ak_df
@@ -105,7 +105,9 @@ def build_pd_df_duplicates():
         userid = [111, 222, 111, 333, 222, 111]
         item = [0, 1, 0, 2, 1, 0]
         day = [5, 5, 5, 5, 5, 5]
-        return pd.DataFrame({"userName": username, "userID": userid, "item": item, "day": day})
+        return pd.DataFrame(
+            {"userName": username, "userID": userid, "item": item, "day": day}
+        )
 
     @staticmethod
     def build_ak_df_duplicates():
@@ -139,7 +141,7 @@ def build_pd_df_append():
         item = [0, 0, 1, 1, 2, 0, 0, 2]
         day = [5, 5, 6, 5, 6, 6, 1, 2]
         amount = [0.5, 0.6, 1.1, 1.2, 4.3, 0.6, 0.5, 5.1]
-        bi = (np.arange(8) + 2**200).tolist()
+        bi = np.arange(2**200, 2**200 + 8).tolist()  # (np.arange(8) + 2**200).tolist()
         ui = (np.arange(8).astype(ak.uint64)) + 2**63
         return pd.DataFrame(
             {
@@ -209,7 +211,9 @@ def test_dataframe_creation(self, size):
                 "uint": ak.array(pddf["uint"]),
                 "bigint": ak.arange(2**200, 2**200 + size),
                 "bool": ak.array(pddf["bool"]),
-                "segarray": ak.SegArray.from_multi_array([ak.array(x) for x in pddf["segarray"]]),
+                "segarray": ak.SegArray.from_multi_array(
+                    [ak.array(x) for x in pddf["segarray"]]
+                ),
             }
         )
         assert isinstance(akdf, ak.DataFrame)
@@ -362,7 +366,9 @@ def test_boolean_indexing(self):
         row = df[df["userName"] == "Carol"]
 
         assert len(row) == 1
-        assert ref_df[ref_df["userName"] == "Carol"].equals(row.to_pandas(retain_index=True))
+        assert ref_df[ref_df["userName"] == "Carol"].equals(
+            row.to_pandas(retain_index=True)
+        )
 
     def test_column_indexing(self):
         df = self.build_ak_df()
@@ -602,14 +608,20 @@ def test_groupby_standard(self):
         pds = pd.Series(
             data=np.ones(4, dtype=np.int64),
             index=pd.Index(
-                data=np.array(["0.0.0.1", "0.0.0.2", "0.0.0.3", "0.0.0.4"], dtype="<U7"),
+                data=np.array(
+                    ["0.0.0.1", "0.0.0.2", "0.0.0.3", "0.0.0.4"], dtype="<U7"
+                ),
                 name="a",
             ),
         )
         assert_series_equal(pds, s.to_pandas())
 
         # testing counts with Categorical column
-        s = ak.DataFrame({"a": ak.Categorical(ak.array(["a", "a", "a", "b"]))}).groupby("a").size()
+        s = (
+            ak.DataFrame({"a": ak.Categorical(ak.array(["a", "a", "a", "b"]))})
+            .groupby("a")
+            .size()
+        )
         pds = pd.Series(
             data=np.array([3, 1]),
             index=pd.Index(data=pd.Categorical(np.array(["a", "b"])), name="a"),
@@ -651,7 +663,9 @@ def test_gb_aggregations_example_numeric_types(self, agg):
         assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
 
     @pytest.mark.parametrize("dropna", [True, False])
-    @pytest.mark.parametrize("agg", ["count", "max", "mean", "median", "min", "std", "sum", "var"])
+    @pytest.mark.parametrize(
+        "agg", ["count", "max", "mean", "median", "min", "std", "sum", "var"]
+    )
     def test_gb_aggregations_with_nans(self, agg, dropna):
         df = self.build_ak_df_with_nans()
         # @TODO handle bool columns correctly
@@ -660,7 +674,9 @@ def test_gb_aggregations_with_nans(self, agg, dropna):
 
         group_on = ["key1", "key2"]
         ak_result = getattr(df.groupby(group_on, dropna=dropna), agg)()
-        pd_result = getattr(pd_df.groupby(group_on, as_index=False, dropna=dropna), agg)()
+        pd_result = getattr(
+            pd_df.groupby(group_on, as_index=False, dropna=dropna), agg
+        )()
         assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
 
         # TODO aggregations of string columns not currently supported (even for count)
@@ -670,7 +686,9 @@ def test_gb_aggregations_with_nans(self, agg, dropna):
 
         group_on = ["nums1", "nums2"]
         ak_result = getattr(df.groupby(group_on, dropna=dropna), agg)()
-        pd_result = getattr(pd_df.groupby(group_on, as_index=False, dropna=dropna), agg)()
+        pd_result = getattr(
+            pd_df.groupby(group_on, as_index=False, dropna=dropna), agg
+        )()
         assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
 
         # TODO aggregation mishandling NaN see issue #3765
@@ -684,37 +702,51 @@ def test_gb_aggregations_with_nans(self, agg, dropna):
     @pytest.mark.parametrize("dropna", [True, False])
     def test_count_nan_bug(self, dropna):
         # verify reproducer for #3762 is fixed
-        df = ak.DataFrame({"A": [1, 2, 2, np.nan], "B": [3, 4, 5, 6], "C": [1, np.nan, 2, 3]})
+        df = ak.DataFrame(
+            {"A": [1, 2, 2, np.nan], "B": [3, 4, 5, 6], "C": [1, np.nan, 2, 3]}
+        )
         ak_result = df.groupby("A", dropna=dropna).count()
         pd_result = df.to_pandas().groupby("A", dropna=dropna).count()
         assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
 
         ak_result = df.groupby(["A", "C"], as_index=False, dropna=dropna).count()
-        pd_result = df.to_pandas().groupby(["A", "C"], as_index=False, dropna=dropna).count()
+        pd_result = (
+            df.to_pandas().groupby(["A", "C"], as_index=False, dropna=dropna).count()
+        )
         assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
 
     def test_gb_aggregations_return_dataframe(self):
         ak_df = self.build_ak_df_example2()
         pd_df = ak_df.to_pandas(retain_index=True)
 
-        pd_result1 = pd_df.groupby(["key1", "key2"], as_index=False).sum("count").drop(["nums"], axis=1)
+        pd_result1 = (
+            pd_df.groupby(["key1", "key2"], as_index=False)
+            .sum("count")
+            .drop(["nums"], axis=1)
+        )
         ak_result1 = ak_df.groupby(["key1", "key2"]).sum("count")
         assert_frame_equal(pd_result1, ak_result1.to_pandas(retain_index=True))
         assert isinstance(ak_result1, ak.dataframe.DataFrame)
 
         pd_result2 = (
-            pd_df.groupby(["key1", "key2"], as_index=False).sum(["count"]).drop(["nums"], axis=1)
+            pd_df.groupby(["key1", "key2"], as_index=False)
+            .sum(["count"])
+            .drop(["nums"], axis=1)
         )
         ak_result2 = ak_df.groupby(["key1", "key2"]).sum(["count"])
         assert_frame_equal(pd_result2, ak_result2.to_pandas(retain_index=True))
         assert isinstance(ak_result2, ak.dataframe.DataFrame)
 
-        pd_result3 = pd_df.groupby(["key1", "key2"], as_index=False).sum(["count", "nums"])
+        pd_result3 = pd_df.groupby(["key1", "key2"], as_index=False).sum(
+            ["count", "nums"]
+        )
         ak_result3 = ak_df.groupby(["key1", "key2"]).sum(["count", "nums"])
         assert_frame_equal(pd_result3, ak_result3.to_pandas(retain_index=True))
         assert isinstance(ak_result3, ak.dataframe.DataFrame)
 
-        pd_result4 = pd_df.groupby(["key1", "key2"], as_index=False).sum().drop(["key3"], axis=1)
+        pd_result4 = (
+            pd_df.groupby(["key1", "key2"], as_index=False).sum().drop(["key3"], axis=1)
+        )
         ak_result4 = ak_df.groupby(["key1", "key2"]).sum()
         assert_frame_equal(pd_result4, ak_result4.to_pandas(retain_index=True))
         assert isinstance(ak_result4, ak.dataframe.DataFrame)
@@ -749,7 +781,9 @@ def test_gb_size_single(self):
         )
 
         assert_frame_equal(
-            ak_df.groupby(["gb_id"], as_index=False).size().to_pandas(retain_index=True),
+            ak_df.groupby(["gb_id"], as_index=False)
+            .size()
+            .to_pandas(retain_index=True),
             pd_df.groupby(["gb_id"], as_index=False).size(),
         )
 
@@ -763,7 +797,9 @@ def test_gb_size_multiple(self):
         assert isinstance(ak_result1, ak.dataframe.DataFrame)
 
         assert_frame_equal(
-            ak_df.groupby(["key1", "key2"], as_index=False).size().to_pandas(retain_index=True),
+            ak_df.groupby(["key1", "key2"], as_index=False)
+            .size()
+            .to_pandas(retain_index=True),
             pd_df.groupby(["key1", "key2"], as_index=False).size(),
         )
 
@@ -801,11 +837,17 @@ def test_gb_size_match_pandas(self):
                     ["nums3", "nums1"],
                     ["nums1", "nums2", "nums3"],
                 ]:
-                    ak_result = ak_df.groupby(gb_keys, as_index=as_index, dropna=dropna).size()
-                    pd_result = pd_df.groupby(gb_keys, as_index=as_index, dropna=dropna).size()
+                    ak_result = ak_df.groupby(
+                        gb_keys, as_index=as_index, dropna=dropna
+                    ).size()
+                    pd_result = pd_df.groupby(
+                        gb_keys, as_index=as_index, dropna=dropna
+                    ).size()
 
                     if isinstance(ak_result, ak.dataframe.DataFrame):
-                        assert_frame_equal(ak_result.to_pandas(retain_index=True), pd_result)
+                        assert_frame_equal(
+                            ak_result.to_pandas(retain_index=True), pd_result
+                        )
                     else:
                         assert_series_equal(ak_result.to_pandas(), pd_result)
 
@@ -864,10 +906,14 @@ def test_sort_values(self):
 
         df = ak.DataFrame({"userID": userid_ak})
         ord = df.sort_values()
-        assert_frame_equal(pd.DataFrame(data=userid, columns=["userID"]), ord.to_pandas())
+        assert_frame_equal(
+            pd.DataFrame(data=userid, columns=["userID"]), ord.to_pandas()
+        )
         ord = df.sort_values(ascending=False)
         userid.reverse()
-        assert_frame_equal(pd.DataFrame(data=userid, columns=["userID"]), ord.to_pandas())
+        assert_frame_equal(
+            pd.DataFrame(data=userid, columns=["userID"]), ord.to_pandas()
+        )
 
         df = self.build_ak_df()
         ord = df.sort_values(by="userID")
@@ -929,7 +975,9 @@ def test_sort_index(self):
                     pd_result.sort_index(),
                 )
             else:
-                assert_series_equal(ak_result.sort_index().to_pandas(), pd_result.sort_index())
+                assert_series_equal(
+                    ak_result.sort_index().to_pandas(), pd_result.sort_index()
+                )
 
     def test_intx(self):
         username = ak.array(["Alice", "Bob", "Alice", "Carol", "Bob", "Alice"])
@@ -1004,7 +1052,9 @@ def test_isin(self):
         assert test_df["col_B"].to_list() == [False, True]
 
         # test against another dataframe
-        other_df = ak.DataFrame({"col_A": ak.array([7, 3], dtype=ak.bigint), "col_C": ak.array([0, 9])})
+        other_df = ak.DataFrame(
+            {"col_A": ak.array([7, 3], dtype=ak.bigint), "col_C": ak.array([0, 9])}
+        )
         test_df = df.isin(other_df)
         assert test_df["col_A"].to_list() == [True, True]
         assert test_df["col_B"].to_list() == [False, False]
@@ -1027,7 +1077,9 @@ def test_corr(self):
         assert_frame_equal(corr.to_pandas(retain_index=True), pd_corr)
 
         for i in range(5):
-            df = ak.DataFrame({"col1": ak.randint(0, 10, 10), "col2": ak.randint(0, 10, 10)})
+            df = ak.DataFrame(
+                {"col1": ak.randint(0, 10, 10), "col2": ak.randint(0, 10, 10)}
+            )
             corr = df.corr()
             pd_corr = df.to_pandas().corr()
             assert_frame_equal(corr.to_pandas(retain_index=True), pd_corr)
@@ -1103,10 +1155,20 @@ def test_multi_col_merge(self):
         ones = ak.ones(size, int)
         altr = alternating_1_0(size)
         for truth in itertools.product([True, False], repeat=3):
-            left_arrs = [pda if t else pda_to_str_helper(pda) for pda, t in zip([a, b, ones], truth)]
-            right_arrs = [pda if t else pda_to_str_helper(pda) for pda, t in zip([c, d, altr], truth)]
-            left_df = ak.DataFrame({k: v for k, v in zip(["first", "second", "third"], left_arrs)})
-            right_df = ak.DataFrame({k: v for k, v in zip(["first", "second", "third"], right_arrs)})
+            left_arrs = [
+                pda if t else pda_to_str_helper(pda)
+                for pda, t in zip([a, b, ones], truth)
+            ]
+            right_arrs = [
+                pda if t else pda_to_str_helper(pda)
+                for pda, t in zip([c, d, altr], truth)
+            ]
+            left_df = ak.DataFrame(
+                {k: v for k, v in zip(["first", "second", "third"], left_arrs)}
+            )
+            right_df = ak.DataFrame(
+                {k: v for k, v in zip(["first", "second", "third"], right_arrs)}
+            )
             l_pd, r_pd = left_df.to_pandas(), right_df.to_pandas()
 
             for how in "inner", "left", "right":
@@ -1124,11 +1186,15 @@ def test_multi_col_merge(self):
                         from_ak = ak_merge[col].to_ndarray()
                         from_pd = pd_merge[col].to_numpy()
                         if isinstance(ak_merge[col], ak.pdarray):
-                            assert np.allclose(np.sort(from_ak), np.sort(from_pd), equal_nan=True)
+                            assert np.allclose(
+                                np.sort(from_ak), np.sort(from_pd), equal_nan=True
+                            )
                         else:
                             # we have to cast to str because pandas arrays converted to numpy
                             # have dtype object and have float NANs in line with the str values
-                            assert (np.sort(from_ak) == np.sort(from_pd.astype(str))).all()
+                            assert (
+                                np.sort(from_ak) == np.sort(from_pd.astype(str))
+                            ).all()
                     # TODO arkouda seems to be sometimes convert columns to floats on a right merge
                     #  when pandas doesnt. Eventually we want to test frame_equal, not just value
                     #  equality
@@ -1235,30 +1301,45 @@ def test_dropna(self):
                 for how in ["any", "all"]:
                     for ignore_index in [True, False]:
                         assert_frame_equal(
-                            df.dropna(axis=axis, how=how, ignore_index=ignore_index).to_pandas(
-                                retain_index=True
-                            ),
+                            df.dropna(
+                                axis=axis, how=how, ignore_index=ignore_index
+                            ).to_pandas(retain_index=True),
                             df.to_pandas(retain_index=True).dropna(
                                 axis=axis, how=how, ignore_index=ignore_index
                             ),
                         )
 
                 for thresh in [0, 1, 2, 3, 4, 5]:
-                    if df.to_pandas(retain_index=True).dropna(axis=axis, thresh=thresh).empty:
+                    if (
+                        df.to_pandas(retain_index=True)
+                        .dropna(axis=axis, thresh=thresh)
+                        .empty
+                    ):
                         assert (
-                            df.dropna(axis=axis, thresh=thresh).to_pandas(retain_index=True).empty
+                            df.dropna(axis=axis, thresh=thresh)
+                            .to_pandas(retain_index=True)
+                            .empty
                             is True
                         )
 
                     else:
                         assert_frame_equal(
-                            df.dropna(axis=axis, thresh=thresh).to_pandas(retain_index=True),
-                            df.to_pandas(retain_index=True).dropna(axis=axis, thresh=thresh),
+                            df.dropna(axis=axis, thresh=thresh).to_pandas(
+                                retain_index=True
+                            ),
+                            df.to_pandas(retain_index=True).dropna(
+                                axis=axis, thresh=thresh
+                            ),
                         )
 
     def test_memory_usage(self):
         dtypes = [ak.int64, ak.float64, ak.bool_]
-        data = dict([(str(ak.dtype(t)), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
+        data = dict(
+            [
+                (str(ak.dtype(t)), ak.ones(5000, dtype=ak.int64).astype(t))
+                for t in dtypes
+            ]
+        )
         df = ak.DataFrame(data)
         ak_memory_usage = df.memory_usage()
         pd_memory_usage = pd.Series(
@@ -1272,7 +1353,9 @@ def test_memory_usage(self):
         assert df.memory_usage_info(unit="GB") == "0.00 GB"
 
         ak_memory_usage = df.memory_usage(index=False)
-        pd_memory_usage = pd.Series([40000, 40000, 5000], index=["int64", "float64", "bool"])
+        pd_memory_usage = pd.Series(
+            [40000, 40000, 5000], index=["int64", "float64", "bool"]
+        )
         assert_series_equal(ak_memory_usage.to_pandas(), pd_memory_usage)
 
         ak_memory_usage = df.memory_usage(unit="KB")
@@ -1304,11 +1387,15 @@ def test_to_markdown(self):
             "+------------+------------+"
         )
 
-        assert df.to_markdown(tablefmt="grid") == df.to_pandas().to_markdown(tablefmt="grid")
-        assert df.to_markdown(tablefmt="grid", index=False) == df.to_pandas().to_markdown(
+        assert df.to_markdown(tablefmt="grid") == df.to_pandas().to_markdown(
+            tablefmt="grid"
+        )
+        assert df.to_markdown(
             tablefmt="grid", index=False
+        ) == df.to_pandas().to_markdown(tablefmt="grid", index=False)
+        assert df.to_markdown(tablefmt="jira") == df.to_pandas().to_markdown(
+            tablefmt="jira"
         )
-        assert df.to_markdown(tablefmt="jira") == df.to_pandas().to_markdown(tablefmt="jira")
 
     def test_sample_hypothesis_testing(self):
         # perform a weighted sample and use chisquare to test
@@ -1328,7 +1415,9 @@ def test_sample_hypothesis_testing(self):
 
         g = akdf.groupby("keys")
 
-        weighted_sample = g.sample(n=num_samples, replace=True, weights=weights, random_state=rng)
+        weighted_sample = g.sample(
+            n=num_samples, replace=True, weights=weights, random_state=rng
+        )
 
         # count how many of each category we saw
         uk, f_obs = ak.GroupBy(weighted_sample["vals"]).size()
@@ -1356,7 +1445,9 @@ def test_sample_flags(self):
             rng.integers(0, 1, size=12, dtype="bool"),
             rng.integers(-(2**32), 2**32, size=12, dtype="int"),
         ]
-        grouping_keys = ak.concatenate([ak.zeros(4, int), ak.ones(4, int), ak.full(4, 2, int)])
+        grouping_keys = ak.concatenate(
+            [ak.zeros(4, int), ak.ones(4, int), ak.full(4, 2, int)]
+        )
         rng.shuffle(grouping_keys)
 
         choice_arrays = []
@@ -1369,10 +1460,17 @@ def test_sample_flags(self):
                         akdf = ak.DataFrame({"keys": grouping_keys, "vals": a})
                         g = akdf.groupby("keys")
                         choice_arrays.append(
-                            g.sample(n=size, replace=replace, weights=p, random_state=rng)
+                            g.sample(
+                                n=size, replace=replace, weights=p, random_state=rng
+                            )
                         )
                         choice_arrays.append(
-                            g.sample(frac=(size / 4), replace=replace, weights=p, random_state=rng)
+                            g.sample(
+                                frac=(size / 4),
+                                replace=replace,
+                                weights=p,
+                                random_state=rng,
+                            )
                         )
 
         # reset generator to ensure we get the same arrays
@@ -1386,14 +1484,25 @@ def test_sample_flags(self):
 
                         akdf = ak.DataFrame({"keys": grouping_keys, "vals": a})
                         g = akdf.groupby("keys")
-                        current1 = g.sample(n=size, replace=replace, weights=p, random_state=rng)
+                        current1 = g.sample(
+                            n=size, replace=replace, weights=p, random_state=rng
+                        )
                         current2 = g.sample(
-                            frac=(size / 4), replace=replace, weights=p, random_state=rng
+                            frac=(size / 4),
+                            replace=replace,
+                            weights=p,
+                            random_state=rng,
                         )
 
                         res = (
-                            np.allclose(previous1["vals"].to_list(), current1["vals"].to_list())
-                        ) and (np.allclose(previous2["vals"].to_list(), current2["vals"].to_list()))
+                            np.allclose(
+                                previous1["vals"].to_list(), current1["vals"].to_list()
+                            )
+                        ) and (
+                            np.allclose(
+                                previous2["vals"].to_list(), current2["vals"].to_list()
+                            )
+                        )
                         if not res:
                             print(f"\nnum locales: {cfg['numLocales']}")
                             print(f"Failure with seed:\n{seed}")
@@ -1445,7 +1554,9 @@ def get_head_values(col):
         tail_zeros_idx = size_range[df["a"] == 0][-2:]
         tail_ones_idx = size_range[df["a"] == 1][-2:]
         tail_twos_idx = size_range[df["a"] == 2][-2:]
-        tail_expected_idx = ak.concatenate([tail_zeros_idx, tail_ones_idx, tail_twos_idx])
+        tail_expected_idx = ak.concatenate(
+            [tail_zeros_idx, tail_ones_idx, tail_twos_idx]
+        )
 
         def get_tail_values(col):
             tail_zeros_values = df[col][tail_zeros_idx]
diff --git a/tests/indexing_test.py b/tests/indexing_test.py
index cb81fd4bc45..3db571f5e72 100644
--- a/tests/indexing_test.py
+++ b/tests/indexing_test.py
@@ -20,7 +20,7 @@ def value_array(dtype, size):
     elif dtype is ak.bool_:
         return (ak.randint(0, size, size) % 2) == 0
     elif dtype is ak.bigint:
-        return ak.randint(0, size, size, dtype=ak.uint64) + 2**200
+        return ak.arange(2**200, 2**200 + size, dtype=ak.bigint)
     elif dtype is ak.str_:
         return ak.random_strings_uniform(1, 16, size=size)
     return None
diff --git a/tests/io_test.py.working b/tests/io_test.py.working
new file mode 100644
index 00000000000..a2bf7fca6b9
--- /dev/null
+++ b/tests/io_test.py.working
@@ -0,0 +1,2441 @@
+import copy
+import glob
+import os
+import tempfile
+from typing import List, Mapping, Union
+
+import h5py
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pytest
+from pandas.testing import assert_series_equal
+
+import arkouda as ak
+from arkouda import io_util, read_zarr, to_zarr
+
+NUMERIC_TYPES = ["int64", "float64", "bool", "uint64"]
+NUMERIC_AND_STR_TYPES = NUMERIC_TYPES + ["str"]
+
+
+@pytest.fixture
+def par_test_base_tmp(request):
+    par_test_base_tmp = "{}/.io_test".format(os.getcwd())
+    io_util.get_directory(par_test_base_tmp)
+
+    with open("{}/not-a-file_LOCALE0000".format(hdf_test_base_tmp), "w"):
+        pass
+
+    # Define a finalizer function for teardown
+    def finalizer():
+        # Clean up any resources if needed
+        io_util.delete_directory(par_test_base_tmp)
+
+    # Register the finalizer to ensure cleanup
+    request.addfinalizer(finalizer)
+    return par_test_base_tmp
+
+
+@pytest.fixture
+def hdf_test_base_tmp(request):
+    hdf_test_base_tmp = "{}/.hdf_test".format(os.getcwd())
+    io_util.get_directory(hdf_test_base_tmp)
+
+    with open("{}/not-a-file_LOCALE0000".format(hdf_test_base_tmp), "w"):
+        pass
+
+    # Define a finalizer function for teardown
+    def finalizer():
+        # Clean up any resources if needed
+        io_util.delete_directory(hdf_test_base_tmp)
+
+    # Register the finalizer to ensure cleanup
+    request.addfinalizer(finalizer)
+    return hdf_test_base_tmp
+
+
+@pytest.fixture
+def csv_test_base_tmp(request):
+    csv_test_base_tmp = "{}/.csv_test".format(os.getcwd())
+    io_util.get_directory(csv_test_base_tmp)
+
+    # Define a finalizer function for teardown
+    def finalizer():
+        # Clean up any resources if needed
+        io_util.delete_directory(csv_test_base_tmp)
+
+    # Register the finalizer to ensure cleanup
+    request.addfinalizer(finalizer)
+    return csv_test_base_tmp
+
+
+@pytest.fixture
+def zarr_test_base_tmp(request):
+    zarr_test_base_tmp = "{}/.zarr_test".format(os.getcwd())
+    io_util.get_directory(zarr_test_base_tmp)
+
+    # Define a finalizer function for teardown
+    def finalizer():
+        # Clean up any resources if needed
+        io_util.delete_directory(zarr_test_base_tmp)
+
+    # Register the finalizer to ensure cleanup
+    request.addfinalizer(finalizer)
+    return zarr_test_base_tmp
+
+
+@pytest.fixture
+def import_export_base_tmp(request):
+    import_export_base_tmp = "{}/import_export_test".format(os.getcwd())
+    io_util.get_directory(import_export_base_tmp)
+
+    # Define a finalizer function for teardown
+    def finalizer():
+        # Clean up any resources if needed
+        io_util.delete_directory(import_export_base_tmp)
+
+    # Register the finalizer to ensure cleanup
+    request.addfinalizer(finalizer)
+    return import_export_base_tmp
+
+
+def make_ak_arrays(size, dtype):
+    if dtype in ["int64", "float64"]:
+        # randint for float is equivalent to uniform
+        return ak.randint(-(2**32), 2**32, size=size, dtype=dtype)
+    elif dtype == "uint64":
+        return ak.cast(ak.randint(-(2**32), 2**32, size=size), dtype)
+    elif dtype == "bool":
+        return ak.randint(0, 1, size=size, dtype=dtype)
+    elif dtype == "str":
+        return ak.random_strings_uniform(1, 16, size=size)
+    return None
+
+
+def make_edge_case_arrays(dtype):
+    if dtype == "int64":
+        return np.array(
+            [np.iinfo(np.int64).min, -1, 0, 3, np.iinfo(np.int64).max], dtype=dtype
+        )
+    elif dtype == "uint64":
+        return np.array([0, 1, 2**63 + 3, np.iinfo(np.uint64).max], dtype=dtype)
+    elif dtype == "float64":
+        return np.array(
+            [
+                np.nan,
+                np.finfo(np.float64).min,
+                -np.inf,
+                -7.0,
+                -3.14,
+                -0.0,
+                0.0,
+                3.14,
+                7.0,
+                np.finfo(np.float64).max,
+                np.inf,
+                np.nan,
+                np.nan,
+                np.nan,
+            ]
+        )
+    elif dtype == "bool":
+        return np.array([True, False, False, True])
+    elif dtype == "str":
+        return np.array(['"', " ", ""])
+    return None
+
+
+def segarray_setup(dtype):
+    if dtype in ["int64", "uint64"]:
+        return [0, 1, 2], [1], [15, 21]
+    elif dtype == "float64":
+        return [1.1, 1.1, 2.7], [1.99], [15.2, 21.0]
+    elif dtype == "bool":
+        return [0, 1, 1], [0], [1, 0]
+    elif dtype == "str":
+        return ["one", "two", "three"], ["un", "deux", "trois"], ["uno", "dos", "tres"]
+    return None
+
+
+def edge_case_segarray_setup(dtype):
+    if dtype == "int64":
+        return [np.iinfo(np.int64).min, -1, 0], [1], [15, np.iinfo(np.int64).max]
+    if dtype == "uint64":
+        return [0, 1, 2**63 + 3], [0], [np.iinfo(np.uint64).max, 17]
+    elif dtype == "float64":
+        return [-0.0, np.finfo(np.float64).min, np.nan, 2.7], [1.99], [np.inf, np.nan]
+    elif dtype == "bool":
+        return [0, 1, 1], [0], [1, 0]
+    elif dtype == "str":
+        return ['"', " ", ""], ["test"], ["'", ""]
+    return None
+
+
+def make_multi_dtype_dict():
+    return {
+        "c_1": ak.array([np.iinfo(np.int64).min, -1, 0, np.iinfo(np.int64).max]),
+        "c_2": ak.SegArray(ak.array([0, 0, 9, 14]), ak.arange(-10, 10)),
+        "c_3": ak.arange(2**63 + 3, 2**63 + 7, dtype=ak.uint64),
+        "c_4": ak.SegArray(
+            ak.array([0, 5, 10, 10]), ak.arange(2**63, 2**63 + 15, dtype=ak.uint64)
+        ),
+        "c_5": ak.array([False, True, False, False]),
+        "c_6": ak.SegArray(
+            ak.array([0, 0, 5, 10]), ak.randint(0, 1, 15, dtype=ak.bool_)
+        ),
+        "c_7": ak.array([-0.0, np.finfo(np.float64).min, np.nan, np.inf]),
+        "c_8": ak.SegArray(
+            ak.array([0, 9, 14, 14]),
+            ak.array(
+                [
+                    np.nan,
+                    np.finfo(np.float64).min,
+                    -np.inf,
+                    -7.0,
+                    -3.14,
+                    -0.0,
+                    0.0,
+                    3.14,
+                    7.0,
+                    np.finfo(np.float64).max,
+                    np.inf,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                ]
+            ),
+        ),
+        "c_9": ak.array(["abc", " ", "xyz", ""]),
+        "c_10": ak.SegArray(
+            ak.array([0, 2, 5, 5]),
+            ak.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
+        ),
+        "c_11": ak.SegArray(
+            ak.array([0, 2, 2, 2]),
+            ak.array(["a", "b", "", "c", "d", "e", "f", "g", "h", "i"]),
+        ),
+        "c_12": ak.SegArray(
+            ak.array([0, 0, 2, 2]),
+            ak.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
+        ),
+        "c_13": ak.SegArray(
+            ak.array([0, 2, 3, 3]),
+            ak.array(["", "'", " ", "test", "", "'", "", " ", ""]),
+        ),
+        "c_14": ak.SegArray(
+            ak.array([0, 5, 5, 8]),
+            ak.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
+        ),
+        "c_15": ak.SegArray(
+            ak.array([0, 5, 8, 8]),
+            ak.array(["abc", "123", "xyz", "l", "m", "n", "o", "p", "arkouda"]),
+        ),
+    }
+
+
+@pytest.fixture
+def par_test_base_tmp(request):
+    par_test_base_tmp = "{}/.par_io_test".format(os.getcwd())
+    io_util.get_directory(par_test_base_tmp)
+
+    # Define a finalizer function for teardown
+    # def finalizer():
+    #     # Clean up any resources if needed
+    #     io_util.delete_directory(par_test_base_tmp)
+    #
+    # # Register the finalizer to ensure cleanup
+    # request.addfinalizer(finalizer)
+    return par_test_base_tmp
+
+
+class TestParquet:
+    COMPRESSIONS = [None, "snappy", "gzip", "brotli", "zstd", "lz4"]
+
+    @pytest.mark.parametrize("prob_size", pytest.prob_size)
+    @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+    @pytest.mark.parametrize("comp", COMPRESSIONS)
+    def test_read_and_write(self, par_test_base_tmp, prob_size, dtype, comp):
+        ak_arr = make_ak_arrays(prob_size * pytest.nl, dtype)
+        with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+            file_name = f"{tmp_dirname}/pq_test_correct"
+            ak_arr.to_parquet(file_name, "my-dset", compression=comp)
+            print(f"{file_name}*")
+            pq_arr = ak.read_parquet(f"{file_name}*", "my-dset")["my-dset"]
+            # assert (ak_arr == pq_arr).all()
+            assert False
+
+        #     # verify generic read works
+        #     gen_arr = ak.read(f"{file_name}*", "my-dset")["my-dset"]
+        #     assert (ak_arr == gen_arr).all()
+        #
+        #     # verify generic load works
+        #     gen_arr = ak.load(path_prefix=file_name, dataset="my-dset")["my-dset"]
+        #     assert (ak_arr == gen_arr).all()
+        #
+        #     # verify generic load works with file_format parameter
+        #     gen_arr = ak.load(
+        #         path_prefix=file_name, dataset="my-dset", file_format="Parquet"
+        #     )["my-dset"]
+        #     assert (ak_arr == gen_arr).all()
+        #
+        #     # verify load_all works
+        #     gen_arr = ak.load_all(path_prefix=file_name)
+        #     assert (ak_arr == gen_arr["my-dset"]).all()
+
+
+#
+#     @pytest.mark.parametrize("prob_size", pytest.prob_size)
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_multi_file(self, par_test_base_tmp, prob_size, dtype):
+#         is_multi_loc = pytest.nl != 1
+#         NUM_FILES = pytest.nl if is_multi_loc else 2
+#         adjusted_size = int(prob_size / NUM_FILES) * NUM_FILES
+#         ak_arr = make_ak_arrays(adjusted_size, dtype)
+#
+#         per_arr = int(adjusted_size / NUM_FILES)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pq_test"
+#             if is_multi_loc:
+#                 # when multi locale multiple created automatically
+#                 ak_arr.to_parquet(file_name, "test-dset")
+#             else:
+#                 # when single locale artifically create multiple files
+#                 for i in range(NUM_FILES):
+#                     arr_in_file_i = ak_arr[(i * per_arr) : (i * per_arr) + per_arr]
+#                     arr_in_file_i.to_parquet(f"{file_name}{i:04d}", "test-dset")
+#
+#             assert len(glob.glob(f"{file_name}*")) == NUM_FILES
+#             pq_arr = ak.read_parquet(f"{file_name}*", "test-dset")["test-dset"]
+#             assert (ak_arr == pq_arr).all()
+#
+#     def test_wrong_dset_name(self, par_test_base_tmp):
+#         ak_arr = ak.randint(0, 2**32, 100)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pq_test"
+#             ak_arr.to_parquet(file_name, "test-dset-name")
+#
+#             with pytest.raises(RuntimeError):
+#                 ak.read_parquet(f"{file_name}*", "wrong-dset-name")
+#
+#             with pytest.raises(ValueError):
+#                 ak.read_parquet(f"{file_name}*", ["test-dset-name", "wrong-dset-name"])
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_edge_case_read_write(self, par_test_base_tmp, dtype, comp):
+#
+#         np_edge_case = make_edge_case_arrays(dtype)
+#         ak_edge_case = ak.array(np_edge_case)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             ak_edge_case.to_parquet(f"{tmp_dirname}/pq_test_edge_case", "my-dset", compression=comp)
+#             pq_arr = ak.read_parquet(f"{tmp_dirname}/pq_test_edge_case*", "my-dset")["my-dset"]
+#             if dtype == "float64":
+#                 assert np.allclose(np_edge_case, pq_arr.to_ndarray(), equal_nan=True)
+#             else:
+#                 assert (np_edge_case == pq_arr.to_ndarray()).all()
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_get_datasets(self, par_test_base_tmp, dtype):
+#         ak_arr = make_ak_arrays(10, dtype)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             ak_arr.to_parquet(f"{tmp_dirname}/pq_test", "TEST_DSET")
+#             dsets = ak.get_datasets(f"{tmp_dirname}/pq_test*")
+#             assert ["TEST_DSET"] == dsets
+#
+#     def test_append(self, par_test_base_tmp):
+#         # use small size to cut down on execution time
+#         append_size = 32
+#
+#         base_dset = ak.randint(0, 2**32, append_size)
+#         ak_dict = {dt: make_ak_arrays(append_size, dt) for dt in NUMERIC_AND_STR_TYPES}
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pq_test"
+#             base_dset.to_parquet(file_name, "base-dset")
+#
+#             for key in ak_dict.keys():
+#                 ak_dict[key].to_parquet(file_name, key, mode="append")
+#
+#             ak_vals = ak.read_parquet(f"{file_name}*")
+#
+#             for key in ak_dict:
+#                 assert ak_vals[key].to_list() == ak_dict[key].to_list()
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_append_empty(self, par_test_base_tmp, dtype):
+#         # use small size to cut down on execution time
+#         ak_arr = make_ak_arrays(32, dtype)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             ak_arr.to_parquet(f"{tmp_dirname}/pq_test_correct", "my-dset", mode="append")
+#             pq_arr = ak.read_parquet(f"{tmp_dirname}/pq_test_correct*", "my-dset")["my-dset"]
+#
+#             assert ak_arr.to_list() == pq_arr.to_list()
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_null_strings(self, par_test_base_tmp, comp):
+#         null_strings = ak.array(["first-string", "", "string2", "", "third", "", ""])
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/null_strings"
+#             null_strings.to_parquet(file_name, compression=comp)
+#
+#             ak_data = ak.read_parquet(f"{file_name}*").popitem()[1]
+#             assert (null_strings == ak_data).all()
+#
+#             # datasets must be specified for get_null_indices
+#             res = ak.get_null_indices(f"{file_name}*", datasets="strings_array").popitem()[1]
+#             assert [0, 1, 0, 1, 0, 1, 1] == res.to_list()
+#
+#     def test_null_indices(self):
+#         datadir = "resources/parquet-testing"
+#         basename = "null-strings.parquet"
+#
+#         filename = os.path.join(datadir, basename)
+#         res = ak.get_null_indices(filename, datasets="col1")["col1"]
+#
+#         assert [0, 1, 0, 1, 0, 1, 1] == res.to_list()
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_compression(self, par_test_base_tmp, comp):
+#         a = ak.arange(150)
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             # write with the selected compression
+#             a.to_parquet(f"{tmp_dirname}/compress_test", compression=comp)
+#
+#             # ensure read functions
+#             rd_arr = ak.read_parquet(f"{tmp_dirname}/compress_test*", "array")["array"]
+#
+#             # validate the list read out matches the array used to write
+#             assert rd_arr.to_list() == a.to_list()
+#
+#         b = ak.randint(0, 2, 150, dtype=ak.bool_)
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             # write with the selected compression
+#             b.to_parquet(f"{tmp_dirname}/compress_test", compression=comp)
+#
+#             # ensure read functions
+#             rd_arr = ak.read_parquet(f"{tmp_dirname}/compress_test*", "array")["array"]
+#
+#             # validate the list read out matches the array used to write
+#             assert rd_arr.to_list() == b.to_list()
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_nan_compressions(self, par_test_base_tmp, comp):
+#         # Reproducer for issue #2005 specifically for gzip
+#         pdf = pd.DataFrame(
+#             {
+#                 "all_nan": np.array([np.nan, np.nan, np.nan, np.nan]),
+#                 "some_nan": np.array([3.14, np.nan, 7.12, 4.44]),
+#             }
+#         )
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             pdf.to_parquet(f"{tmp_dirname}/nan_compressed_pq", engine="pyarrow", compression=comp)
+#
+#             ak_data = ak.read_parquet(f"{tmp_dirname}/nan_compressed_pq")
+#             rd_df = ak.DataFrame(ak_data)
+#             pd.testing.assert_frame_equal(rd_df.to_pandas(), pdf)
+#
+#     def test_gzip_nan_rd(self, par_test_base_tmp):
+#         # create pandas dataframe
+#         pdf = pd.DataFrame(
+#             {
+#                 "all_nan": np.array([np.nan, np.nan, np.nan, np.nan]),
+#                 "some_nan": np.array([3.14, np.nan, 7.12, 4.44]),
+#             }
+#         )
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             pdf.to_parquet(f"{tmp_dirname}/gzip_pq", engine="pyarrow", compression="gzip")
+#
+#             ak_data = ak.read_parquet(f"{tmp_dirname}/gzip_pq")
+#             rd_df = ak.DataFrame(ak_data)
+#             assert pdf.equals(rd_df.to_pandas())
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_segarray_read(self, par_test_base_tmp, comp):
+#         df = pd.DataFrame(
+#             {
+#                 "IntList": [
+#                     [np.iinfo(np.int64).max],
+#                     [0, 1, -2],
+#                     [],
+#                     [3, -4, np.iinfo(np.int64).min, 6],
+#                     [-1, 2, 3],
+#                 ],
+#                 "BoolList": [[False], [True, False], [False, False, False], [True], []],
+#                 "FloatList": [
+#                     [np.finfo(np.float64).max],
+#                     [3.14, np.nan, np.finfo(np.float64).min, 2.23, 3.08],
+#                     [],
+#                     [np.inf, 6.8],
+#                     [-0.0, np.nan, np.nan, np.nan],
+#                 ],
+#                 "UintList": [
+#                     np.array([], np.uint64),
+#                     np.array([1, 2**64 - 2], np.uint64),
+#                     np.array([2**63 + 1], np.uint64),
+#                     np.array([2, 2, 0], np.uint64),
+#                     np.array([11], np.uint64),
+#                 ],
+#                 "StringsList": [['"', " ", ""], [], ["test"], [], ["'", ""]],
+#                 "EmptySegList": [[], [0, 1], [], [3, 4, 5, 6], []],
+#             }
+#         )
+#         table = pa.Table.from_pandas(df)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/segarray_parquet"
+#             pq.write_table(table, file_name, compression=comp)
+#
+#             # verify full file read with various object types
+#             ak_data = ak.read_parquet(f"{file_name}*")
+#             for k, v in ak_data.items():
+#                 assert isinstance(v, ak.SegArray)
+#                 for x, y in zip(df[k].tolist(), v.to_list()):
+#                     if isinstance(x, np.ndarray):
+#                         x = x.tolist()
+#                     assert x == y if k != "FloatList" else np.allclose(x, y, equal_nan=True)
+#
+#             # verify individual column selection
+#             for k, v in df.items():
+#                 ak_data = ak.read_parquet(f"{file_name}*", datasets=k)[k]
+#                 assert isinstance(ak_data, ak.SegArray)
+#                 for x, y in zip(v.tolist(), ak_data.to_list()):
+#                     if isinstance(x, np.ndarray):
+#                         x = x.tolist()
+#                     assert x == y if k != "FloatList" else np.allclose(x, y, equal_nan=True)
+#
+#         # test for handling empty segments only reading single segarray
+#         df = pd.DataFrame({"ListCol": [[8], [0, 1], [], [3, 4, 5, 6], []]})
+#         table = pa.Table.from_pandas(df)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             pq.write_table(table, f"{tmp_dirname}/empty_segments", compression=comp)
+#
+#             ak_data = ak.read_parquet(f"{tmp_dirname}/empty_segments*")["ListCol"]
+#             assert isinstance(ak_data, ak.SegArray)
+#             assert ak_data.size == 5
+#             for i in range(5):
+#                 assert df["ListCol"][i] == ak_data[i].to_list()
+#
+#         df = pd.DataFrame(
+#             {"IntCol": [0, 1, 2, 3], "ListCol": [[0, 1, 2], [0, 1], [3, 4, 5, 6], [1, 2, 3]]}
+#         )
+#         table = pa.Table.from_pandas(df)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/segarray_varied_parquet"
+#             pq.write_table(table, file_name, compression=comp)
+#
+#             # read full file
+#             ak_data = ak.read_parquet(f"{file_name}*")
+#             for k, v in ak_data.items():
+#                 assert df[k].tolist() == v.to_list()
+#
+#             # read individual datasets
+#             ak_data = ak.read_parquet(f"{file_name}*", datasets="IntCol")["IntCol"]
+#             assert isinstance(ak_data, ak.pdarray)
+#             assert df["IntCol"].to_list() == ak_data.to_list()
+#             ak_data = ak.read_parquet(f"{file_name}*", datasets="ListCol")["ListCol"]
+#             assert isinstance(ak_data, ak.SegArray)
+#             assert df["ListCol"].to_list() == ak_data.to_list()
+#
+#         # test for multi-file with and without empty segs
+#         is_multi_loc = pytest.nl != 1
+#         NUM_FILES = pytest.nl if is_multi_loc else 2
+#         regular = (
+#             [[0, 1, 2], [0, 1], [3, 4, 5, 6], [1, 2, 3]],
+#             [[0, 1, 11], [0, 1], [3, 4, 5, 6], [1]],
+#         )
+#         first_empty = ([[], [0, 1], [], [3, 4, 5, 6], []], [[0, 1], [], [3, 4, 5, 6], [], [1, 2, 3]])
+#         # there are two empty segs tests with only difference being the first segment being [8] not []
+#         # including to avoid loss of coverage
+#         # use deepcopy to avoid changing first_empty
+#         first_non_empty = copy.deepcopy(first_empty)
+#         first_non_empty[0][0] = [8]
+#         for args in [regular, first_empty, first_non_empty]:
+#             lists = [args[i % 2] for i in range(NUM_FILES)]
+#             dataframes = [pd.DataFrame({"ListCol": li}) for li in lists]
+#             tables = [pa.Table.from_pandas(df) for df in dataframes]
+#             combo = pd.concat(dataframes, ignore_index=True)
+#             with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#                 file_name = f"{tmp_dirname}/segarray_varied_parquet"
+#                 if is_multi_loc:
+#                     # when multi locale multiple created automatically
+#                     # so create concatenated segarray and write using arkouda
+#                     # have to set dtype to avoid empty list being created as float type
+#                     concat_segarr = ak.SegArray.concat(
+#                         [
+#                             ak.SegArray.from_multi_array([ak.array(a, dtype=ak.int64) for a in li])
+#                             for li in lists
+#                         ]
+#                     )
+#                     concat_segarr.to_parquet(file_name, "ListCol", compression=comp)
+#                 else:
+#                     # when single locale artifically create multiple files
+#                     for i in range(NUM_FILES):
+#                         pq.write_table(tables[i], f"{file_name}_LOCALE{i:04d}", compression=comp)
+#                 ak_data = ak.read_parquet(f"{file_name}*")["ListCol"]
+#                 assert isinstance(ak_data, ak.SegArray)
+#                 assert ak_data.size == len(lists[0]) * NUM_FILES
+#                 for i in range(ak_data.size):
+#                     assert combo["ListCol"][i] == ak_data[i].to_list()
+#
+#     def test_segarray_string(self, par_test_base_tmp):
+#         words = ak.array(["one,two,three", "uno,dos,tres"])
+#         strs, segs = words.regex_split(",", return_segments=True)
+#         x = ak.SegArray(segs, strs)
+#
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             x.to_parquet(f"{tmp_dirname}/segarr_str")
+#
+#             rd = ak.read_parquet(f"{tmp_dirname}/segarr_str_*").popitem()[1]
+#             assert isinstance(rd, ak.SegArray)
+#             assert x.segments.to_list() == rd.segments.to_list()
+#             assert x.values.to_list() == rd.values.to_list()
+#             assert x.to_list() == rd.to_list()
+#
+#         # additional testing for empty segments. See Issue #2560
+#         a, b, c = ["one", "two", "three"], ["un", "deux", "trois"], ["uno", "dos", "tres"]
+#         s = ak.SegArray(ak.array([0, 0, len(a), len(a), len(a), len(a) + len(c)]), ak.array(a + c))
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             s.to_parquet(f"{tmp_dirname}/segarray_test_empty")
+#             rd_data = ak.read_parquet(f"{tmp_dirname}/segarray_test_empty_*").popitem()[1]
+#             assert s.to_list() == rd_data.to_list()
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     @pytest.mark.parametrize("segarray_create", [segarray_setup, edge_case_segarray_setup])
+#     def test_segarray_write(self, par_test_base_tmp, dtype, segarray_create):
+#         a, b, c = segarray_create(dtype)
+#         s = ak.SegArray(ak.array([0, len(a), len(a) + len(b)]), ak.array(a + b + c))
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             s.to_parquet(f"{tmp_dirname}/segarray_test")
+#
+#             rd_data = ak.read_parquet(f"{tmp_dirname}/segarray_test*").popitem()[1]
+#             for i in range(3):
+#                 x, y = s[i].to_list(), rd_data[i].to_list()
+#                 assert x == y if dtype != "float64" else np.allclose(x, y, equal_nan=True)
+#
+#         s = ak.SegArray(ak.array([0, 0, len(a), len(a), len(a), len(a) + len(c)]), ak.array(a + c))
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             s.to_parquet(f"{tmp_dirname}/segarray_test_empty")
+#
+#             rd_data = ak.read_parquet(f"{tmp_dirname}/segarray_test_empty*").popitem()[1]
+#             for i in range(6):
+#                 x, y = s[i].to_list(), rd_data[i].to_list()
+#                 assert x == y if dtype != "float64" else np.allclose(x, y, equal_nan=True)
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_multi_col_write(self, par_test_base_tmp, comp):
+#         df_dict = make_multi_dtype_dict()
+#         akdf = ak.DataFrame(df_dict)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             # use multi-column write to generate parquet file
+#             akdf.to_parquet(f"{tmp_dirname}/multi_col_parquet", compression=comp)
+#             # read files and ensure that all resulting fields are as expected
+#             rd_data = ak.read_parquet(f"{tmp_dirname}/multi_col_parquet*")
+#             rd_df = ak.DataFrame(rd_data)
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # test save with index true
+#             akdf.to_parquet(f"{tmp_dirname}/idx_multi_col_parquet", index=True, compression=comp)
+#             rd_data = ak.read_parquet(f"{tmp_dirname}/idx_multi_col_parquet*")
+#             rd_df = ak.DataFrame(rd_data)
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#     def test_small_ints(self, par_test_base_tmp):
+#         df_pd = pd.DataFrame(
+#             {
+#                 "int16": pd.Series([2**15 - 1, -(2**15)], dtype=np.int16),
+#                 "int32": pd.Series([2**31 - 1, -(2**31)], dtype=np.int32),
+#                 "uint16": pd.Series([2**15 - 1, 2**15], dtype=np.uint16),
+#                 "uint32": pd.Series([2**31 - 1, 2**31], dtype=np.uint32),
+#             }
+#         )
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pq_small_int"
+#             df_pd.to_parquet(file_name)
+#             df_ak = ak.DataFrame(ak.read_parquet(f"{file_name}*"))
+#             for c in df_ak.columns.values:
+#                 assert df_ak[c].to_list() == df_pd[c].to_list()
+#
+#     def test_read_nested(self, par_test_base_tmp):
+#         df = ak.DataFrame({"idx": ak.arange(5), "seg": ak.SegArray(ak.arange(0, 10, 2), ak.arange(10))})
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/read_nested_test"
+#             df.to_parquet(file_name)
+#
+#             # test read with read_nested=true
+#             data = ak.read_parquet(f"{file_name}*")
+#             assert "idx" in data
+#             assert "seg" in data
+#             assert df["idx"].to_list() == data["idx"].to_list()
+#             assert df["seg"].to_list() == data["seg"].to_list()
+#
+#             # test read with read_nested=false and no supplied datasets
+#             data = ak.read_parquet(f"{file_name}*", read_nested=False)["idx"]
+#             assert isinstance(data, ak.pdarray)
+#             assert df["idx"].to_list() == data.to_list()
+#
+#             # test read with read_nested=false and user supplied datasets. Should ignore read_nested
+#             data = ak.read_parquet(f"{file_name}*", datasets=["idx", "seg"], read_nested=False)
+#             assert "idx" in data
+#             assert "seg" in data
+#             assert df["idx"].to_list() == data["idx"].to_list()
+#             assert df["seg"].to_list() == data["seg"].to_list()
+#
+#     @pytest.mark.parametrize("comp", COMPRESSIONS)
+#     def test_ipv4_columns(self, par_test_base_tmp, comp):
+#         # Added as reproducer for issue #2337
+#         # test with single IPv4 column
+#         df = ak.DataFrame({"a": ak.arange(10), "b": ak.IPv4(ak.arange(10))})
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/ipv4_df"
+#             df.to_parquet(file_name, compression=comp)
+#
+#             data = ak.read_parquet(f"{file_name}*")
+#             rd_df = ak.DataFrame({"a": data["a"], "b": ak.IPv4(data["b"])})
+#
+#             pd.testing.assert_frame_equal(df.to_pandas(), rd_df.to_pandas())
+#
+#         # test with multiple IPv4 columns
+#         df = ak.DataFrame({"a": ak.IPv4(ak.arange(10)), "b": ak.IPv4(ak.arange(10))})
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/ipv4_df"
+#             df.to_parquet(file_name, compression=comp)
+#
+#             data = ak.read_parquet(f"{file_name}*")
+#             rd_df = ak.DataFrame({"a": ak.IPv4(data["a"]), "b": ak.IPv4(data["b"])})
+#
+#             pd.testing.assert_frame_equal(df.to_pandas(), rd_df.to_pandas())
+#
+#         # test replacement of IPv4 with uint representation
+#         df = ak.DataFrame({"a": ak.IPv4(ak.arange(10))})
+#         df["a"] = df["a"].export_uint()
+#         assert ak.arange(10).to_list() == df["a"].to_list()
+#
+#     def test_decimal_reads(self, par_test_base_tmp):
+#         cols = []
+#         data = []
+#         for i in range(1, 39):
+#             cols.append(("decCol" + str(i), pa.decimal128(i, 0)))
+#             data.append([i])
+#
+#         schema = pa.schema(cols)
+#
+#         table = pa.Table.from_arrays(data, schema=schema)
+#         with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#             pq.write_table(table, f"{tmp_dirname}/decimal")
+#             ak_data = ak.read(f"{tmp_dirname}/decimal")
+#             for i in range(1, 39):
+#                 assert np.allclose(ak_data["decCol" + str(i)].to_ndarray(), data[i - 1])
+#
+#     def test_multi_batch_reads(self, par_test_base_tmp):
+#         # verify reproducer for #3074 is resolved
+#         # seagarray w/ empty segs multi-batch pq reads
+#
+#         # bug seemed to consistently appear for val_sizes
+#         # exceeding 700000 (likely due to this requiring more than one batch)
+#         # we round up to ensure we'd hit it
+#         val_size = 1000000
+#
+#         df_dict = dict()
+#         seed = np.random.default_rng().choice(2**63)
+#         rng = ak.random.default_rng(seed)
+#         some_nans = rng.uniform(-(2**10), 2**10, val_size)
+#         some_nans[ak.arange(val_size) % 2 == 0] = np.nan
+#         vals_list = [
+#             rng.uniform(-(2**10), 2**10, val_size),
+#             rng.integers(0, 2**32, size=val_size, dtype="uint"),
+#             rng.integers(0, 1, size=val_size, dtype="bool"),
+#             rng.integers(-(2**32), 2**32, size=val_size, dtype="int"),
+#             some_nans,  # contains nans
+#             ak.random_strings_uniform(0, 4, val_size, seed=seed),  # contains empty strings
+#         ]
+#
+#         for vals in vals_list:
+#             # segs must start with 0, all other segment lengths are random
+#             # by having val_size number of segments, except in the extremely unlikely case of
+#             # randomly getting exactly arange(val_size), we are guaranteed empty segs
+#             segs = ak.concatenate(
+#                 [ak.array([0]), ak.sort(ak.randint(0, val_size, val_size - 1, seed=seed))]
+#             )
+#             df_dict["rand"] = ak.SegArray(segs, vals).to_list()
+#
+#             pddf = pd.DataFrame(df_dict)
+#             with tempfile.TemporaryDirectory(dir=par_test_base_tmp) as tmp_dirname:
+#                 file_path = f"{tmp_dirname}/empty_segs"
+#                 pddf.to_parquet(file_path)
+#                 akdf = ak.DataFrame(ak.read_parquet(file_path))
+#
+#                 to_pd = pd.Series(akdf["rand"].to_list())
+#                 # raises an error if the two series aren't equal
+#                 # we can't use np.allclose(pddf['rand'].to_list, akdf['rand'].to_list) since these
+#                 # are lists of lists. assert_series_equal handles this and properly handles nans.
+#                 # we pass the same absolute and relative tolerances as the numpy default in allclose
+#                 # to ensure float point differences don't cause errors
+#                 print("\nseed: ", seed)
+#                 assert_series_equal(pddf["rand"], to_pd, check_names=False, rtol=1e-05, atol=1e-08)
+#
+#                 # test writing multi-batch non-segarrays
+#                 file_path = f"{tmp_dirname}/multi_batch_vals"
+#                 vals.to_parquet(file_path, dataset="my_vals")
+#                 read = ak.read_parquet(file_path + "*")["my_vals"]
+#                 if isinstance(vals, ak.pdarray) and vals.dtype == ak.float64:
+#                     assert np.allclose(read.to_list(), vals.to_list(), equal_nan=True)
+#                 else:
+#                     assert (read == vals).all()
+#
+#     @pytest.mark.optional_parquet
+#     def test_against_standard_files(self):
+#         datadir = "resources/parquet-testing"
+#         filenames = [
+#             "alltypes_plain.parquet",
+#             "alltypes_plain.snappy.parquet",
+#             "delta_byte_array.parquet",
+#         ]
+#         columns1 = [
+#             "id",
+#             "bool_col",
+#             "tinyint_col",
+#             "smallint_col",
+#             "int_col",
+#             "bigint_col",
+#             "float_col",
+#             "double_col",
+#             "date_string_col",
+#             "string_col",
+#             "timestamp_col",
+#         ]
+#         columns2 = [
+#             "c_customer_id",
+#             "c_salutation",
+#             "c_first_name",
+#             "c_last_name",
+#             "c_preferred_cust_flag",
+#             "c_birth_country",
+#             "c_login",
+#             "c_email_address",
+#             "c_last_review_date",
+#         ]
+#         for basename, ans in zip(filenames, (columns1, columns1, columns2)):
+#             filename = os.path.join(datadir, basename)
+#             columns = ak.get_datasets(filename)
+#             assert columns == ans
+#             # Merely test that read succeeds, do not check output
+#             if "delta_byte_array.parquet" not in filename:
+#                 data = ak.read_parquet(filename, datasets=columns)
+#             else:
+#                 # Since delta encoding is not supported, the columns in
+#                 # this file should raise an error and not crash the server
+#                 with pytest.raises(RuntimeError):
+#                     data = ak.read_parquet(filename, datasets=columns)
+#
+#
+# class TestHDF5:
+#     @pytest.fixture(autouse=True)
+#     def set_attributes(self):
+#
+#         self.int_tens_pdarray = ak.array(np.random.randint(-100, 100, 1000))
+#         self.int_tens_ndarray = self.int_tens_pdarray.to_ndarray()
+#         self.int_tens_ndarray.sort()
+#         self.int_tens_pdarray_dupe = ak.array(np.random.randint(-100, 100, 1000))
+#
+#         self.int_hundreds_pdarray = ak.array(np.random.randint(-1000, 1000, 1000))
+#         self.int_hundreds_ndarray = self.int_hundreds_pdarray.to_ndarray()
+#         self.int_hundreds_ndarray.sort()
+#         self.int_hundreds_pdarray_dupe = ak.array(np.random.randint(-1000, 1000, 1000))
+#
+#         self.float_pdarray = ak.array(np.random.uniform(-100, 100, 1000))
+#         self.float_ndarray = self.float_pdarray.to_ndarray()
+#         self.float_ndarray.sort()
+#         self.float_pdarray_dupe = ak.array(np.random.uniform(-100, 100, 1000))
+#
+#         self.bool_pdarray = ak.randint(0, 1, 1000, dtype=ak.bool_)
+#         self.bool_pdarray_dupe = ak.randint(0, 1, 1000, dtype=ak.bool_)
+#
+#         self.dict_columns = {
+#             "int_tens_pdarray": self.int_tens_pdarray,
+#             "int_hundreds_pdarray": self.int_hundreds_pdarray,
+#             "float_pdarray": self.float_pdarray,
+#             "bool_pdarray": self.bool_pdarray,
+#         }
+#
+#         self.dict_columns_dupe = {
+#             "int_tens_pdarray": self.int_tens_pdarray_dupe,
+#             "int_hundreds_pdarray": self.int_hundreds_pdarray_dupe,
+#             "float_pdarray": self.float_pdarray_dupe,
+#             "bool_pdarray": self.bool_pdarray_dupe,
+#         }
+#
+#         self.dict_single_column = {"int_tens_pdarray": self.int_tens_pdarray}
+#
+#         self.list_columns = [
+#             self.int_tens_pdarray,
+#             self.int_hundreds_pdarray,
+#             self.float_pdarray,
+#             self.bool_pdarray,
+#         ]
+#
+#         self.names = ["int_tens_pdarray", "int_hundreds_pdarray", "float_pdarray", "bool_pdarray"]
+#
+#     def _create_file(
+#         self, prefix_path: str, columns: Union[Mapping[str, ak.array]], names: List[str] = None
+#     ) -> None:
+#         """
+#         Creates an hdf5 file with dataset(s) from the specified columns and path prefix
+#         via the ak.save_all method. If columns is a List, then the names list is used
+#         to create the datasets
+#
+#         :return: None
+#         :raise: ValueError if the names list is None when columns is a list
+#         """
+#         if isinstance(columns, dict):
+#             ak.to_hdf(columns=columns, prefix_path=prefix_path)
+#         else:
+#             if not names:
+#                 raise ValueError("the names list must be not None if columns is a list")
+#             ak.to_hdf(columns=columns, prefix_path=prefix_path, names=names)
+#
+#     def test_save_all_load_all_with_dict(self, hdf_test_base_tmp):
+#         """
+#         Creates 2..n files from an input columns dict depending upon the number of
+#         arkouda_server locales, retrieves all datasets and correspoding pdarrays,
+#         and confirms they match inputs
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned datasets and pdarrays don't match
+#         """
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict".format(hdf_test_base_tmp)
+#         )
+#         retrieved_columns = ak.load_all("{}/iotest_dict".format(hdf_test_base_tmp))
+#
+#         itp = self.dict_columns["int_tens_pdarray"].to_ndarray()
+#         ritp = retrieved_columns["int_tens_pdarray"].to_ndarray()
+#         itp.sort()
+#         ritp.sort()
+#         ihp = self.dict_columns["int_hundreds_pdarray"].to_ndarray()
+#         rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray()
+#         ihp.sort()
+#         rihp.sort()
+#         ifp = self.dict_columns["float_pdarray"].to_ndarray()
+#         rifp = retrieved_columns["float_pdarray"].to_ndarray()
+#         ifp.sort()
+#         rifp.sort()
+#
+#         assert 4 == len(retrieved_columns)
+#         assert itp.tolist() == ritp.tolist()
+#         assert ihp.tolist() == rihp.tolist()
+#         assert ifp.tolist() == rifp.tolist()
+#         assert len(self.dict_columns["bool_pdarray"]) == len(retrieved_columns["bool_pdarray"])
+#         assert 4 == len(ak.get_datasets("{}/iotest_dict_LOCALE0000".format(hdf_test_base_tmp)))
+#
+#     def test_save_all_load_all_with_list(self, hdf_test_base_tmp):
+#         """
+#         Creates 2..n files from an input columns and names list depending upon the number of
+#         arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms
+#         they match inputs
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned datasets and pdarrays don't match
+#         """
+#         self._create_file(
+#             columns=self.list_columns,
+#             prefix_path="{}/iotest_list".format(hdf_test_base_tmp),
+#             names=self.names,
+#         )
+#         retrieved_columns = ak.load_all(path_prefix="{}/iotest_list".format(hdf_test_base_tmp))
+#
+#         itp = self.list_columns[0].to_ndarray()
+#         itp.sort()
+#         ritp = retrieved_columns["int_tens_pdarray"].to_ndarray()
+#         ritp.sort()
+#         ihp = self.list_columns[1].to_ndarray()
+#         ihp.sort()
+#         rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray()
+#         rihp.sort()
+#         fp = self.list_columns[2].to_ndarray()
+#         fp.sort()
+#         rfp = retrieved_columns["float_pdarray"].to_ndarray()
+#         rfp.sort()
+#
+#         assert 4 == len(retrieved_columns)
+#         assert itp.tolist() == ritp.tolist()
+#         assert ihp.tolist() == rihp.tolist()
+#         assert fp.tolist() == rfp.tolist()
+#         assert len(self.list_columns[3]) == len(retrieved_columns["bool_pdarray"])
+#         assert 4 == len(ak.get_datasets("{}/iotest_list_LOCALE0000".format(hdf_test_base_tmp)))
+#
+#     def test_read_hdf(self, hdf_test_base_tmp):
+#         """
+#         Creates 2..n files depending upon the number of arkouda_server locales, reads the files
+#         with an explicit list of file names to the read_all method, and confirms the datasets
+#         and embedded pdarrays match the input dataset and pdarrays
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned datasets don't match
+#         """
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp)
+#         )
+#
+#         # test with read_hdf
+#         dataset = ak.read_hdf(filenames=["{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp)])
+#         assert 4 == len(list(dataset.keys()))
+#
+#         # test with generic read function
+#         dataset = ak.read(filenames=["{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp)])
+#         assert 4 == len(list(dataset.keys()))
+#
+#     def test_read_hdf_with_glob(self, hdf_test_base_tmp):
+#         """
+#         Creates 2..n files depending upon the number of arkouda_server locales with two
+#         files each containing different-named datasets with the same pdarrays, reads the files
+#         with the glob feature of the read_all method, and confirms the datasets and embedded
+#         pdarrays match the input dataset and pdarrays
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned datasets don't match
+#         """
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp)
+#         )
+#
+#         retrieved_columns = ak.read_hdf(filenames="{}/iotest_dict_columns*".format(hdf_test_base_tmp))
+#
+#         itp = self.list_columns[0].to_ndarray()
+#         itp.sort()
+#         ritp = retrieved_columns["int_tens_pdarray"].to_ndarray()
+#         ritp.sort()
+#         ihp = self.list_columns[1].to_ndarray()
+#         ihp.sort()
+#         rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray()
+#         rihp.sort()
+#         fp = self.list_columns[2].to_ndarray()
+#         fp.sort()
+#         rfp = retrieved_columns["float_pdarray"].to_ndarray()
+#         rfp.sort()
+#
+#         assert 4 == len(list(retrieved_columns.keys()))
+#         assert itp.tolist() == ritp.tolist()
+#         assert ihp.tolist() == rihp.tolist()
+#         assert fp.tolist() == rfp.tolist()
+#         assert len(self.bool_pdarray) == len(retrieved_columns["bool_pdarray"])
+#
+#     def test_load(self, hdf_test_base_tmp):
+#         """
+#         Creates 1..n files depending upon the number of arkouda_server locales with three columns
+#         AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray
+#         equals the input pdarray.
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned datasets (pdarrays) don't match
+#         """
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp)
+#         )
+#         result_array_tens = ak.load(
+#             path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="int_tens_pdarray"
+#         )["int_tens_pdarray"]
+#         result_array_hundreds = ak.load(
+#             path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp),
+#             dataset="int_hundreds_pdarray",
+#         )["int_hundreds_pdarray"]
+#         result_array_floats = ak.load(
+#             path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="float_pdarray"
+#         )["float_pdarray"]
+#         result_array_bools = ak.load(
+#             path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="bool_pdarray"
+#         )["bool_pdarray"]
+#
+#         ratens = result_array_tens.to_ndarray()
+#         ratens.sort()
+#
+#         rahundreds = result_array_hundreds.to_ndarray()
+#         rahundreds.sort()
+#
+#         rafloats = result_array_floats.to_ndarray()
+#         rafloats.sort()
+#
+#         assert self.int_tens_ndarray.tolist() == ratens.tolist()
+#         assert self.int_hundreds_ndarray.tolist() == rahundreds.tolist()
+#         assert self.float_ndarray.tolist() == rafloats.tolist()
+#         assert len(self.bool_pdarray) == len(result_array_bools)
+#
+#         # test load_all with file_format parameter usage
+#         ak.to_parquet(
+#             columns=self.dict_columns,
+#             prefix_path="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#         )
+#         result_array_tens = ak.load(
+#             path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#             dataset="int_tens_pdarray",
+#             file_format="Parquet",
+#         )["int_tens_pdarray"]
+#         result_array_hundreds = ak.load(
+#             path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#             dataset="int_hundreds_pdarray",
+#             file_format="Parquet",
+#         )["int_hundreds_pdarray"]
+#         result_array_floats = ak.load(
+#             path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#             dataset="float_pdarray",
+#             file_format="Parquet",
+#         )["float_pdarray"]
+#         result_array_bools = ak.load(
+#             path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#             dataset="bool_pdarray",
+#             file_format="Parquet",
+#         )["bool_pdarray"]
+#         ratens = result_array_tens.to_ndarray()
+#         ratens.sort()
+#
+#         rahundreds = result_array_hundreds.to_ndarray()
+#         rahundreds.sort()
+#
+#         rafloats = result_array_floats.to_ndarray()
+#         rafloats.sort()
+#         assert self.int_tens_ndarray.tolist() == ratens.tolist()
+#         assert self.int_hundreds_ndarray.tolist() == rahundreds.tolist()
+#         assert self.float_ndarray.tolist() == rafloats.tolist()
+#         assert len(self.bool_pdarray) == len(result_array_bools)
+#
+#         # Test load with invalid prefix
+#         with pytest.raises(RuntimeError):
+#             ak.load(
+#                 path_prefix="{}/iotest_dict_column".format(hdf_test_base_tmp),
+#                 dataset="int_tens_pdarray",
+#             )["int_tens_pdarray"]
+#
+#         # Test load with invalid file
+#         with pytest.raises(RuntimeError):
+#             ak.load(path_prefix="{}/not-a-file".format(hdf_test_base_tmp), dataset="int_tens_pdarray")[
+#                 "int_tens_pdarray"
+#             ]
+#
+#     def test_load_all(self, hdf_test_base_tmp):
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp)
+#         )
+#
+#         results = ak.load_all(path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp))
+#         assert "bool_pdarray" in results
+#         assert "float_pdarray" in results
+#         assert "int_tens_pdarray" in results
+#         assert "int_hundreds_pdarray" in results
+#
+#         # test load_all with file_format parameter usage
+#         ak.to_parquet(
+#             columns=self.dict_columns,
+#             prefix_path="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#         )
+#         results = ak.load_all(
+#             file_format="Parquet",
+#             path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp),
+#         )
+#         assert "bool_pdarray" in results
+#         assert "float_pdarray" in results
+#         assert "int_tens_pdarray" in results
+#         assert "int_hundreds_pdarray" in results
+#
+#         # # Test load_all with invalid prefix
+#         with pytest.raises(ValueError):
+#             ak.load_all(path_prefix="{}/iotest_dict_column".format(hdf_test_base_tmp))
+#
+#         # Test load with invalid file
+#         with pytest.raises(RuntimeError):
+#             ak.load_all(path_prefix="{}/not-a-file".format(hdf_test_base_tmp))
+#
+#     def test_get_data_sets(self, hdf_test_base_tmp):
+#         """
+#         Creates 1..n files depending upon the number of arkouda_server locales containing three
+#         datasets and confirms the expected number of datasets along with the dataset names
+#
+#         :return: None
+#         :raise: AssertionError if the input and returned dataset names don't match
+#         """
+#         self._create_file(
+#             columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp)
+#         )
+#         datasets = ak.get_datasets("{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp))
+#
+#         assert 4 == len(datasets)
+#         for dataset in datasets:
+#             assert dataset in self.names
+#
+#         # Test load_all with invalid filename
+#         with pytest.raises(RuntimeError):
+#             ak.get_datasets("{}/iotest_dict_columns_LOCALE000".format(hdf_test_base_tmp))
+#
+#     @pytest.mark.parametrize("prob_size", pytest.prob_size)
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_read_and_write(self, prob_size, dtype, hdf_test_base_tmp):
+#         ak_arr = make_ak_arrays(prob_size * pytest.nl, dtype)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/hdf_test_correct"
+#             ak_arr.to_hdf(file_name)
+#
+#             # test read_hdf with glob
+#             gen_arr = ak.read_hdf(f"{file_name}*").popitem()[1]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # test read_hdf with filenames
+#             gen_arr = ak.read_hdf(
+#                 filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)]
+#             ).popitem()[1]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic read works
+#             gen_arr = ak.read(f"{file_name}*").popitem()[1]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic load works
+#             if dtype == "str":
+#                 # we have to specify the dataset for strings since it differs from default of "array"
+#                 gen_arr = ak.load(path_prefix=file_name, dataset="strings_array")["strings_array"]
+#             else:
+#                 gen_arr = ak.load(path_prefix=file_name).popitem()[1]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic load works with file_format parameter
+#             if dtype == "str":
+#                 # we have to specify the dataset for strings since it differs from default of "array"
+#                 gen_arr = ak.load(path_prefix=file_name, dataset="strings_array", file_format="HDF5")[
+#                     "strings_array"
+#                 ]
+#             else:
+#                 gen_arr = ak.load(path_prefix=file_name, file_format="HDF5").popitem()[1]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify load_all works
+#             gen_arr = ak.load_all(path_prefix=file_name)
+#             if dtype == "str":
+#                 # we have to specify the dataset for strings since it differs from default of "array"
+#                 assert (ak_arr == gen_arr["strings_array"]).all()
+#             else:
+#                 assert (ak_arr == gen_arr["array"]).all()
+#
+#             # Test load with invalid file
+#             with pytest.raises(RuntimeError):
+#                 ak.load(path_prefix=f"{hdf_test_base_tmp}/not-a-file")
+#
+#     @pytest.mark.parametrize("prob_size", pytest.prob_size)
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_read_and_write_dset_provided(self, prob_size, dtype, hdf_test_base_tmp):
+#         ak_arr = make_ak_arrays(prob_size * pytest.nl, dtype)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/hdf_test_correct"
+#             ak_arr.to_hdf(file_name, "my_dset")
+#
+#             # test read_hdf with glob
+#             gen_arr = ak.read_hdf(f"{file_name}*", "my_dset")["my_dset"]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # test read_hdf with filenames
+#             gen_arr = ak.read_hdf(
+#                 filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)], datasets="my_dset"
+#             )["my_dset"]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic read works
+#             gen_arr = ak.read(f"{file_name}*", "my_dset")["my_dset"]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic load works
+#             gen_arr = ak.load(path_prefix=file_name, dataset="my_dset")["my_dset"]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify generic load works with file_format parameter
+#             gen_arr = ak.load(path_prefix=file_name, dataset="my_dset", file_format="HDF5")["my_dset"]
+#             assert (ak_arr == gen_arr).all()
+#
+#             # verify load_all works
+#             gen_arr = ak.load_all(path_prefix=file_name)
+#             assert (ak_arr == gen_arr["my_dset"]).all()
+#
+#             # Test load with invalid file
+#             with pytest.raises(RuntimeError):
+#                 ak.load(path_prefix=f"{hdf_test_base_tmp}/not-a-file", dataset="my_dset")
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     def test_edge_case_read_write(self, dtype, hdf_test_base_tmp):
+#         np_edge_case = make_edge_case_arrays(dtype)
+#         ak_edge_case = ak.array(np_edge_case)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             ak_edge_case.to_hdf(f"{tmp_dirname}/hdf_test_edge_case", "my-dset")
+#             hdf_arr = ak.read_hdf(f"{tmp_dirname}/hdf_test_edge_case*", "my-dset")["my-dset"]
+#             if dtype == "float64":
+#                 assert np.allclose(np_edge_case, hdf_arr.to_ndarray(), equal_nan=True)
+#             else:
+#                 assert (np_edge_case == hdf_arr.to_ndarray()).all()
+#
+#     def test_read_and_write_with_dict(self, hdf_test_base_tmp):
+#         df_dict = make_multi_dtype_dict()
+#         # extend to include categoricals
+#         df_dict["cat"] = ak.Categorical(ak.array(["c", "b", "a", "b"]))
+#         df_dict["cat_from_codes"] = ak.Categorical.from_codes(
+#             codes=ak.array([2, 1, 0, 1]), categories=ak.array(["a", "b", "c"])
+#         )
+#         akdf = ak.DataFrame(df_dict)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/multi_col_hdf"
+#             # use multi-column write to generate hdf file
+#             akdf.to_hdf(file_name)
+#
+#             # test read_hdf with glob, no datasets specified
+#             rd_data = ak.read_hdf(f"{file_name}*")
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # test read_hdf with only one dataset specified (each tested)
+#             for col_name in akdf.columns.values:
+#                 gen_arr = ak.read_hdf(f"{file_name}*", datasets=[col_name])[col_name]
+#                 if akdf[col_name].dtype != ak.float64:
+#                     assert akdf[col_name].to_list() == gen_arr.to_list()
+#                 else:
+#                     a = akdf[col_name].to_ndarray()
+#                     b = gen_arr.to_ndarray()
+#                     if isinstance(a[0], np.ndarray):
+#                         assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b))
+#                     else:
+#                         assert np.allclose(a, b, equal_nan=True)
+#
+#             # test read_hdf with half of columns names specified as datasets
+#             half_cols = akdf.columns.values[: len(akdf.columns.values) // 2]
+#             rd_data = ak.read_hdf(f"{file_name}*", datasets=half_cols)
+#             rd_df = ak.DataFrame(rd_data)
+#             pd.testing.assert_frame_equal(akdf[half_cols].to_pandas(), rd_df[half_cols].to_pandas())
+#
+#             # test read_hdf with all columns names specified as datasets
+#             rd_data = ak.read_hdf(f"{file_name}*", datasets=akdf.columns.values)
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # test read_hdf with filenames
+#             rd_data = ak.read_hdf(filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)])
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # verify generic read works
+#             rd_data = ak.read(f"{file_name}*")
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             for col_name in akdf.columns.values:
+#                 # verify generic load works
+#                 gen_arr = ak.load(path_prefix=file_name, dataset=col_name)[col_name]
+#                 if akdf[col_name].dtype != ak.float64:
+#                     assert akdf[col_name].to_list() == gen_arr.to_list()
+#                 else:
+#                     a = akdf[col_name].to_ndarray()
+#                     b = gen_arr.to_ndarray()
+#                     if isinstance(a[0], np.ndarray):
+#                         assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b))
+#                     else:
+#                         assert np.allclose(a, b, equal_nan=True)
+#
+#                 # verify generic load works with file_format parameter
+#                 gen_arr = ak.load(path_prefix=file_name, dataset=col_name, file_format="HDF5")[col_name]
+#                 if akdf[col_name].dtype != ak.float64:
+#                     assert akdf[col_name].to_list() == gen_arr.to_list()
+#                 else:
+#                     a = akdf[col_name].to_ndarray()
+#                     b = gen_arr.to_ndarray()
+#                     if isinstance(a[0], np.ndarray):
+#                         assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b))
+#                     else:
+#                         assert np.allclose(a, b, equal_nan=True)
+#
+#             # Test load with invalid file
+#             with pytest.raises(RuntimeError):
+#                 ak.load(
+#                     path_prefix=f"{hdf_test_base_tmp}/not-a-file",
+#                     dataset=akdf.columns.values[0],
+#                 )
+#
+#             # verify load_all works
+#             rd_data = ak.load_all(path_prefix=file_name)
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # Test load_all with invalid file
+#             with pytest.raises(ValueError):
+#                 ak.load_all(path_prefix=f"{hdf_test_base_tmp}/does-not-exist")
+#
+#             # test get_datasets
+#             datasets = ak.get_datasets(f"{file_name}*")
+#             assert sorted(datasets) == sorted(akdf.columns.values)
+#
+#             # test save with index true
+#             akdf.to_hdf(file_name, index=True)
+#             rd_data = ak.read_hdf(f"{file_name}*")
+#             rd_df = ak.DataFrame(rd_data)
+#             # fix column ordering see issue #2611
+#             rd_df = rd_df[akdf.columns.values]
+#             pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas())
+#
+#             # test get_datasets with index
+#             datasets = ak.get_datasets(f"{file_name}*")
+#             assert sorted(datasets) == ["Index"] + sorted(akdf.columns.values)
+#
+#     def test_ls_hdf(self, hdf_test_base_tmp):
+#         df_dict = make_multi_dtype_dict()
+#         akdf = ak.DataFrame(df_dict)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/test_ls_hdf"
+#             # use multi-column write to generate hdf file
+#             akdf.to_hdf(file_name)
+#
+#             message = ak.ls(f"{file_name}_LOCALE0000")
+#             for col_name in akdf.columns.values:
+#                 assert col_name in message
+#
+#             with pytest.raises(RuntimeError):
+#                 ak.ls(f"{tmp_dirname}/not-a-file_LOCALE0000")
+#
+#     def test_ls_hdf_empty(self):
+#         # Test filename empty/whitespace-only condition
+#         with pytest.raises(ValueError):
+#             ak.ls("")
+#
+#         with pytest.raises(ValueError):
+#             ak.ls("   ")
+#
+#         with pytest.raises(ValueError):
+#             ak.ls(" \n\r\t  ")
+#
+#     def test_read_hdf_with_error_and_warn(self, hdf_test_base_tmp):
+#         df_dict = make_multi_dtype_dict()
+#         akdf = ak.DataFrame(df_dict)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/test_error_hdf"
+#             # use multi-column write to generate hdf file
+#             akdf.to_hdf(file_name)
+#             akdf.to_hdf(f"{file_name}_dupe")
+#
+#             # Make sure we can read ok
+#             dataset = ak.read_hdf(
+#                 filenames=[
+#                     f"{file_name}_LOCALE0000",
+#                     f"{file_name}_dupe_LOCALE0000",
+#                 ]
+#             )
+#             assert dataset is not None
+#
+#             # Change the name of the first file we try to raise an error due to file missing.
+#             with pytest.raises(RuntimeError):
+#                 ak.read_hdf(
+#                     filenames=[
+#                         f"{file_name}_MISSING_LOCALE0000",
+#                         f"{file_name}_dupe_LOCALE0000",
+#                     ]
+#                 )
+#
+#             # Run the same test with missing file, but this time with the warning flag for read_all
+#             with pytest.warns(
+#                 RuntimeWarning, match=r"There were .* errors reading files on the server.*"
+#             ):
+#                 dataset = ak.read_hdf(
+#                     filenames=[
+#                         f"{file_name}_MISSING_LOCALE0000",
+#                         f"{file_name}_dupe_LOCALE0000",
+#                     ],
+#                     strict_types=False,
+#                     allow_errors=True,
+#                 )
+#                 assert dataset is not None
+#
+#     @pytest.mark.parametrize("prob_size", pytest.prob_size)
+#     def test_save_strings_dataset(self, prob_size, hdf_test_base_tmp):
+#         reg_strings = make_ak_arrays(prob_size, "str")
+#         # hard coded at 26 because we don't need to test long strings at large scale
+#         # passing data from python to chpl this way can really slow down as size increases
+#         long_strings = ak.array(
+#             [f"testing a longer string{num} to be written, loaded and appended" for num in range(26)]
+#         )
+#
+#         for strings_array in [reg_strings, long_strings]:
+#             with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#                 file_name = f"{tmp_dirname}/test_strings_hdf"
+#                 strings_array.to_hdf(file_name)
+#                 r_strings_array = ak.read_hdf(f"{file_name}*").popitem()[1]
+#                 assert (strings_array == r_strings_array).all()
+#
+#                 # Read a part of a saved Strings dataset from one hdf5 file
+#                 r_strings_subset = ak.read_hdf(filenames=f"{file_name}_LOCALE0000").popitem()[1]
+#                 assert isinstance(r_strings_subset, ak.Strings)
+#                 assert (strings_array[: r_strings_subset.size] == r_strings_subset).all()
+#
+#                 # Repeat the test using the calc_string_offsets=True option to
+#                 # have server calculate offsets array
+#                 r_strings_subset = ak.read_hdf(
+#                     filenames=f"{file_name}_LOCALE0000", calc_string_offsets=True
+#                 ).popitem()[1]
+#                 assert isinstance(r_strings_subset, ak.Strings)
+#                 assert (strings_array[: r_strings_subset.size] == r_strings_subset).all()
+#
+#                 # test append
+#                 strings_array.to_hdf(file_name, dataset="strings-dupe", mode="append")
+#                 r_strings = ak.read_hdf(f"{file_name}*", datasets="strings_array")["strings_array"]
+#                 r_strings_dupe = ak.read_hdf(f"{file_name}*", datasets="strings-dupe")["strings-dupe"]
+#                 assert (r_strings == r_strings_dupe).all()
+#
+#     def testStringsWithoutOffsets(self, hdf_test_base_tmp):
+#         """
+#         This tests both saving & reading a strings array without saving and reading the offsets to HDF5.
+#         Instead the offsets array will be derived from the values/bytes area by looking for null-byte
+#         terminator strings
+#         """
+#         strings_array = ak.array(["testing string{}".format(num) for num in list(range(0, 25))])
+#         strings_array.to_hdf(
+#             "{}/strings-test".format(hdf_test_base_tmp), dataset="strings", save_offsets=False
+#         )
+#         r_strings_array = ak.load(
+#             "{}/strings-test".format(hdf_test_base_tmp), dataset="strings", calc_string_offsets=True
+#         )["strings"]
+#         strings = strings_array.to_ndarray()
+#         strings.sort()
+#         r_strings = r_strings_array.to_ndarray()
+#         r_strings.sort()
+#         assert strings.tolist() == r_strings.tolist()
+#
+#     def testSaveLongStringsDataset(self, hdf_test_base_tmp):
+#         # Create, save, and load Strings dataset
+#         strings = ak.array(
+#             [
+#                 "testing a longer string{} to be written, loaded and appended".format(num)
+#                 for num in list(range(0, 26))
+#             ]
+#         )
+#         strings.to_hdf("{}/strings-test".format(hdf_test_base_tmp), dataset="strings")
+#
+#         n_strings = strings.to_ndarray()
+#         n_strings.sort()
+#         r_strings = ak.load("{}/strings-test".format(hdf_test_base_tmp), dataset="strings")[
+#             "strings"
+#         ].to_ndarray()
+#         r_strings.sort()
+#
+#         assert n_strings.tolist() == r_strings.tolist()
+#
+#     def testSaveMixedStringsDataset(self, hdf_test_base_tmp):
+#         strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))])
+#         m_floats = ak.array([x / 10.0 for x in range(0, 10)])
+#         m_ints = ak.array(list(range(0, 10)))
+#         ak.to_hdf(
+#             {"m_strings": strings_array, "m_floats": m_floats, "m_ints": m_ints},
+#             "{}/multi-type-test".format(hdf_test_base_tmp),
+#         )
+#         r_mixed = ak.load_all("{}/multi-type-test".format(hdf_test_base_tmp))
+#
+#         assert (
+#             np.sort(strings_array.to_ndarray()).tolist()
+#             == np.sort(r_mixed["m_strings"].to_ndarray()).tolist()
+#         )
+#
+#         assert r_mixed["m_floats"] is not None
+#         assert r_mixed["m_ints"] is not None
+#
+#         r_floats = ak.sort(
+#             ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_floats")["m_floats"]
+#         )
+#         assert m_floats.to_list() == r_floats.to_list()
+#
+#         r_ints = ak.sort(
+#             ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_ints")["m_ints"]
+#         )
+#         assert m_ints.to_list() == r_ints.to_list()
+#
+#         strings = strings_array.to_ndarray()
+#         strings.sort()
+#         r_strings = ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_strings")[
+#             "m_strings"
+#         ].to_ndarray()
+#         r_strings.sort()
+#
+#         assert strings.tolist() == r_strings.tolist()
+#
+#     def testAppendStringsDataset(self, hdf_test_base_tmp):
+#         strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))])
+#         strings_array.to_hdf("{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings")
+#         strings_array.to_hdf(
+#             "{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings-dupe", mode="append"
+#         )
+#
+#         r_strings = ak.load("{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings")[
+#             "strings"
+#         ]
+#         r_strings_dupe = ak.load(
+#             "{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings-dupe"
+#         )["strings-dupe"]
+#         assert r_strings.to_list() == r_strings_dupe.to_list()
+#
+#     def testAppendMixedStringsDataset(self, hdf_test_base_tmp):
+#         strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))])
+#         strings_array.to_hdf("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_strings")
+#         m_floats = ak.array([x / 10.0 for x in range(0, 10)])
+#         m_ints = ak.array(list(range(0, 10)))
+#         ak.to_hdf(
+#             {"m_floats": m_floats, "m_ints": m_ints},
+#             "{}/append-multi-type-test".format(hdf_test_base_tmp),
+#             mode="append",
+#         )
+#         r_mixed = ak.load_all("{}/append-multi-type-test".format(hdf_test_base_tmp))
+#
+#         assert r_mixed["m_floats"] is not None
+#         assert r_mixed["m_ints"] is not None
+#
+#         r_floats = ak.sort(
+#             ak.load("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_floats")[
+#                 "m_floats"
+#             ]
+#         )
+#         r_ints = ak.sort(
+#             ak.load("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_ints")["m_ints"]
+#         )
+#         assert m_floats.to_list() == r_floats.to_list()
+#         assert m_ints.to_list() == r_ints.to_list()
+#
+#         strings = strings_array.to_ndarray()
+#         strings.sort()
+#         r_strings = r_mixed["m_strings"].to_ndarray()
+#         r_strings.sort()
+#
+#         assert strings.tolist() == r_strings.tolist()
+#
+#     def test_save_multi_type_dict_dataset(self, hdf_test_base_tmp):
+#         df_dict = make_multi_dtype_dict()
+#         # extend to include categoricals
+#         df_dict["cat"] = ak.Categorical(ak.array(["c", "b", "a", "b"]))
+#         df_dict["cat_from_codes"] = ak.Categorical.from_codes(
+#             codes=ak.array([2, 1, 0, 1]), categories=ak.array(["a", "b", "c"])
+#         )
+#         keys = list(df_dict.keys())
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/multi_type_dict_test"
+#             # use multi-column write to generate hdf file
+#             ak.to_hdf(df_dict, file_name)
+#             r_mixed = ak.read_hdf(f"{file_name}*")
+#
+#             for col_name in keys:
+#                 # verify load by dataset and returned mixed dict at col_name
+#                 loaded = ak.load(file_name, dataset=col_name)[col_name]
+#                 for arr in [loaded, r_mixed[col_name]]:
+#                     if df_dict[col_name].dtype != ak.float64:
+#                         assert df_dict[col_name].to_list() == arr.to_list()
+#                     else:
+#                         a = df_dict[col_name].to_ndarray()
+#                         b = arr.to_ndarray()
+#                         if isinstance(a[0], np.ndarray):
+#                             assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b))
+#                         else:
+#                             assert np.allclose(a, b, equal_nan=True)
+#
+#         # test append for multi type dict
+#         single_arr = df_dict[keys[0]]
+#         rest_dict = {k: df_dict[k] for k in keys[1:]}
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/multi_type_dict_test"
+#             single_arr.to_hdf(file_name, dataset=keys[0])
+#
+#             ak.to_hdf(rest_dict, file_name, mode="append")
+#             r_mixed = ak.read_hdf(f"{file_name}*")
+#
+#             for col_name in keys:
+#                 # verify load by dataset and returned mixed dict at col_name
+#                 loaded = ak.load(file_name, dataset=col_name)[col_name]
+#                 for arr in [loaded, r_mixed[col_name]]:
+#                     if df_dict[col_name].dtype != ak.float64:
+#                         assert df_dict[col_name].to_list() == arr.to_list()
+#                     else:
+#                         a = df_dict[col_name].to_ndarray()
+#                         b = arr.to_ndarray()
+#                         if isinstance(a[0], np.ndarray):
+#                             assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b))
+#                         else:
+#                             assert np.allclose(a, b, equal_nan=True)
+#
+#     def test_strict_types(self, hdf_test_base_tmp):
+#         N = 100
+#         int_types = [np.uint32, np.int64, np.uint16, np.int16]
+#         float_types = [np.float32, np.float64, np.float32, np.float64]
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             prefix = f"{tmp_dirname}/strict-type-test"
+#             for i, (it, ft) in enumerate(zip(int_types, float_types)):
+#                 with h5py.File("{}-{}".format(prefix, i), "w") as f:
+#                     idata = np.arange(i * N, (i + 1) * N, dtype=it)
+#                     id = f.create_dataset("integers", data=idata)
+#                     id.attrs["ObjType"] = 1
+#                     fdata = np.arange(i * N, (i + 1) * N, dtype=ft)
+#                     fd = f.create_dataset("floats", data=fdata)
+#                     fd.attrs["ObjType"] = 1
+#             with pytest.raises(RuntimeError):
+#                 ak.read_hdf(f"{prefix}*")
+#
+#             a = ak.read_hdf(f"{prefix}*", strict_types=False)
+#             assert a["integers"].to_list() == np.arange(len(int_types) * N).tolist()
+#             assert np.allclose(
+#                 a["floats"].to_ndarray(), np.arange(len(float_types) * N, dtype=np.float64)
+#             )
+#
+#     def test_small_arrays(self, hdf_test_base_tmp):
+#         for arr in [ak.array([1]), ak.array(["ab", "cd"]), ak.array(["123456789"])]:
+#             with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#                 arr.to_hdf(f"{tmp_dirname}/small_numeric")
+#                 ret_arr = ak.read_hdf(f"{tmp_dirname}/small_numeric*").popitem()[1]
+#                 assert (arr == ret_arr).all()
+#
+#     def test_uint64_to_from_HDF5(self, hdf_test_base_tmp):
+#         """
+#         Test our ability to read/write uint64 to HDF5
+#         """
+#         npa1 = np.array(
+#             [18446744073709551500, 18446744073709551501, 18446744073709551502], dtype=np.uint64
+#         )
+#         pda1 = ak.array(npa1)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             pda1.to_hdf(f"{tmp_dirname}/small_numeric", dataset="pda1")
+#             # Now load it back in
+#             pda2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="pda1")["pda1"]
+#             assert str(pda1) == str(pda2)
+#             assert 18446744073709551500 == pda2[0]
+#             assert pda2.to_list() == npa1.tolist()
+#
+#     def test_uint64_to_from_array(self, hdf_test_base_tmp):
+#         """
+#         Test conversion to and from numpy array / pdarray using unsigned 64bit integer (uint64)
+#         """
+#         npa1 = np.array(
+#             [18446744073709551500, 18446744073709551501, 18446744073709551502], dtype=np.uint64
+#         )
+#         pda1 = ak.array(npa1)
+#         assert 18446744073709551500 == pda1[0]
+#         assert pda1.to_list() == npa1.tolist()
+#
+#     def test_bigint(self, hdf_test_base_tmp):
+#         df_dict = {
+#             "pdarray": ak.arange(2**200, 2**200 + 3, max_bits=201),
+#             "groupby": ak.GroupBy(ak.arange(2**200, 2**200 + 5)),
+#             "segarray": ak.SegArray(ak.arange(0, 10, 2), ak.arange(2**200, 2**200 + 10, max_bits=212)),
+#         }
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/bigint_test"
+#             ak.to_hdf(df_dict, file_name)
+#             ret_dict = ak.read_hdf(f"{tmp_dirname}/bigint_test*")
+#
+#             pda_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="pdarray")["pdarray"]
+#             a = df_dict["pdarray"]
+#             for rd_a in [ret_dict["pdarray"], pda_loaded]:
+#                 assert isinstance(rd_a, ak.pdarray)
+#                 assert a.to_list() == rd_a.to_list()
+#                 assert a.max_bits == rd_a.max_bits
+#
+#             g_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="groupby")["groupby"]
+#             g = df_dict["groupby"]
+#             for rd_g in [ret_dict["groupby"], g_loaded]:
+#                 assert isinstance(rd_g, ak.GroupBy)
+#                 assert g.keys.to_list() == rd_g.keys.to_list()
+#                 assert g.unique_keys.to_list() == rd_g.unique_keys.to_list()
+#                 assert g.permutation.to_list() == rd_g.permutation.to_list()
+#                 assert g.segments.to_list() == rd_g.segments.to_list()
+#
+#             sa_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="segarray")["segarray"]
+#             sa = df_dict["segarray"]
+#             for rd_sa in [ret_dict["segarray"], sa_loaded]:
+#                 assert isinstance(rd_sa, ak.SegArray)
+#                 assert sa.values.to_list() == rd_sa.values.to_list()
+#                 assert sa.segments.to_list() == rd_sa.segments.to_list()
+#
+#     def test_unsanitized_dataset_names(self, hdf_test_base_tmp):
+#         # Test when quotes are part of the dataset name
+#         my_arrays = {'foo"0"': ak.arange(100), 'bar"': ak.arange(100)}
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             ak.to_hdf(my_arrays, f"{tmp_dirname}/bad_dataset_names")
+#             ak.read_hdf(f"{tmp_dirname}/bad_dataset_names*")
+#
+#
+#     def test_hdf_groupby(self, hdf_test_base_tmp):
+#         # test for categorical and multiple keys
+#         string = ak.array(["a", "b", "a", "b", "c"])
+#         cat = ak.Categorical(string)
+#         cat_from_codes = ak.Categorical.from_codes(
+#             codes=ak.array([0, 1, 0, 1, 2]), categories=ak.array(["a", "b", "c"])
+#         )
+#         pda = ak.array([0, 1, 2, 0, 2])
+#
+#         pda_grouping = ak.GroupBy(pda)
+#         str_grouping = ak.GroupBy(string)
+#         cat_grouping = ak.GroupBy([cat, cat_from_codes])
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             for g in [pda_grouping, str_grouping, cat_grouping]:
+#                 g.to_hdf(f"{tmp_dirname}/groupby_test")
+#                 g_load = ak.read(f"{tmp_dirname}/groupby_test*").popitem()[1]
+#                 assert len(g_load.keys) == len(g.keys)
+#                 assert g_load.permutation.to_list() == g.permutation.to_list()
+#                 assert g_load.segments.to_list() == g.segments.to_list()
+#                 assert g_load._uki.to_list() == g._uki.to_list()
+#                 if isinstance(g.keys[0], ak.Categorical):
+#                     for k, kload in zip(g.keys, g_load.keys):
+#                         assert k.to_list() == kload.to_list()
+#                 else:
+#                     assert g_load.keys.to_list() == g.keys.to_list()
+#
+#     def test_hdf_categorical(self, hdf_test_base_tmp):
+#         cat = ak.Categorical(ak.array(["a", "b", "a", "b", "c"]))
+#         cat_from_codes = ak.Categorical.from_codes(
+#             codes=ak.array([0, 1, 0, 1, 2]), categories=ak.array(["a", "b", "c"])
+#         )
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             for c in cat, cat_from_codes:
+#                 c.to_hdf(f"{tmp_dirname}/categorical_test")
+#                 c_load = ak.read(f"{tmp_dirname}/categorical_test*").popitem()[1]
+#
+#                 assert c_load.categories.to_list() == (["a", "b", "c", "N/A"])
+#                 if c.segments is not None:
+#                     assert c.segments.to_list() == c_load.segments.to_list()
+#                     assert c.permutation.to_list() == c_load.permutation.to_list()
+#
+#     def test_segarray_hdf(self, hdf_test_base_tmp):
+#         a = [0, 1, 2, 3]
+#         b = [4, 0, 5, 6, 0, 7, 8, 0]
+#         c = [9, 0, 0]
+#
+#         # int64 test
+#         flat = a + b + c
+#         segments = ak.array([0, len(a), len(a) + len(b)])
+#         dtype = ak.dtypes.int64
+#         akflat = ak.array(flat, dtype)
+#         segarr = ak.SegArray(segments, akflat)
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             segarr.to_hdf(f"{tmp_dirname}/segarray_int")
+#             # Now load it back in
+#             seg2 = ak.load(f"{tmp_dirname}/segarray_int", dataset="segarray")["segarray"]
+#             assert segarr.segments.to_list() == seg2.segments.to_list()
+#             assert segarr.values.to_list() == seg2.values.to_list()
+#
+#         # uint64 test
+#         dtype = ak.dtypes.uint64
+#         akflat = ak.array(flat, dtype)
+#         segarr = ak.SegArray(segments, akflat)
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             segarr.to_hdf(f"{tmp_dirname}/segarray_uint")
+#             # Now load it back in
+#             seg2 = ak.load(f"{tmp_dirname}/segarray_uint", dataset="segarray")["segarray"]
+#             assert segarr.segments.to_list() == seg2.segments.to_list()
+#             assert segarr.values.to_list() == seg2.values.to_list()
+#
+#         # float64 test
+#         dtype = ak.dtypes.float64
+#         akflat = ak.array(flat, dtype)
+#         segarr = ak.SegArray(segments, akflat)
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             segarr.to_hdf(f"{tmp_dirname}/segarray_float")
+#             # Now load it back in
+#             seg2 = ak.load(f"{tmp_dirname}/segarray_float", dataset="segarray")["segarray"]
+#             assert segarr.segments.to_list() == seg2.segments.to_list()
+#             assert segarr.values.to_list() == seg2.values.to_list()
+#
+#         # bool test
+#         dtype = ak.dtypes.bool_
+#         akflat = ak.array(flat, dtype)
+#         segarr = ak.SegArray(segments, akflat)
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             segarr.to_hdf(f"{tmp_dirname}/segarray_bool")
+#             # Now load it back in
+#             seg2 = ak.load(f"{tmp_dirname}/segarray_bool", dataset="segarray")["segarray"]
+#             assert segarr.segments.to_list() == seg2.segments.to_list()
+#             assert segarr.values.to_list() == seg2.values.to_list()
+#
+#     def test_dataframe_segarr(self, hdf_test_base_tmp):
+#         a = [0, 1, 2, 3]
+#         b = [4, 0, 5, 6, 0, 7, 8, 0]
+#         c = [9, 0, 0]
+#
+#         # int64 test
+#         flat = a + b + c
+#         segments = ak.array([0, len(a), len(a) + len(b)])
+#         dtype = ak.dtypes.int64
+#         akflat = ak.array(flat, dtype)
+#         segarr = ak.SegArray(segments, akflat)
+#
+#         s = ak.array(["abc", "def", "ghi"])
+#         df = ak.DataFrame([segarr, s])
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             df.to_hdf(f"{tmp_dirname}/dataframe_segarr")
+#             df_load = ak.DataFrame.load(f"{tmp_dirname}/dataframe_segarr")
+#             assert df.to_pandas().equals(df_load.to_pandas())
+#
+#     def test_segarray_str_hdf5(self, hdf_test_base_tmp):
+#         words = ak.array(["one,two,three", "uno,dos,tres"])
+#         strs, segs = words.regex_split(",", return_segments=True)
+#
+#         x = ak.SegArray(segs, strs)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             x.to_hdf(f"{tmp_dirname}/test_file")
+#             rd = ak.read_hdf(f"{tmp_dirname}/test_file*").popitem()[1]
+#             assert isinstance(rd, ak.SegArray)
+#             assert x.segments.to_list() == rd.segments.to_list()
+#             assert x.values.to_list() == rd.values.to_list()
+#
+#     def test_hdf_overwrite_pdarray(self, hdf_test_base_tmp):
+#         # test repack with a single object
+#         a = ak.arange(1000)
+#         b = ak.randint(0, 100, 1000)
+#         c = ak.arange(15)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pda_test"
+#             for repack in [True, False]:
+#                 a.to_hdf(file_name)
+#                 b.to_hdf(file_name, dataset="array_2", mode="append")
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#                 # hdf5 only releases memory if overwriting last dset so overwrite first
+#                 c.update_hdf(file_name, dataset="array", repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size < orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 assert data["array"].to_list() == c.to_list()
+#
+#         # test overwrites with different types
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/pda_test"
+#             a.to_hdf(file_name)
+#             for size, dtype in [(15, ak.uint64), (150, ak.float64), (1000, ak.bool_)]:
+#                 b = ak.arange(size, dtype=dtype)
+#                 b.update_hdf(file_name)
+#                 data = ak.read_hdf(f"{file_name}*").popitem()[1]
+#                 assert data.to_list() == b.to_list()
+#
+#     def test_hdf_overwrite_strings(self, hdf_test_base_tmp):
+#         # test repack with a single object
+#         a = ak.random_strings_uniform(0, 16, 1000)
+#         b = ak.random_strings_uniform(0, 16, 1000)
+#         c = ak.random_strings_uniform(0, 16, 10)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/str_test"
+#             for repack in [True, False]:
+#                 a.to_hdf(file_name, dataset="test_str")
+#                 b.to_hdf(file_name, mode="append")
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#                 # hdf5 only releases memory if overwriting last dset so overwrite first
+#                 c.update_hdf(file_name, dataset="test_str", repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size < orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 assert data["test_str"].to_list() == c.to_list()
+#
+#     def test_overwrite_categorical(self, hdf_test_base_tmp):
+#         a = ak.Categorical(ak.array([f"cat_{i%3}" for i in range(100)]))
+#         b = ak.Categorical(ak.array([f"cat_{i%4}" for i in range(100)]))
+#         c = ak.Categorical(ak.array([f"cat_{i%5}" for i in range(10)]))
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/cat_test"
+#             for repack in [True, False]:
+#                 a.to_hdf(file_name, dataset="test_cat")
+#                 b.to_hdf(file_name, mode="append")
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#                 # hdf5 only releases memory if overwriting last dset so overwrite first
+#                 c.update_hdf(file_name, dataset="test_cat", repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size < orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 assert (data["test_cat"] == c).all()
+#
+#             dset_name = "categorical_array"  # name of categorical array
+#             dset_name2 = "to_replace"
+#             dset_name3 = "cat_array2"
+#             a.to_hdf(file_name, dataset=dset_name)
+#             b.to_hdf(file_name, dataset=dset_name2, mode="append")
+#             c.to_hdf(file_name, dataset=dset_name3, mode="append")
+#
+#             a.update_hdf(file_name, dataset=dset_name2)
+#             data = ak.read_hdf(f"{file_name}*")
+#             assert all(name in data for name in (dset_name, dset_name2, dset_name3))
+#             d = data[dset_name2]
+#             for attr in "categories", "codes", "permutation", "segments", "_akNAcode":
+#                 assert getattr(d, attr).to_list() == getattr(a, attr).to_list()
+#
+#     def test_hdf_overwrite_dataframe(self, hdf_test_base_tmp):
+#         df = ak.DataFrame(
+#             {
+#                 "a": ak.arange(1000),
+#                 "b": ak.random_strings_uniform(0, 16, 1000),
+#                 "c": ak.arange(1000, dtype=bool),
+#                 "d": ak.randint(0, 50, 1000),
+#             }
+#         )
+#         odf = ak.DataFrame(
+#             {
+#                 "b": ak.randint(0, 25, 50),
+#                 "c": ak.arange(50, dtype=bool),
+#             }
+#         )
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/df_test"
+#             for repack in [True, False]:
+#                 df.to_hdf(file_name)
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#                 # hdf5 only releases memory if overwriting last dset so overwrite first
+#                 odf.update_hdf(file_name, repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size <= orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 odf_keys = list(odf.keys())
+#                 for key in df.keys():
+#                     assert (data[key] == (odf[key] if key in odf_keys else df[key])).all()
+#
+#     def test_overwrite_segarray(self, hdf_test_base_tmp):
+#         sa1 = ak.SegArray(ak.arange(0, 1000, 5), ak.arange(1000))
+#         sa2 = ak.SegArray(ak.arange(0, 100, 5), ak.arange(100))
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/segarray_test"
+#             for repack in [True, False]:
+#                 sa1.to_hdf(file_name)
+#                 sa1.to_hdf(file_name, dataset="seg2", mode="append")
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#
+#                 sa2.update_hdf(file_name, repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size <= orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 assert (data["segarray"].values == sa2.values).all()
+#                 assert (data["segarray"].segments == sa2.segments).all()
+#
+#     def test_overwrite_single_dset(self, hdf_test_base_tmp):
+#         # we need to test that both repack=False and repack=True generate the same file size here
+#         a = ak.arange(1000)
+#         b = ak.arange(15)
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             a.to_hdf(f"{tmp_dirname}/test_file")
+#             b.update_hdf(f"{tmp_dirname}/test_file")
+#             f_list = glob.glob(f"{tmp_dirname}/test_file*")
+#             f1_size = sum(os.path.getsize(f) for f in f_list)
+#
+#             a.to_hdf(f"{tmp_dirname}/test_file_2")
+#             b.update_hdf(f"{tmp_dirname}/test_file_2", repack=False)
+#             f_list = glob.glob(f"{tmp_dirname}/test_file_2_*")
+#             f2_size = sum(os.path.getsize(f) for f in f_list)
+#
+#             assert f1_size == f2_size
+#
+#     def test_overwrite_dataframe(self, hdf_test_base_tmp):
+#         df = ak.DataFrame(
+#             {
+#                 "a": ak.arange(1000),
+#                 "b": ak.random_strings_uniform(0, 16, 1000),
+#                 "c": ak.arange(1000, dtype=bool),
+#                 "d": ak.randint(0, 50, 1000),
+#             }
+#         )
+#         replace = {
+#             "b": ak.randint(0, 25, 50),
+#             "c": ak.arange(50, dtype=bool),
+#         }
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             df.to_hdf(f"{tmp_dirname}/overwrite_test")
+#             f_list = glob.glob(f"{tmp_dirname}/overwrite_test_*")
+#             orig_size = sum(os.path.getsize(f) for f in f_list)
+#             # hdf5 only releases memory if overwritting last dset so overwrite first
+#             ak.update_hdf(replace, f"{tmp_dirname}/overwrite_test")
+#
+#             new_size = sum(os.path.getsize(f) for f in f_list)
+#             # ensure that the column was actually overwritten
+#             assert new_size < orig_size
+#             data = ak.read_hdf(f"{tmp_dirname}/overwrite_test_*")
+#             assert data["b"].to_list() == replace["b"].to_list()
+#             assert data["c"].to_list() == replace["c"].to_list()
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             df.to_hdf(f"{tmp_dirname}/overwrite_test")
+#             f_list = glob.glob(f"{tmp_dirname}/overwrite_test_*")
+#             orig_size = sum(os.path.getsize(f) for f in f_list)
+#             # hdf5 only releases memory if overwritting last dset so overwrite first
+#             ak.update_hdf(replace, f"{tmp_dirname}/overwrite_test", repack=False)
+#
+#             new_size = sum(os.path.getsize(f) for f in f_list)
+#             # ensure that the column was actually overwritten
+#             assert new_size >= orig_size
+#             data = ak.read_hdf(f"{tmp_dirname}/overwrite_test_*")
+#             assert data["b"].to_list() == replace["b"].to_list()
+#             assert data["c"].to_list() == replace["c"].to_list()
+#
+#     def test_snapshot(self, hdf_test_base_tmp):
+#         df = ak.DataFrame(make_multi_dtype_dict())
+#         df_str_idx = df.copy()
+#         df_str_idx._set_index([f"A{i}" for i in range(len(df))])
+#         col_order = df.columns.values
+#         df_ref = df.to_pandas()
+#         df_str_idx_ref = df_str_idx.to_pandas(retain_index=True)
+#         a = ak.randint(0, 10, 100)
+#         s = ak.random_strings_uniform(0, 5, 50)
+#         c = ak.Categorical(s)
+#         g = ak.GroupBy(a)
+#         ref_data = {"a": a, "s": s, "c": c, "g": g}
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             ak.snapshot(f"{tmp_dirname}/snapshot_test")
+#             for v in [df, df_str_idx, a, s, c, g]:
+#                 # delete variables and verify no longer in the namespace
+#                 del v
+#                 with pytest.raises(NameError):
+#                     assert not v  # noqa: F821
+#
+#             # restore the variables
+#             data = ak.restore(f"{tmp_dirname}/snapshot_test")
+#             for vn in ["df", "df_str_idx", "a", "s", "c", "g"]:
+#                 # ensure all variable names returned
+#                 assert vn in data.keys()
+#
+#             # validate that restored variables are correct
+#             pd.testing.assert_frame_equal(
+#                 df_ref[col_order], data["df"].to_pandas(retain_index=True)[col_order]
+#             )
+#             pd.testing.assert_frame_equal(
+#                 df_str_idx_ref[col_order], data["df_str_idx"].to_pandas(retain_index=True)[col_order]
+#             )
+#             for key in ref_data.keys():
+#                 if isinstance(data[key], ak.GroupBy):
+#                     assert (ref_data[key].permutation == data[key].permutation).all()
+#                     assert (ref_data[key].keys == data[key].keys).all()
+#                     assert (ref_data[key].segments == data[key].segments).all()
+#                 else:
+#                     assert (ref_data[key] == data[key]).all()
+#
+#     @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES)
+#     @pytest.mark.parametrize("size", pytest.prob_size)
+#     def test_index_save_and_load(self, dtype, size, hdf_test_base_tmp):
+#         idx = ak.Index(make_ak_arrays(size, dtype))
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             idx.to_hdf(f"{tmp_dirname}/idx_test")
+#             rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1]
+#
+#             assert isinstance(rd_idx, ak.Index)
+#             assert type(rd_idx.values) == type(idx.values)
+#             assert idx.to_list() == rd_idx.to_list()
+#
+#         if dtype == ak.str_:
+#             # if strings we need to also test Categorical
+#             idx = ak.Index(ak.Categorical(make_ak_arrays(size, dtype)))
+#             with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#                 idx.to_hdf(f"{tmp_dirname}/idx_test")
+#                 rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1]
+#
+#                 assert isinstance(rd_idx, ak.Index)
+#                 assert type(rd_idx.values) == type(idx.values)
+#                 assert idx.to_list() == rd_idx.to_list()
+#
+#     @pytest.mark.parametrize("dtype1", NUMERIC_AND_STR_TYPES)
+#     @pytest.mark.parametrize("dtype2", NUMERIC_AND_STR_TYPES)
+#     @pytest.mark.parametrize("size", pytest.prob_size)
+#     def test_multi_index(self, dtype1, dtype2, size, hdf_test_base_tmp):
+#         t1 = make_ak_arrays(size, dtype1)
+#         t2 = make_ak_arrays(size, dtype2)
+#         idx = ak.Index.factory([t1, t2])
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             idx.to_hdf(f"{tmp_dirname}/idx_test")
+#             rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1]
+#
+#             assert isinstance(rd_idx, ak.MultiIndex)
+#             assert idx.to_list() == rd_idx.to_list()
+#
+#         # handle categorical cases as well
+#         if ak.str_ in [dtype1, dtype2]:
+#             if dtype1 == ak.str_:
+#                 t1 = ak.Categorical(t1)
+#             if dtype2 == ak.str_:
+#                 t2 = ak.Categorical(t2)
+#             idx = ak.Index.factory([t1, t2])
+#             with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#                 idx.to_hdf(f"{tmp_dirname}/idx_test")
+#                 rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1]
+#
+#                 assert isinstance(rd_idx, ak.MultiIndex)
+#                 assert idx.to_list() == rd_idx.to_list()
+#
+#     def test_hdf_overwrite_index(self, hdf_test_base_tmp):
+#         # test repack with a single object
+#         a = ak.Index(ak.arange(1000))
+#         b = ak.Index(ak.randint(0, 100, 1000))
+#         c = ak.Index(ak.arange(15))
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/idx_test"
+#             for repack in [True, False]:
+#                 a.to_hdf(file_name, dataset="index")
+#                 b.to_hdf(file_name, dataset="index_2", mode="append")
+#                 f_list = glob.glob(f"{file_name}*")
+#                 orig_size = sum(os.path.getsize(f) for f in f_list)
+#                 # hdf5 only releases memory if overwriting last dset so overwrite first
+#                 c.update_hdf(file_name, dataset="index", repack=repack)
+#
+#                 new_size = sum(os.path.getsize(f) for f in f_list)
+#
+#                 # ensure that the column was actually overwritten
+#                 # test that repack on/off the file gets smaller/larger respectively
+#                 assert new_size < orig_size if repack else new_size >= orig_size
+#                 data = ak.read_hdf(f"{file_name}*")
+#                 assert isinstance(data["index"], ak.Index)
+#                 assert data["index"].to_list() == c.to_list()
+#
+#     def test_special_objtype(self, hdf_test_base_tmp):
+#         """
+#         This test is simply to ensure that the dtype is persisted through the io
+#         operation. It ultimately uses the process of pdarray, but need to ensure
+#         correct Arkouda Object Type is returned
+#         """
+#         ip = ak.IPv4(ak.arange(10))
+#         dt = ak.Datetime(ak.arange(10))
+#         td = ak.Timedelta(ak.arange(10))
+#         df = ak.DataFrame({"ip": ip, "datetime": dt, "timedelta": td})
+#
+#         with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname:
+#             ip.to_hdf(f"{tmp_dirname}/ip_test")
+#             rd_ip = ak.read_hdf(f"{tmp_dirname}/ip_test*").popitem()[1]
+#             assert isinstance(rd_ip, ak.IPv4)
+#             assert ip.to_list() == rd_ip.to_list()
+#
+#             dt.to_hdf(f"{tmp_dirname}/dt_test")
+#             rd_dt = ak.read_hdf(f"{tmp_dirname}/dt_test*").popitem()[1]
+#             assert isinstance(rd_dt, ak.Datetime)
+#             assert dt.to_list() == rd_dt.to_list()
+#
+#             td.to_hdf(f"{tmp_dirname}/td_test")
+#             rd_td = ak.read_hdf(f"{tmp_dirname}/td_test*").popitem()[1]
+#             assert isinstance(rd_td, ak.Timedelta)
+#             assert td.to_list() == rd_td.to_list()
+#
+#             df.to_hdf(f"{tmp_dirname}/df_test")
+#             rd_df = ak.read_hdf(f"{tmp_dirname}/df_test*")
+#
+#             assert isinstance(rd_df["ip"], ak.IPv4)
+#             assert isinstance(rd_df["datetime"], ak.Datetime)
+#             assert isinstance(rd_df["timedelta"], ak.Timedelta)
+#             assert df["ip"].to_list() == rd_df["ip"].to_list()
+#             assert df["datetime"].to_list() == rd_df["datetime"].to_list()
+#             assert df["timedelta"].to_list() == rd_df["timedelta"].to_list()
+#
+#
+# class TestCSV:
+#
+#     def test_csv_read_write(self, csv_test_base_tmp):
+#         # first test that can read csv with no header not written by Arkouda
+#         cols = ["ColA", "ColB", "ColC"]
+#         a = ["ABC", "DEF"]
+#         b = ["123", "345"]
+#         c = ["3.14", "5.56"]
+#         with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/non_ak.csv"
+#             with open(file_name, "w") as f:
+#                 f.write(",".join(cols) + "\n")
+#                 f.write(f"{a[0]},{b[0]},{c[0]}\n")
+#                 f.write(f"{a[1]},{b[1]},{c[1]}\n")
+#
+#             data = ak.read_csv(file_name)
+#             assert list(data.keys()) == cols
+#             assert data["ColA"].to_list() == a
+#             assert data["ColB"].to_list() == b
+#             assert data["ColC"].to_list() == c
+#
+#             data = ak.read_csv(file_name, datasets="ColB")["ColB"]
+#             assert isinstance(data, ak.Strings)
+#             assert data.to_list() == b
+#
+#         d = {
+#             cols[0]: ak.array(a),
+#             cols[1]: ak.array([int(x) for x in b]),
+#             cols[2]: ak.array([round(float(x), 2) for x in c]),
+#         }
+#         with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname:
+#             # test can read csv with header not written by Arkouda
+#             non_ak_file_name = f"{tmp_dirname}/non_ak.csv"
+#             with open(non_ak_file_name, "w") as f:
+#                 f.write("**HEADER**\n")
+#                 f.write("str,int64,float64\n")
+#                 f.write("*/HEADER/*\n")
+#                 f.write(",".join(cols) + "\n")
+#                 f.write(f"{a[0]},{b[0]},{c[0]}\n")
+#                 f.write(f"{a[1]},{b[1]},{c[1]}\n")
+#
+#             # test writing file with Arkouda with non-standard delim
+#             non_standard_delim_file_name = f"{tmp_dirname}/non_standard_delim"
+#             ak.to_csv(d, f"{non_standard_delim_file_name}.csv", col_delim="|*|")
+#
+#             for file_name, delim in [
+#                 (non_ak_file_name, ","),
+#                 (f"{non_standard_delim_file_name}*", "|*|"),
+#             ]:
+#                 data = ak.read_csv(file_name, column_delim=delim)
+#                 assert list(data.keys()) == cols
+#                 assert data["ColA"].to_list() == a
+#                 assert data["ColB"].to_list() == [int(x) for x in b]
+#                 assert data["ColC"].to_list() == [round(float(x), 2) for x in c]
+#
+#                 # test reading subset of columns
+#                 data = ak.read_csv(file_name, datasets="ColB", column_delim=delim)["ColB"]
+#                 assert isinstance(data, ak.pdarray)
+#                 assert data.to_list() == [int(x) for x in b]
+#
+#         # larger data set testing
+#         d = {
+#             "ColA": ak.randint(0, 50, 101),
+#             "ColB": ak.randint(0, 50, 101),
+#             "ColC": ak.randint(0, 50, 101),
+#         }
+#         with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname:
+#             ak.to_csv(d, f"{tmp_dirname}/non_equal_set.csv")
+#             data = ak.read_csv(f"{tmp_dirname}/non_equal_set*")
+#             assert data["ColA"].to_list() == d["ColA"].to_list()
+#             assert data["ColB"].to_list() == d["ColB"].to_list()
+#             assert data["ColC"].to_list() == d["ColC"].to_list()
+#
+#
+# class TestImportExport:
+#
+#     @classmethod
+#     def setup_class(cls):
+#         cls.pddf = pd.DataFrame(
+#             data={
+#                 "c_1": np.array([np.iinfo(np.int64).min, -1, 0, np.iinfo(np.int64).max]),
+#                 "c_3": np.array([False, True, False, False]),
+#                 "c_4": np.array([-0.0, np.finfo(np.float64).min, np.nan, np.inf]),
+#                 "c_5": np.array(["abc", " ", "xyz", ""]),
+#             },
+#             index=np.arange(4),
+#         )
+#         cls.akdf = ak.DataFrame(cls.pddf)
+#
+#     def test_import_hdf(self, import_export_base_tmp):
+#         locales = pytest.nl
+#         with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/import_hdf_test"
+#
+#             self.pddf.to_hdf(f"{file_name}_table.h5", key="dataframe", format="table", mode="w")
+#             akdf = ak.import_data(f"{file_name}_table.h5", write_file=f"{file_name}_ak_table.h5")
+#             assert len(glob.glob(f"{file_name}_ak_table*.h5")) == locales
+#             assert self.pddf.equals(akdf.to_pandas())
+#
+#             self.pddf.to_hdf(
+#                 f"{file_name}_table_cols.h5",
+#                 key="dataframe",
+#                 format="table",
+#                 data_columns=True,
+#                 mode="w",
+#             )
+#             akdf = ak.import_data(
+#                 f"{file_name}_table_cols.h5", write_file=f"{file_name}_ak_table_cols.h5"
+#             )
+#             assert len(glob.glob(f"{file_name}_ak_table_cols*.h5")) == locales
+#             assert self.pddf.equals(akdf.to_pandas())
+#
+#             self.pddf.to_hdf(
+#                 f"{file_name}_fixed.h5", key="dataframe", format="fixed", data_columns=True, mode="w"
+#             )
+#             akdf = ak.import_data(f"{file_name}_fixed.h5", write_file=f"{file_name}_ak_fixed.h5")
+#             assert len(glob.glob(f"{file_name}_ak_fixed*.h5")) == locales
+#             assert self.pddf.equals(akdf.to_pandas())
+#
+#             with pytest.raises(FileNotFoundError):
+#                 ak.import_data(f"{file_name}_foo.h5", write_file=f"{file_name}_ak_fixed.h5")
+#             with pytest.raises(RuntimeError):
+#                 ak.import_data(f"{file_name}_*.h5", write_file=f"{file_name}_ak_fixed.h5")
+#
+#     def test_export_hdf(self, import_export_base_tmp):
+#         with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/export_hdf_test"
+#
+#             self.akdf.to_hdf(f"{file_name}_ak_write")
+#
+#             pddf = ak.export(
+#                 f"{file_name}_ak_write", write_file=f"{file_name}_pd_from_ak.h5", index=True
+#             )
+#             assert len(glob.glob(f"{file_name}_pd_from_ak.h5")) == 1
+#             assert pddf.equals(self.akdf.to_pandas())
+#
+#             with pytest.raises(RuntimeError):
+#                 ak.export(f"{tmp_dirname}_foo.h5", write_file=f"{tmp_dirname}/pd_from_ak.h5", index=True)
+#
+#     def test_import_parquet(self, import_export_base_tmp):
+#         locales = pytest.nl
+#         with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/import_pq_test"
+#
+#             self.pddf.to_parquet(f"{file_name}_table.parquet")
+#             akdf = ak.import_data(
+#                 f"{file_name}_table.parquet", write_file=f"{file_name}_ak_table.parquet"
+#             )
+#             assert len(glob.glob(f"{file_name}_ak_table*.parquet")) == locales
+#             assert self.pddf.equals(akdf.to_pandas())
+#
+#     def test_export_parquet(self, import_export_base_tmp):
+#         with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname:
+#             file_name = f"{tmp_dirname}/export_pq_test"
+#
+#             self.akdf.to_parquet(f"{file_name}_ak_write")
+#
+#             pddf = ak.export(
+#                 f"{file_name}_ak_write", write_file=f"{file_name}_pd_from_ak.parquet", index=True
+#             )
+#             assert len(glob.glob(f"{file_name}_pd_from_ak.parquet")) == 1
+#             assert pddf[self.akdf.columns.values].equals(self.akdf.to_pandas())
+#
+#             with pytest.raises(RuntimeError):
+#                 ak.export(
+#                     f"{tmp_dirname}_foo.parquet",
+#                     write_file=f"{tmp_dirname}/pd_from_ak.parquet",
+#                     index=True,
+#                 )
+#
+#
+# class TestZarr:
+#
+#     @pytest.mark.skip
+#     def test_zarr_read_write(self, zarr_test_base_tmp):
+#         import arkouda.array_api as Array
+#
+#         shapes = [(10,), (20,)]
+#         chunk_shapes = [(2,), (3,)]
+#         dtypes = [ak.int64, ak.float64]
+#         for shape, chunk_shape in zip(shapes, chunk_shapes):
+#             for dtype in dtypes:
+#                 a = Array.full(shape, 7, dtype=dtype)
+#                 with tempfile.TemporaryDirectory(dir=zarr_test_base_tmp) as tmp_dirname:
+#                     to_zarr(f"{tmp_dirname}", a._array, chunk_shape)
+#                     b = read_zarr(f"{tmp_dirname}", len(shape), dtype)
+#                     assert np.allclose(a.to_ndarray(), b.to_ndarray())
diff --git a/tests/numpy/dtypes_test.py b/tests/numpy/dtypes_test.py
index 4c1c0522ae7..336595cc41c 100644
--- a/tests/numpy/dtypes_test.py
+++ b/tests/numpy/dtypes_test.py
@@ -99,7 +99,15 @@ def test_pdarrays_datatypes(self):
         assert dtypes.dtype("bigint") == ak.arange(2**200, 2**200 + 10).dtype
 
     def test_isSupportedInt(self):
-        for supported in -10, 1, np.int64(1), np.int64(1.0), np.uint32(1), 2**63 + 1, 2**200:
+        for supported in (
+            -10,
+            1,
+            np.int64(1),
+            np.int64(1.0),
+            np.uint32(1),
+            2**63 + 1,
+            2**200,
+        ):
             assert dtypes.isSupportedInt(supported)
         for unsupported in 1.0, "1":
             assert not dtypes.isSupportedInt(unsupported)
@@ -158,13 +166,26 @@ def test_DtypeEnum(self):
 
         assert (
             frozenset(
-                {"bool_", "float", "float64", "int", "int64", "uint", "uint64", "uint8", "bigint", "str"}
+                {
+                    "bool_",
+                    "float",
+                    "float64",
+                    "int",
+                    "int64",
+                    "uint",
+                    "uint64",
+                    "uint8",
+                    "bigint",
+                    "str",
+                }
             )
             == ak.ARKOUDA_SUPPORTED_DTYPES
         )
 
     def test_NumericDTypes(self):
-        num_types = frozenset(["bool", "bool_", "float", "float64", "int", "int64", "uint64", "bigint"])
+        num_types = frozenset(
+            ["bool", "bool_", "float", "float64", "int", "int64", "uint64", "bigint"]
+        )
         assert num_types == dtypes.NumericDTypes
 
     def test_SeriesDTypes(self):
@@ -181,8 +202,10 @@ def test_SeriesDTypes(self):
             assert dtypes.SeriesDTypes[dt] == np.bool_
 
     def test_scalars(self):
-        assert "typing.Union[bool, numpy.bool_]" == str(ak.bool_scalars)
-        assert "typing.Union[float, numpy.float64, numpy.float32]" == str(ak.float_scalars)
+        assert "typing.Union[bool, numpy.bool]" == str(ak.bool_scalars)
+        assert "typing.Union[float, numpy.float64, numpy.float32]" == str(
+            ak.float_scalars
+        )
         assert (
             "typing.Union[int, numpy.int8, numpy.int16, numpy.int32, numpy.int64, "
             + "numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64]"
@@ -196,12 +219,12 @@ def test_scalars(self):
         assert "typing.Union[str, numpy.str_]" == str(ak.str_scalars)
         assert (
             "typing.Union[numpy.float64, numpy.float32, numpy.int8, numpy.int16, numpy.int32, "
-            + "numpy.int64, numpy.bool_, numpy.str_, numpy.uint8, numpy.uint16, numpy.uint32, "
+            + "numpy.int64, numpy.bool, numpy.str_, numpy.uint8, numpy.uint16, numpy.uint32, "
             + "numpy.uint64]"
         ) == str(ak.numpy_scalars)
 
         assert (
-            "typing.Union[bool, numpy.bool_, float, numpy.float64, numpy.float32, int, numpy.int8, "
+            "typing.Union[bool, numpy.bool, float, numpy.float64, numpy.float32, int, numpy.int8, "
             + "numpy.int16, numpy.int32, numpy.int64, numpy.uint8, numpy.uint16, numpy.uint32,"
             + " numpy.uint64, numpy.str_, str]"
         ) == str(ak.all_scalars)
diff --git a/tests/operator_test.py b/tests/operator_test.py
index 4ee1a123535..9b8ae26894d 100644
--- a/tests/operator_test.py
+++ b/tests/operator_test.py
@@ -40,7 +40,12 @@ def test_numpy_equivalency(self, size=100, verbose=pytest.verbose):
             "bool": (np.arange(0, size, 1) % 2) == 0,
         }
         global scalars
-        scalars = {"int64": 5, "uint64": np.uint64(2**63 + 1), "float64": -3.14159, "bool": True}
+        scalars = {
+            "int64": 5,
+            "uint64": np.uint64(2**63 + 1),
+            "float64": -3.14159,
+            "bool": True,
+        }
         dtypes = pdarrays.keys()
         if verbose:
             print("Operators: ", ak.pdarray.BinOps)
@@ -66,6 +71,7 @@ def do_op(lt, rt, ls, rs, isarkouda, oper):
                 evalstr += f'scalars["{rt}"]'
             else:
                 evalstr += f'{("ndarrays", "pdarrays")[isarkouda]}["{rt}"]'
+            print("EVAL STRING: ", evalstr)
             res = eval(evalstr)
             return res
 
@@ -96,10 +102,14 @@ def do_op(lt, rt, ls, rs, isarkouda, oper):
                         ):  # neither numpy nor arkouda implement
                             results["neither_implement"].append((expression, str(e)))
                         else:  # arkouda implements with error, np does not implement
-                            results["arkouda_minus_numpy"].append((expression, str(e), True))
+                            results["arkouda_minus_numpy"].append(
+                                (expression, str(e), True)
+                            )
                         continue
                     # arkouda implements but not numpy
-                    results["arkouda_minus_numpy"].append((expression, str(akres), False))
+                    results["arkouda_minus_numpy"].append(
+                        (expression, str(akres), False)
+                    )
                     continue
                 try:
                     akres = do_op(ltype, rtype, lscalar, rscalar, True, op)
@@ -107,9 +117,13 @@ def do_op(lt, rt, ls, rs, isarkouda, oper):
                     if "not implemented" or "unrecognized type" in str(
                         e
                     ):  # numpy implements but not arkouda
-                        results["numpy_minus_arkouda"].append((expression, str(e), True))
+                        results["numpy_minus_arkouda"].append(
+                            (expression, str(e), True)
+                        )
                     else:  # both implement, but arkouda errors
-                        results["both_implement"].append((expression, str(e), True, False, False))
+                        results["both_implement"].append(
+                            (expression, str(e), True, False, False)
+                        )
                     continue
                 # both numpy and arkouda execute without error
                 try:
@@ -118,35 +132,49 @@ def do_op(lt, rt, ls, rs, isarkouda, oper):
                     warnings.warn(
                         f"Cannot detect return dtype of ak result: {akres} (np result: {npres})"
                     )
-                    results["both_implement"].append((expression, str(akres), False, True, False))
+                    results["both_implement"].append(
+                        (expression, str(akres), False, True, False)
+                    )
                     continue
 
                 if akrestype != npres.dtype:
                     restypes = f"{npres.dtype}(np) vs. {akrestype}(ak)"
-                    results["both_implement"].append((expression, restypes, False, True, False))
+                    results["both_implement"].append(
+                        (expression, restypes, False, True, False)
+                    )
                     continue
                 try:
                     akasnp = akres.to_ndarray()
                 except Exception:
                     warnings.warn(f"Could not convert to ndarray: {akres}")
-                    results["both_implement"].append((expression, str(akres), True, False, False))
+                    results["both_implement"].append(
+                        (expression, str(akres), True, False, False)
+                    )
                     continue
                 if not np.allclose(akasnp, npres, equal_nan=True):
                     res = f"np: {npres}\nak: {akasnp}"
-                    results["both_implement"].append((expression, res, False, False, True))
+                    results["both_implement"].append(
+                        (expression, res, False, False, True)
+                    )
                     continue
                 # Finally, both numpy and arkouda agree on result
                 results["both_implement"].append((expression, "", False, False, False))
 
-        print(f'# ops not implemented by numpy or arkouda: {len(results["neither_implement"])}')
+        print(
+            f'# ops not implemented by numpy or arkouda: {len(results["neither_implement"])}'
+        )
         if verbose:
             for expression, err in results["neither_implement"]:
                 print(expression)
-        print(f'# ops implemented by numpy but not arkouda: {len(results["numpy_minus_arkouda"])}')
+        print(
+            f'# ops implemented by numpy but not arkouda: {len(results["numpy_minus_arkouda"])}'
+        )
         if verbose:
             for expression, err, flag in results["numpy_minus_arkouda"]:
                 print(expression)
-        print(f'# ops implemented by arkouda but not numpy: {len(results["arkouda_minus_numpy"])}')
+        print(
+            f'# ops implemented by arkouda but not numpy: {len(results["arkouda_minus_numpy"])}'
+        )
         if verbose:
             for expression, res, flag in results["arkouda_minus_numpy"]:
                 print(expression, " -> ", res)
@@ -180,17 +208,23 @@ def test_pdarray_and_scalar_ops(self, dtype):
         pda = ak.ones(100, dtype=dtype)
         npa = np.ones(100, dtype=dtype)
         for scal in 1, np.int64(1):
-            for ak_add, np_add in zip((pda + scal, scal + pda), (npa + scal, scal + npa)):
+            for ak_add, np_add in zip(
+                (pda + scal, scal + pda), (npa + scal, scal + npa)
+            ):
                 assert isinstance(ak_add, ak.pdarrayclass.pdarray)
                 assert np.allclose(ak_add.to_ndarray(), np_add)
 
         for scal in 2, np.int64(2):
-            for ak_sub, np_sub in zip((pda - scal, scal - pda), (npa - scal, scal - npa)):
+            for ak_sub, np_sub in zip(
+                (pda - scal, scal - pda), (npa - scal, scal - npa)
+            ):
                 assert isinstance(ak_sub, ak.pdarrayclass.pdarray)
                 assert np.allclose(ak_sub.to_ndarray(), np_sub)
 
         for scal in 5, np.int64(5):
-            for ak_mul, np_mul in zip((pda * scal, scal * pda), (npa * scal, scal * npa)):
+            for ak_mul, np_mul in zip(
+                (pda * scal, scal * pda), (npa * scal, scal * npa)
+            ):
                 assert isinstance(ak_mul, ak.pdarrayclass.pdarray)
                 assert np.allclose(ak_mul.to_ndarray(), np_mul)
 
@@ -198,7 +232,9 @@ def test_pdarray_and_scalar_ops(self, dtype):
             pda *= 15
             npa *= 15
             for scal in 3, np.int64(3):
-                for ak_div, np_div in zip((pda / scal, scal / pda), (npa / scal, scal / npa)):
+                for ak_div, np_div in zip(
+                    (pda / scal, scal / pda), (npa / scal, scal / npa)
+                ):
                     assert isinstance(ak_div, ak.pdarrayclass.pdarray)
                     assert np.allclose(ak_div.to_ndarray(), np_div)
 
@@ -216,13 +252,16 @@ def test_concatenation(self, dtype):
 
     def test_max_bits_concatenation(self):
         # reproducer for issue #2802
-        concatenated = ak.concatenate([ak.arange(5, max_bits=3), ak.arange(2**200 - 1, 2**200 + 4)])
+        concatenated = ak.concatenate(
+            [ak.arange(5, max_bits=3), ak.arange(2**200 - 1, 2**200 + 4)]
+        )
         assert concatenated.max_bits == 3
         assert [0, 1, 2, 3, 4, 7, 0, 1, 2, 3] == concatenated.to_list()
 
     def test_fixed_concatenate(self):
         for pda1, pda2 in zip(
-            (ak.arange(4), ak.linspace(0, 3, 4)), (ak.arange(4, 7), ak.linspace(4, 6, 3))
+            (ak.arange(4), ak.linspace(0, 3, 4)),
+            (ak.arange(4, 7), ak.linspace(4, 6, 3)),
         ):
             ans = list(range(7))
             assert ak.concatenate([pda1, pda2]).to_list() == ans
@@ -285,28 +324,64 @@ def test_int_uint_binops(self):
         ak_uint = ak.array(np_uint)
 
         # Vector-Vector Case (Division and Floor Division)
-        assert np.allclose((ak_uint / ak_uint).to_ndarray(), np_uint / np_uint, equal_nan=True)
-        assert np.allclose((ak_int / ak_uint).to_ndarray(), np_int / np_uint, equal_nan=True)
-        assert np.allclose((ak_uint / ak_int).to_ndarray(), np_uint / np_int, equal_nan=True)
-        assert np.allclose((ak_uint // ak_uint).to_ndarray(), np_uint // np_uint, equal_nan=True)
-        assert np.allclose((ak_int // ak_uint).to_ndarray(), np_int // np_uint, equal_nan=True)
-        assert np.allclose((ak_uint // ak_int).to_ndarray(), np_uint // np_int, equal_nan=True)
+        assert np.allclose(
+            (ak_uint / ak_uint).to_ndarray(), np_uint / np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int / ak_uint).to_ndarray(), np_int / np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint / ak_int).to_ndarray(), np_uint / np_int, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint // ak_uint).to_ndarray(), np_uint // np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int // ak_uint).to_ndarray(), np_int // np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint // ak_int).to_ndarray(), np_uint // np_int, equal_nan=True
+        )
 
         # Scalar-Vector Case (Division and Floor Division)
-        assert np.allclose((ak_uint[0] / ak_uint).to_ndarray(), np_uint[0] / np_uint, equal_nan=True)
-        assert np.allclose((ak_int[0] / ak_uint).to_ndarray(), np_int[0] / np_uint, equal_nan=True)
-        assert np.allclose((ak_uint[0] / ak_int).to_ndarray(), np_uint[0] / np_int, equal_nan=True)
-        assert np.allclose((ak_uint[0] // ak_uint).to_ndarray(), np_uint[0] // np_uint, equal_nan=True)
-        assert np.allclose((ak_int[0] // ak_uint).to_ndarray(), np_int[0] // np_uint, equal_nan=True)
-        assert np.allclose((ak_uint[0] // ak_int).to_ndarray(), np_uint[0] // np_int, equal_nan=True)
+        assert np.allclose(
+            (ak_uint[0] / ak_uint).to_ndarray(), np_uint[0] / np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int[0] / ak_uint).to_ndarray(), np_int[0] / np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint[0] / ak_int).to_ndarray(), np_uint[0] / np_int, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint[0] // ak_uint).to_ndarray(), np_uint[0] // np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int[0] // ak_uint).to_ndarray(), np_int[0] // np_uint, equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint[0] // ak_int).to_ndarray(), np_uint[0] // np_int, equal_nan=True
+        )
 
         # Vector-Scalar Case (Division and Floor Division)
-        assert np.allclose((ak_uint / ak_uint[0]).to_ndarray(), np_uint / np_uint[0], equal_nan=True)
-        assert np.allclose((ak_int / ak_uint[0]).to_ndarray(), np_int / np_uint[0], equal_nan=True)
-        assert np.allclose((ak_uint / ak_int[0]).to_ndarray(), np_uint / np_int[0], equal_nan=True)
-        assert np.allclose((ak_uint // ak_uint[0]).to_ndarray(), np_uint // np_uint[0], equal_nan=True)
-        assert np.allclose((ak_int // ak_uint[0]).to_ndarray(), np_int // np_uint[0], equal_nan=True)
-        assert np.allclose((ak_uint // ak_int[0]).to_ndarray(), np_uint // np_int[0], equal_nan=True)
+        assert np.allclose(
+            (ak_uint / ak_uint[0]).to_ndarray(), np_uint / np_uint[0], equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int / ak_uint[0]).to_ndarray(), np_int / np_uint[0], equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint / ak_int[0]).to_ndarray(), np_uint / np_int[0], equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint // ak_uint[0]).to_ndarray(), np_uint // np_uint[0], equal_nan=True
+        )
+        assert np.allclose(
+            (ak_int // ak_uint[0]).to_ndarray(), np_int // np_uint[0], equal_nan=True
+        )
+        assert np.allclose(
+            (ak_uint // ak_int[0]).to_ndarray(), np_uint // np_int[0], equal_nan=True
+        )
 
     def test_float_uint_binops(self):
         # Test fix for issue #1620
@@ -323,38 +398,90 @@ def test_float_uint_binops(self):
         ak_floats = [ak_float, scalar_float]
         np_floats = [np_float, scalar_float]
         for aku, akf, npu, npf in zip(ak_uints, ak_floats, np_uints, np_floats):
-            assert np.allclose((ak_uint + akf).to_ndarray(), np_uint + npf, equal_nan=True)
-            assert np.allclose((akf + ak_uint).to_ndarray(), npf + np_uint, equal_nan=True)
-            assert np.allclose((ak_float + aku).to_ndarray(), np_float + npu, equal_nan=True)
-            assert np.allclose((aku + ak_float).to_ndarray(), npu + np_float, equal_nan=True)
-
-            assert np.allclose((ak_uint - akf).to_ndarray(), np_uint - npf, equal_nan=True)
-            assert np.allclose((akf - ak_uint).to_ndarray(), npf - np_uint, equal_nan=True)
-            assert np.allclose((ak_float - aku).to_ndarray(), np_float - npu, equal_nan=True)
-            assert np.allclose((aku - ak_float).to_ndarray(), npu - np_float, equal_nan=True)
-
-            assert np.allclose((ak_uint * akf).to_ndarray(), np_uint * npf, equal_nan=True)
-            assert np.allclose((akf * ak_uint).to_ndarray(), npf * np_uint, equal_nan=True)
-            assert np.allclose((ak_float * aku).to_ndarray(), np_float * npu, equal_nan=True)
-            assert np.allclose((aku * ak_float).to_ndarray(), npu * np_float, equal_nan=True)
-
-            assert np.allclose((ak_uint / akf).to_ndarray(), np_uint / npf, equal_nan=True)
-            assert np.allclose((akf / ak_uint).to_ndarray(), npf / np_uint, equal_nan=True)
-            assert np.allclose((ak_float / aku).to_ndarray(), np_float / npu, equal_nan=True)
-            assert np.allclose((aku / ak_float).to_ndarray(), npu / np_float, equal_nan=True)
-
-            assert np.allclose((ak_uint // akf).to_ndarray(), np_uint // npf, equal_nan=True)
-            assert np.allclose((akf // ak_uint).to_ndarray(), npf // np_uint, equal_nan=True)
-            assert np.allclose((ak_float // aku).to_ndarray(), np_float // npu, equal_nan=True)
-            assert np.allclose((aku // ak_float).to_ndarray(), npu // np_float, equal_nan=True)
-
-            assert np.allclose((ak_uint**akf).to_ndarray(), np_uint**npf, equal_nan=True)
-            assert np.allclose((akf**ak_uint).to_ndarray(), npf**np_uint, equal_nan=True)
-            assert np.allclose((ak_float**aku).to_ndarray(), np_float**npu, equal_nan=True)
-            assert np.allclose((aku**ak_float).to_ndarray(), npu**np_float, equal_nan=True)
-
-            assert np.allclose((ak_float % aku).to_ndarray(), np_float % npu, equal_nan=True)
-            assert np.allclose((aku % ak_float).to_ndarray(), npu % np_float, equal_nan=True)
+            assert np.allclose(
+                (ak_uint + akf).to_ndarray(), np_uint + npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf + ak_uint).to_ndarray(), npf + np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float + aku).to_ndarray(), np_float + npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku + ak_float).to_ndarray(), npu + np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_uint - akf).to_ndarray(), np_uint - npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf - ak_uint).to_ndarray(), npf - np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float - aku).to_ndarray(), np_float - npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku - ak_float).to_ndarray(), npu - np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_uint * akf).to_ndarray(), np_uint * npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf * ak_uint).to_ndarray(), npf * np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float * aku).to_ndarray(), np_float * npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku * ak_float).to_ndarray(), npu * np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_uint / akf).to_ndarray(), np_uint / npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf / ak_uint).to_ndarray(), npf / np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float / aku).to_ndarray(), np_float / npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku / ak_float).to_ndarray(), npu / np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_uint // akf).to_ndarray(), np_uint // npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf // ak_uint).to_ndarray(), npf // np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float // aku).to_ndarray(), np_float // npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku // ak_float).to_ndarray(), npu // np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_uint**akf).to_ndarray(), np_uint**npf, equal_nan=True
+            )
+            assert np.allclose(
+                (akf**ak_uint).to_ndarray(), npf**np_uint, equal_nan=True
+            )
+            assert np.allclose(
+                (ak_float**aku).to_ndarray(), np_float**npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku**ak_float).to_ndarray(), npu**np_float, equal_nan=True
+            )
+
+            assert np.allclose(
+                (ak_float % aku).to_ndarray(), np_float % npu, equal_nan=True
+            )
+            assert np.allclose(
+                (aku % ak_float).to_ndarray(), npu % np_float, equal_nan=True
+            )
 
     def test_shift_maxbits_binop(self):
         # This tests for a bug when left shifting by a value >=64 bits for int/uint, Issue #2099
@@ -382,9 +509,15 @@ def test_shift_maxbits_binop(self):
             assert (ak_arr >> ak_shift).to_list() == (np_arr >> np_shift).tolist()
 
             # Binopvv case, Mixed type
-            ak_shift_other_dtype = ak.cast(ak_shift, "int64" if dtype != "int64" else "uint64")
-            assert (ak_arr << ak_shift_other_dtype).to_list() == (np_arr << np_shift).tolist()
-            assert (ak_arr >> ak_shift_other_dtype).to_list() == (np_arr >> np_shift).tolist()
+            ak_shift_other_dtype = ak.cast(
+                ak_shift, "int64" if dtype != "int64" else "uint64"
+            )
+            assert (ak_arr << ak_shift_other_dtype).to_list() == (
+                np_arr << np_shift
+            ).tolist()
+            assert (ak_arr >> ak_shift_other_dtype).to_list() == (
+                np_arr >> np_shift
+            ).tolist()
 
     def test_shift_bool_int64_binop(self):
         # This tests for a missing implementation of bit shifting booleans and ints, Issue #2945
@@ -411,24 +544,30 @@ def test_shift_bool_int64_binop(self):
         assert np.allclose((ak_bool[0] >> ak_int).to_ndarray(), np_bool[0] >> np_int)
         assert np.allclose((ak_bool[0] << ak_int).to_ndarray(), np_bool[0] << np_int)
 
-    def test_shift_equals_scalar_binops(self):
-        vector_pairs = [
-            (ak.arange(0, 5, dtype=ak.int64), np.arange(5, dtype=np.int64)),
-            (ak.arange(0, 5, dtype=ak.uint64), np.arange(5, dtype=np.uint64)),
+    @pytest.mark.parametrize("dtype", [ak.int64, ak.uint64])
+    def test_shift_equals_scalar_binops(self, dtype):
+
+        ak_vector = ak.arange(0, 5, dtype=dtype)
+        np_vector = np.arange(5, dtype=dtype)
+        shift_scalars = [
+            dtype(1),
+            dtype(5),
+            1,
+            5,
+            True,
+            False,
         ]
-        shift_scalars = [np.int64(1), np.int64(5), np.uint64(1), np.uint64(5), True, False]
 
-        for ak_vector, np_vector in vector_pairs:
-            for x in shift_scalars:
-                assert ak_vector.to_list() == np_vector.tolist()
+        for x in shift_scalars:
+            assert ak_vector.to_list() == np_vector.tolist()
 
-                ak_vector <<= x
-                np_vector <<= x
-                assert ak_vector.to_list() == np_vector.tolist()
+            ak_vector <<= x
+            np_vector <<= x
+            assert ak_vector.to_list() == np_vector.tolist()
 
-                ak_vector >>= x
-                np_vector >>= x
-                assert ak_vector.to_list() == np_vector.tolist()
+            ak_vector >>= x
+            np_vector >>= x
+            assert ak_vector.to_list() == np_vector.tolist()
 
     def test_shift_equals_vector_binops(self):
         vector_pairs = [
@@ -446,7 +585,9 @@ def test_shift_equals_vector_binops(self):
 
         for ak_vector, np_vector in vector_pairs:
             for v in shift_vectors:
-                if (v[0].dtype.kind != "b") and (ak_vector[0].dtype.kind != v[0].dtype.kind):
+                if (v[0].dtype.kind != "b") and (
+                    ak_vector[0].dtype.kind != v[0].dtype.kind
+                ):
                     continue
 
                 assert ak_vector.to_list() == np_vector.tolist()
@@ -475,7 +616,10 @@ def test_concatenate_type_preservation(self):
             # test single and empty
             assert isinstance(ak.concatenate([special_one]), special_type)
             assert special_one.to_list() == ak.concatenate([special_one]).to_list()
-            assert isinstance(ak.concatenate([special_type(ak.array([], dtype=ak.int64))]), special_type)
+            assert isinstance(
+                ak.concatenate([special_type(ak.array([], dtype=ak.int64))]),
+                special_type,
+            )
 
             # verify ak.util.concatenate still works
             special_aku_concat = akuconcat([special_one, special_two])
@@ -492,17 +636,25 @@ def test_floor_div_edge_cases(self):
         ak_edge_cases = ak.array(np_edge_cases)
 
         for s in scalar_edge_cases:
-            assert np.allclose((ak_edge_cases // s).to_ndarray(), np_edge_cases // s, equal_nan=True)
-            assert np.allclose((s // ak_edge_cases).to_ndarray(), s // np_edge_cases, equal_nan=True)
+            assert np.allclose(
+                (ak_edge_cases // s).to_ndarray(), np_edge_cases // s, equal_nan=True
+            )
+            assert np.allclose(
+                (s // ak_edge_cases).to_ndarray(), s // np_edge_cases, equal_nan=True
+            )
 
             # test both vector // vector
             n_vect = np.full(len(scalar_edge_cases), s)
             a_vect = ak.array(n_vect)
             assert np.allclose(
-                (ak_edge_cases // a_vect).to_ndarray(), np_edge_cases // n_vect, equal_nan=True
+                (ak_edge_cases // a_vect).to_ndarray(),
+                np_edge_cases // n_vect,
+                equal_nan=True,
             )
             assert np.allclose(
-                (a_vect // ak_edge_cases).to_ndarray(), n_vect // np_edge_cases, equal_nan=True
+                (a_vect // ak_edge_cases).to_ndarray(),
+                n_vect // np_edge_cases,
+                equal_nan=True,
             )
 
     def test_pda_power(self):
@@ -510,7 +662,10 @@ def test_pda_power(self):
         a = ak.array(n)
 
         assert ak.power(a, 2).to_list() == np.power(n, 2).tolist()
-        assert ak.power(a, ak.array([2, 3, 4])).to_list() == np.power(n, [2, 3, 4]).tolist()
+        assert (
+            ak.power(a, ak.array([2, 3, 4])).to_list()
+            == np.power(n, [2, 3, 4]).tolist()
+        )
 
         # Test a singleton with and without a Boolean argument
         a = ak.array([7])
@@ -524,16 +679,22 @@ def test_pda_power(self):
 
         # Test a singleton with a mixed Boolean argument
         a = ak.arange(10)
-        assert [i if i % 2 else i**2 for i in range(10)] == ak.power(a, 2, a % 2 == 0).to_list()
+        assert [i if i % 2 else i**2 for i in range(10)] == ak.power(
+            a, 2, a % 2 == 0
+        ).to_list()
 
         # Test invalid input, negative
         n = np.array([-1.0, -3.0])
         a = ak.array(n)
-        assert np.allclose(ak.power(a, 0.5).to_ndarray(), np.power(n, 0.5), equal_nan=True)
+        assert np.allclose(
+            ak.power(a, 0.5).to_ndarray(), np.power(n, 0.5), equal_nan=True
+        )
 
         # Test edge case input, inf
         infs = [np.inf, -np.inf]
-        assert (np.power(np.array(infs), 2) == ak.power(ak.array(infs), 2).to_ndarray()).all()
+        assert (
+            np.power(np.array(infs), 2) == ak.power(ak.array(infs), 2).to_ndarray()
+        ).all()
 
     def test_pda_sqrt(self):
         n = np.array([4, 16.0, -1, 0, np.inf])
@@ -542,7 +703,9 @@ def test_pda_sqrt(self):
 
         # Test with a mixed Boolean array
         a = ak.arange(5)
-        assert [i if i % 2 else i**0.5 for i in range(5)] == ak.sqrt(a, a % 2 == 0).to_list()
+        assert [i if i % 2 else i**0.5 for i in range(5)] == ak.sqrt(
+            a, a % 2 == 0
+        ).to_list()
 
     def test_uint_and_bigint_operation_equals(self):
         def declare_arrays():
@@ -685,7 +848,10 @@ def test_str_repr(self):
         ]
         assert ak.linspace(0, 10, 20).__str__() in answers
         assert "[False False False]" == ak.isnan(ak.array([1.1, 2.3, 5])).__str__()
-        assert "[False False False ... False False False]" == ak.isnan(ak.linspace(0, 10, 20)).__str__()
+        assert (
+            "[False False False ... False False False]"
+            == ak.isnan(ak.linspace(0, 10, 20)).__str__()
+        )
 
         # Test __repr__()
         assert "array([1 2 3])" == ak.array([1, 2, 3]).__repr__()
@@ -703,7 +869,9 @@ def test_str_repr(self):
             "array([0.00000000000000000 0.52631578947368418 1.0526315789473684 ... 8.9473684210526319 9.473684210526315 10.00000000000000000])",
         ]
         assert ak.linspace(0, 10, 20).__repr__() in answers
-        assert "array([False False False])" == ak.isnan(ak.array([1.1, 2.3, 5])).__repr__()
+        assert (
+            "array([False False False])" == ak.isnan(ak.array([1.1, 2.3, 5])).__repr__()
+        )
         assert (
             "array([False False False ... False False False])"
             == ak.isnan(ak.linspace(0, 10, 20)).__repr__()
@@ -715,7 +883,9 @@ def test_str_repr(self):
     def test_bigint_binops(self):
         # test bigint array with max_bits=64 against an equivalent uint64
         u = ak.array([0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.uint64)
-        bi = ak.array([0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.bigint, max_bits=64)
+        bi = ak.array(
+            [0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.bigint, max_bits=64
+        )
         mod_by = 2**64
 
         bi_range = ak.arange(6, dtype=ak.bigint)
@@ -729,16 +899,28 @@ def test_bigint_binops(self):
 
         # logical bit ops: only work if both arguments are bigint
         assert (u & u_range).to_list() == (bi & bi_range).to_list()
-        assert [(bi[i] & bi_scalar) % mod_by for i in range(bi.size)] == (bi & bi_scalar).to_list()
-        assert [(bi_scalar & bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar & bi).to_list()
+        assert [(bi[i] & bi_scalar) % mod_by for i in range(bi.size)] == (
+            bi & bi_scalar
+        ).to_list()
+        assert [(bi_scalar & bi[i]) % mod_by for i in range(bi.size)] == (
+            bi_scalar & bi
+        ).to_list()
 
         assert (u | u_range).to_list() == (bi | bi_range).to_list()
-        assert [(bi[i] | bi_scalar) % mod_by for i in range(bi.size)] == (bi | bi_scalar).to_list()
-        assert [(bi_scalar | bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar | bi).to_list()
+        assert [(bi[i] | bi_scalar) % mod_by for i in range(bi.size)] == (
+            bi | bi_scalar
+        ).to_list()
+        assert [(bi_scalar | bi[i]) % mod_by for i in range(bi.size)] == (
+            bi_scalar | bi
+        ).to_list()
 
         assert (u ^ u_range).to_list() == (bi ^ bi_range).to_list()
-        assert [(bi[i] ^ bi_scalar) % mod_by for i in range(bi.size)] == (bi ^ bi_scalar).to_list()
-        assert [(bi_scalar ^ bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar ^ bi).to_list()
+        assert [(bi[i] ^ bi_scalar) % mod_by for i in range(bi.size)] == (
+            bi ^ bi_scalar
+        ).to_list()
+        assert [(bi_scalar ^ bi[i]) % mod_by for i in range(bi.size)] == (
+            bi_scalar ^ bi
+        ).to_list()
 
         # bit shifts: left side must be bigint, right side must be int/uint
         ans = u << u_range
@@ -813,22 +995,41 @@ def test_bigint_rotate(self):
 
         # rotate by scalar
         for i in range(10):
-            assert ak.array([10], dtype=ak.bigint, max_bits=4).rotl(i) == 10 if i % 2 == 0 else 5
-            assert ak.array([10], dtype=ak.bigint, max_bits=4).rotr(i) == 10 if i % 2 == 0 else 5
+            assert (
+                ak.array([10], dtype=ak.bigint, max_bits=4).rotl(i) == 10
+                if i % 2 == 0
+                else 5
+            )
+            assert (
+                ak.array([10], dtype=ak.bigint, max_bits=4).rotr(i) == 10
+                if i % 2 == 0
+                else 5
+            )
 
         # rotate by array
-        left_rot = ak.bigint_from_uint_arrays([ak.full(10, 10, ak.uint64)], max_bits=4).rotl(
-            ak.arange(10)
-        )
-        right_rot = ak.bigint_from_uint_arrays([ak.full(10, 10, ak.uint64)], max_bits=4).rotr(
-            ak.arange(10)
-        )
+        left_rot = ak.bigint_from_uint_arrays(
+            [ak.full(10, 10, ak.uint64)], max_bits=4
+        ).rotl(ak.arange(10))
+        right_rot = ak.bigint_from_uint_arrays(
+            [ak.full(10, 10, ak.uint64)], max_bits=4
+        ).rotr(ak.arange(10))
         ans = [10 if i % 2 == 0 else 5 for i in range(10)]
         assert left_rot.to_list() == ans
         assert right_rot.to_list() == ans
 
     def test_float_mods(self):
-        edge_cases = [np.nan, -np.inf, -7.0, -3.14, -0.0, 0.0, 3.14, 7.0, np.inf, np.nan]
+        edge_cases = [
+            np.nan,
+            -np.inf,
+            -7.0,
+            -3.14,
+            -0.0,
+            0.0,
+            3.14,
+            7.0,
+            np.inf,
+            np.nan,
+        ]
 
         # get 2 random permutations of edgecases
         rand_edge_cases1 = np.random.permutation(edge_cases)
@@ -842,12 +1043,22 @@ def test_float_mods(self):
         uint_arr = np.arange(2**64 - 10, 2**64, dtype=np.uint64)
         u_scal = np.uint(2**63 + 1)
 
-        args = [rand_edge_cases1, rand_edge_cases2, float_arr, int_arr, uint_arr, i_scal, u_scal]
+        args = [
+            rand_edge_cases1,
+            rand_edge_cases2,
+            float_arr,
+            int_arr,
+            uint_arr,
+            i_scal,
+            u_scal,
+        ]
         # add all the float edge cases as scalars
         args.extend(edge_cases)
 
         def type_helper(x):
-            return ak.resolve_scalar_dtype(x) if ak.isSupportedNumber(x) else x.dtype.name
+            return (
+                ak.resolve_scalar_dtype(x) if ak.isSupportedNumber(x) else x.dtype.name
+            )
 
         # take the product of args (i.e. every possible combination)
         for a, b in product(args, args):
@@ -863,8 +1074,12 @@ def type_helper(x):
             ak_b = b if ak.isSupportedNumber(b) else ak.array(b)
 
             # verify mod and fmod match numpy
-            assert np.allclose(ak.mod(ak_a, ak_b).to_ndarray(), np.mod(a, b), equal_nan=True)
-            assert np.allclose(ak.fmod(ak_a, ak_b).to_ndarray(), np.fmod(a, b), equal_nan=True)
+            assert np.allclose(
+                ak.mod(ak_a, ak_b).to_ndarray(), np.mod(a, b), equal_nan=True
+            )
+            assert np.allclose(
+                ak.fmod(ak_a, ak_b).to_ndarray(), np.fmod(a, b), equal_nan=True
+            )
 
         npf = np.array([2.23, 3.14, 3.08, 5.7])
         npf2 = np.array([3.14, 2.23, 1.1, 4.1])
diff --git a/tests/pdarray_creation_test.py b/tests/pdarray_creation_test.py
index 8d417633b37..ecdf643f455 100644
--- a/tests/pdarray_creation_test.py
+++ b/tests/pdarray_creation_test.py
@@ -39,13 +39,17 @@ def test_array_creation(self, dtype):
             ak.array(deque(range(fixed_size)), dtype),
             ak.array([f"{i}" for i in range(fixed_size)], dtype=dtype),
         ]:
-            assert isinstance(pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings)
+            assert isinstance(
+                pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings
+            )
             assert len(pda) == fixed_size
             assert dtype == pda.dtype
 
     @pytest.mark.skip_if_rank_not_compiled([3])
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_array_creation_multi_dim(self, size, dtype):
         shape = (2, 2, size)
         for pda in [
@@ -57,7 +61,9 @@ def test_array_creation_multi_dim(self, size, dtype):
             assert dtype == pda.dtype
 
     @pytest.mark.skip_if_max_rank_greater_than(3)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_array_creation_error(self, dtype):
         shape = (2, 2, 2, 2)
         with pytest.raises(ValueError):
@@ -114,7 +120,9 @@ def test_array_creation_transpose_bug_reproducer(self):
         cols = 5
         nda = np.random.randint(1, 10, (rows, cols))
 
-        assert_arkouda_array_equal(ak.transpose(ak.array(nda)), ak.array(np.transpose(nda)))
+        assert_arkouda_array_equal(
+            ak.transpose(ak.array(nda)), ak.array(np.transpose(nda))
+        )
 
     def test_infer_shape_from_size(self):
         from arkouda.util import _infer_shape_from_size
@@ -135,7 +143,9 @@ def test_bigint_creation(self):
 
         pda_from_str = ak.array([f"{i}" for i in range(bi, bi + 10)], dtype=ak.bigint)
         pda_from_int = ak.array([i for i in range(bi, bi + 10)])
-        cast_from_segstr = ak.cast(ak.array([f"{i}" for i in range(bi, bi + 10)]), ak.bigint)
+        cast_from_segstr = ak.cast(
+            ak.array([f"{i}" for i in range(bi, bi + 10)]), ak.bigint
+        )
         for pda in [pda_from_str, pda_from_int, cast_from_segstr]:
             assert isinstance(pda, ak.pdarray)
             assert 10 == len(pda)
@@ -144,7 +154,8 @@ def test_bigint_creation(self):
 
         # test array and arange infer dtype
         assert (
-            ak.array([bi, bi + 1, bi + 2, bi + 3, bi + 4]).to_list() == ak.arange(bi, bi + 5).to_list()
+            ak.array([bi, bi + 1, bi + 2, bi + 3, bi + 4]).to_list()
+            == ak.arange(bi, bi + 5).to_list()
         )
 
         # test that max_bits being set results in a mod
@@ -195,7 +206,9 @@ def test_arange_dtype(self, dtype):
         assert dtype == start_stop.dtype
 
         start_stop_stride = ak.arange(100, 105, 2, dtype=dtype)
-        assert np.arange(100, 105, 2, dtype=dtype).tolist() == start_stop_stride.to_list()
+        assert (
+            np.arange(100, 105, 2, dtype=dtype).tolist() == start_stop_stride.to_list()
+        )
         assert dtype == start_stop_stride.dtype
 
     def test_arange_misc(self):
@@ -255,7 +268,9 @@ def test_randint_array_dtype(self, size, array_type):
     #  tests with various dtypes for the other parameters passed to randint)
     @pytest.mark.parametrize("dtype", NUMERIC_SCALARS)
     def test_randint_num_dtype(self, dtype):
-        for test_array in ak.randint(dtype(0), 100, 1000), ak.randint(0, dtype(100), 1000):
+        for test_array in ak.randint(dtype(0), 100, 1000), ak.randint(
+            0, dtype(100), 1000
+        ):
             assert isinstance(test_array, ak.pdarray)
             assert 1000 == len(test_array)
             assert ak.int64 == test_array.dtype
@@ -325,7 +340,9 @@ def test_randint_with_seed(self):
         assert values.to_list() == bools
 
         # Test that int_scalars covers uint8, uint16, uint32
-        uint_arr = ak.randint(np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2))
+        uint_arr = ak.randint(
+            np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2)
+        )
         int_arr = ak.randint(1, 5, 10, seed=2)
         assert (uint_arr == int_arr).all()
 
@@ -343,7 +360,9 @@ def test_uniform(self, size):
             1.0441791878997098,
         ] == u_array.to_list()
 
-        u_array = ak.uniform(size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0))
+        u_array = ak.uniform(
+            size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0)
+        )
         assert [
             0.30013431967121934,
             0.47383036230759112,
@@ -360,12 +379,16 @@ def test_uniform(self, size):
             ak.uniform(low=0, high=5, size="100")
 
         # Test that int_scalars covers uint8, uint16, uint32
-        uint_arr = ak.uniform(low=np.uint8(0), high=np.uint16(5), size=np.uint32(100), seed=np.uint8(1))
+        uint_arr = ak.uniform(
+            low=np.uint8(0), high=np.uint16(5), size=np.uint32(100), seed=np.uint8(1)
+        )
         int_arr = ak.uniform(low=0, high=5, size=100, seed=1)
         assert (uint_arr == int_arr).all()
 
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint])
+    @pytest.mark.parametrize(
+        "dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]
+    )
     def test_zeros_dtype(self, size, dtype):
         zeros = ak.zeros(size, dtype)
         assert isinstance(zeros, ak.pdarray)
@@ -373,14 +396,18 @@ def test_zeros_dtype(self, size, dtype):
         assert (0 == zeros).all()
 
     @pytest.mark.skip_if_rank_not_compiled([2])
-    @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_]
+    )
     @pytest.mark.parametrize("shape", [0, 2, (2, 3)])
     def test_ones_match_numpy(self, shape, dtype):
         assert_equivalent(ak.zeros(shape, dtype=dtype), np.zeros(shape, dtype=dtype))
 
     @pytest.mark.skip_if_rank_not_compiled([3])
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint])
+    @pytest.mark.parametrize(
+        "dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]
+    )
     def test_zeros_dtype_mult_dim(self, size, dtype):
         shape = (2, 2, size)
         zeros = ak.zeros(shape, dtype)
@@ -390,7 +417,9 @@ def test_zeros_dtype_mult_dim(self, size, dtype):
         assert (0 == zeros).all()
 
     @pytest.mark.skip_if_max_rank_greater_than(3)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_zeros_error(self, dtype):
         shape = (2, 2, 2, 2)
         with pytest.raises(ValueError):
@@ -411,7 +440,9 @@ def test_zeros_misc(self):
         for arg in np.uint8(5), np.uint16(5), np.uint32(5), str(5):
             assert (int_arr == ak.zeros(arg, dtype=ak.int64)).all()
 
-    @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]
+    )
     @pytest.mark.parametrize("size", pytest.prob_size)
     def test_ones_dtype(self, size, dtype):
         ones = ak.ones(size, dtype)
@@ -419,7 +450,9 @@ def test_ones_dtype(self, size, dtype):
         assert dtype == ones.dtype
         assert (1 == ones).all()
 
-    @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]
+    )
     @pytest.mark.parametrize("size", pytest.prob_size)
     @pytest.mark.skip_if_rank_not_compiled([3])
     def test_ones_dtype_multi_dim(self, size, dtype):
@@ -431,7 +464,9 @@ def test_ones_dtype_multi_dim(self, size, dtype):
         assert (1 == ones).all()
 
     @pytest.mark.skip_if_max_rank_greater_than(3)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_ones_error(self, dtype):
         shape = (2, 2, 2, 2)
         with pytest.raises(ValueError):
@@ -463,7 +498,9 @@ def test_ones_like(self, size, dtype):
         assert ones_like_arr.size == ran_arr.size
 
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_full_dtype(self, size, dtype):
         type_full = ak.full(size, 1, dtype)
         assert isinstance(type_full, ak.pdarray)
@@ -471,7 +508,9 @@ def test_full_dtype(self, size, dtype):
         assert (1 == type_full).all()
 
     @pytest.mark.skip_if_rank_not_compiled([2])
-    @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_]
+    )
     @pytest.mark.parametrize("shape", [0, 2, (2, 3)])
     def test_full_match_numpy(self, shape, dtype):
         assert_equivalent(
@@ -481,7 +520,9 @@ def test_full_match_numpy(self, shape, dtype):
 
     @pytest.mark.skip_if_rank_not_compiled([3])
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_full_dtype_multi_dim(self, size, dtype):
         shape = (2, 2, size)
         type_full = ak.full(shape, 1, dtype)
@@ -491,7 +532,9 @@ def test_full_dtype_multi_dim(self, size, dtype):
         assert (1 == type_full).all()
 
     @pytest.mark.skip_if_max_rank_greater_than(3)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_full_error(self, dtype):
         shape = (2, 2, 2, 2)
         with pytest.raises(ValueError):
@@ -527,7 +570,9 @@ def test_full_misc(self):
             assert (int_arr == ak.full(*args, dtype=int)).all()
 
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_full_like(self, size, dtype):
         ran_arr = ak.full(size, 5, dtype)
         full_like_arr = ak.full_like(ran_arr, 1)
@@ -537,7 +582,9 @@ def test_full_like(self, size, dtype):
         assert full_like_arr.size == ran_arr.size
 
     @pytest.mark.parametrize("size", pytest.prob_size)
-    @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_])
+    @pytest.mark.parametrize(
+        "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]
+    )
     def test_zeros_like(self, size, dtype):
         ran_arr = ak.array(ak.arange(size, dtype=dtype))
         zeros_like_arr = ak.zeros_like(ran_arr)
@@ -566,7 +613,9 @@ def test_linspace(self):
         pda = ak.linspace(start=float(5.0), stop=float(0.0), length=np.int64(6))
         assert 5.0000 == pda[0]
         assert 0.0000 == pda[5]
-        assert (pda.to_ndarray() == np.linspace(float(5.0), float(0.0), np.int64(6))).all()
+        assert (
+            pda.to_ndarray() == np.linspace(float(5.0), float(0.0), np.int64(6))
+        ).all()
 
         with pytest.raises(TypeError):
             ak.linspace(0, "100", 1000)
@@ -639,7 +688,9 @@ def test_standard_normal_errors(self):
 
     @pytest.mark.parametrize("dtype", INT_SCALARS)
     def test_random_strings_uniform(self, dtype):
-        pda = ak.random_strings_uniform(minlen=dtype(1), maxlen=dtype(5), size=dtype(100))
+        pda = ak.random_strings_uniform(
+            minlen=dtype(1), maxlen=dtype(5), size=dtype(100)
+        )
         assert isinstance(pda, ak.Strings)
         assert 100 == len(pda)
         assert str == pda.dtype
@@ -700,7 +751,9 @@ def test_random_strings_uniform_with_seed(self):
             "DSN",
         ] == pda.to_list()
 
-        pda = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=10, characters="printable")
+        pda = ak.random_strings_uniform(
+            minlen=1, maxlen=5, seed=1, size=10, characters="printable"
+        )
         assert [
             "eL",
             "6<OD",
@@ -757,7 +810,9 @@ def test_random_strings_lognormal_with_seed(self):
         pda = ak.random_strings_lognormal(2, 0.25, 10, seed=1)
         assert randoms == pda.to_list()
 
-        pda = ak.random_strings_lognormal(float(2), np.float64(0.25), np.int64(10), seed=1)
+        pda = ak.random_strings_lognormal(
+            float(2), np.float64(0.25), np.int64(10), seed=1
+        )
         assert randoms == pda.to_list()
 
         printable_randoms = [
@@ -821,7 +876,11 @@ def test_from_series_misc(self):
         assert np.int64 == p_array.dtype
 
         p_array = ak.from_series(
-            pd.Series(pd.to_datetime(["1/1/2018", np.datetime64("2018-01-01"), dt.datetime(2018, 1, 1)]))
+            pd.Series(
+                pd.to_datetime(
+                    ["1/1/2018", np.datetime64("2018-01-01"), dt.datetime(2018, 1, 1)]
+                )
+            )
         )
 
         assert isinstance(p_array, ak.pdarray)
@@ -851,12 +910,12 @@ def test_endian(self):
         npa = aka.to_ndarray()
         assert np.allclose(a, npa)
 
-        a = a.newbyteorder().byteswap()
+        a = a.view(a.dtype.newbyteorder("S")).byteswap()
         aka = ak.array(a)
         npa = aka.to_ndarray()
         assert np.allclose(a, npa)
 
-        a = a.newbyteorder().byteswap()
+        a = a.view(a.dtype.newbyteorder("S")).byteswap()
         aka = ak.array(a)
         npa = aka.to_ndarray()
         assert np.allclose(a, npa)
@@ -915,7 +974,9 @@ def randint_randomness(self, size):
             # Checking for start of new run
             for i in range(len(l_int)):
                 # no. of runs
-                if (l_int[i] >= l_median > l_int[i - 1]) or (l_int[i] < l_median <= l_int[i - 1]):
+                if (l_int[i] >= l_median > l_int[i - 1]) or (
+                    l_int[i] < l_median <= l_int[i - 1]
+                ):
                     runs += 1
 
                 # no. of positive values
@@ -927,7 +988,8 @@ def randint_randomness(self, size):
 
             runs_exp = ((2 * n1 * n2) / (n1 + n2)) + 1
             stan_dev = math.sqrt(
-                (2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / (((n1 + n2) ** 2) * (n1 + n2 - 1))
+                (2 * n1 * n2 * (2 * n1 * n2 - n1 - n2))
+                / (((n1 + n2) ** 2) * (n1 + n2 - 1))
             )
 
             if abs((runs - runs_exp) / stan_dev) < 1.9:
diff --git a/tests/setops_test.py b/tests/setops_test.py
index 213f1bcbef5..965ec0b1f3d 100644
--- a/tests/setops_test.py
+++ b/tests/setops_test.py
@@ -36,8 +36,8 @@ def make_np_arrays_small(dtype):
             a = np.array([-1, 0, 1, 3]).astype(dtype)
             b = np.array([-1, 2, 2, 3]).astype(dtype)
         elif dtype == ak.bigint:
-            a = np.array([-1, 0, 1, 3]).astype(ak.uint64) + 2**200
-            b = np.array([-1, 2, 2, 3]).astype(ak.uint64) + 2**200
+            a = np.array([i + 2**200 for i in [-1, 0, 1, 3]])
+            b = np.array([i + 2**200 for i in [-1, 2, 2, 3]])
         elif dtype == ak.bool_:
             a = np.array([True, False, False, True]).astype(dtype)
             b = np.array([True, True, False, False]).astype(dtype)
@@ -51,8 +51,8 @@ def make_np_arrays_cross_type(dtype1, dtype2):
             a = np.array([-1, -3, 0, 1, 2, 3]).astype(dtype1)
             c = np.array([-1, 0, 0, 7, 8, 3]).astype(dtype1)
         elif dtype1 == ak.bigint:
-            a = np.array([-1, -3, 0, 1, 2, 3]).astype(ak.uint64) + 2**200
-            c = np.array([-1, 0, 0, 7, 8, 3]).astype(ak.uint64) + 2**200
+            a = np.array([i + 2**200 for i in [-1, -3, 0, 1, 2, 3]])
+            c = np.array([i + 2**200 for i in [-1, 0, 0, 7, 8, 3]])
         elif dtype1 == ak.bool_:
             a = np.array([True, False, False, True, True])
             c = np.array([True, True, False, False, True])
@@ -63,8 +63,8 @@ def make_np_arrays_cross_type(dtype1, dtype2):
             b = np.array([-1, -11, 0, 4, 5, 3]).astype(dtype2)
             d = np.array([-1, -4, 0, 7, 8, 3]).astype(dtype2)
         elif dtype2 == ak.bigint:
-            b = np.array([-1, -11, 0, 4, 5, 3]).astype(ak.uint64) + 2**200
-            d = np.array([-1, -4, 0, 7, 8, 3]).astype(ak.uint64) + 2**200
+            b = np.array([i + 2**200 for i in [-1, -11, 0, 4, 5, 3]])
+            d = np.array([i + 2**200 for i in [-1, -4, 0, 7, 8, 3]])
         elif dtype2 == ak.bool_:
             b = np.array([True, True, False, False, True])
             d = np.array([True, True, False, False, True])