From e3609edd04b92dba7e84325cc633301988345a63 Mon Sep 17 00:00:00 2001 From: Amanda Potts Date: Fri, 14 Feb 2025 13:10:21 -0500 Subject: [PATCH] Closes #4098: upgrade to numpy 2.0.0 --- arkouda-env-dev.yml | 2 +- arkouda-env.yml | 2 +- arkouda/numpy/__init__.py | 33 +- arkouda/numpy/_numeric.py | 22 + arkouda/numpy/dtypes/__init__.py | 84 + arkouda/numpy/dtypes/dtypes.py | 31 +- arkouda/numpy/exceptions/__init__.py | 2 +- arkouda/numpy/lib/__init__.py | 20 +- arkouda/numpy/lib/npyio/__init__.py | 1 + arkouda/numpy/rec/__init__.py | 2 +- arkouda/pdarrayclass.py | 71 +- arkouda/pdarraycreation.py | 13 +- arkouda/util.py | 2 +- pydoc/requirements.txt | 2 +- setup.py | 2 +- tests/dataframe_test.py | 217 ++- tests/indexing_test.py | 2 +- tests/io_test.py.working | 2441 ++++++++++++++++++++++++++ tests/numpy/dtypes_test.py | 37 +- tests/operator_test.py | 451 +++-- tests/pdarray_creation_test.py | 128 +- tests/setops_test.py | 12 +- 22 files changed, 3281 insertions(+), 296 deletions(-) create mode 100644 arkouda/numpy/lib/npyio/__init__.py create mode 100644 tests/io_test.py.working diff --git a/arkouda-env-dev.yml b/arkouda-env-dev.yml index 16d66da822c..2d1986df93e 100644 --- a/arkouda-env-dev.yml +++ b/arkouda-env-dev.yml @@ -4,7 +4,7 @@ channels: - defaults dependencies: - python>=3.9,<3.12.4 # minimum 3.9 - - numpy>=1.24.1,<2.0 + - numpy>=2.0 - pandas>=1.4.0,!=2.2.0 - pyzmq>=20.0.0 - tabulate diff --git a/arkouda-env.yml b/arkouda-env.yml index efced73406b..cb68d439285 100644 --- a/arkouda-env.yml +++ b/arkouda-env.yml @@ -4,7 +4,7 @@ channels: - defaults dependencies: - python>=3.9,<3.12.4 # minimum 3.9 - - numpy>=1.24.1,<2.0 + - numpy>=2.0 - pandas>=1.4.0,!=2.2.0 - pyzmq>=20.0.0 - tabulate diff --git a/arkouda/numpy/__init__.py b/arkouda/numpy/__init__.py index 34387451601..c37af9b1e59 100644 --- a/arkouda/numpy/__init__.py +++ b/arkouda/numpy/__init__.py @@ -1,15 +1,6 @@ # flake8: noqa from numpy import ( # noqa - NAN, - NINF, - NZERO, - PINF, - PZERO, - DataSource, False_, - Inf, - Infinity, - NaN, ScalarType, True_, base_repr, @@ -17,9 +8,7 @@ byte, bytes_, cdouble, - cfloat, clongdouble, - clongfloat, compat, csingle, datetime64, @@ -28,7 +17,6 @@ euler_gamma, finfo, flexible, - float_, floating, format_float_positional, format_float_scientific, @@ -36,26 +24,22 @@ iinfo, inexact, inf, - infty, intc, intp, isscalar, - issctype, issubdtype, longdouble, - longfloat, longlong, - maximum_sctype, nan, number, pi, promote_types, sctypeDict, - sctypes, short, signedinteger, single, timedelta64, + typename, ubyte, uint, uintc, @@ -66,20 +50,7 @@ void, ) -from arkouda.numpy import ( - _builtins, - _mat, - _typing, - char, - ctypeslib, - dtypes, - exceptions, - fft, - lib, - linalg, - ma, - rec, -) +from arkouda.numpy.lib import * from arkouda.numpy._builtins import * from arkouda.numpy._mat import * from arkouda.numpy._typing import * diff --git a/arkouda/numpy/_numeric.py b/arkouda/numpy/_numeric.py index 175efa9dea4..b0eaa35aa02 100644 --- a/arkouda/numpy/_numeric.py +++ b/arkouda/numpy/_numeric.py @@ -119,6 +119,28 @@ def _merge_where(new_pda, where, ret): return new_pda +def can_cast(from_, to) -> ak_bool: + from arkouda.util import is_int + from arkouda.numpy.dtypes import uint64 as ak_unit64 + from arkouda.numpy.dtypes import _is_dtype_in_union + from arkouda.numpy.dtypes import isSupportedInt + from arkouda.numpy.dtypes import dtype as ak_dtype + + if isSupportedInt(from_) and (from_ < 2**64) and (from_ >= 0) and (to == ak_dtype(ak_uint64)): + print("CASE 2") + return True + + if (np.isscalar(from_) or _is_dtype_in_union(from_, numeric_scalars)) and not isinstance( + from_, (int, float, complex) + ): + print("CASE1") + return np.can_cast(from_, to) + + print("CASE3") + + return False + + @typechecked def cast( pda: Union[pdarray, Strings, Categorical], # type: ignore diff --git a/arkouda/numpy/dtypes/__init__.py b/arkouda/numpy/dtypes/__init__.py index ee89c72a1d2..d2cd49a61c4 100644 --- a/arkouda/numpy/dtypes/__init__.py +++ b/arkouda/numpy/dtypes/__init__.py @@ -36,3 +36,87 @@ ) from .dtypes import * + +__all__ = [ + "BoolDType", + "ByteDType", + "BytesDType", + "CLongDoubleDType", + "Complex64DType", + "Complex128DType", + "DateTime64DType", + "Float16DType", + "Float32DType", + "Float64DType", + "Int8DType", + "Int16DType", + "Int32DType", + "Int64DType", + "IntDType", + "LongDoubleDType", + "LongDType", + "LongLongDType", + "ObjectDType", + "ShortDType", + "StrDType", + "TimeDelta64DType", + "UByteDType", + "UInt8DType", + "UInt16DType", + "UInt32DType", + "UInt64DType", + "UIntDType", + "ULongDType", + "ULongLongDType", + "UShortDType", + "VoidDType", + "_datatype_check", + "ARKOUDA_SUPPORTED_DTYPES", + "ARKOUDA_SUPPORTED_INTS", + "DType", + "DTypeObjects", + "DTypes", + "NUMBER_FORMAT_STRINGS", + "NumericDTypes", + "ScalarDTypes", + "SeriesDTypes", + "_is_dtype_in_union", + "_val_isinstance_of_union", + "all_scalars", + "bigint", + "bitType", + "bool_", + "bool_scalars", + "complex128", + "complex64", + "dtype", + "float16", + "float32", + "float64", + "float_scalars", + "get_byteorder", + "get_server_byteorder", + "int16", + "int32", + "int64", + "int8", + "intTypes", + "int_scalars", + "isSupportedBool", + "isSupportedDType", + "isSupportedFloat", + "isSupportedInt", + "isSupportedNumber", + "numeric_and_bool_scalars", + "numeric_and_bool_scalars", + "numeric_scalars", + "numpy_scalars", + "resolve_scalar_dtype", + "resolve_scalar_dtype", + "str_", + "str_scalars", + "uint16", + "uint32", + "uint64", + "uint8", +] diff --git a/arkouda/numpy/dtypes/dtypes.py b/arkouda/numpy/dtypes/dtypes.py index e0b2b0117f7..d4fbe179eaa 100644 --- a/arkouda/numpy/dtypes/dtypes.py +++ b/arkouda/numpy/dtypes/dtypes.py @@ -104,8 +104,18 @@ def dtype(x): return bigint() if isinstance(x, str) and x in ["Strings"]: return np.dtype(np.str_) - else: - return np.dtype(x) + if isinstance(x, int): + if 0 < x and x < 2**64: + return np.dtype(np.uint64) + elif x >= 2**64: + return bigint() + else: + return np.dtype(np.int64) + if isinstance(x, float): + return np.dtype(np.float64) + if isinstance(x, bool): + return np.dtype(np.bool) + return np.dtype(x) def _is_dtype_in_union(dtype, union_type) -> builtins.bool: @@ -284,7 +294,18 @@ def __repr__(self) -> str: # missing full support for: float32, int32, int16, int8, uint32, uint16, complex64, complex128 # ARKOUDA_SUPPORTED_DTYPES = frozenset([member.value for _, member in DType.__members__.items()]) ARKOUDA_SUPPORTED_DTYPES = frozenset( - ["bool_", "float", "float64", "int", "int64", "uint", "uint64", "uint8", "bigint", "str"] + [ + "bool_", + "float", + "float64", + "int", + "int64", + "uint", + "uint64", + "uint8", + "bigint", + "str", + ] ) DTypes = frozenset([member.value for _, member in DType.__members__.items()]) @@ -347,9 +368,9 @@ def resolve_scalar_dtype(val: object) -> str: else: return "int64" # Python float or np.float* - elif isinstance(val, float) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "f"): + elif isinstance(val, float) or (hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "f"): return "float64" - elif isinstance(val, complex) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "c"): + elif isinstance(val, complex) or (hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "c"): return "float64" # TODO: actually support complex values in the backend elif isinstance(val, builtins.str) or isinstance(val, np.str_): return "str" diff --git a/arkouda/numpy/exceptions/__init__.py b/arkouda/numpy/exceptions/__init__.py index 0f9abbca300..98c283e857e 100644 --- a/arkouda/numpy/exceptions/__init__.py +++ b/arkouda/numpy/exceptions/__init__.py @@ -1,3 +1,3 @@ -from numpy import RankWarning, TooHardError +from numpy.exceptions import RankWarning, TooHardError __all__ = ["RankWarning", "TooHardError"] diff --git a/arkouda/numpy/lib/__init__.py b/arkouda/numpy/lib/__init__.py index a516f365d6a..93026e89d1d 100644 --- a/arkouda/numpy/lib/__init__.py +++ b/arkouda/numpy/lib/__init__.py @@ -1,30 +1,14 @@ # flake8: noqa -from numpy import ( - RankWarning, +from numpy.lib import ( add_docstring, add_newdoc, - deprecate, - deprecate_with_doc, - disp, - issubclass_, - issubdtype, - polynomial, - typename, ) -from arkouda.numpy.lib import emath from arkouda.numpy.lib.emath import * + __all__ = [ - "RankWarning", "add_docstring", "add_newdoc", - "deprecate", - "deprecate_with_doc", - "disp", "emath", - "issubclass_", - "issubdtype", - "polynomial", - "typename", ] diff --git a/arkouda/numpy/lib/npyio/__init__.py b/arkouda/numpy/lib/npyio/__init__.py new file mode 100644 index 00000000000..ef5c423dd7b --- /dev/null +++ b/arkouda/numpy/lib/npyio/__init__.py @@ -0,0 +1 @@ +from numpy.lib.npyio import DataSource diff --git a/arkouda/numpy/rec/__init__.py b/arkouda/numpy/rec/__init__.py index f752a4adcab..9a34c9ad52f 100644 --- a/arkouda/numpy/rec/__init__.py +++ b/arkouda/numpy/rec/__init__.py @@ -1,3 +1,3 @@ -from numpy import format_parser +from numpy.rec import format_parser __all__ = ["format_parser"] diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index ede5da86369..8c7fdcbc20d 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -11,6 +11,7 @@ from typeguard import typechecked from arkouda.client import generic_msg +from arkouda.dtypes import uint64 as ak_uint64 from arkouda.infoclass import information, pretty_print_information from arkouda.logger import getArkoudaLogger from arkouda.numpy.dtypes import ( @@ -20,9 +21,15 @@ bigint, ) from arkouda.numpy.dtypes import bool_ as akbool -from arkouda.numpy.dtypes import bool_scalars, dtype +from arkouda.numpy.dtypes import ( + bool_scalars, + dtype, +) from arkouda.numpy.dtypes import float64 as akfloat64 -from arkouda.numpy.dtypes import get_byteorder, get_server_byteorder +from arkouda.numpy.dtypes import ( + get_byteorder, + get_server_byteorder, +) from arkouda.numpy.dtypes import int64 as akint64 from arkouda.numpy.dtypes import ( int_scalars, @@ -166,6 +173,14 @@ def unescape(s): if mydtype == akstr_: # String value will always be surrounded with double quotes, so remove them return mydtype.type(unescape(value[1:-1])) + + if mydtype == ak_uint64: + if get_server_byteorder() == "little": + if value.startswith("-"): + value = value.strip("-") + uint_value = np.iinfo(np.uint64).max - ak_uint64(value) + 1 + return mydtype.type(uint_value) + return mydtype.type(value) return mydtype.type(value) except Exception: raise ValueError(f"unsupported value from server {mydtype.name} {value}") @@ -571,11 +586,33 @@ def _binop(self, other: pdarray, op: str) -> pdarray: # pdarray binop scalar # If scalar cannot be safely cast, server will infer the return dtype dt = resolve_scalar_dtype(other) - if self.dtype != bigint and np.can_cast(other, self.dtype): + + from arkouda.numpy._numeric import can_cast as ak_can_cast + + print("\n**\ndt: ", dt) + print("other: ", other) + print("self.dtype: ", self.dtype) + print( + "ak_can_cast(other, self.dtype): ", + ak_can_cast(other, self.dtype), + ) + + from arkouda.dtypes import int64 as ak_int64 + from arkouda.dtypes import float64 as ak_float64 + + if self.dtype == ak_uint64 and dtype(other) == ak_int64: + dt = "float64" + other = ak_float64(other) + print("new dt: ", dt) + print("new other: ", other) + elif self.dtype != bigint and ak_can_cast(other, self.dtype): # If scalar can be losslessly cast to array dtype, # do the cast so that return array will have same dtype dt = self.dtype.name other = self.dtype.type(other) + print("new dt: ", dt) + print("new other: ", other) + if dt not in DTypes: raise TypeError(f"Unhandled scalar type: {other} ({type(other)})") repMsg = generic_msg( @@ -616,7 +653,9 @@ def _r_binop(self, other: pdarray, op: str) -> pdarray: # pdarray binop scalar # If scalar cannot be safely cast, server will infer the return dtype dt = resolve_scalar_dtype(other) - if self.dtype != bigint and np.can_cast(other, self.dtype): + from arkouda.numpy._numeric import can_cast as ak_can_cast + + if self.dtype != bigint and ak_can_cast(other, self.dtype): # If scalar can be losslessly cast to array dtype, # do the cast so that return array will have same dtype dt = self.dtype.name @@ -894,6 +933,7 @@ def __iter__(self): # overload a[] to treat like list def __getitem__(self, key): if self.ndim == 1 and np.isscalar(key) and (resolve_scalar_dtype(key) in ["int64", "uint64"]): + print("\n\nCASE1\n\n") orig_key = key if key < 0: # Interpret negative key as offset from end of array @@ -4131,23 +4171,22 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume ) # TODO: handle shape broadcasting for multidimensional arrays + # The code below creates a command string for fmod2vv, fmod2vs or fmod2sv. -# The code below creates a command string for fmod2vv, fmod2vs or fmod2sv. - - if isinstance(dividend, pdarray) and isinstance(divisor, pdarray) : + if isinstance(dividend, pdarray) and isinstance(divisor, pdarray): cmdstring = f"fmod2vv<{dividend.dtype},{dividend.ndim},{divisor.dtype}>" - elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)) : - if resolve_scalar_dtype(divisor) in ['float64', 'int64', 'uint64', 'bool'] : - acmd = 'fmod2vs_'+resolve_scalar_dtype(divisor) - else : # this condition *should* be impossible because of the isSupportedNumber check + elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)): + if resolve_scalar_dtype(divisor) in ["float64", "int64", "uint64", "bool"]: + acmd = "fmod2vs_" + resolve_scalar_dtype(divisor) + else: # this condition *should* be impossible because of the isSupportedNumber check raise TypeError(f"Scalar divisor type {resolve_scalar_dtype(divisor)} not allowed in fmod") cmdstring = f"{acmd}<{dividend.dtype},{dividend.ndim}>" - elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)) : - if resolve_scalar_dtype(dividend) in ['float64', 'int64', 'uint64', 'bool'] : - acmd = 'fmod2sv_'+resolve_scalar_dtype(dividend) - else : # this condition *should* be impossible because of the isSupportedNumber check + elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)): + if resolve_scalar_dtype(dividend) in ["float64", "int64", "uint64", "bool"]: + acmd = "fmod2sv_" + resolve_scalar_dtype(dividend) + else: # this condition *should* be impossible because of the isSupportedNumber check raise TypeError(f"Scalar dividend type {resolve_scalar_dtype(dividend)} not allowed in fmod") cmdstring = f"{acmd}<{divisor.dtype},{divisor.ndim}>" # type: ignore[union-attr] @@ -4155,7 +4194,7 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume m = mod(dividend, divisor) return _create_scalar_array(m) -# We reach here if this was any case other than scalar & scalar + # We reach here if this was any case other than scalar & scalar return create_pdarray( cast( diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index fde8b3fe997..697159a712d 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -275,7 +275,10 @@ def array( # early out if we would have more uint arrays than can fit in max_bits early_out = (max_bits // 64) + (max_bits % 64 != 0) if max_bits != -1 else float("inf") while any(a != 0) and len(uint_arrays) < early_out: - low, a = a % 2**64, a // 2**64 + if isinstance(a, np.ndarray): + low, a = a.astype("O") % 2**64, a.astype("O") // 2**64 + else: + low, a = a % 2**64, a // 2**64 uint_arrays.append(array(np.array(low, dtype=np.uint), dtype=akuint64)) return bigint_from_uint_arrays(uint_arrays[::-1], max_bits=max_bits) except TypeError: @@ -300,6 +303,14 @@ def array( # than our numpy array we need to swap to match since the server expects # native endian bytes aview = _array_memview(a_) + + if get_server_byteorder() == "big": + if a.dtype.byteorder == "<": + a = a.view(a.dtype.newbyteorder("S")).byteswap() + else: + if a.dtype.byteorder == ">": + a = a.view(a.dtype.newbyteorder("S")).byteswap() + rep_msg = generic_msg( cmd=f"array<{a_.dtype.name},{ndim}>", args={"dtype": a_.dtype.name, "shape": tuple(a_.shape), "seg_string": False}, diff --git a/arkouda/util.py b/arkouda/util.py index e84d7dfa5fe..a0ba1b5f89d 100644 --- a/arkouda/util.py +++ b/arkouda/util.py @@ -425,7 +425,7 @@ def convert_bytes(nbytes, unit="B"): def is_numeric( - arry: Union[pdarray, Strings, Categorical, "Series", "Index"] # noqa: F821 + arry: Union[pdarray, Strings, Categorical, "Series", "Index"], # noqa: F821 ) -> builtins.bool: """ Check if the dtype of the given array is numeric. diff --git a/pydoc/requirements.txt b/pydoc/requirements.txt index 618f5818123..069bf81b099 100644 --- a/pydoc/requirements.txt +++ b/pydoc/requirements.txt @@ -1,6 +1,6 @@ # dependencies python>=3.9,<3.12.4 -numpy>=1.24.1,<2.0 +numpy>=2.0 pandas>=1.4.0,!=2.2.0 pyzmq>=20.0.0 typeguard==2.10.0 diff --git a/setup.py b/setup.py index 162f69a27b9..ac0b3a2723a 100644 --- a/setup.py +++ b/setup.py @@ -116,7 +116,7 @@ # For an analysis of "install_requires" vs pip's requirements files see: # https://packaging.python.org/en/latest/requirements.html install_requires=[ - "numpy>=1.24.1,<2.0", + "numpy>=2.0", "pandas>=1.4.0,!=2.2.0", "pyzmq>=20.0.0", "typeguard==2.10.0", diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py index fd6c1f6b9c5..85b31b20436 100644 --- a/tests/dataframe_test.py +++ b/tests/dataframe_test.py @@ -94,7 +94,7 @@ def build_ak_df_example_numeric_types(): "float64": ak.randint(0, 1, 20, dtype=ak.float64), "int64": ak.randint(0, 10, 20, dtype=ak.int64), "uint64": ak.randint(0, 10, 20, dtype=ak.uint64), - "bigint": ak.randint(0, 10, 20, dtype=ak.uint64) + 2**200, + "bigint": ak.randint(2**200, 2**200 + 10, 20, dtype=ak.uint64), } ) return ak_df @@ -105,7 +105,9 @@ def build_pd_df_duplicates(): userid = [111, 222, 111, 333, 222, 111] item = [0, 1, 0, 2, 1, 0] day = [5, 5, 5, 5, 5, 5] - return pd.DataFrame({"userName": username, "userID": userid, "item": item, "day": day}) + return pd.DataFrame( + {"userName": username, "userID": userid, "item": item, "day": day} + ) @staticmethod def build_ak_df_duplicates(): @@ -139,7 +141,7 @@ def build_pd_df_append(): item = [0, 0, 1, 1, 2, 0, 0, 2] day = [5, 5, 6, 5, 6, 6, 1, 2] amount = [0.5, 0.6, 1.1, 1.2, 4.3, 0.6, 0.5, 5.1] - bi = (np.arange(8) + 2**200).tolist() + bi = np.arange(2**200, 2**200 + 8).tolist() # (np.arange(8) + 2**200).tolist() ui = (np.arange(8).astype(ak.uint64)) + 2**63 return pd.DataFrame( { @@ -209,7 +211,9 @@ def test_dataframe_creation(self, size): "uint": ak.array(pddf["uint"]), "bigint": ak.arange(2**200, 2**200 + size), "bool": ak.array(pddf["bool"]), - "segarray": ak.SegArray.from_multi_array([ak.array(x) for x in pddf["segarray"]]), + "segarray": ak.SegArray.from_multi_array( + [ak.array(x) for x in pddf["segarray"]] + ), } ) assert isinstance(akdf, ak.DataFrame) @@ -362,7 +366,9 @@ def test_boolean_indexing(self): row = df[df["userName"] == "Carol"] assert len(row) == 1 - assert ref_df[ref_df["userName"] == "Carol"].equals(row.to_pandas(retain_index=True)) + assert ref_df[ref_df["userName"] == "Carol"].equals( + row.to_pandas(retain_index=True) + ) def test_column_indexing(self): df = self.build_ak_df() @@ -602,14 +608,20 @@ def test_groupby_standard(self): pds = pd.Series( data=np.ones(4, dtype=np.int64), index=pd.Index( - data=np.array(["0.0.0.1", "0.0.0.2", "0.0.0.3", "0.0.0.4"], dtype=" None: +# """ +# Creates an hdf5 file with dataset(s) from the specified columns and path prefix +# via the ak.save_all method. If columns is a List, then the names list is used +# to create the datasets +# +# :return: None +# :raise: ValueError if the names list is None when columns is a list +# """ +# if isinstance(columns, dict): +# ak.to_hdf(columns=columns, prefix_path=prefix_path) +# else: +# if not names: +# raise ValueError("the names list must be not None if columns is a list") +# ak.to_hdf(columns=columns, prefix_path=prefix_path, names=names) +# +# def test_save_all_load_all_with_dict(self, hdf_test_base_tmp): +# """ +# Creates 2..n files from an input columns dict depending upon the number of +# arkouda_server locales, retrieves all datasets and correspoding pdarrays, +# and confirms they match inputs +# +# :return: None +# :raise: AssertionError if the input and returned datasets and pdarrays don't match +# """ +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict".format(hdf_test_base_tmp) +# ) +# retrieved_columns = ak.load_all("{}/iotest_dict".format(hdf_test_base_tmp)) +# +# itp = self.dict_columns["int_tens_pdarray"].to_ndarray() +# ritp = retrieved_columns["int_tens_pdarray"].to_ndarray() +# itp.sort() +# ritp.sort() +# ihp = self.dict_columns["int_hundreds_pdarray"].to_ndarray() +# rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray() +# ihp.sort() +# rihp.sort() +# ifp = self.dict_columns["float_pdarray"].to_ndarray() +# rifp = retrieved_columns["float_pdarray"].to_ndarray() +# ifp.sort() +# rifp.sort() +# +# assert 4 == len(retrieved_columns) +# assert itp.tolist() == ritp.tolist() +# assert ihp.tolist() == rihp.tolist() +# assert ifp.tolist() == rifp.tolist() +# assert len(self.dict_columns["bool_pdarray"]) == len(retrieved_columns["bool_pdarray"]) +# assert 4 == len(ak.get_datasets("{}/iotest_dict_LOCALE0000".format(hdf_test_base_tmp))) +# +# def test_save_all_load_all_with_list(self, hdf_test_base_tmp): +# """ +# Creates 2..n files from an input columns and names list depending upon the number of +# arkouda_server locales, retrieves all datasets and correspoding pdarrays, and confirms +# they match inputs +# +# :return: None +# :raise: AssertionError if the input and returned datasets and pdarrays don't match +# """ +# self._create_file( +# columns=self.list_columns, +# prefix_path="{}/iotest_list".format(hdf_test_base_tmp), +# names=self.names, +# ) +# retrieved_columns = ak.load_all(path_prefix="{}/iotest_list".format(hdf_test_base_tmp)) +# +# itp = self.list_columns[0].to_ndarray() +# itp.sort() +# ritp = retrieved_columns["int_tens_pdarray"].to_ndarray() +# ritp.sort() +# ihp = self.list_columns[1].to_ndarray() +# ihp.sort() +# rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray() +# rihp.sort() +# fp = self.list_columns[2].to_ndarray() +# fp.sort() +# rfp = retrieved_columns["float_pdarray"].to_ndarray() +# rfp.sort() +# +# assert 4 == len(retrieved_columns) +# assert itp.tolist() == ritp.tolist() +# assert ihp.tolist() == rihp.tolist() +# assert fp.tolist() == rfp.tolist() +# assert len(self.list_columns[3]) == len(retrieved_columns["bool_pdarray"]) +# assert 4 == len(ak.get_datasets("{}/iotest_list_LOCALE0000".format(hdf_test_base_tmp))) +# +# def test_read_hdf(self, hdf_test_base_tmp): +# """ +# Creates 2..n files depending upon the number of arkouda_server locales, reads the files +# with an explicit list of file names to the read_all method, and confirms the datasets +# and embedded pdarrays match the input dataset and pdarrays +# +# :return: None +# :raise: AssertionError if the input and returned datasets don't match +# """ +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp) +# ) +# +# # test with read_hdf +# dataset = ak.read_hdf(filenames=["{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp)]) +# assert 4 == len(list(dataset.keys())) +# +# # test with generic read function +# dataset = ak.read(filenames=["{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp)]) +# assert 4 == len(list(dataset.keys())) +# +# def test_read_hdf_with_glob(self, hdf_test_base_tmp): +# """ +# Creates 2..n files depending upon the number of arkouda_server locales with two +# files each containing different-named datasets with the same pdarrays, reads the files +# with the glob feature of the read_all method, and confirms the datasets and embedded +# pdarrays match the input dataset and pdarrays +# +# :return: None +# :raise: AssertionError if the input and returned datasets don't match +# """ +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp) +# ) +# +# retrieved_columns = ak.read_hdf(filenames="{}/iotest_dict_columns*".format(hdf_test_base_tmp)) +# +# itp = self.list_columns[0].to_ndarray() +# itp.sort() +# ritp = retrieved_columns["int_tens_pdarray"].to_ndarray() +# ritp.sort() +# ihp = self.list_columns[1].to_ndarray() +# ihp.sort() +# rihp = retrieved_columns["int_hundreds_pdarray"].to_ndarray() +# rihp.sort() +# fp = self.list_columns[2].to_ndarray() +# fp.sort() +# rfp = retrieved_columns["float_pdarray"].to_ndarray() +# rfp.sort() +# +# assert 4 == len(list(retrieved_columns.keys())) +# assert itp.tolist() == ritp.tolist() +# assert ihp.tolist() == rihp.tolist() +# assert fp.tolist() == rfp.tolist() +# assert len(self.bool_pdarray) == len(retrieved_columns["bool_pdarray"]) +# +# def test_load(self, hdf_test_base_tmp): +# """ +# Creates 1..n files depending upon the number of arkouda_server locales with three columns +# AKA datasets, loads each corresponding dataset and confirms each corresponding pdarray +# equals the input pdarray. +# +# :return: None +# :raise: AssertionError if the input and returned datasets (pdarrays) don't match +# """ +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp) +# ) +# result_array_tens = ak.load( +# path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="int_tens_pdarray" +# )["int_tens_pdarray"] +# result_array_hundreds = ak.load( +# path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), +# dataset="int_hundreds_pdarray", +# )["int_hundreds_pdarray"] +# result_array_floats = ak.load( +# path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="float_pdarray" +# )["float_pdarray"] +# result_array_bools = ak.load( +# path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp), dataset="bool_pdarray" +# )["bool_pdarray"] +# +# ratens = result_array_tens.to_ndarray() +# ratens.sort() +# +# rahundreds = result_array_hundreds.to_ndarray() +# rahundreds.sort() +# +# rafloats = result_array_floats.to_ndarray() +# rafloats.sort() +# +# assert self.int_tens_ndarray.tolist() == ratens.tolist() +# assert self.int_hundreds_ndarray.tolist() == rahundreds.tolist() +# assert self.float_ndarray.tolist() == rafloats.tolist() +# assert len(self.bool_pdarray) == len(result_array_bools) +# +# # test load_all with file_format parameter usage +# ak.to_parquet( +# columns=self.dict_columns, +# prefix_path="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# ) +# result_array_tens = ak.load( +# path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# dataset="int_tens_pdarray", +# file_format="Parquet", +# )["int_tens_pdarray"] +# result_array_hundreds = ak.load( +# path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# dataset="int_hundreds_pdarray", +# file_format="Parquet", +# )["int_hundreds_pdarray"] +# result_array_floats = ak.load( +# path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# dataset="float_pdarray", +# file_format="Parquet", +# )["float_pdarray"] +# result_array_bools = ak.load( +# path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# dataset="bool_pdarray", +# file_format="Parquet", +# )["bool_pdarray"] +# ratens = result_array_tens.to_ndarray() +# ratens.sort() +# +# rahundreds = result_array_hundreds.to_ndarray() +# rahundreds.sort() +# +# rafloats = result_array_floats.to_ndarray() +# rafloats.sort() +# assert self.int_tens_ndarray.tolist() == ratens.tolist() +# assert self.int_hundreds_ndarray.tolist() == rahundreds.tolist() +# assert self.float_ndarray.tolist() == rafloats.tolist() +# assert len(self.bool_pdarray) == len(result_array_bools) +# +# # Test load with invalid prefix +# with pytest.raises(RuntimeError): +# ak.load( +# path_prefix="{}/iotest_dict_column".format(hdf_test_base_tmp), +# dataset="int_tens_pdarray", +# )["int_tens_pdarray"] +# +# # Test load with invalid file +# with pytest.raises(RuntimeError): +# ak.load(path_prefix="{}/not-a-file".format(hdf_test_base_tmp), dataset="int_tens_pdarray")[ +# "int_tens_pdarray" +# ] +# +# def test_load_all(self, hdf_test_base_tmp): +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp) +# ) +# +# results = ak.load_all(path_prefix="{}/iotest_dict_columns".format(hdf_test_base_tmp)) +# assert "bool_pdarray" in results +# assert "float_pdarray" in results +# assert "int_tens_pdarray" in results +# assert "int_hundreds_pdarray" in results +# +# # test load_all with file_format parameter usage +# ak.to_parquet( +# columns=self.dict_columns, +# prefix_path="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# ) +# results = ak.load_all( +# file_format="Parquet", +# path_prefix="{}/iotest_dict_columns_parquet".format(hdf_test_base_tmp), +# ) +# assert "bool_pdarray" in results +# assert "float_pdarray" in results +# assert "int_tens_pdarray" in results +# assert "int_hundreds_pdarray" in results +# +# # # Test load_all with invalid prefix +# with pytest.raises(ValueError): +# ak.load_all(path_prefix="{}/iotest_dict_column".format(hdf_test_base_tmp)) +# +# # Test load with invalid file +# with pytest.raises(RuntimeError): +# ak.load_all(path_prefix="{}/not-a-file".format(hdf_test_base_tmp)) +# +# def test_get_data_sets(self, hdf_test_base_tmp): +# """ +# Creates 1..n files depending upon the number of arkouda_server locales containing three +# datasets and confirms the expected number of datasets along with the dataset names +# +# :return: None +# :raise: AssertionError if the input and returned dataset names don't match +# """ +# self._create_file( +# columns=self.dict_columns, prefix_path="{}/iotest_dict_columns".format(hdf_test_base_tmp) +# ) +# datasets = ak.get_datasets("{}/iotest_dict_columns_LOCALE0000".format(hdf_test_base_tmp)) +# +# assert 4 == len(datasets) +# for dataset in datasets: +# assert dataset in self.names +# +# # Test load_all with invalid filename +# with pytest.raises(RuntimeError): +# ak.get_datasets("{}/iotest_dict_columns_LOCALE000".format(hdf_test_base_tmp)) +# +# @pytest.mark.parametrize("prob_size", pytest.prob_size) +# @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES) +# def test_read_and_write(self, prob_size, dtype, hdf_test_base_tmp): +# ak_arr = make_ak_arrays(prob_size * pytest.nl, dtype) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/hdf_test_correct" +# ak_arr.to_hdf(file_name) +# +# # test read_hdf with glob +# gen_arr = ak.read_hdf(f"{file_name}*").popitem()[1] +# assert (ak_arr == gen_arr).all() +# +# # test read_hdf with filenames +# gen_arr = ak.read_hdf( +# filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)] +# ).popitem()[1] +# assert (ak_arr == gen_arr).all() +# +# # verify generic read works +# gen_arr = ak.read(f"{file_name}*").popitem()[1] +# assert (ak_arr == gen_arr).all() +# +# # verify generic load works +# if dtype == "str": +# # we have to specify the dataset for strings since it differs from default of "array" +# gen_arr = ak.load(path_prefix=file_name, dataset="strings_array")["strings_array"] +# else: +# gen_arr = ak.load(path_prefix=file_name).popitem()[1] +# assert (ak_arr == gen_arr).all() +# +# # verify generic load works with file_format parameter +# if dtype == "str": +# # we have to specify the dataset for strings since it differs from default of "array" +# gen_arr = ak.load(path_prefix=file_name, dataset="strings_array", file_format="HDF5")[ +# "strings_array" +# ] +# else: +# gen_arr = ak.load(path_prefix=file_name, file_format="HDF5").popitem()[1] +# assert (ak_arr == gen_arr).all() +# +# # verify load_all works +# gen_arr = ak.load_all(path_prefix=file_name) +# if dtype == "str": +# # we have to specify the dataset for strings since it differs from default of "array" +# assert (ak_arr == gen_arr["strings_array"]).all() +# else: +# assert (ak_arr == gen_arr["array"]).all() +# +# # Test load with invalid file +# with pytest.raises(RuntimeError): +# ak.load(path_prefix=f"{hdf_test_base_tmp}/not-a-file") +# +# @pytest.mark.parametrize("prob_size", pytest.prob_size) +# @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES) +# def test_read_and_write_dset_provided(self, prob_size, dtype, hdf_test_base_tmp): +# ak_arr = make_ak_arrays(prob_size * pytest.nl, dtype) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/hdf_test_correct" +# ak_arr.to_hdf(file_name, "my_dset") +# +# # test read_hdf with glob +# gen_arr = ak.read_hdf(f"{file_name}*", "my_dset")["my_dset"] +# assert (ak_arr == gen_arr).all() +# +# # test read_hdf with filenames +# gen_arr = ak.read_hdf( +# filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)], datasets="my_dset" +# )["my_dset"] +# assert (ak_arr == gen_arr).all() +# +# # verify generic read works +# gen_arr = ak.read(f"{file_name}*", "my_dset")["my_dset"] +# assert (ak_arr == gen_arr).all() +# +# # verify generic load works +# gen_arr = ak.load(path_prefix=file_name, dataset="my_dset")["my_dset"] +# assert (ak_arr == gen_arr).all() +# +# # verify generic load works with file_format parameter +# gen_arr = ak.load(path_prefix=file_name, dataset="my_dset", file_format="HDF5")["my_dset"] +# assert (ak_arr == gen_arr).all() +# +# # verify load_all works +# gen_arr = ak.load_all(path_prefix=file_name) +# assert (ak_arr == gen_arr["my_dset"]).all() +# +# # Test load with invalid file +# with pytest.raises(RuntimeError): +# ak.load(path_prefix=f"{hdf_test_base_tmp}/not-a-file", dataset="my_dset") +# +# @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES) +# def test_edge_case_read_write(self, dtype, hdf_test_base_tmp): +# np_edge_case = make_edge_case_arrays(dtype) +# ak_edge_case = ak.array(np_edge_case) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# ak_edge_case.to_hdf(f"{tmp_dirname}/hdf_test_edge_case", "my-dset") +# hdf_arr = ak.read_hdf(f"{tmp_dirname}/hdf_test_edge_case*", "my-dset")["my-dset"] +# if dtype == "float64": +# assert np.allclose(np_edge_case, hdf_arr.to_ndarray(), equal_nan=True) +# else: +# assert (np_edge_case == hdf_arr.to_ndarray()).all() +# +# def test_read_and_write_with_dict(self, hdf_test_base_tmp): +# df_dict = make_multi_dtype_dict() +# # extend to include categoricals +# df_dict["cat"] = ak.Categorical(ak.array(["c", "b", "a", "b"])) +# df_dict["cat_from_codes"] = ak.Categorical.from_codes( +# codes=ak.array([2, 1, 0, 1]), categories=ak.array(["a", "b", "c"]) +# ) +# akdf = ak.DataFrame(df_dict) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/multi_col_hdf" +# # use multi-column write to generate hdf file +# akdf.to_hdf(file_name) +# +# # test read_hdf with glob, no datasets specified +# rd_data = ak.read_hdf(f"{file_name}*") +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# # test read_hdf with only one dataset specified (each tested) +# for col_name in akdf.columns.values: +# gen_arr = ak.read_hdf(f"{file_name}*", datasets=[col_name])[col_name] +# if akdf[col_name].dtype != ak.float64: +# assert akdf[col_name].to_list() == gen_arr.to_list() +# else: +# a = akdf[col_name].to_ndarray() +# b = gen_arr.to_ndarray() +# if isinstance(a[0], np.ndarray): +# assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b)) +# else: +# assert np.allclose(a, b, equal_nan=True) +# +# # test read_hdf with half of columns names specified as datasets +# half_cols = akdf.columns.values[: len(akdf.columns.values) // 2] +# rd_data = ak.read_hdf(f"{file_name}*", datasets=half_cols) +# rd_df = ak.DataFrame(rd_data) +# pd.testing.assert_frame_equal(akdf[half_cols].to_pandas(), rd_df[half_cols].to_pandas()) +# +# # test read_hdf with all columns names specified as datasets +# rd_data = ak.read_hdf(f"{file_name}*", datasets=akdf.columns.values) +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# # test read_hdf with filenames +# rd_data = ak.read_hdf(filenames=[f"{file_name}_LOCALE{i:04d}" for i in range(pytest.nl)]) +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# # verify generic read works +# rd_data = ak.read(f"{file_name}*") +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# for col_name in akdf.columns.values: +# # verify generic load works +# gen_arr = ak.load(path_prefix=file_name, dataset=col_name)[col_name] +# if akdf[col_name].dtype != ak.float64: +# assert akdf[col_name].to_list() == gen_arr.to_list() +# else: +# a = akdf[col_name].to_ndarray() +# b = gen_arr.to_ndarray() +# if isinstance(a[0], np.ndarray): +# assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b)) +# else: +# assert np.allclose(a, b, equal_nan=True) +# +# # verify generic load works with file_format parameter +# gen_arr = ak.load(path_prefix=file_name, dataset=col_name, file_format="HDF5")[col_name] +# if akdf[col_name].dtype != ak.float64: +# assert akdf[col_name].to_list() == gen_arr.to_list() +# else: +# a = akdf[col_name].to_ndarray() +# b = gen_arr.to_ndarray() +# if isinstance(a[0], np.ndarray): +# assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b)) +# else: +# assert np.allclose(a, b, equal_nan=True) +# +# # Test load with invalid file +# with pytest.raises(RuntimeError): +# ak.load( +# path_prefix=f"{hdf_test_base_tmp}/not-a-file", +# dataset=akdf.columns.values[0], +# ) +# +# # verify load_all works +# rd_data = ak.load_all(path_prefix=file_name) +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# # Test load_all with invalid file +# with pytest.raises(ValueError): +# ak.load_all(path_prefix=f"{hdf_test_base_tmp}/does-not-exist") +# +# # test get_datasets +# datasets = ak.get_datasets(f"{file_name}*") +# assert sorted(datasets) == sorted(akdf.columns.values) +# +# # test save with index true +# akdf.to_hdf(file_name, index=True) +# rd_data = ak.read_hdf(f"{file_name}*") +# rd_df = ak.DataFrame(rd_data) +# # fix column ordering see issue #2611 +# rd_df = rd_df[akdf.columns.values] +# pd.testing.assert_frame_equal(akdf.to_pandas(), rd_df.to_pandas()) +# +# # test get_datasets with index +# datasets = ak.get_datasets(f"{file_name}*") +# assert sorted(datasets) == ["Index"] + sorted(akdf.columns.values) +# +# def test_ls_hdf(self, hdf_test_base_tmp): +# df_dict = make_multi_dtype_dict() +# akdf = ak.DataFrame(df_dict) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/test_ls_hdf" +# # use multi-column write to generate hdf file +# akdf.to_hdf(file_name) +# +# message = ak.ls(f"{file_name}_LOCALE0000") +# for col_name in akdf.columns.values: +# assert col_name in message +# +# with pytest.raises(RuntimeError): +# ak.ls(f"{tmp_dirname}/not-a-file_LOCALE0000") +# +# def test_ls_hdf_empty(self): +# # Test filename empty/whitespace-only condition +# with pytest.raises(ValueError): +# ak.ls("") +# +# with pytest.raises(ValueError): +# ak.ls(" ") +# +# with pytest.raises(ValueError): +# ak.ls(" \n\r\t ") +# +# def test_read_hdf_with_error_and_warn(self, hdf_test_base_tmp): +# df_dict = make_multi_dtype_dict() +# akdf = ak.DataFrame(df_dict) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/test_error_hdf" +# # use multi-column write to generate hdf file +# akdf.to_hdf(file_name) +# akdf.to_hdf(f"{file_name}_dupe") +# +# # Make sure we can read ok +# dataset = ak.read_hdf( +# filenames=[ +# f"{file_name}_LOCALE0000", +# f"{file_name}_dupe_LOCALE0000", +# ] +# ) +# assert dataset is not None +# +# # Change the name of the first file we try to raise an error due to file missing. +# with pytest.raises(RuntimeError): +# ak.read_hdf( +# filenames=[ +# f"{file_name}_MISSING_LOCALE0000", +# f"{file_name}_dupe_LOCALE0000", +# ] +# ) +# +# # Run the same test with missing file, but this time with the warning flag for read_all +# with pytest.warns( +# RuntimeWarning, match=r"There were .* errors reading files on the server.*" +# ): +# dataset = ak.read_hdf( +# filenames=[ +# f"{file_name}_MISSING_LOCALE0000", +# f"{file_name}_dupe_LOCALE0000", +# ], +# strict_types=False, +# allow_errors=True, +# ) +# assert dataset is not None +# +# @pytest.mark.parametrize("prob_size", pytest.prob_size) +# def test_save_strings_dataset(self, prob_size, hdf_test_base_tmp): +# reg_strings = make_ak_arrays(prob_size, "str") +# # hard coded at 26 because we don't need to test long strings at large scale +# # passing data from python to chpl this way can really slow down as size increases +# long_strings = ak.array( +# [f"testing a longer string{num} to be written, loaded and appended" for num in range(26)] +# ) +# +# for strings_array in [reg_strings, long_strings]: +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/test_strings_hdf" +# strings_array.to_hdf(file_name) +# r_strings_array = ak.read_hdf(f"{file_name}*").popitem()[1] +# assert (strings_array == r_strings_array).all() +# +# # Read a part of a saved Strings dataset from one hdf5 file +# r_strings_subset = ak.read_hdf(filenames=f"{file_name}_LOCALE0000").popitem()[1] +# assert isinstance(r_strings_subset, ak.Strings) +# assert (strings_array[: r_strings_subset.size] == r_strings_subset).all() +# +# # Repeat the test using the calc_string_offsets=True option to +# # have server calculate offsets array +# r_strings_subset = ak.read_hdf( +# filenames=f"{file_name}_LOCALE0000", calc_string_offsets=True +# ).popitem()[1] +# assert isinstance(r_strings_subset, ak.Strings) +# assert (strings_array[: r_strings_subset.size] == r_strings_subset).all() +# +# # test append +# strings_array.to_hdf(file_name, dataset="strings-dupe", mode="append") +# r_strings = ak.read_hdf(f"{file_name}*", datasets="strings_array")["strings_array"] +# r_strings_dupe = ak.read_hdf(f"{file_name}*", datasets="strings-dupe")["strings-dupe"] +# assert (r_strings == r_strings_dupe).all() +# +# def testStringsWithoutOffsets(self, hdf_test_base_tmp): +# """ +# This tests both saving & reading a strings array without saving and reading the offsets to HDF5. +# Instead the offsets array will be derived from the values/bytes area by looking for null-byte +# terminator strings +# """ +# strings_array = ak.array(["testing string{}".format(num) for num in list(range(0, 25))]) +# strings_array.to_hdf( +# "{}/strings-test".format(hdf_test_base_tmp), dataset="strings", save_offsets=False +# ) +# r_strings_array = ak.load( +# "{}/strings-test".format(hdf_test_base_tmp), dataset="strings", calc_string_offsets=True +# )["strings"] +# strings = strings_array.to_ndarray() +# strings.sort() +# r_strings = r_strings_array.to_ndarray() +# r_strings.sort() +# assert strings.tolist() == r_strings.tolist() +# +# def testSaveLongStringsDataset(self, hdf_test_base_tmp): +# # Create, save, and load Strings dataset +# strings = ak.array( +# [ +# "testing a longer string{} to be written, loaded and appended".format(num) +# for num in list(range(0, 26)) +# ] +# ) +# strings.to_hdf("{}/strings-test".format(hdf_test_base_tmp), dataset="strings") +# +# n_strings = strings.to_ndarray() +# n_strings.sort() +# r_strings = ak.load("{}/strings-test".format(hdf_test_base_tmp), dataset="strings")[ +# "strings" +# ].to_ndarray() +# r_strings.sort() +# +# assert n_strings.tolist() == r_strings.tolist() +# +# def testSaveMixedStringsDataset(self, hdf_test_base_tmp): +# strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))]) +# m_floats = ak.array([x / 10.0 for x in range(0, 10)]) +# m_ints = ak.array(list(range(0, 10))) +# ak.to_hdf( +# {"m_strings": strings_array, "m_floats": m_floats, "m_ints": m_ints}, +# "{}/multi-type-test".format(hdf_test_base_tmp), +# ) +# r_mixed = ak.load_all("{}/multi-type-test".format(hdf_test_base_tmp)) +# +# assert ( +# np.sort(strings_array.to_ndarray()).tolist() +# == np.sort(r_mixed["m_strings"].to_ndarray()).tolist() +# ) +# +# assert r_mixed["m_floats"] is not None +# assert r_mixed["m_ints"] is not None +# +# r_floats = ak.sort( +# ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_floats")["m_floats"] +# ) +# assert m_floats.to_list() == r_floats.to_list() +# +# r_ints = ak.sort( +# ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_ints")["m_ints"] +# ) +# assert m_ints.to_list() == r_ints.to_list() +# +# strings = strings_array.to_ndarray() +# strings.sort() +# r_strings = ak.load("{}/multi-type-test".format(hdf_test_base_tmp), dataset="m_strings")[ +# "m_strings" +# ].to_ndarray() +# r_strings.sort() +# +# assert strings.tolist() == r_strings.tolist() +# +# def testAppendStringsDataset(self, hdf_test_base_tmp): +# strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))]) +# strings_array.to_hdf("{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings") +# strings_array.to_hdf( +# "{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings-dupe", mode="append" +# ) +# +# r_strings = ak.load("{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings")[ +# "strings" +# ] +# r_strings_dupe = ak.load( +# "{}/append-strings-test".format(hdf_test_base_tmp), dataset="strings-dupe" +# )["strings-dupe"] +# assert r_strings.to_list() == r_strings_dupe.to_list() +# +# def testAppendMixedStringsDataset(self, hdf_test_base_tmp): +# strings_array = ak.array(["string {}".format(num) for num in list(range(0, 25))]) +# strings_array.to_hdf("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_strings") +# m_floats = ak.array([x / 10.0 for x in range(0, 10)]) +# m_ints = ak.array(list(range(0, 10))) +# ak.to_hdf( +# {"m_floats": m_floats, "m_ints": m_ints}, +# "{}/append-multi-type-test".format(hdf_test_base_tmp), +# mode="append", +# ) +# r_mixed = ak.load_all("{}/append-multi-type-test".format(hdf_test_base_tmp)) +# +# assert r_mixed["m_floats"] is not None +# assert r_mixed["m_ints"] is not None +# +# r_floats = ak.sort( +# ak.load("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_floats")[ +# "m_floats" +# ] +# ) +# r_ints = ak.sort( +# ak.load("{}/append-multi-type-test".format(hdf_test_base_tmp), dataset="m_ints")["m_ints"] +# ) +# assert m_floats.to_list() == r_floats.to_list() +# assert m_ints.to_list() == r_ints.to_list() +# +# strings = strings_array.to_ndarray() +# strings.sort() +# r_strings = r_mixed["m_strings"].to_ndarray() +# r_strings.sort() +# +# assert strings.tolist() == r_strings.tolist() +# +# def test_save_multi_type_dict_dataset(self, hdf_test_base_tmp): +# df_dict = make_multi_dtype_dict() +# # extend to include categoricals +# df_dict["cat"] = ak.Categorical(ak.array(["c", "b", "a", "b"])) +# df_dict["cat_from_codes"] = ak.Categorical.from_codes( +# codes=ak.array([2, 1, 0, 1]), categories=ak.array(["a", "b", "c"]) +# ) +# keys = list(df_dict.keys()) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/multi_type_dict_test" +# # use multi-column write to generate hdf file +# ak.to_hdf(df_dict, file_name) +# r_mixed = ak.read_hdf(f"{file_name}*") +# +# for col_name in keys: +# # verify load by dataset and returned mixed dict at col_name +# loaded = ak.load(file_name, dataset=col_name)[col_name] +# for arr in [loaded, r_mixed[col_name]]: +# if df_dict[col_name].dtype != ak.float64: +# assert df_dict[col_name].to_list() == arr.to_list() +# else: +# a = df_dict[col_name].to_ndarray() +# b = arr.to_ndarray() +# if isinstance(a[0], np.ndarray): +# assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b)) +# else: +# assert np.allclose(a, b, equal_nan=True) +# +# # test append for multi type dict +# single_arr = df_dict[keys[0]] +# rest_dict = {k: df_dict[k] for k in keys[1:]} +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/multi_type_dict_test" +# single_arr.to_hdf(file_name, dataset=keys[0]) +# +# ak.to_hdf(rest_dict, file_name, mode="append") +# r_mixed = ak.read_hdf(f"{file_name}*") +# +# for col_name in keys: +# # verify load by dataset and returned mixed dict at col_name +# loaded = ak.load(file_name, dataset=col_name)[col_name] +# for arr in [loaded, r_mixed[col_name]]: +# if df_dict[col_name].dtype != ak.float64: +# assert df_dict[col_name].to_list() == arr.to_list() +# else: +# a = df_dict[col_name].to_ndarray() +# b = arr.to_ndarray() +# if isinstance(a[0], np.ndarray): +# assert all(np.allclose(a1, b1, equal_nan=True) for a1, b1 in zip(a, b)) +# else: +# assert np.allclose(a, b, equal_nan=True) +# +# def test_strict_types(self, hdf_test_base_tmp): +# N = 100 +# int_types = [np.uint32, np.int64, np.uint16, np.int16] +# float_types = [np.float32, np.float64, np.float32, np.float64] +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# prefix = f"{tmp_dirname}/strict-type-test" +# for i, (it, ft) in enumerate(zip(int_types, float_types)): +# with h5py.File("{}-{}".format(prefix, i), "w") as f: +# idata = np.arange(i * N, (i + 1) * N, dtype=it) +# id = f.create_dataset("integers", data=idata) +# id.attrs["ObjType"] = 1 +# fdata = np.arange(i * N, (i + 1) * N, dtype=ft) +# fd = f.create_dataset("floats", data=fdata) +# fd.attrs["ObjType"] = 1 +# with pytest.raises(RuntimeError): +# ak.read_hdf(f"{prefix}*") +# +# a = ak.read_hdf(f"{prefix}*", strict_types=False) +# assert a["integers"].to_list() == np.arange(len(int_types) * N).tolist() +# assert np.allclose( +# a["floats"].to_ndarray(), np.arange(len(float_types) * N, dtype=np.float64) +# ) +# +# def test_small_arrays(self, hdf_test_base_tmp): +# for arr in [ak.array([1]), ak.array(["ab", "cd"]), ak.array(["123456789"])]: +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# arr.to_hdf(f"{tmp_dirname}/small_numeric") +# ret_arr = ak.read_hdf(f"{tmp_dirname}/small_numeric*").popitem()[1] +# assert (arr == ret_arr).all() +# +# def test_uint64_to_from_HDF5(self, hdf_test_base_tmp): +# """ +# Test our ability to read/write uint64 to HDF5 +# """ +# npa1 = np.array( +# [18446744073709551500, 18446744073709551501, 18446744073709551502], dtype=np.uint64 +# ) +# pda1 = ak.array(npa1) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# pda1.to_hdf(f"{tmp_dirname}/small_numeric", dataset="pda1") +# # Now load it back in +# pda2 = ak.load(f"{tmp_dirname}/small_numeric", dataset="pda1")["pda1"] +# assert str(pda1) == str(pda2) +# assert 18446744073709551500 == pda2[0] +# assert pda2.to_list() == npa1.tolist() +# +# def test_uint64_to_from_array(self, hdf_test_base_tmp): +# """ +# Test conversion to and from numpy array / pdarray using unsigned 64bit integer (uint64) +# """ +# npa1 = np.array( +# [18446744073709551500, 18446744073709551501, 18446744073709551502], dtype=np.uint64 +# ) +# pda1 = ak.array(npa1) +# assert 18446744073709551500 == pda1[0] +# assert pda1.to_list() == npa1.tolist() +# +# def test_bigint(self, hdf_test_base_tmp): +# df_dict = { +# "pdarray": ak.arange(2**200, 2**200 + 3, max_bits=201), +# "groupby": ak.GroupBy(ak.arange(2**200, 2**200 + 5)), +# "segarray": ak.SegArray(ak.arange(0, 10, 2), ak.arange(2**200, 2**200 + 10, max_bits=212)), +# } +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/bigint_test" +# ak.to_hdf(df_dict, file_name) +# ret_dict = ak.read_hdf(f"{tmp_dirname}/bigint_test*") +# +# pda_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="pdarray")["pdarray"] +# a = df_dict["pdarray"] +# for rd_a in [ret_dict["pdarray"], pda_loaded]: +# assert isinstance(rd_a, ak.pdarray) +# assert a.to_list() == rd_a.to_list() +# assert a.max_bits == rd_a.max_bits +# +# g_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="groupby")["groupby"] +# g = df_dict["groupby"] +# for rd_g in [ret_dict["groupby"], g_loaded]: +# assert isinstance(rd_g, ak.GroupBy) +# assert g.keys.to_list() == rd_g.keys.to_list() +# assert g.unique_keys.to_list() == rd_g.unique_keys.to_list() +# assert g.permutation.to_list() == rd_g.permutation.to_list() +# assert g.segments.to_list() == rd_g.segments.to_list() +# +# sa_loaded = ak.read_hdf(f"{tmp_dirname}/bigint_test*", datasets="segarray")["segarray"] +# sa = df_dict["segarray"] +# for rd_sa in [ret_dict["segarray"], sa_loaded]: +# assert isinstance(rd_sa, ak.SegArray) +# assert sa.values.to_list() == rd_sa.values.to_list() +# assert sa.segments.to_list() == rd_sa.segments.to_list() +# +# def test_unsanitized_dataset_names(self, hdf_test_base_tmp): +# # Test when quotes are part of the dataset name +# my_arrays = {'foo"0"': ak.arange(100), 'bar"': ak.arange(100)} +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# ak.to_hdf(my_arrays, f"{tmp_dirname}/bad_dataset_names") +# ak.read_hdf(f"{tmp_dirname}/bad_dataset_names*") +# +# +# def test_hdf_groupby(self, hdf_test_base_tmp): +# # test for categorical and multiple keys +# string = ak.array(["a", "b", "a", "b", "c"]) +# cat = ak.Categorical(string) +# cat_from_codes = ak.Categorical.from_codes( +# codes=ak.array([0, 1, 0, 1, 2]), categories=ak.array(["a", "b", "c"]) +# ) +# pda = ak.array([0, 1, 2, 0, 2]) +# +# pda_grouping = ak.GroupBy(pda) +# str_grouping = ak.GroupBy(string) +# cat_grouping = ak.GroupBy([cat, cat_from_codes]) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# for g in [pda_grouping, str_grouping, cat_grouping]: +# g.to_hdf(f"{tmp_dirname}/groupby_test") +# g_load = ak.read(f"{tmp_dirname}/groupby_test*").popitem()[1] +# assert len(g_load.keys) == len(g.keys) +# assert g_load.permutation.to_list() == g.permutation.to_list() +# assert g_load.segments.to_list() == g.segments.to_list() +# assert g_load._uki.to_list() == g._uki.to_list() +# if isinstance(g.keys[0], ak.Categorical): +# for k, kload in zip(g.keys, g_load.keys): +# assert k.to_list() == kload.to_list() +# else: +# assert g_load.keys.to_list() == g.keys.to_list() +# +# def test_hdf_categorical(self, hdf_test_base_tmp): +# cat = ak.Categorical(ak.array(["a", "b", "a", "b", "c"])) +# cat_from_codes = ak.Categorical.from_codes( +# codes=ak.array([0, 1, 0, 1, 2]), categories=ak.array(["a", "b", "c"]) +# ) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# for c in cat, cat_from_codes: +# c.to_hdf(f"{tmp_dirname}/categorical_test") +# c_load = ak.read(f"{tmp_dirname}/categorical_test*").popitem()[1] +# +# assert c_load.categories.to_list() == (["a", "b", "c", "N/A"]) +# if c.segments is not None: +# assert c.segments.to_list() == c_load.segments.to_list() +# assert c.permutation.to_list() == c_load.permutation.to_list() +# +# def test_segarray_hdf(self, hdf_test_base_tmp): +# a = [0, 1, 2, 3] +# b = [4, 0, 5, 6, 0, 7, 8, 0] +# c = [9, 0, 0] +# +# # int64 test +# flat = a + b + c +# segments = ak.array([0, len(a), len(a) + len(b)]) +# dtype = ak.dtypes.int64 +# akflat = ak.array(flat, dtype) +# segarr = ak.SegArray(segments, akflat) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# segarr.to_hdf(f"{tmp_dirname}/segarray_int") +# # Now load it back in +# seg2 = ak.load(f"{tmp_dirname}/segarray_int", dataset="segarray")["segarray"] +# assert segarr.segments.to_list() == seg2.segments.to_list() +# assert segarr.values.to_list() == seg2.values.to_list() +# +# # uint64 test +# dtype = ak.dtypes.uint64 +# akflat = ak.array(flat, dtype) +# segarr = ak.SegArray(segments, akflat) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# segarr.to_hdf(f"{tmp_dirname}/segarray_uint") +# # Now load it back in +# seg2 = ak.load(f"{tmp_dirname}/segarray_uint", dataset="segarray")["segarray"] +# assert segarr.segments.to_list() == seg2.segments.to_list() +# assert segarr.values.to_list() == seg2.values.to_list() +# +# # float64 test +# dtype = ak.dtypes.float64 +# akflat = ak.array(flat, dtype) +# segarr = ak.SegArray(segments, akflat) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# segarr.to_hdf(f"{tmp_dirname}/segarray_float") +# # Now load it back in +# seg2 = ak.load(f"{tmp_dirname}/segarray_float", dataset="segarray")["segarray"] +# assert segarr.segments.to_list() == seg2.segments.to_list() +# assert segarr.values.to_list() == seg2.values.to_list() +# +# # bool test +# dtype = ak.dtypes.bool_ +# akflat = ak.array(flat, dtype) +# segarr = ak.SegArray(segments, akflat) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# segarr.to_hdf(f"{tmp_dirname}/segarray_bool") +# # Now load it back in +# seg2 = ak.load(f"{tmp_dirname}/segarray_bool", dataset="segarray")["segarray"] +# assert segarr.segments.to_list() == seg2.segments.to_list() +# assert segarr.values.to_list() == seg2.values.to_list() +# +# def test_dataframe_segarr(self, hdf_test_base_tmp): +# a = [0, 1, 2, 3] +# b = [4, 0, 5, 6, 0, 7, 8, 0] +# c = [9, 0, 0] +# +# # int64 test +# flat = a + b + c +# segments = ak.array([0, len(a), len(a) + len(b)]) +# dtype = ak.dtypes.int64 +# akflat = ak.array(flat, dtype) +# segarr = ak.SegArray(segments, akflat) +# +# s = ak.array(["abc", "def", "ghi"]) +# df = ak.DataFrame([segarr, s]) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# df.to_hdf(f"{tmp_dirname}/dataframe_segarr") +# df_load = ak.DataFrame.load(f"{tmp_dirname}/dataframe_segarr") +# assert df.to_pandas().equals(df_load.to_pandas()) +# +# def test_segarray_str_hdf5(self, hdf_test_base_tmp): +# words = ak.array(["one,two,three", "uno,dos,tres"]) +# strs, segs = words.regex_split(",", return_segments=True) +# +# x = ak.SegArray(segs, strs) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# x.to_hdf(f"{tmp_dirname}/test_file") +# rd = ak.read_hdf(f"{tmp_dirname}/test_file*").popitem()[1] +# assert isinstance(rd, ak.SegArray) +# assert x.segments.to_list() == rd.segments.to_list() +# assert x.values.to_list() == rd.values.to_list() +# +# def test_hdf_overwrite_pdarray(self, hdf_test_base_tmp): +# # test repack with a single object +# a = ak.arange(1000) +# b = ak.randint(0, 100, 1000) +# c = ak.arange(15) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/pda_test" +# for repack in [True, False]: +# a.to_hdf(file_name) +# b.to_hdf(file_name, dataset="array_2", mode="append") +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwriting last dset so overwrite first +# c.update_hdf(file_name, dataset="array", repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size < orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# assert data["array"].to_list() == c.to_list() +# +# # test overwrites with different types +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/pda_test" +# a.to_hdf(file_name) +# for size, dtype in [(15, ak.uint64), (150, ak.float64), (1000, ak.bool_)]: +# b = ak.arange(size, dtype=dtype) +# b.update_hdf(file_name) +# data = ak.read_hdf(f"{file_name}*").popitem()[1] +# assert data.to_list() == b.to_list() +# +# def test_hdf_overwrite_strings(self, hdf_test_base_tmp): +# # test repack with a single object +# a = ak.random_strings_uniform(0, 16, 1000) +# b = ak.random_strings_uniform(0, 16, 1000) +# c = ak.random_strings_uniform(0, 16, 10) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/str_test" +# for repack in [True, False]: +# a.to_hdf(file_name, dataset="test_str") +# b.to_hdf(file_name, mode="append") +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwriting last dset so overwrite first +# c.update_hdf(file_name, dataset="test_str", repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size < orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# assert data["test_str"].to_list() == c.to_list() +# +# def test_overwrite_categorical(self, hdf_test_base_tmp): +# a = ak.Categorical(ak.array([f"cat_{i%3}" for i in range(100)])) +# b = ak.Categorical(ak.array([f"cat_{i%4}" for i in range(100)])) +# c = ak.Categorical(ak.array([f"cat_{i%5}" for i in range(10)])) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/cat_test" +# for repack in [True, False]: +# a.to_hdf(file_name, dataset="test_cat") +# b.to_hdf(file_name, mode="append") +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwriting last dset so overwrite first +# c.update_hdf(file_name, dataset="test_cat", repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size < orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# assert (data["test_cat"] == c).all() +# +# dset_name = "categorical_array" # name of categorical array +# dset_name2 = "to_replace" +# dset_name3 = "cat_array2" +# a.to_hdf(file_name, dataset=dset_name) +# b.to_hdf(file_name, dataset=dset_name2, mode="append") +# c.to_hdf(file_name, dataset=dset_name3, mode="append") +# +# a.update_hdf(file_name, dataset=dset_name2) +# data = ak.read_hdf(f"{file_name}*") +# assert all(name in data for name in (dset_name, dset_name2, dset_name3)) +# d = data[dset_name2] +# for attr in "categories", "codes", "permutation", "segments", "_akNAcode": +# assert getattr(d, attr).to_list() == getattr(a, attr).to_list() +# +# def test_hdf_overwrite_dataframe(self, hdf_test_base_tmp): +# df = ak.DataFrame( +# { +# "a": ak.arange(1000), +# "b": ak.random_strings_uniform(0, 16, 1000), +# "c": ak.arange(1000, dtype=bool), +# "d": ak.randint(0, 50, 1000), +# } +# ) +# odf = ak.DataFrame( +# { +# "b": ak.randint(0, 25, 50), +# "c": ak.arange(50, dtype=bool), +# } +# ) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/df_test" +# for repack in [True, False]: +# df.to_hdf(file_name) +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwriting last dset so overwrite first +# odf.update_hdf(file_name, repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size <= orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# odf_keys = list(odf.keys()) +# for key in df.keys(): +# assert (data[key] == (odf[key] if key in odf_keys else df[key])).all() +# +# def test_overwrite_segarray(self, hdf_test_base_tmp): +# sa1 = ak.SegArray(ak.arange(0, 1000, 5), ak.arange(1000)) +# sa2 = ak.SegArray(ak.arange(0, 100, 5), ak.arange(100)) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/segarray_test" +# for repack in [True, False]: +# sa1.to_hdf(file_name) +# sa1.to_hdf(file_name, dataset="seg2", mode="append") +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# +# sa2.update_hdf(file_name, repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size <= orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# assert (data["segarray"].values == sa2.values).all() +# assert (data["segarray"].segments == sa2.segments).all() +# +# def test_overwrite_single_dset(self, hdf_test_base_tmp): +# # we need to test that both repack=False and repack=True generate the same file size here +# a = ak.arange(1000) +# b = ak.arange(15) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# a.to_hdf(f"{tmp_dirname}/test_file") +# b.update_hdf(f"{tmp_dirname}/test_file") +# f_list = glob.glob(f"{tmp_dirname}/test_file*") +# f1_size = sum(os.path.getsize(f) for f in f_list) +# +# a.to_hdf(f"{tmp_dirname}/test_file_2") +# b.update_hdf(f"{tmp_dirname}/test_file_2", repack=False) +# f_list = glob.glob(f"{tmp_dirname}/test_file_2_*") +# f2_size = sum(os.path.getsize(f) for f in f_list) +# +# assert f1_size == f2_size +# +# def test_overwrite_dataframe(self, hdf_test_base_tmp): +# df = ak.DataFrame( +# { +# "a": ak.arange(1000), +# "b": ak.random_strings_uniform(0, 16, 1000), +# "c": ak.arange(1000, dtype=bool), +# "d": ak.randint(0, 50, 1000), +# } +# ) +# replace = { +# "b": ak.randint(0, 25, 50), +# "c": ak.arange(50, dtype=bool), +# } +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# df.to_hdf(f"{tmp_dirname}/overwrite_test") +# f_list = glob.glob(f"{tmp_dirname}/overwrite_test_*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwritting last dset so overwrite first +# ak.update_hdf(replace, f"{tmp_dirname}/overwrite_test") +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# # ensure that the column was actually overwritten +# assert new_size < orig_size +# data = ak.read_hdf(f"{tmp_dirname}/overwrite_test_*") +# assert data["b"].to_list() == replace["b"].to_list() +# assert data["c"].to_list() == replace["c"].to_list() +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# df.to_hdf(f"{tmp_dirname}/overwrite_test") +# f_list = glob.glob(f"{tmp_dirname}/overwrite_test_*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwritting last dset so overwrite first +# ak.update_hdf(replace, f"{tmp_dirname}/overwrite_test", repack=False) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# # ensure that the column was actually overwritten +# assert new_size >= orig_size +# data = ak.read_hdf(f"{tmp_dirname}/overwrite_test_*") +# assert data["b"].to_list() == replace["b"].to_list() +# assert data["c"].to_list() == replace["c"].to_list() +# +# def test_snapshot(self, hdf_test_base_tmp): +# df = ak.DataFrame(make_multi_dtype_dict()) +# df_str_idx = df.copy() +# df_str_idx._set_index([f"A{i}" for i in range(len(df))]) +# col_order = df.columns.values +# df_ref = df.to_pandas() +# df_str_idx_ref = df_str_idx.to_pandas(retain_index=True) +# a = ak.randint(0, 10, 100) +# s = ak.random_strings_uniform(0, 5, 50) +# c = ak.Categorical(s) +# g = ak.GroupBy(a) +# ref_data = {"a": a, "s": s, "c": c, "g": g} +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# ak.snapshot(f"{tmp_dirname}/snapshot_test") +# for v in [df, df_str_idx, a, s, c, g]: +# # delete variables and verify no longer in the namespace +# del v +# with pytest.raises(NameError): +# assert not v # noqa: F821 +# +# # restore the variables +# data = ak.restore(f"{tmp_dirname}/snapshot_test") +# for vn in ["df", "df_str_idx", "a", "s", "c", "g"]: +# # ensure all variable names returned +# assert vn in data.keys() +# +# # validate that restored variables are correct +# pd.testing.assert_frame_equal( +# df_ref[col_order], data["df"].to_pandas(retain_index=True)[col_order] +# ) +# pd.testing.assert_frame_equal( +# df_str_idx_ref[col_order], data["df_str_idx"].to_pandas(retain_index=True)[col_order] +# ) +# for key in ref_data.keys(): +# if isinstance(data[key], ak.GroupBy): +# assert (ref_data[key].permutation == data[key].permutation).all() +# assert (ref_data[key].keys == data[key].keys).all() +# assert (ref_data[key].segments == data[key].segments).all() +# else: +# assert (ref_data[key] == data[key]).all() +# +# @pytest.mark.parametrize("dtype", NUMERIC_AND_STR_TYPES) +# @pytest.mark.parametrize("size", pytest.prob_size) +# def test_index_save_and_load(self, dtype, size, hdf_test_base_tmp): +# idx = ak.Index(make_ak_arrays(size, dtype)) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# idx.to_hdf(f"{tmp_dirname}/idx_test") +# rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1] +# +# assert isinstance(rd_idx, ak.Index) +# assert type(rd_idx.values) == type(idx.values) +# assert idx.to_list() == rd_idx.to_list() +# +# if dtype == ak.str_: +# # if strings we need to also test Categorical +# idx = ak.Index(ak.Categorical(make_ak_arrays(size, dtype))) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# idx.to_hdf(f"{tmp_dirname}/idx_test") +# rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1] +# +# assert isinstance(rd_idx, ak.Index) +# assert type(rd_idx.values) == type(idx.values) +# assert idx.to_list() == rd_idx.to_list() +# +# @pytest.mark.parametrize("dtype1", NUMERIC_AND_STR_TYPES) +# @pytest.mark.parametrize("dtype2", NUMERIC_AND_STR_TYPES) +# @pytest.mark.parametrize("size", pytest.prob_size) +# def test_multi_index(self, dtype1, dtype2, size, hdf_test_base_tmp): +# t1 = make_ak_arrays(size, dtype1) +# t2 = make_ak_arrays(size, dtype2) +# idx = ak.Index.factory([t1, t2]) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# idx.to_hdf(f"{tmp_dirname}/idx_test") +# rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1] +# +# assert isinstance(rd_idx, ak.MultiIndex) +# assert idx.to_list() == rd_idx.to_list() +# +# # handle categorical cases as well +# if ak.str_ in [dtype1, dtype2]: +# if dtype1 == ak.str_: +# t1 = ak.Categorical(t1) +# if dtype2 == ak.str_: +# t2 = ak.Categorical(t2) +# idx = ak.Index.factory([t1, t2]) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# idx.to_hdf(f"{tmp_dirname}/idx_test") +# rd_idx = ak.read_hdf(f"{tmp_dirname}/idx_test*").popitem()[1] +# +# assert isinstance(rd_idx, ak.MultiIndex) +# assert idx.to_list() == rd_idx.to_list() +# +# def test_hdf_overwrite_index(self, hdf_test_base_tmp): +# # test repack with a single object +# a = ak.Index(ak.arange(1000)) +# b = ak.Index(ak.randint(0, 100, 1000)) +# c = ak.Index(ak.arange(15)) +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/idx_test" +# for repack in [True, False]: +# a.to_hdf(file_name, dataset="index") +# b.to_hdf(file_name, dataset="index_2", mode="append") +# f_list = glob.glob(f"{file_name}*") +# orig_size = sum(os.path.getsize(f) for f in f_list) +# # hdf5 only releases memory if overwriting last dset so overwrite first +# c.update_hdf(file_name, dataset="index", repack=repack) +# +# new_size = sum(os.path.getsize(f) for f in f_list) +# +# # ensure that the column was actually overwritten +# # test that repack on/off the file gets smaller/larger respectively +# assert new_size < orig_size if repack else new_size >= orig_size +# data = ak.read_hdf(f"{file_name}*") +# assert isinstance(data["index"], ak.Index) +# assert data["index"].to_list() == c.to_list() +# +# def test_special_objtype(self, hdf_test_base_tmp): +# """ +# This test is simply to ensure that the dtype is persisted through the io +# operation. It ultimately uses the process of pdarray, but need to ensure +# correct Arkouda Object Type is returned +# """ +# ip = ak.IPv4(ak.arange(10)) +# dt = ak.Datetime(ak.arange(10)) +# td = ak.Timedelta(ak.arange(10)) +# df = ak.DataFrame({"ip": ip, "datetime": dt, "timedelta": td}) +# +# with tempfile.TemporaryDirectory(dir=hdf_test_base_tmp) as tmp_dirname: +# ip.to_hdf(f"{tmp_dirname}/ip_test") +# rd_ip = ak.read_hdf(f"{tmp_dirname}/ip_test*").popitem()[1] +# assert isinstance(rd_ip, ak.IPv4) +# assert ip.to_list() == rd_ip.to_list() +# +# dt.to_hdf(f"{tmp_dirname}/dt_test") +# rd_dt = ak.read_hdf(f"{tmp_dirname}/dt_test*").popitem()[1] +# assert isinstance(rd_dt, ak.Datetime) +# assert dt.to_list() == rd_dt.to_list() +# +# td.to_hdf(f"{tmp_dirname}/td_test") +# rd_td = ak.read_hdf(f"{tmp_dirname}/td_test*").popitem()[1] +# assert isinstance(rd_td, ak.Timedelta) +# assert td.to_list() == rd_td.to_list() +# +# df.to_hdf(f"{tmp_dirname}/df_test") +# rd_df = ak.read_hdf(f"{tmp_dirname}/df_test*") +# +# assert isinstance(rd_df["ip"], ak.IPv4) +# assert isinstance(rd_df["datetime"], ak.Datetime) +# assert isinstance(rd_df["timedelta"], ak.Timedelta) +# assert df["ip"].to_list() == rd_df["ip"].to_list() +# assert df["datetime"].to_list() == rd_df["datetime"].to_list() +# assert df["timedelta"].to_list() == rd_df["timedelta"].to_list() +# +# +# class TestCSV: +# +# def test_csv_read_write(self, csv_test_base_tmp): +# # first test that can read csv with no header not written by Arkouda +# cols = ["ColA", "ColB", "ColC"] +# a = ["ABC", "DEF"] +# b = ["123", "345"] +# c = ["3.14", "5.56"] +# with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/non_ak.csv" +# with open(file_name, "w") as f: +# f.write(",".join(cols) + "\n") +# f.write(f"{a[0]},{b[0]},{c[0]}\n") +# f.write(f"{a[1]},{b[1]},{c[1]}\n") +# +# data = ak.read_csv(file_name) +# assert list(data.keys()) == cols +# assert data["ColA"].to_list() == a +# assert data["ColB"].to_list() == b +# assert data["ColC"].to_list() == c +# +# data = ak.read_csv(file_name, datasets="ColB")["ColB"] +# assert isinstance(data, ak.Strings) +# assert data.to_list() == b +# +# d = { +# cols[0]: ak.array(a), +# cols[1]: ak.array([int(x) for x in b]), +# cols[2]: ak.array([round(float(x), 2) for x in c]), +# } +# with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname: +# # test can read csv with header not written by Arkouda +# non_ak_file_name = f"{tmp_dirname}/non_ak.csv" +# with open(non_ak_file_name, "w") as f: +# f.write("**HEADER**\n") +# f.write("str,int64,float64\n") +# f.write("*/HEADER/*\n") +# f.write(",".join(cols) + "\n") +# f.write(f"{a[0]},{b[0]},{c[0]}\n") +# f.write(f"{a[1]},{b[1]},{c[1]}\n") +# +# # test writing file with Arkouda with non-standard delim +# non_standard_delim_file_name = f"{tmp_dirname}/non_standard_delim" +# ak.to_csv(d, f"{non_standard_delim_file_name}.csv", col_delim="|*|") +# +# for file_name, delim in [ +# (non_ak_file_name, ","), +# (f"{non_standard_delim_file_name}*", "|*|"), +# ]: +# data = ak.read_csv(file_name, column_delim=delim) +# assert list(data.keys()) == cols +# assert data["ColA"].to_list() == a +# assert data["ColB"].to_list() == [int(x) for x in b] +# assert data["ColC"].to_list() == [round(float(x), 2) for x in c] +# +# # test reading subset of columns +# data = ak.read_csv(file_name, datasets="ColB", column_delim=delim)["ColB"] +# assert isinstance(data, ak.pdarray) +# assert data.to_list() == [int(x) for x in b] +# +# # larger data set testing +# d = { +# "ColA": ak.randint(0, 50, 101), +# "ColB": ak.randint(0, 50, 101), +# "ColC": ak.randint(0, 50, 101), +# } +# with tempfile.TemporaryDirectory(dir=csv_test_base_tmp) as tmp_dirname: +# ak.to_csv(d, f"{tmp_dirname}/non_equal_set.csv") +# data = ak.read_csv(f"{tmp_dirname}/non_equal_set*") +# assert data["ColA"].to_list() == d["ColA"].to_list() +# assert data["ColB"].to_list() == d["ColB"].to_list() +# assert data["ColC"].to_list() == d["ColC"].to_list() +# +# +# class TestImportExport: +# +# @classmethod +# def setup_class(cls): +# cls.pddf = pd.DataFrame( +# data={ +# "c_1": np.array([np.iinfo(np.int64).min, -1, 0, np.iinfo(np.int64).max]), +# "c_3": np.array([False, True, False, False]), +# "c_4": np.array([-0.0, np.finfo(np.float64).min, np.nan, np.inf]), +# "c_5": np.array(["abc", " ", "xyz", ""]), +# }, +# index=np.arange(4), +# ) +# cls.akdf = ak.DataFrame(cls.pddf) +# +# def test_import_hdf(self, import_export_base_tmp): +# locales = pytest.nl +# with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/import_hdf_test" +# +# self.pddf.to_hdf(f"{file_name}_table.h5", key="dataframe", format="table", mode="w") +# akdf = ak.import_data(f"{file_name}_table.h5", write_file=f"{file_name}_ak_table.h5") +# assert len(glob.glob(f"{file_name}_ak_table*.h5")) == locales +# assert self.pddf.equals(akdf.to_pandas()) +# +# self.pddf.to_hdf( +# f"{file_name}_table_cols.h5", +# key="dataframe", +# format="table", +# data_columns=True, +# mode="w", +# ) +# akdf = ak.import_data( +# f"{file_name}_table_cols.h5", write_file=f"{file_name}_ak_table_cols.h5" +# ) +# assert len(glob.glob(f"{file_name}_ak_table_cols*.h5")) == locales +# assert self.pddf.equals(akdf.to_pandas()) +# +# self.pddf.to_hdf( +# f"{file_name}_fixed.h5", key="dataframe", format="fixed", data_columns=True, mode="w" +# ) +# akdf = ak.import_data(f"{file_name}_fixed.h5", write_file=f"{file_name}_ak_fixed.h5") +# assert len(glob.glob(f"{file_name}_ak_fixed*.h5")) == locales +# assert self.pddf.equals(akdf.to_pandas()) +# +# with pytest.raises(FileNotFoundError): +# ak.import_data(f"{file_name}_foo.h5", write_file=f"{file_name}_ak_fixed.h5") +# with pytest.raises(RuntimeError): +# ak.import_data(f"{file_name}_*.h5", write_file=f"{file_name}_ak_fixed.h5") +# +# def test_export_hdf(self, import_export_base_tmp): +# with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/export_hdf_test" +# +# self.akdf.to_hdf(f"{file_name}_ak_write") +# +# pddf = ak.export( +# f"{file_name}_ak_write", write_file=f"{file_name}_pd_from_ak.h5", index=True +# ) +# assert len(glob.glob(f"{file_name}_pd_from_ak.h5")) == 1 +# assert pddf.equals(self.akdf.to_pandas()) +# +# with pytest.raises(RuntimeError): +# ak.export(f"{tmp_dirname}_foo.h5", write_file=f"{tmp_dirname}/pd_from_ak.h5", index=True) +# +# def test_import_parquet(self, import_export_base_tmp): +# locales = pytest.nl +# with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/import_pq_test" +# +# self.pddf.to_parquet(f"{file_name}_table.parquet") +# akdf = ak.import_data( +# f"{file_name}_table.parquet", write_file=f"{file_name}_ak_table.parquet" +# ) +# assert len(glob.glob(f"{file_name}_ak_table*.parquet")) == locales +# assert self.pddf.equals(akdf.to_pandas()) +# +# def test_export_parquet(self, import_export_base_tmp): +# with tempfile.TemporaryDirectory(dir=import_export_base_tmp) as tmp_dirname: +# file_name = f"{tmp_dirname}/export_pq_test" +# +# self.akdf.to_parquet(f"{file_name}_ak_write") +# +# pddf = ak.export( +# f"{file_name}_ak_write", write_file=f"{file_name}_pd_from_ak.parquet", index=True +# ) +# assert len(glob.glob(f"{file_name}_pd_from_ak.parquet")) == 1 +# assert pddf[self.akdf.columns.values].equals(self.akdf.to_pandas()) +# +# with pytest.raises(RuntimeError): +# ak.export( +# f"{tmp_dirname}_foo.parquet", +# write_file=f"{tmp_dirname}/pd_from_ak.parquet", +# index=True, +# ) +# +# +# class TestZarr: +# +# @pytest.mark.skip +# def test_zarr_read_write(self, zarr_test_base_tmp): +# import arkouda.array_api as Array +# +# shapes = [(10,), (20,)] +# chunk_shapes = [(2,), (3,)] +# dtypes = [ak.int64, ak.float64] +# for shape, chunk_shape in zip(shapes, chunk_shapes): +# for dtype in dtypes: +# a = Array.full(shape, 7, dtype=dtype) +# with tempfile.TemporaryDirectory(dir=zarr_test_base_tmp) as tmp_dirname: +# to_zarr(f"{tmp_dirname}", a._array, chunk_shape) +# b = read_zarr(f"{tmp_dirname}", len(shape), dtype) +# assert np.allclose(a.to_ndarray(), b.to_ndarray()) diff --git a/tests/numpy/dtypes_test.py b/tests/numpy/dtypes_test.py index 4c1c0522ae7..336595cc41c 100644 --- a/tests/numpy/dtypes_test.py +++ b/tests/numpy/dtypes_test.py @@ -99,7 +99,15 @@ def test_pdarrays_datatypes(self): assert dtypes.dtype("bigint") == ak.arange(2**200, 2**200 + 10).dtype def test_isSupportedInt(self): - for supported in -10, 1, np.int64(1), np.int64(1.0), np.uint32(1), 2**63 + 1, 2**200: + for supported in ( + -10, + 1, + np.int64(1), + np.int64(1.0), + np.uint32(1), + 2**63 + 1, + 2**200, + ): assert dtypes.isSupportedInt(supported) for unsupported in 1.0, "1": assert not dtypes.isSupportedInt(unsupported) @@ -158,13 +166,26 @@ def test_DtypeEnum(self): assert ( frozenset( - {"bool_", "float", "float64", "int", "int64", "uint", "uint64", "uint8", "bigint", "str"} + { + "bool_", + "float", + "float64", + "int", + "int64", + "uint", + "uint64", + "uint8", + "bigint", + "str", + } ) == ak.ARKOUDA_SUPPORTED_DTYPES ) def test_NumericDTypes(self): - num_types = frozenset(["bool", "bool_", "float", "float64", "int", "int64", "uint64", "bigint"]) + num_types = frozenset( + ["bool", "bool_", "float", "float64", "int", "int64", "uint64", "bigint"] + ) assert num_types == dtypes.NumericDTypes def test_SeriesDTypes(self): @@ -181,8 +202,10 @@ def test_SeriesDTypes(self): assert dtypes.SeriesDTypes[dt] == np.bool_ def test_scalars(self): - assert "typing.Union[bool, numpy.bool_]" == str(ak.bool_scalars) - assert "typing.Union[float, numpy.float64, numpy.float32]" == str(ak.float_scalars) + assert "typing.Union[bool, numpy.bool]" == str(ak.bool_scalars) + assert "typing.Union[float, numpy.float64, numpy.float32]" == str( + ak.float_scalars + ) assert ( "typing.Union[int, numpy.int8, numpy.int16, numpy.int32, numpy.int64, " + "numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64]" @@ -196,12 +219,12 @@ def test_scalars(self): assert "typing.Union[str, numpy.str_]" == str(ak.str_scalars) assert ( "typing.Union[numpy.float64, numpy.float32, numpy.int8, numpy.int16, numpy.int32, " - + "numpy.int64, numpy.bool_, numpy.str_, numpy.uint8, numpy.uint16, numpy.uint32, " + + "numpy.int64, numpy.bool, numpy.str_, numpy.uint8, numpy.uint16, numpy.uint32, " + "numpy.uint64]" ) == str(ak.numpy_scalars) assert ( - "typing.Union[bool, numpy.bool_, float, numpy.float64, numpy.float32, int, numpy.int8, " + "typing.Union[bool, numpy.bool, float, numpy.float64, numpy.float32, int, numpy.int8, " + "numpy.int16, numpy.int32, numpy.int64, numpy.uint8, numpy.uint16, numpy.uint32," + " numpy.uint64, numpy.str_, str]" ) == str(ak.all_scalars) diff --git a/tests/operator_test.py b/tests/operator_test.py index 4ee1a123535..9b8ae26894d 100644 --- a/tests/operator_test.py +++ b/tests/operator_test.py @@ -40,7 +40,12 @@ def test_numpy_equivalency(self, size=100, verbose=pytest.verbose): "bool": (np.arange(0, size, 1) % 2) == 0, } global scalars - scalars = {"int64": 5, "uint64": np.uint64(2**63 + 1), "float64": -3.14159, "bool": True} + scalars = { + "int64": 5, + "uint64": np.uint64(2**63 + 1), + "float64": -3.14159, + "bool": True, + } dtypes = pdarrays.keys() if verbose: print("Operators: ", ak.pdarray.BinOps) @@ -66,6 +71,7 @@ def do_op(lt, rt, ls, rs, isarkouda, oper): evalstr += f'scalars["{rt}"]' else: evalstr += f'{("ndarrays", "pdarrays")[isarkouda]}["{rt}"]' + print("EVAL STRING: ", evalstr) res = eval(evalstr) return res @@ -96,10 +102,14 @@ def do_op(lt, rt, ls, rs, isarkouda, oper): ): # neither numpy nor arkouda implement results["neither_implement"].append((expression, str(e))) else: # arkouda implements with error, np does not implement - results["arkouda_minus_numpy"].append((expression, str(e), True)) + results["arkouda_minus_numpy"].append( + (expression, str(e), True) + ) continue # arkouda implements but not numpy - results["arkouda_minus_numpy"].append((expression, str(akres), False)) + results["arkouda_minus_numpy"].append( + (expression, str(akres), False) + ) continue try: akres = do_op(ltype, rtype, lscalar, rscalar, True, op) @@ -107,9 +117,13 @@ def do_op(lt, rt, ls, rs, isarkouda, oper): if "not implemented" or "unrecognized type" in str( e ): # numpy implements but not arkouda - results["numpy_minus_arkouda"].append((expression, str(e), True)) + results["numpy_minus_arkouda"].append( + (expression, str(e), True) + ) else: # both implement, but arkouda errors - results["both_implement"].append((expression, str(e), True, False, False)) + results["both_implement"].append( + (expression, str(e), True, False, False) + ) continue # both numpy and arkouda execute without error try: @@ -118,35 +132,49 @@ def do_op(lt, rt, ls, rs, isarkouda, oper): warnings.warn( f"Cannot detect return dtype of ak result: {akres} (np result: {npres})" ) - results["both_implement"].append((expression, str(akres), False, True, False)) + results["both_implement"].append( + (expression, str(akres), False, True, False) + ) continue if akrestype != npres.dtype: restypes = f"{npres.dtype}(np) vs. {akrestype}(ak)" - results["both_implement"].append((expression, restypes, False, True, False)) + results["both_implement"].append( + (expression, restypes, False, True, False) + ) continue try: akasnp = akres.to_ndarray() except Exception: warnings.warn(f"Could not convert to ndarray: {akres}") - results["both_implement"].append((expression, str(akres), True, False, False)) + results["both_implement"].append( + (expression, str(akres), True, False, False) + ) continue if not np.allclose(akasnp, npres, equal_nan=True): res = f"np: {npres}\nak: {akasnp}" - results["both_implement"].append((expression, res, False, False, True)) + results["both_implement"].append( + (expression, res, False, False, True) + ) continue # Finally, both numpy and arkouda agree on result results["both_implement"].append((expression, "", False, False, False)) - print(f'# ops not implemented by numpy or arkouda: {len(results["neither_implement"])}') + print( + f'# ops not implemented by numpy or arkouda: {len(results["neither_implement"])}' + ) if verbose: for expression, err in results["neither_implement"]: print(expression) - print(f'# ops implemented by numpy but not arkouda: {len(results["numpy_minus_arkouda"])}') + print( + f'# ops implemented by numpy but not arkouda: {len(results["numpy_minus_arkouda"])}' + ) if verbose: for expression, err, flag in results["numpy_minus_arkouda"]: print(expression) - print(f'# ops implemented by arkouda but not numpy: {len(results["arkouda_minus_numpy"])}') + print( + f'# ops implemented by arkouda but not numpy: {len(results["arkouda_minus_numpy"])}' + ) if verbose: for expression, res, flag in results["arkouda_minus_numpy"]: print(expression, " -> ", res) @@ -180,17 +208,23 @@ def test_pdarray_and_scalar_ops(self, dtype): pda = ak.ones(100, dtype=dtype) npa = np.ones(100, dtype=dtype) for scal in 1, np.int64(1): - for ak_add, np_add in zip((pda + scal, scal + pda), (npa + scal, scal + npa)): + for ak_add, np_add in zip( + (pda + scal, scal + pda), (npa + scal, scal + npa) + ): assert isinstance(ak_add, ak.pdarrayclass.pdarray) assert np.allclose(ak_add.to_ndarray(), np_add) for scal in 2, np.int64(2): - for ak_sub, np_sub in zip((pda - scal, scal - pda), (npa - scal, scal - npa)): + for ak_sub, np_sub in zip( + (pda - scal, scal - pda), (npa - scal, scal - npa) + ): assert isinstance(ak_sub, ak.pdarrayclass.pdarray) assert np.allclose(ak_sub.to_ndarray(), np_sub) for scal in 5, np.int64(5): - for ak_mul, np_mul in zip((pda * scal, scal * pda), (npa * scal, scal * npa)): + for ak_mul, np_mul in zip( + (pda * scal, scal * pda), (npa * scal, scal * npa) + ): assert isinstance(ak_mul, ak.pdarrayclass.pdarray) assert np.allclose(ak_mul.to_ndarray(), np_mul) @@ -198,7 +232,9 @@ def test_pdarray_and_scalar_ops(self, dtype): pda *= 15 npa *= 15 for scal in 3, np.int64(3): - for ak_div, np_div in zip((pda / scal, scal / pda), (npa / scal, scal / npa)): + for ak_div, np_div in zip( + (pda / scal, scal / pda), (npa / scal, scal / npa) + ): assert isinstance(ak_div, ak.pdarrayclass.pdarray) assert np.allclose(ak_div.to_ndarray(), np_div) @@ -216,13 +252,16 @@ def test_concatenation(self, dtype): def test_max_bits_concatenation(self): # reproducer for issue #2802 - concatenated = ak.concatenate([ak.arange(5, max_bits=3), ak.arange(2**200 - 1, 2**200 + 4)]) + concatenated = ak.concatenate( + [ak.arange(5, max_bits=3), ak.arange(2**200 - 1, 2**200 + 4)] + ) assert concatenated.max_bits == 3 assert [0, 1, 2, 3, 4, 7, 0, 1, 2, 3] == concatenated.to_list() def test_fixed_concatenate(self): for pda1, pda2 in zip( - (ak.arange(4), ak.linspace(0, 3, 4)), (ak.arange(4, 7), ak.linspace(4, 6, 3)) + (ak.arange(4), ak.linspace(0, 3, 4)), + (ak.arange(4, 7), ak.linspace(4, 6, 3)), ): ans = list(range(7)) assert ak.concatenate([pda1, pda2]).to_list() == ans @@ -285,28 +324,64 @@ def test_int_uint_binops(self): ak_uint = ak.array(np_uint) # Vector-Vector Case (Division and Floor Division) - assert np.allclose((ak_uint / ak_uint).to_ndarray(), np_uint / np_uint, equal_nan=True) - assert np.allclose((ak_int / ak_uint).to_ndarray(), np_int / np_uint, equal_nan=True) - assert np.allclose((ak_uint / ak_int).to_ndarray(), np_uint / np_int, equal_nan=True) - assert np.allclose((ak_uint // ak_uint).to_ndarray(), np_uint // np_uint, equal_nan=True) - assert np.allclose((ak_int // ak_uint).to_ndarray(), np_int // np_uint, equal_nan=True) - assert np.allclose((ak_uint // ak_int).to_ndarray(), np_uint // np_int, equal_nan=True) + assert np.allclose( + (ak_uint / ak_uint).to_ndarray(), np_uint / np_uint, equal_nan=True + ) + assert np.allclose( + (ak_int / ak_uint).to_ndarray(), np_int / np_uint, equal_nan=True + ) + assert np.allclose( + (ak_uint / ak_int).to_ndarray(), np_uint / np_int, equal_nan=True + ) + assert np.allclose( + (ak_uint // ak_uint).to_ndarray(), np_uint // np_uint, equal_nan=True + ) + assert np.allclose( + (ak_int // ak_uint).to_ndarray(), np_int // np_uint, equal_nan=True + ) + assert np.allclose( + (ak_uint // ak_int).to_ndarray(), np_uint // np_int, equal_nan=True + ) # Scalar-Vector Case (Division and Floor Division) - assert np.allclose((ak_uint[0] / ak_uint).to_ndarray(), np_uint[0] / np_uint, equal_nan=True) - assert np.allclose((ak_int[0] / ak_uint).to_ndarray(), np_int[0] / np_uint, equal_nan=True) - assert np.allclose((ak_uint[0] / ak_int).to_ndarray(), np_uint[0] / np_int, equal_nan=True) - assert np.allclose((ak_uint[0] // ak_uint).to_ndarray(), np_uint[0] // np_uint, equal_nan=True) - assert np.allclose((ak_int[0] // ak_uint).to_ndarray(), np_int[0] // np_uint, equal_nan=True) - assert np.allclose((ak_uint[0] // ak_int).to_ndarray(), np_uint[0] // np_int, equal_nan=True) + assert np.allclose( + (ak_uint[0] / ak_uint).to_ndarray(), np_uint[0] / np_uint, equal_nan=True + ) + assert np.allclose( + (ak_int[0] / ak_uint).to_ndarray(), np_int[0] / np_uint, equal_nan=True + ) + assert np.allclose( + (ak_uint[0] / ak_int).to_ndarray(), np_uint[0] / np_int, equal_nan=True + ) + assert np.allclose( + (ak_uint[0] // ak_uint).to_ndarray(), np_uint[0] // np_uint, equal_nan=True + ) + assert np.allclose( + (ak_int[0] // ak_uint).to_ndarray(), np_int[0] // np_uint, equal_nan=True + ) + assert np.allclose( + (ak_uint[0] // ak_int).to_ndarray(), np_uint[0] // np_int, equal_nan=True + ) # Vector-Scalar Case (Division and Floor Division) - assert np.allclose((ak_uint / ak_uint[0]).to_ndarray(), np_uint / np_uint[0], equal_nan=True) - assert np.allclose((ak_int / ak_uint[0]).to_ndarray(), np_int / np_uint[0], equal_nan=True) - assert np.allclose((ak_uint / ak_int[0]).to_ndarray(), np_uint / np_int[0], equal_nan=True) - assert np.allclose((ak_uint // ak_uint[0]).to_ndarray(), np_uint // np_uint[0], equal_nan=True) - assert np.allclose((ak_int // ak_uint[0]).to_ndarray(), np_int // np_uint[0], equal_nan=True) - assert np.allclose((ak_uint // ak_int[0]).to_ndarray(), np_uint // np_int[0], equal_nan=True) + assert np.allclose( + (ak_uint / ak_uint[0]).to_ndarray(), np_uint / np_uint[0], equal_nan=True + ) + assert np.allclose( + (ak_int / ak_uint[0]).to_ndarray(), np_int / np_uint[0], equal_nan=True + ) + assert np.allclose( + (ak_uint / ak_int[0]).to_ndarray(), np_uint / np_int[0], equal_nan=True + ) + assert np.allclose( + (ak_uint // ak_uint[0]).to_ndarray(), np_uint // np_uint[0], equal_nan=True + ) + assert np.allclose( + (ak_int // ak_uint[0]).to_ndarray(), np_int // np_uint[0], equal_nan=True + ) + assert np.allclose( + (ak_uint // ak_int[0]).to_ndarray(), np_uint // np_int[0], equal_nan=True + ) def test_float_uint_binops(self): # Test fix for issue #1620 @@ -323,38 +398,90 @@ def test_float_uint_binops(self): ak_floats = [ak_float, scalar_float] np_floats = [np_float, scalar_float] for aku, akf, npu, npf in zip(ak_uints, ak_floats, np_uints, np_floats): - assert np.allclose((ak_uint + akf).to_ndarray(), np_uint + npf, equal_nan=True) - assert np.allclose((akf + ak_uint).to_ndarray(), npf + np_uint, equal_nan=True) - assert np.allclose((ak_float + aku).to_ndarray(), np_float + npu, equal_nan=True) - assert np.allclose((aku + ak_float).to_ndarray(), npu + np_float, equal_nan=True) - - assert np.allclose((ak_uint - akf).to_ndarray(), np_uint - npf, equal_nan=True) - assert np.allclose((akf - ak_uint).to_ndarray(), npf - np_uint, equal_nan=True) - assert np.allclose((ak_float - aku).to_ndarray(), np_float - npu, equal_nan=True) - assert np.allclose((aku - ak_float).to_ndarray(), npu - np_float, equal_nan=True) - - assert np.allclose((ak_uint * akf).to_ndarray(), np_uint * npf, equal_nan=True) - assert np.allclose((akf * ak_uint).to_ndarray(), npf * np_uint, equal_nan=True) - assert np.allclose((ak_float * aku).to_ndarray(), np_float * npu, equal_nan=True) - assert np.allclose((aku * ak_float).to_ndarray(), npu * np_float, equal_nan=True) - - assert np.allclose((ak_uint / akf).to_ndarray(), np_uint / npf, equal_nan=True) - assert np.allclose((akf / ak_uint).to_ndarray(), npf / np_uint, equal_nan=True) - assert np.allclose((ak_float / aku).to_ndarray(), np_float / npu, equal_nan=True) - assert np.allclose((aku / ak_float).to_ndarray(), npu / np_float, equal_nan=True) - - assert np.allclose((ak_uint // akf).to_ndarray(), np_uint // npf, equal_nan=True) - assert np.allclose((akf // ak_uint).to_ndarray(), npf // np_uint, equal_nan=True) - assert np.allclose((ak_float // aku).to_ndarray(), np_float // npu, equal_nan=True) - assert np.allclose((aku // ak_float).to_ndarray(), npu // np_float, equal_nan=True) - - assert np.allclose((ak_uint**akf).to_ndarray(), np_uint**npf, equal_nan=True) - assert np.allclose((akf**ak_uint).to_ndarray(), npf**np_uint, equal_nan=True) - assert np.allclose((ak_float**aku).to_ndarray(), np_float**npu, equal_nan=True) - assert np.allclose((aku**ak_float).to_ndarray(), npu**np_float, equal_nan=True) - - assert np.allclose((ak_float % aku).to_ndarray(), np_float % npu, equal_nan=True) - assert np.allclose((aku % ak_float).to_ndarray(), npu % np_float, equal_nan=True) + assert np.allclose( + (ak_uint + akf).to_ndarray(), np_uint + npf, equal_nan=True + ) + assert np.allclose( + (akf + ak_uint).to_ndarray(), npf + np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float + aku).to_ndarray(), np_float + npu, equal_nan=True + ) + assert np.allclose( + (aku + ak_float).to_ndarray(), npu + np_float, equal_nan=True + ) + + assert np.allclose( + (ak_uint - akf).to_ndarray(), np_uint - npf, equal_nan=True + ) + assert np.allclose( + (akf - ak_uint).to_ndarray(), npf - np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float - aku).to_ndarray(), np_float - npu, equal_nan=True + ) + assert np.allclose( + (aku - ak_float).to_ndarray(), npu - np_float, equal_nan=True + ) + + assert np.allclose( + (ak_uint * akf).to_ndarray(), np_uint * npf, equal_nan=True + ) + assert np.allclose( + (akf * ak_uint).to_ndarray(), npf * np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float * aku).to_ndarray(), np_float * npu, equal_nan=True + ) + assert np.allclose( + (aku * ak_float).to_ndarray(), npu * np_float, equal_nan=True + ) + + assert np.allclose( + (ak_uint / akf).to_ndarray(), np_uint / npf, equal_nan=True + ) + assert np.allclose( + (akf / ak_uint).to_ndarray(), npf / np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float / aku).to_ndarray(), np_float / npu, equal_nan=True + ) + assert np.allclose( + (aku / ak_float).to_ndarray(), npu / np_float, equal_nan=True + ) + + assert np.allclose( + (ak_uint // akf).to_ndarray(), np_uint // npf, equal_nan=True + ) + assert np.allclose( + (akf // ak_uint).to_ndarray(), npf // np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float // aku).to_ndarray(), np_float // npu, equal_nan=True + ) + assert np.allclose( + (aku // ak_float).to_ndarray(), npu // np_float, equal_nan=True + ) + + assert np.allclose( + (ak_uint**akf).to_ndarray(), np_uint**npf, equal_nan=True + ) + assert np.allclose( + (akf**ak_uint).to_ndarray(), npf**np_uint, equal_nan=True + ) + assert np.allclose( + (ak_float**aku).to_ndarray(), np_float**npu, equal_nan=True + ) + assert np.allclose( + (aku**ak_float).to_ndarray(), npu**np_float, equal_nan=True + ) + + assert np.allclose( + (ak_float % aku).to_ndarray(), np_float % npu, equal_nan=True + ) + assert np.allclose( + (aku % ak_float).to_ndarray(), npu % np_float, equal_nan=True + ) def test_shift_maxbits_binop(self): # This tests for a bug when left shifting by a value >=64 bits for int/uint, Issue #2099 @@ -382,9 +509,15 @@ def test_shift_maxbits_binop(self): assert (ak_arr >> ak_shift).to_list() == (np_arr >> np_shift).tolist() # Binopvv case, Mixed type - ak_shift_other_dtype = ak.cast(ak_shift, "int64" if dtype != "int64" else "uint64") - assert (ak_arr << ak_shift_other_dtype).to_list() == (np_arr << np_shift).tolist() - assert (ak_arr >> ak_shift_other_dtype).to_list() == (np_arr >> np_shift).tolist() + ak_shift_other_dtype = ak.cast( + ak_shift, "int64" if dtype != "int64" else "uint64" + ) + assert (ak_arr << ak_shift_other_dtype).to_list() == ( + np_arr << np_shift + ).tolist() + assert (ak_arr >> ak_shift_other_dtype).to_list() == ( + np_arr >> np_shift + ).tolist() def test_shift_bool_int64_binop(self): # This tests for a missing implementation of bit shifting booleans and ints, Issue #2945 @@ -411,24 +544,30 @@ def test_shift_bool_int64_binop(self): assert np.allclose((ak_bool[0] >> ak_int).to_ndarray(), np_bool[0] >> np_int) assert np.allclose((ak_bool[0] << ak_int).to_ndarray(), np_bool[0] << np_int) - def test_shift_equals_scalar_binops(self): - vector_pairs = [ - (ak.arange(0, 5, dtype=ak.int64), np.arange(5, dtype=np.int64)), - (ak.arange(0, 5, dtype=ak.uint64), np.arange(5, dtype=np.uint64)), + @pytest.mark.parametrize("dtype", [ak.int64, ak.uint64]) + def test_shift_equals_scalar_binops(self, dtype): + + ak_vector = ak.arange(0, 5, dtype=dtype) + np_vector = np.arange(5, dtype=dtype) + shift_scalars = [ + dtype(1), + dtype(5), + 1, + 5, + True, + False, ] - shift_scalars = [np.int64(1), np.int64(5), np.uint64(1), np.uint64(5), True, False] - for ak_vector, np_vector in vector_pairs: - for x in shift_scalars: - assert ak_vector.to_list() == np_vector.tolist() + for x in shift_scalars: + assert ak_vector.to_list() == np_vector.tolist() - ak_vector <<= x - np_vector <<= x - assert ak_vector.to_list() == np_vector.tolist() + ak_vector <<= x + np_vector <<= x + assert ak_vector.to_list() == np_vector.tolist() - ak_vector >>= x - np_vector >>= x - assert ak_vector.to_list() == np_vector.tolist() + ak_vector >>= x + np_vector >>= x + assert ak_vector.to_list() == np_vector.tolist() def test_shift_equals_vector_binops(self): vector_pairs = [ @@ -446,7 +585,9 @@ def test_shift_equals_vector_binops(self): for ak_vector, np_vector in vector_pairs: for v in shift_vectors: - if (v[0].dtype.kind != "b") and (ak_vector[0].dtype.kind != v[0].dtype.kind): + if (v[0].dtype.kind != "b") and ( + ak_vector[0].dtype.kind != v[0].dtype.kind + ): continue assert ak_vector.to_list() == np_vector.tolist() @@ -475,7 +616,10 @@ def test_concatenate_type_preservation(self): # test single and empty assert isinstance(ak.concatenate([special_one]), special_type) assert special_one.to_list() == ak.concatenate([special_one]).to_list() - assert isinstance(ak.concatenate([special_type(ak.array([], dtype=ak.int64))]), special_type) + assert isinstance( + ak.concatenate([special_type(ak.array([], dtype=ak.int64))]), + special_type, + ) # verify ak.util.concatenate still works special_aku_concat = akuconcat([special_one, special_two]) @@ -492,17 +636,25 @@ def test_floor_div_edge_cases(self): ak_edge_cases = ak.array(np_edge_cases) for s in scalar_edge_cases: - assert np.allclose((ak_edge_cases // s).to_ndarray(), np_edge_cases // s, equal_nan=True) - assert np.allclose((s // ak_edge_cases).to_ndarray(), s // np_edge_cases, equal_nan=True) + assert np.allclose( + (ak_edge_cases // s).to_ndarray(), np_edge_cases // s, equal_nan=True + ) + assert np.allclose( + (s // ak_edge_cases).to_ndarray(), s // np_edge_cases, equal_nan=True + ) # test both vector // vector n_vect = np.full(len(scalar_edge_cases), s) a_vect = ak.array(n_vect) assert np.allclose( - (ak_edge_cases // a_vect).to_ndarray(), np_edge_cases // n_vect, equal_nan=True + (ak_edge_cases // a_vect).to_ndarray(), + np_edge_cases // n_vect, + equal_nan=True, ) assert np.allclose( - (a_vect // ak_edge_cases).to_ndarray(), n_vect // np_edge_cases, equal_nan=True + (a_vect // ak_edge_cases).to_ndarray(), + n_vect // np_edge_cases, + equal_nan=True, ) def test_pda_power(self): @@ -510,7 +662,10 @@ def test_pda_power(self): a = ak.array(n) assert ak.power(a, 2).to_list() == np.power(n, 2).tolist() - assert ak.power(a, ak.array([2, 3, 4])).to_list() == np.power(n, [2, 3, 4]).tolist() + assert ( + ak.power(a, ak.array([2, 3, 4])).to_list() + == np.power(n, [2, 3, 4]).tolist() + ) # Test a singleton with and without a Boolean argument a = ak.array([7]) @@ -524,16 +679,22 @@ def test_pda_power(self): # Test a singleton with a mixed Boolean argument a = ak.arange(10) - assert [i if i % 2 else i**2 for i in range(10)] == ak.power(a, 2, a % 2 == 0).to_list() + assert [i if i % 2 else i**2 for i in range(10)] == ak.power( + a, 2, a % 2 == 0 + ).to_list() # Test invalid input, negative n = np.array([-1.0, -3.0]) a = ak.array(n) - assert np.allclose(ak.power(a, 0.5).to_ndarray(), np.power(n, 0.5), equal_nan=True) + assert np.allclose( + ak.power(a, 0.5).to_ndarray(), np.power(n, 0.5), equal_nan=True + ) # Test edge case input, inf infs = [np.inf, -np.inf] - assert (np.power(np.array(infs), 2) == ak.power(ak.array(infs), 2).to_ndarray()).all() + assert ( + np.power(np.array(infs), 2) == ak.power(ak.array(infs), 2).to_ndarray() + ).all() def test_pda_sqrt(self): n = np.array([4, 16.0, -1, 0, np.inf]) @@ -542,7 +703,9 @@ def test_pda_sqrt(self): # Test with a mixed Boolean array a = ak.arange(5) - assert [i if i % 2 else i**0.5 for i in range(5)] == ak.sqrt(a, a % 2 == 0).to_list() + assert [i if i % 2 else i**0.5 for i in range(5)] == ak.sqrt( + a, a % 2 == 0 + ).to_list() def test_uint_and_bigint_operation_equals(self): def declare_arrays(): @@ -685,7 +848,10 @@ def test_str_repr(self): ] assert ak.linspace(0, 10, 20).__str__() in answers assert "[False False False]" == ak.isnan(ak.array([1.1, 2.3, 5])).__str__() - assert "[False False False ... False False False]" == ak.isnan(ak.linspace(0, 10, 20)).__str__() + assert ( + "[False False False ... False False False]" + == ak.isnan(ak.linspace(0, 10, 20)).__str__() + ) # Test __repr__() assert "array([1 2 3])" == ak.array([1, 2, 3]).__repr__() @@ -703,7 +869,9 @@ def test_str_repr(self): "array([0.00000000000000000 0.52631578947368418 1.0526315789473684 ... 8.9473684210526319 9.473684210526315 10.00000000000000000])", ] assert ak.linspace(0, 10, 20).__repr__() in answers - assert "array([False False False])" == ak.isnan(ak.array([1.1, 2.3, 5])).__repr__() + assert ( + "array([False False False])" == ak.isnan(ak.array([1.1, 2.3, 5])).__repr__() + ) assert ( "array([False False False ... False False False])" == ak.isnan(ak.linspace(0, 10, 20)).__repr__() @@ -715,7 +883,9 @@ def test_str_repr(self): def test_bigint_binops(self): # test bigint array with max_bits=64 against an equivalent uint64 u = ak.array([0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.uint64) - bi = ak.array([0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.bigint, max_bits=64) + bi = ak.array( + [0, 1, 2, 2**64 - 3, 2**64 - 2, 2**64 - 1], dtype=ak.bigint, max_bits=64 + ) mod_by = 2**64 bi_range = ak.arange(6, dtype=ak.bigint) @@ -729,16 +899,28 @@ def test_bigint_binops(self): # logical bit ops: only work if both arguments are bigint assert (u & u_range).to_list() == (bi & bi_range).to_list() - assert [(bi[i] & bi_scalar) % mod_by for i in range(bi.size)] == (bi & bi_scalar).to_list() - assert [(bi_scalar & bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar & bi).to_list() + assert [(bi[i] & bi_scalar) % mod_by for i in range(bi.size)] == ( + bi & bi_scalar + ).to_list() + assert [(bi_scalar & bi[i]) % mod_by for i in range(bi.size)] == ( + bi_scalar & bi + ).to_list() assert (u | u_range).to_list() == (bi | bi_range).to_list() - assert [(bi[i] | bi_scalar) % mod_by for i in range(bi.size)] == (bi | bi_scalar).to_list() - assert [(bi_scalar | bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar | bi).to_list() + assert [(bi[i] | bi_scalar) % mod_by for i in range(bi.size)] == ( + bi | bi_scalar + ).to_list() + assert [(bi_scalar | bi[i]) % mod_by for i in range(bi.size)] == ( + bi_scalar | bi + ).to_list() assert (u ^ u_range).to_list() == (bi ^ bi_range).to_list() - assert [(bi[i] ^ bi_scalar) % mod_by for i in range(bi.size)] == (bi ^ bi_scalar).to_list() - assert [(bi_scalar ^ bi[i]) % mod_by for i in range(bi.size)] == (bi_scalar ^ bi).to_list() + assert [(bi[i] ^ bi_scalar) % mod_by for i in range(bi.size)] == ( + bi ^ bi_scalar + ).to_list() + assert [(bi_scalar ^ bi[i]) % mod_by for i in range(bi.size)] == ( + bi_scalar ^ bi + ).to_list() # bit shifts: left side must be bigint, right side must be int/uint ans = u << u_range @@ -813,22 +995,41 @@ def test_bigint_rotate(self): # rotate by scalar for i in range(10): - assert ak.array([10], dtype=ak.bigint, max_bits=4).rotl(i) == 10 if i % 2 == 0 else 5 - assert ak.array([10], dtype=ak.bigint, max_bits=4).rotr(i) == 10 if i % 2 == 0 else 5 + assert ( + ak.array([10], dtype=ak.bigint, max_bits=4).rotl(i) == 10 + if i % 2 == 0 + else 5 + ) + assert ( + ak.array([10], dtype=ak.bigint, max_bits=4).rotr(i) == 10 + if i % 2 == 0 + else 5 + ) # rotate by array - left_rot = ak.bigint_from_uint_arrays([ak.full(10, 10, ak.uint64)], max_bits=4).rotl( - ak.arange(10) - ) - right_rot = ak.bigint_from_uint_arrays([ak.full(10, 10, ak.uint64)], max_bits=4).rotr( - ak.arange(10) - ) + left_rot = ak.bigint_from_uint_arrays( + [ak.full(10, 10, ak.uint64)], max_bits=4 + ).rotl(ak.arange(10)) + right_rot = ak.bigint_from_uint_arrays( + [ak.full(10, 10, ak.uint64)], max_bits=4 + ).rotr(ak.arange(10)) ans = [10 if i % 2 == 0 else 5 for i in range(10)] assert left_rot.to_list() == ans assert right_rot.to_list() == ans def test_float_mods(self): - edge_cases = [np.nan, -np.inf, -7.0, -3.14, -0.0, 0.0, 3.14, 7.0, np.inf, np.nan] + edge_cases = [ + np.nan, + -np.inf, + -7.0, + -3.14, + -0.0, + 0.0, + 3.14, + 7.0, + np.inf, + np.nan, + ] # get 2 random permutations of edgecases rand_edge_cases1 = np.random.permutation(edge_cases) @@ -842,12 +1043,22 @@ def test_float_mods(self): uint_arr = np.arange(2**64 - 10, 2**64, dtype=np.uint64) u_scal = np.uint(2**63 + 1) - args = [rand_edge_cases1, rand_edge_cases2, float_arr, int_arr, uint_arr, i_scal, u_scal] + args = [ + rand_edge_cases1, + rand_edge_cases2, + float_arr, + int_arr, + uint_arr, + i_scal, + u_scal, + ] # add all the float edge cases as scalars args.extend(edge_cases) def type_helper(x): - return ak.resolve_scalar_dtype(x) if ak.isSupportedNumber(x) else x.dtype.name + return ( + ak.resolve_scalar_dtype(x) if ak.isSupportedNumber(x) else x.dtype.name + ) # take the product of args (i.e. every possible combination) for a, b in product(args, args): @@ -863,8 +1074,12 @@ def type_helper(x): ak_b = b if ak.isSupportedNumber(b) else ak.array(b) # verify mod and fmod match numpy - assert np.allclose(ak.mod(ak_a, ak_b).to_ndarray(), np.mod(a, b), equal_nan=True) - assert np.allclose(ak.fmod(ak_a, ak_b).to_ndarray(), np.fmod(a, b), equal_nan=True) + assert np.allclose( + ak.mod(ak_a, ak_b).to_ndarray(), np.mod(a, b), equal_nan=True + ) + assert np.allclose( + ak.fmod(ak_a, ak_b).to_ndarray(), np.fmod(a, b), equal_nan=True + ) npf = np.array([2.23, 3.14, 3.08, 5.7]) npf2 = np.array([3.14, 2.23, 1.1, 4.1]) diff --git a/tests/pdarray_creation_test.py b/tests/pdarray_creation_test.py index 8d417633b37..ecdf643f455 100644 --- a/tests/pdarray_creation_test.py +++ b/tests/pdarray_creation_test.py @@ -39,13 +39,17 @@ def test_array_creation(self, dtype): ak.array(deque(range(fixed_size)), dtype), ak.array([f"{i}" for i in range(fixed_size)], dtype=dtype), ]: - assert isinstance(pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings) + assert isinstance( + pda, ak.pdarray if ak.dtype(dtype) != "str_" else ak.Strings + ) assert len(pda) == fixed_size assert dtype == pda.dtype @pytest.mark.skip_if_rank_not_compiled([3]) @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_array_creation_multi_dim(self, size, dtype): shape = (2, 2, size) for pda in [ @@ -57,7 +61,9 @@ def test_array_creation_multi_dim(self, size, dtype): assert dtype == pda.dtype @pytest.mark.skip_if_max_rank_greater_than(3) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_array_creation_error(self, dtype): shape = (2, 2, 2, 2) with pytest.raises(ValueError): @@ -114,7 +120,9 @@ def test_array_creation_transpose_bug_reproducer(self): cols = 5 nda = np.random.randint(1, 10, (rows, cols)) - assert_arkouda_array_equal(ak.transpose(ak.array(nda)), ak.array(np.transpose(nda))) + assert_arkouda_array_equal( + ak.transpose(ak.array(nda)), ak.array(np.transpose(nda)) + ) def test_infer_shape_from_size(self): from arkouda.util import _infer_shape_from_size @@ -135,7 +143,9 @@ def test_bigint_creation(self): pda_from_str = ak.array([f"{i}" for i in range(bi, bi + 10)], dtype=ak.bigint) pda_from_int = ak.array([i for i in range(bi, bi + 10)]) - cast_from_segstr = ak.cast(ak.array([f"{i}" for i in range(bi, bi + 10)]), ak.bigint) + cast_from_segstr = ak.cast( + ak.array([f"{i}" for i in range(bi, bi + 10)]), ak.bigint + ) for pda in [pda_from_str, pda_from_int, cast_from_segstr]: assert isinstance(pda, ak.pdarray) assert 10 == len(pda) @@ -144,7 +154,8 @@ def test_bigint_creation(self): # test array and arange infer dtype assert ( - ak.array([bi, bi + 1, bi + 2, bi + 3, bi + 4]).to_list() == ak.arange(bi, bi + 5).to_list() + ak.array([bi, bi + 1, bi + 2, bi + 3, bi + 4]).to_list() + == ak.arange(bi, bi + 5).to_list() ) # test that max_bits being set results in a mod @@ -195,7 +206,9 @@ def test_arange_dtype(self, dtype): assert dtype == start_stop.dtype start_stop_stride = ak.arange(100, 105, 2, dtype=dtype) - assert np.arange(100, 105, 2, dtype=dtype).tolist() == start_stop_stride.to_list() + assert ( + np.arange(100, 105, 2, dtype=dtype).tolist() == start_stop_stride.to_list() + ) assert dtype == start_stop_stride.dtype def test_arange_misc(self): @@ -255,7 +268,9 @@ def test_randint_array_dtype(self, size, array_type): # tests with various dtypes for the other parameters passed to randint) @pytest.mark.parametrize("dtype", NUMERIC_SCALARS) def test_randint_num_dtype(self, dtype): - for test_array in ak.randint(dtype(0), 100, 1000), ak.randint(0, dtype(100), 1000): + for test_array in ak.randint(dtype(0), 100, 1000), ak.randint( + 0, dtype(100), 1000 + ): assert isinstance(test_array, ak.pdarray) assert 1000 == len(test_array) assert ak.int64 == test_array.dtype @@ -325,7 +340,9 @@ def test_randint_with_seed(self): assert values.to_list() == bools # Test that int_scalars covers uint8, uint16, uint32 - uint_arr = ak.randint(np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2)) + uint_arr = ak.randint( + np.uint8(1), np.uint32(5), np.uint16(10), seed=np.uint8(2) + ) int_arr = ak.randint(1, 5, 10, seed=2) assert (uint_arr == int_arr).all() @@ -343,7 +360,9 @@ def test_uniform(self, size): 1.0441791878997098, ] == u_array.to_list() - u_array = ak.uniform(size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0)) + u_array = ak.uniform( + size=np.int64(3), low=np.int64(0), high=np.int64(5), seed=np.int64(0) + ) assert [ 0.30013431967121934, 0.47383036230759112, @@ -360,12 +379,16 @@ def test_uniform(self, size): ak.uniform(low=0, high=5, size="100") # Test that int_scalars covers uint8, uint16, uint32 - uint_arr = ak.uniform(low=np.uint8(0), high=np.uint16(5), size=np.uint32(100), seed=np.uint8(1)) + uint_arr = ak.uniform( + low=np.uint8(0), high=np.uint16(5), size=np.uint32(100), seed=np.uint8(1) + ) int_arr = ak.uniform(low=0, high=5, size=100, seed=1) assert (uint_arr == int_arr).all() @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]) + @pytest.mark.parametrize( + "dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint] + ) def test_zeros_dtype(self, size, dtype): zeros = ak.zeros(size, dtype) assert isinstance(zeros, ak.pdarray) @@ -373,14 +396,18 @@ def test_zeros_dtype(self, size, dtype): assert (0 == zeros).all() @pytest.mark.skip_if_rank_not_compiled([2]) - @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_] + ) @pytest.mark.parametrize("shape", [0, 2, (2, 3)]) def test_ones_match_numpy(self, shape, dtype): assert_equivalent(ak.zeros(shape, dtype=dtype), np.zeros(shape, dtype=dtype)) @pytest.mark.skip_if_rank_not_compiled([3]) @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]) + @pytest.mark.parametrize( + "dtype", [ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint] + ) def test_zeros_dtype_mult_dim(self, size, dtype): shape = (2, 2, size) zeros = ak.zeros(shape, dtype) @@ -390,7 +417,9 @@ def test_zeros_dtype_mult_dim(self, size, dtype): assert (0 == zeros).all() @pytest.mark.skip_if_max_rank_greater_than(3) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_zeros_error(self, dtype): shape = (2, 2, 2, 2) with pytest.raises(ValueError): @@ -411,7 +440,9 @@ def test_zeros_misc(self): for arg in np.uint8(5), np.uint16(5), np.uint32(5), str(5): assert (int_arr == ak.zeros(arg, dtype=ak.int64)).all() - @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint] + ) @pytest.mark.parametrize("size", pytest.prob_size) def test_ones_dtype(self, size, dtype): ones = ak.ones(size, dtype) @@ -419,7 +450,9 @@ def test_ones_dtype(self, size, dtype): assert dtype == ones.dtype assert (1 == ones).all() - @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_, ak.bigint] + ) @pytest.mark.parametrize("size", pytest.prob_size) @pytest.mark.skip_if_rank_not_compiled([3]) def test_ones_dtype_multi_dim(self, size, dtype): @@ -431,7 +464,9 @@ def test_ones_dtype_multi_dim(self, size, dtype): assert (1 == ones).all() @pytest.mark.skip_if_max_rank_greater_than(3) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_ones_error(self, dtype): shape = (2, 2, 2, 2) with pytest.raises(ValueError): @@ -463,7 +498,9 @@ def test_ones_like(self, size, dtype): assert ones_like_arr.size == ran_arr.size @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_full_dtype(self, size, dtype): type_full = ak.full(size, 1, dtype) assert isinstance(type_full, ak.pdarray) @@ -471,7 +508,9 @@ def test_full_dtype(self, size, dtype): assert (1 == type_full).all() @pytest.mark.skip_if_rank_not_compiled([2]) - @pytest.mark.parametrize("dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, float, ak.float64, bool, ak.bool_] + ) @pytest.mark.parametrize("shape", [0, 2, (2, 3)]) def test_full_match_numpy(self, shape, dtype): assert_equivalent( @@ -481,7 +520,9 @@ def test_full_match_numpy(self, shape, dtype): @pytest.mark.skip_if_rank_not_compiled([3]) @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_full_dtype_multi_dim(self, size, dtype): shape = (2, 2, size) type_full = ak.full(shape, 1, dtype) @@ -491,7 +532,9 @@ def test_full_dtype_multi_dim(self, size, dtype): assert (1 == type_full).all() @pytest.mark.skip_if_max_rank_greater_than(3) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_full_error(self, dtype): shape = (2, 2, 2, 2) with pytest.raises(ValueError): @@ -527,7 +570,9 @@ def test_full_misc(self): assert (int_arr == ak.full(*args, dtype=int)).all() @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_full_like(self, size, dtype): ran_arr = ak.full(size, 5, dtype) full_like_arr = ak.full_like(ran_arr, 1) @@ -537,7 +582,9 @@ def test_full_like(self, size, dtype): assert full_like_arr.size == ran_arr.size @pytest.mark.parametrize("size", pytest.prob_size) - @pytest.mark.parametrize("dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_]) + @pytest.mark.parametrize( + "dtype", [int, ak.int64, ak.uint64, float, ak.float64, bool, ak.bool_] + ) def test_zeros_like(self, size, dtype): ran_arr = ak.array(ak.arange(size, dtype=dtype)) zeros_like_arr = ak.zeros_like(ran_arr) @@ -566,7 +613,9 @@ def test_linspace(self): pda = ak.linspace(start=float(5.0), stop=float(0.0), length=np.int64(6)) assert 5.0000 == pda[0] assert 0.0000 == pda[5] - assert (pda.to_ndarray() == np.linspace(float(5.0), float(0.0), np.int64(6))).all() + assert ( + pda.to_ndarray() == np.linspace(float(5.0), float(0.0), np.int64(6)) + ).all() with pytest.raises(TypeError): ak.linspace(0, "100", 1000) @@ -639,7 +688,9 @@ def test_standard_normal_errors(self): @pytest.mark.parametrize("dtype", INT_SCALARS) def test_random_strings_uniform(self, dtype): - pda = ak.random_strings_uniform(minlen=dtype(1), maxlen=dtype(5), size=dtype(100)) + pda = ak.random_strings_uniform( + minlen=dtype(1), maxlen=dtype(5), size=dtype(100) + ) assert isinstance(pda, ak.Strings) assert 100 == len(pda) assert str == pda.dtype @@ -700,7 +751,9 @@ def test_random_strings_uniform_with_seed(self): "DSN", ] == pda.to_list() - pda = ak.random_strings_uniform(minlen=1, maxlen=5, seed=1, size=10, characters="printable") + pda = ak.random_strings_uniform( + minlen=1, maxlen=5, seed=1, size=10, characters="printable" + ) assert [ "eL", "6= l_median > l_int[i - 1]) or (l_int[i] < l_median <= l_int[i - 1]): + if (l_int[i] >= l_median > l_int[i - 1]) or ( + l_int[i] < l_median <= l_int[i - 1] + ): runs += 1 # no. of positive values @@ -927,7 +988,8 @@ def randint_randomness(self, size): runs_exp = ((2 * n1 * n2) / (n1 + n2)) + 1 stan_dev = math.sqrt( - (2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / (((n1 + n2) ** 2) * (n1 + n2 - 1)) + (2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) + / (((n1 + n2) ** 2) * (n1 + n2 - 1)) ) if abs((runs - runs_exp) / stan_dev) < 1.9: diff --git a/tests/setops_test.py b/tests/setops_test.py index 213f1bcbef5..965ec0b1f3d 100644 --- a/tests/setops_test.py +++ b/tests/setops_test.py @@ -36,8 +36,8 @@ def make_np_arrays_small(dtype): a = np.array([-1, 0, 1, 3]).astype(dtype) b = np.array([-1, 2, 2, 3]).astype(dtype) elif dtype == ak.bigint: - a = np.array([-1, 0, 1, 3]).astype(ak.uint64) + 2**200 - b = np.array([-1, 2, 2, 3]).astype(ak.uint64) + 2**200 + a = np.array([i + 2**200 for i in [-1, 0, 1, 3]]) + b = np.array([i + 2**200 for i in [-1, 2, 2, 3]]) elif dtype == ak.bool_: a = np.array([True, False, False, True]).astype(dtype) b = np.array([True, True, False, False]).astype(dtype) @@ -51,8 +51,8 @@ def make_np_arrays_cross_type(dtype1, dtype2): a = np.array([-1, -3, 0, 1, 2, 3]).astype(dtype1) c = np.array([-1, 0, 0, 7, 8, 3]).astype(dtype1) elif dtype1 == ak.bigint: - a = np.array([-1, -3, 0, 1, 2, 3]).astype(ak.uint64) + 2**200 - c = np.array([-1, 0, 0, 7, 8, 3]).astype(ak.uint64) + 2**200 + a = np.array([i + 2**200 for i in [-1, -3, 0, 1, 2, 3]]) + c = np.array([i + 2**200 for i in [-1, 0, 0, 7, 8, 3]]) elif dtype1 == ak.bool_: a = np.array([True, False, False, True, True]) c = np.array([True, True, False, False, True]) @@ -63,8 +63,8 @@ def make_np_arrays_cross_type(dtype1, dtype2): b = np.array([-1, -11, 0, 4, 5, 3]).astype(dtype2) d = np.array([-1, -4, 0, 7, 8, 3]).astype(dtype2) elif dtype2 == ak.bigint: - b = np.array([-1, -11, 0, 4, 5, 3]).astype(ak.uint64) + 2**200 - d = np.array([-1, -4, 0, 7, 8, 3]).astype(ak.uint64) + 2**200 + b = np.array([i + 2**200 for i in [-1, -11, 0, 4, 5, 3]]) + d = np.array([i + 2**200 for i in [-1, -4, 0, 7, 8, 3]]) elif dtype2 == ak.bool_: b = np.array([True, True, False, False, True]) d = np.array([True, True, False, False, True])