diff --git a/arkouda-env-dev.yml b/arkouda-env-dev.yml index 16d66da822c..9f150fddeea 100644 --- a/arkouda-env-dev.yml +++ b/arkouda-env-dev.yml @@ -4,7 +4,8 @@ channels: - defaults dependencies: - python>=3.9,<3.12.4 # minimum 3.9 - - numpy>=1.24.1,<2.0 +# - numpy>=1.24.1,<2.0 + - numpy>=2.0 - pandas>=1.4.0,!=2.2.0 - pyzmq>=20.0.0 - tabulate diff --git a/arkouda/numpy/__init__.py b/arkouda/numpy/__init__.py index 34387451601..b358c5978d7 100644 --- a/arkouda/numpy/__init__.py +++ b/arkouda/numpy/__init__.py @@ -1,15 +1,6 @@ # flake8: noqa from numpy import ( # noqa - NAN, - NINF, - NZERO, - PINF, - PZERO, - DataSource, False_, - Inf, - Infinity, - NaN, ScalarType, True_, base_repr, @@ -17,9 +8,7 @@ byte, bytes_, cdouble, - cfloat, clongdouble, - clongfloat, compat, csingle, datetime64, @@ -28,7 +17,6 @@ euler_gamma, finfo, flexible, - float_, floating, format_float_positional, format_float_scientific, @@ -36,22 +24,17 @@ iinfo, inexact, inf, - infty, intc, intp, isscalar, - issctype, issubdtype, longdouble, - longfloat, longlong, - maximum_sctype, nan, number, pi, promote_types, sctypeDict, - sctypes, short, signedinteger, single, diff --git a/arkouda/numpy/_numeric.py b/arkouda/numpy/_numeric.py index 175efa9dea4..b6b5e8c5281 100644 --- a/arkouda/numpy/_numeric.py +++ b/arkouda/numpy/_numeric.py @@ -119,6 +119,22 @@ def _merge_where(new_pda, where, ret): return new_pda +def can_cast(from_, to) -> ak_bool: + from arkouda.util import is_int + from arkouda.numpy.dtypes import uint64 as ak_unit64 + from arkouda.numpy.dtypes import _is_dtype_in_union + from arkouda.numpy.dtypes import isSupportedInt + + if (np.isscalar(from_) or _is_dtype_in_union(from_, numeric_scalars)) and not isinstance( + from_, (int, float, complex) + ): + return np.can_cast(from_, to) + elif isSupportedInt(from_) and from_ >= 0 and to == ak_uint64: + return True + + return False + + @typechecked def cast( pda: Union[pdarray, Strings, Categorical], # type: ignore diff --git a/arkouda/numpy/dtypes/dtypes.py b/arkouda/numpy/dtypes/dtypes.py index e0b2b0117f7..c05e404c8c7 100644 --- a/arkouda/numpy/dtypes/dtypes.py +++ b/arkouda/numpy/dtypes/dtypes.py @@ -284,12 +284,25 @@ def __repr__(self) -> str: # missing full support for: float32, int32, int16, int8, uint32, uint16, complex64, complex128 # ARKOUDA_SUPPORTED_DTYPES = frozenset([member.value for _, member in DType.__members__.items()]) ARKOUDA_SUPPORTED_DTYPES = frozenset( - ["bool_", "float", "float64", "int", "int64", "uint", "uint64", "uint8", "bigint", "str"] + [ + "bool_", + "float", + "float64", + "int", + "int64", + "uint", + "uint64", + "uint8", + "bigint", + "str", + ] ) DTypes = frozenset([member.value for _, member in DType.__members__.items()]) DTypeObjects = frozenset([bool_, float, float64, int, int64, str, str_, uint8, uint64]) -NumericDTypes = frozenset(["bool_", "bool", "float", "float64", "int", "int64", "uint64", "bigint"]) +NumericDTypes = frozenset( + ["bool_", "bool", "float", "float64", "int", "int64", "uint64", "bigint"] +) SeriesDTypes = { "string": np.str_, "": np.str_, @@ -337,7 +350,9 @@ def resolve_scalar_dtype(val: object) -> str: ): return "bool" # Python int or np.int* or np.uint* - elif isinstance(val, int) or (hasattr(val, "dtype") and cast(np.uint, val).dtype.kind in "ui"): + elif isinstance(val, int) or ( + hasattr(val, "dtype") and cast(np.uint, val).dtype.kind in "ui" + ): # we've established these are int, uint, or bigint, # so we can do comparisons if isSupportedInt(val) and val >= 2**64: # type: ignore @@ -347,9 +362,13 @@ def resolve_scalar_dtype(val: object) -> str: else: return "int64" # Python float or np.float* - elif isinstance(val, float) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "f"): + elif isinstance(val, float) or ( + hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "f" + ): return "float64" - elif isinstance(val, complex) or (hasattr(val, "dtype") and cast(np.float_, val).dtype.kind == "c"): + elif isinstance(val, complex) or ( + hasattr(val, "dtype") and cast(np.float64, val).dtype.kind == "c" + ): return "float64" # TODO: actually support complex values in the backend elif isinstance(val, builtins.str) or isinstance(val, np.str_): return "str" diff --git a/arkouda/numpy/exceptions/__init__.py b/arkouda/numpy/exceptions/__init__.py index 0f9abbca300..2de247b7e0c 100644 --- a/arkouda/numpy/exceptions/__init__.py +++ b/arkouda/numpy/exceptions/__init__.py @@ -1,3 +1,3 @@ -from numpy import RankWarning, TooHardError +# from numpy import RankWarning, TooHardError -__all__ = ["RankWarning", "TooHardError"] +# __all__ = ["RankWarning", "TooHardError"] diff --git a/arkouda/numpy/lib/__init__.py b/arkouda/numpy/lib/__init__.py index a516f365d6a..76986a9e7e8 100644 --- a/arkouda/numpy/lib/__init__.py +++ b/arkouda/numpy/lib/__init__.py @@ -1,12 +1,12 @@ # flake8: noqa from numpy import ( - RankWarning, - add_docstring, - add_newdoc, - deprecate, - deprecate_with_doc, - disp, - issubclass_, + # RankWarning, + # add_docstring, + # add_newdoc, + # deprecate, + # deprecate_with_doc, + # disp, + # issubclass_, issubdtype, polynomial, typename, @@ -16,14 +16,14 @@ from arkouda.numpy.lib.emath import * __all__ = [ - "RankWarning", - "add_docstring", - "add_newdoc", - "deprecate", - "deprecate_with_doc", - "disp", + # "RankWarning", + # "add_docstring", + # "add_newdoc", + # "deprecate", + # "deprecate_with_doc", + # "disp", "emath", - "issubclass_", + # "issubclass_", "issubdtype", "polynomial", "typename", diff --git a/arkouda/numpy/rec/__init__.py b/arkouda/numpy/rec/__init__.py index f752a4adcab..948ed964053 100644 --- a/arkouda/numpy/rec/__init__.py +++ b/arkouda/numpy/rec/__init__.py @@ -1,3 +1,3 @@ -from numpy import format_parser - -__all__ = ["format_parser"] +# from numpy import format_parser +# +# __all__ = ["format_parser"] diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index ede5da86369..6d1383611a8 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -571,11 +571,24 @@ def _binop(self, other: pdarray, op: str) -> pdarray: # pdarray binop scalar # If scalar cannot be safely cast, server will infer the return dtype dt = resolve_scalar_dtype(other) - if self.dtype != bigint and np.can_cast(other, self.dtype): + + from arkouda.numpy._numeric import can_cast as ak_can_cast + + print("\n**\ndt: ", dt) + print("other: ", other) + print("self.dtype: ", self.dtype) + print( + "ak_can_cast(other, self.dtype): ", + ak_can_cast(other, self.dtype), + ) + + if self.dtype != bigint and ak_can_cast(other, self.dtype): # If scalar can be losslessly cast to array dtype, # do the cast so that return array will have same dtype dt = self.dtype.name other = self.dtype.type(other) + print("new dt: ", dt) + print("neow other: ", other) if dt not in DTypes: raise TypeError(f"Unhandled scalar type: {other} ({type(other)})") repMsg = generic_msg( @@ -616,7 +629,9 @@ def _r_binop(self, other: pdarray, op: str) -> pdarray: # pdarray binop scalar # If scalar cannot be safely cast, server will infer the return dtype dt = resolve_scalar_dtype(other) - if self.dtype != bigint and np.can_cast(other, self.dtype): + from arkouda.numpy._numeric import can_cast as ak_can_cast + + if self.dtype != bigint and ak_can_cast(other, self.dtype): # If scalar can be losslessly cast to array dtype, # do the cast so that return array will have same dtype dt = self.dtype.name @@ -4131,23 +4146,22 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume ) # TODO: handle shape broadcasting for multidimensional arrays + # The code below creates a command string for fmod2vv, fmod2vs or fmod2sv. -# The code below creates a command string for fmod2vv, fmod2vs or fmod2sv. - - if isinstance(dividend, pdarray) and isinstance(divisor, pdarray) : + if isinstance(dividend, pdarray) and isinstance(divisor, pdarray): cmdstring = f"fmod2vv<{dividend.dtype},{dividend.ndim},{divisor.dtype}>" - elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)) : - if resolve_scalar_dtype(divisor) in ['float64', 'int64', 'uint64', 'bool'] : - acmd = 'fmod2vs_'+resolve_scalar_dtype(divisor) - else : # this condition *should* be impossible because of the isSupportedNumber check + elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)): + if resolve_scalar_dtype(divisor) in ["float64", "int64", "uint64", "bool"]: + acmd = "fmod2vs_" + resolve_scalar_dtype(divisor) + else: # this condition *should* be impossible because of the isSupportedNumber check raise TypeError(f"Scalar divisor type {resolve_scalar_dtype(divisor)} not allowed in fmod") cmdstring = f"{acmd}<{dividend.dtype},{dividend.ndim}>" - elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)) : - if resolve_scalar_dtype(dividend) in ['float64', 'int64', 'uint64', 'bool'] : - acmd = 'fmod2sv_'+resolve_scalar_dtype(dividend) - else : # this condition *should* be impossible because of the isSupportedNumber check + elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)): + if resolve_scalar_dtype(dividend) in ["float64", "int64", "uint64", "bool"]: + acmd = "fmod2sv_" + resolve_scalar_dtype(dividend) + else: # this condition *should* be impossible because of the isSupportedNumber check raise TypeError(f"Scalar dividend type {resolve_scalar_dtype(dividend)} not allowed in fmod") cmdstring = f"{acmd}<{divisor.dtype},{divisor.ndim}>" # type: ignore[union-attr] @@ -4155,7 +4169,7 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume m = mod(dividend, divisor) return _create_scalar_array(m) -# We reach here if this was any case other than scalar & scalar + # We reach here if this was any case other than scalar & scalar return create_pdarray( cast( diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py index fde8b3fe997..7f04c725903 100644 --- a/arkouda/pdarraycreation.py +++ b/arkouda/pdarraycreation.py @@ -275,7 +275,11 @@ def array( # early out if we would have more uint arrays than can fit in max_bits early_out = (max_bits // 64) + (max_bits % 64 != 0) if max_bits != -1 else float("inf") while any(a != 0) and len(uint_arrays) < early_out: - low, a = a % 2**64, a // 2**64 + if isinstance(a, np.ndarray): + # numpy arrays do not support sizes > 2**64 so this is a short process. + low, a = a, np.zeros_like(a) + else: + low, a = a % 2**64, a // 2**64 uint_arrays.append(array(np.array(low, dtype=np.uint), dtype=akuint64)) return bigint_from_uint_arrays(uint_arrays[::-1], max_bits=max_bits) except TypeError: diff --git a/arkouda/util.py b/arkouda/util.py index e84d7dfa5fe..a0ba1b5f89d 100644 --- a/arkouda/util.py +++ b/arkouda/util.py @@ -425,7 +425,7 @@ def convert_bytes(nbytes, unit="B"): def is_numeric( - arry: Union[pdarray, Strings, Categorical, "Series", "Index"] # noqa: F821 + arry: Union[pdarray, Strings, Categorical, "Series", "Index"], # noqa: F821 ) -> builtins.bool: """ Check if the dtype of the given array is numeric. diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py index fd6c1f6b9c5..85b31b20436 100644 --- a/tests/dataframe_test.py +++ b/tests/dataframe_test.py @@ -94,7 +94,7 @@ def build_ak_df_example_numeric_types(): "float64": ak.randint(0, 1, 20, dtype=ak.float64), "int64": ak.randint(0, 10, 20, dtype=ak.int64), "uint64": ak.randint(0, 10, 20, dtype=ak.uint64), - "bigint": ak.randint(0, 10, 20, dtype=ak.uint64) + 2**200, + "bigint": ak.randint(2**200, 2**200 + 10, 20, dtype=ak.uint64), } ) return ak_df @@ -105,7 +105,9 @@ def build_pd_df_duplicates(): userid = [111, 222, 111, 333, 222, 111] item = [0, 1, 0, 2, 1, 0] day = [5, 5, 5, 5, 5, 5] - return pd.DataFrame({"userName": username, "userID": userid, "item": item, "day": day}) + return pd.DataFrame( + {"userName": username, "userID": userid, "item": item, "day": day} + ) @staticmethod def build_ak_df_duplicates(): @@ -139,7 +141,7 @@ def build_pd_df_append(): item = [0, 0, 1, 1, 2, 0, 0, 2] day = [5, 5, 6, 5, 6, 6, 1, 2] amount = [0.5, 0.6, 1.1, 1.2, 4.3, 0.6, 0.5, 5.1] - bi = (np.arange(8) + 2**200).tolist() + bi = np.arange(2**200, 2**200 + 8).tolist() # (np.arange(8) + 2**200).tolist() ui = (np.arange(8).astype(ak.uint64)) + 2**63 return pd.DataFrame( { @@ -209,7 +211,9 @@ def test_dataframe_creation(self, size): "uint": ak.array(pddf["uint"]), "bigint": ak.arange(2**200, 2**200 + size), "bool": ak.array(pddf["bool"]), - "segarray": ak.SegArray.from_multi_array([ak.array(x) for x in pddf["segarray"]]), + "segarray": ak.SegArray.from_multi_array( + [ak.array(x) for x in pddf["segarray"]] + ), } ) assert isinstance(akdf, ak.DataFrame) @@ -362,7 +366,9 @@ def test_boolean_indexing(self): row = df[df["userName"] == "Carol"] assert len(row) == 1 - assert ref_df[ref_df["userName"] == "Carol"].equals(row.to_pandas(retain_index=True)) + assert ref_df[ref_df["userName"] == "Carol"].equals( + row.to_pandas(retain_index=True) + ) def test_column_indexing(self): df = self.build_ak_df() @@ -602,14 +608,20 @@ def test_groupby_standard(self): pds = pd.Series( data=np.ones(4, dtype=np.int64), index=pd.Index( - data=np.array(["0.0.0.1", "0.0.0.2", "0.0.0.3", "0.0.0.4"], dtype="