Skip to content

Commit

Permalink
Closes #3329 Set up hstack and fixed vstack. Created tests for both. …
Browse files Browse the repository at this point in the history
…Created new

concatenate on server side because the old one didn't allow for
multi-dimensional things.
  • Loading branch information
1RyanK committed Feb 27, 2025
1 parent 8563fa1 commit 4f878b9
Show file tree
Hide file tree
Showing 6 changed files with 477 additions and 63 deletions.
35 changes: 17 additions & 18 deletions arkouda/pdarrayclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from functools import reduce
from math import ceil
from sys import modules
from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union, cast
from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast

import numpy as np
from typeguard import typechecked
Expand Down Expand Up @@ -342,8 +342,8 @@ class pdarray:
The number of elements in the array
ndim : int_scalars
The rank of the array (currently only rank 1 arrays supported)
shape : Sequence[int]
A list or tuple containing the sizes of each dimension of the array
shape : Tuple[int, ...]
A tuple containing the sizes of each dimension of the array
itemsize : int_scalars
The size in bytes of each element
"""
Expand Down Expand Up @@ -383,7 +383,7 @@ def __init__(
mydtype: Union[np.dtype, str],
size: int_scalars,
ndim: int_scalars,
shape: Sequence[int],
shape: Tuple[int, ...],
itemsize: int_scalars,
max_bits: Optional[int] = None,
) -> None:
Expand Down Expand Up @@ -2637,10 +2637,10 @@ def create_pdarray(repMsg: str, max_bits=None) -> pdarray:
ndim = int(fields[4])

if fields[5] == "[]":
shape = []
shape: Tuple[int, ...] = tuple([])
else:
trailing_comma_offset = -2 if fields[5][len(fields[5]) - 2] == "," else -1
shape = [int(el) for el in fields[5][1:trailing_comma_offset].split(",")]
shape = tuple([int(el) for el in fields[5][1:trailing_comma_offset].split(",")])

itemsize = int(fields[6])
except Exception as e:
Expand Down Expand Up @@ -4131,31 +4131,30 @@ def fmod(dividend: Union[pdarray, numeric_scalars], divisor: Union[pdarray, nume
)
# TODO: handle shape broadcasting for multidimensional arrays

# The code below creates a command string for fmod2vv, fmod2vs or fmod2sv.

# The code below creates a command string for fmod2vv, fmod2vs or fmod2sv.

if isinstance(dividend, pdarray) and isinstance(divisor, pdarray) :
if isinstance(dividend, pdarray) and isinstance(divisor, pdarray):
cmdstring = f"fmod2vv<{dividend.dtype},{dividend.ndim},{divisor.dtype}>"

elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)) :
if resolve_scalar_dtype(divisor) in ['float64', 'int64', 'uint64', 'bool'] :
acmd = 'fmod2vs_'+resolve_scalar_dtype(divisor)
else : # this condition *should* be impossible because of the isSupportedNumber check
elif isinstance(dividend, pdarray) and not (isinstance(divisor, pdarray)):
if resolve_scalar_dtype(divisor) in ["float64", "int64", "uint64", "bool"]:
acmd = "fmod2vs_" + resolve_scalar_dtype(divisor)
else: # this condition *should* be impossible because of the isSupportedNumber check
raise TypeError(f"Scalar divisor type {resolve_scalar_dtype(divisor)} not allowed in fmod")
cmdstring = f"{acmd}<{dividend.dtype},{dividend.ndim}>"

elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)) :
if resolve_scalar_dtype(dividend) in ['float64', 'int64', 'uint64', 'bool'] :
acmd = 'fmod2sv_'+resolve_scalar_dtype(dividend)
else : # this condition *should* be impossible because of the isSupportedNumber check
elif not (isinstance(dividend, pdarray) and isinstance(divisor, pdarray)):
if resolve_scalar_dtype(dividend) in ["float64", "int64", "uint64", "bool"]:
acmd = "fmod2sv_" + resolve_scalar_dtype(dividend)
else: # this condition *should* be impossible because of the isSupportedNumber check
raise TypeError(f"Scalar dividend type {resolve_scalar_dtype(dividend)} not allowed in fmod")
cmdstring = f"{acmd}<{divisor.dtype},{divisor.ndim}>" # type: ignore[union-attr]

else:
m = mod(dividend, divisor)
return _create_scalar_array(m)

# We reach here if this was any case other than scalar & scalar
# We reach here if this was any case other than scalar & scalar

return create_pdarray(
cast(
Expand Down
217 changes: 193 additions & 24 deletions arkouda/pdarraymanipulation.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,67 @@
from typing import Tuple, List, Literal, Union, Optional
from typing import Literal, Optional, Sequence, Union

import numpy as np
from typeguard import typechecked

from arkouda.client import generic_msg
from arkouda.pdarrayclass import pdarray, create_pdarray
from arkouda.numpy.dtypes import bigint
from arkouda.numpy.dtypes import dtype as akdtype
from arkouda.pdarrayclass import create_pdarray, pdarray

import numpy as np

__all__ = ["vstack", "delete"]
__all__ = ["hstack", "vstack", "delete"]


@typechecked
def vstack(
tup: Union[Tuple[pdarray], List[pdarray]],
def hstack(
tup: Sequence[pdarray],
*,
dtype: Optional[Union[type, str]] = None,
dtype: Optional[Union[str, type]] = None,
casting: Literal["no", "equiv", "safe", "same_kind", "unsafe"] = "same_kind",
) -> pdarray:
"""
Stack a sequence of arrays vertically (row-wise).
Stack arrays in sequence horizontally (column wise).
This is equivalent to concatenation along the first axis after 1-D arrays of
shape `(N,)` have been reshaped to `(1,N)`.
This is equivalent to concatenation along the second axis, except for 1-D arrays
where it concatenates along the first axis. Rebuilds arrays divided by ``hsplit``.
This function makes most sense for arrays with up to 3 dimensions. For instance, for pixel-data
with a height (first axis), width (second axis), and r/g/b channels (third axis). The functions
``concatenate``, ``stack`` and ``block`` provide more general stacking and concatenation operations.
Parameters
----------
tup : Tuple[pdarray]
The arrays to be stacked
dtype : Optional[Union[type, str]], optional
The data-type of the output array. If not provided, the output
array will be determined using `np.common_type` on the
input arrays Defaults to None
casting : {"no", "equiv", "safe", "same_kind", "unsafe"], optional
Controls what kind of data casting may occur - currently unused
tup : sequence of pdarray
The arrays must have the same shape along all but the second axis, except 1-D arrays which
can be any length. In the case of a single array_like input, it will be treated as a sequence of
arrays; i.e., each element along the zeroth axis is treated as a separate array.
dtype : str or type, optional
If provided, the destination array will have this type.
casting : {‘no’, ‘equiv’, ‘safe’, ‘same_kind’, ‘unsafe’}, optional
Controls what kind of data casting may occur. Defaults to ‘same_kind’. Currently unused.
Returns
-------
pdarray
The stacked array
The array formed by stacking the given arrays.
See Also
--------
concatenate, stack, block, vstack, dstack, column_stack, hsplit, unstack
Examples
--------
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([4, 5, 6])
>>> ak.hstack((a, b))
array([1 2 3 4 5 6])
>>> a = ak.array([[1],[2],[3]])
>>> b = ak.array([[4],[5],[6]])
>>> ak.hstack((a, b))
array([array([1 4]) array([2 5]) array([3 6])])
"""

if casting != "same_kind":
# TODO: wasn't clear from the docs what each of the casting options does
# TODO: align with https://numpy.org/doc/stable/glossary.html#term-casting
raise NotImplementedError(f"casting={casting} is not yet supported")

# ensure all arrays have the same number of dimensions
Expand All @@ -51,23 +70,173 @@ def vstack(
if a.ndim != ndim:
raise ValueError("all input arrays must have the same number of dimensions")

has_bigint = False
for a in tup:
if a.dtype == bigint:
has_bigint = True
# Should this be min or max?
try:
m_bits = min([a.max_bits for a in tup if a.dtype == bigint and a.max_bits > 0])
except ValueError:
m_bits = -1
break

# establish the dtype of the output array
if has_bigint and dtype is None:
dtype = bigint
if dtype is None:
dtype_ = np.common_type(*[np.empty(0, dtype=a.dtype) for a in tup])
dtype_ = np.result_type(*[np.dtype(a.dtype) for a in tup])
else:
dtype_ = akdtype(dtype)

# cast the input arrays to the output dtype if necessary
arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in tup]

if has_bigint:
for i in range(len(arrays)):
arrays[i].max_bits = m_bits

offsets = [0 for _ in range(len(arrays))]

if ndim == 1:
for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[0]
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"n": len(arrays),
"axis": 0,
"offsets": offsets,
},
)
)

for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[1]

# stack the arrays along the horizontal axis
return create_pdarray(
generic_msg(
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"n": len(arrays),
"axis": 1,
"offsets": offsets,
},
)
)


@typechecked
def vstack(
tup: Sequence[pdarray],
*,
dtype: Optional[Union[str, type]] = None,
casting: Literal["no", "equiv", "safe", "same_kind", "unsafe"] = "same_kind",
) -> pdarray:
"""
Stack arrays in sequence vertically (row wise).
This is equivalent to concatenation along the first axis after
1-D arrays of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by ``vsplit``.
This function makes most sense for arrays with up to 3 dimensions.
For instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions ``concatenate``, ``stack`` and ``block``
provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of pdarray
The arrays must have the same shape along all but the first axis. 1-D arrays
must have the same length. In the case of a single array_like input, it will be
treated as a sequence of arrays; i.e., each element along the zeroth axis is treated
as a separate array.
dtype : str or type, optional
If provided, the destination array will have this dtype.
casting : {"no", "equiv", "safe", "same_kind", "unsafe"], optional
Controls what kind of data casting may occur. Defaults to ‘same_kind’. Currently unused.
Returns
-------
pdarray
The array formed by stacking the given arrays, will be at least 2-D.
See Also
--------
concatenate, stack, block, hstack, dstack, column_stack, hsplit, unstack
Examples
--------
>>> a = ak.array([1, 2, 3])
>>> b = ak.array([4, 5, 6])
>>> ak.vstack((a, b))
array([array([1 2 3]) array([4 5 6])])
>>> a = ak.array([[1],[2],[3]])
>>> b = ak.array([[4],[5],[6]])
>>> ak.vstack((a, b))
array([array([1]) array([2]) array([3]) array([4]) array([5]) array([6])])
"""

if casting != "same_kind":
# TODO: align with https://numpy.org/doc/stable/glossary.html#term-casting
raise NotImplementedError(f"casting={casting} is not yet supported")

# From docstring: "This is equivalent to concatenation along the first axis after 1-D arrays
# of shape (N,) have been reshaped to (1,N)."
arrays = [a if a.ndim != 1 else a.reshape((1, len(a))) for a in tup]

# ensure all arrays have the same number of dimensions
ndim = arrays[0].ndim
for a in arrays:
if a.ndim != ndim:
raise ValueError("all input arrays must have the same number of dimensions")

has_bigint = False
for a in arrays:
if a.dtype == bigint:
has_bigint = True
# Should this be min or max?
try:
m_bits = min([a.max_bits for a in arrays if a.dtype == bigint and a.max_bits > 0])
except ValueError:
m_bits = -1
break

# establish the dtype of the output array
if has_bigint and dtype is None:
dtype = bigint
if dtype is None:
dtype_ = np.result_type(*[np.dtype(a.dtype) for a in arrays])
else:
dtype_ = akdtype(dtype)

# cast the input arrays to the output dtype if necessary
arrays = [a.astype(dtype_) if a.dtype != dtype_ else a for a in arrays]

if has_bigint:
for i in range(len(arrays)):
arrays[i].max_bits = m_bits

offsets = [0 for _ in range(len(arrays))]

for i in range(1, len(arrays)):
offsets[i] = offsets[i - 1] + arrays[i - 1].shape[0]

# stack the arrays along the first axis
return create_pdarray(
generic_msg(
cmd=f"stack{ndim}D",
cmd=f"concatenate<{akdtype(dtype_).name},{arrays[0].ndim}>",
args={
"names": list(arrays),
"n": len(arrays),
"axis": 0,
"offsets": offsets,
},
)
)
Expand Down
Loading

0 comments on commit 4f878b9

Please sign in to comment.