diff --git a/README.rst b/README.rst index ae7ffdd..75fb6f5 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ mpi4py-ve ######### *mpi4py-ve* is an extension to *mpi4py*, which provides Python bindings for the Message Passing Interface (MPI). -This package also supports to communicate array objects of `NLCPy `_ (nlcpy.ndarray) between MPI processes on x86 servers of SX-Aurora TSUBASA systems. +This package also supports to communicate array objects of `NLCPy `_ (nlcpy.ndarray) between MPI processes on x86 servers of SX-Aurora TSUBASA systems. Combining NLCPy with *mpi4py-ve* enables Python scripts to utilize multi-VE computing power. The current version of *mpi4py-ve* is based on *mpi4py* version 3.0.3. For details of API references, please refer to `mpi4py manual `_. @@ -14,25 +14,27 @@ Requirements Before the installation, the following components are required to be installed on your x86 Node of SX-Aurora TSUBASA. -- `NEC SDK `_ - - required NEC C/C++ compiler version: >= 3.2.1 - - required NLC version: >= 2.3.0 +- `Alternative VE Offloading (AVEO) `_ + - required version: >= 2.13.0 -- `VEOS `_ - - required version: >= 2.11.1 - -- `NEC MPI `_ - - required NEC MPI version: >= 2.20.0 +- `NEC MPI `_ + - required NEC MPI version: > 2.22.0 (for Mellanox OFED 4.x) or >= 3.1.0 (for Mellanox OFED 5.x) - `Python `_ - required version: 3.6, 3.7, or 3.8 -- `NLCPy `_ - - required version: v2.1.1 - - `NumPy `_ - required version: v1.17, v1.18, v1.19, or v1.20 +- `NLC(optional) `_ + - required version: >= 2.3.0 + +- `NLCPy(optional) `_ + - required version: >= 2.2.0 + +Since December 2022, mpi4py-ve has been provided as a software of NEC SDK (NEC Software Development Kit for Vector Engine). +If NEC SDK on your machine has been properly installed or updated after that, mpi4py-ve is available by using /usr/bin/python3 command. + ****************** Install from wheel ****************** @@ -57,16 +59,14 @@ You can install *mpi4py-ve* by executing either of the following commands. $ pip install -The shared objects for Vector Engine, which are included in the wheel package, are compiled and tested by using following software: - +-------------------+---------------+ - | NEC C/C++ Compiler| Version 3.2.1 | - +-------------------+---------------+ - | NEC MPI | v2.20.0 | - +-------------------+---------------+ - | NumPy | v1.19.2 | - +-------------------+---------------+ - | NLCPy | v2.1.1 | - +-------------------+---------------+ +The shared objects for Vector Host, which are included in the wheel package, are compiled by gcc 4.8.5 and tested by using following softwares: + +---------+--------------------+ + | NEC MPI | v2.22.0 and V3.1.0 | + +---------+--------------------+ + | NumPy | v1.19.2 | + +---------+--------------------+ + | NLCPy | v2.2.0 | + +---------+--------------------+ *********************************** Install from source (with building) @@ -178,7 +178,7 @@ When using the *mpirun* command: :: - $ mpirun -vh -np N $(which python) sample.py + $ mpirun -veo -np N $(which python) sample.py | Here, N is the number of MPI processes that are created on an x86 server. | NEC MPI 2.21.0 or later supports the environment variable `NMPI_USE_COMMAND_SEARCH_PATH`. @@ -187,9 +187,141 @@ When using the *mpirun* command: :: $ export NMPI_USE_COMMAND_SEARCH_PATH=ON - $ mpirun -vh -np N python sample.py + $ mpirun -veo -np N python sample.py + +| For details of mpirun command, refer to `NEC MPI User's Guide `_. + +****************** +Execution Examples +****************** + +The following examples show how to launch MPI programs that use mpi4py-ve and NLCPy on the SX-Aurora TSUBASA. + +| *ncore* : Number of cores per VE. +| a.py: Python script using mpi4py-ve and NLCPy. +| + +* Interactive Execution + + * Execution on one VE + + Example of using 4 processes on local VH and 4 VE processes (*ncore* / 4 OpenMP parallel per process) on VE#0 of local VH + + :: + + $ mpirun -veo -np 4 python a.py + + * Execution on multiple VEs on a VH + + Example of using 4 processes on local VH and 4 VE processes (1 process per VE, *ncore* OpenMP parallel per process) on VE#0 to VE#3 of local VH + + :: + + $ VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 python a.py + + + Example of using 32 processes on local VH and 32 VE processes (8 processes per VE, *ncore* / 8 OpenMP parallel per process) on VE#0 to VE# 3 of local VH + + :: + + $ VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 32 python a.py + + * Execution on multiple VEs on multiple VHs + + Example of using a total of 32 processes on two VHs host1 and host2, and a total of 32 VE processes on VE#0 and VE#1 of each VH (8 processes per VE, *ncore* / 8 OpenMP parallel per process) + + :: + + $ VE_NLCPY_NODELIST=0,1 mpirun -hosts host1,host2 -veo -np 32 python a.py + +* NQSV Request Execution + + * Execution on a specific VH, on a VE + + Example of using 32 processes on logical VH#0 and 32 VE processes on logical VE#0 to logical VE#3 on logical VH#0 (8 processes per VE, *ncore* / 8 OpenMP parallel per process) + + :: + + #PBS -T necmpi + #PBS -b 2 # The number of logical hosts + #PBS --venum-lhost=4 # The number of VEs per logical host + #PBS --cpunum-lhost=32 # The number of CPUs per logical host + + source /opt/nec/ve/mpi/2.22.0/bin/necmpivars.sh + export NMPI_USE_COMMAND_SEARCH_PATH=ON + mpirun -host 0 -veo -np 32 python a.py + + * Execution on a specific VH, on a specific VE -| For details of mpirun command, refer to `NEC MPI User's Guide `_. + Example of using 16 processes on logical VH#0, 16 VE processes in total on logical VE#0 and logical VE#3 on logical VH#0 (8 processes per VE, *ncore* / 8 OpenMP parallel per process) + + :: + + #PBS -T necmpi + #PBS -b 2 # The number of logical hosts + #PBS --venum-lhost=4 # The number of VEs per logical host + #PBS --cpunum-lhost=16 # The number of CPUs per logical host + + source /opt/nec/ve/mpi/2.22.0/bin/necmpivars.sh + export NMPI_USE_COMMAND_SEARCH_PATH=ON + VE_NLCPY_NODELIST=0,3 mpirun -host 0 -veo -np 16 python a.py + + * Execution on all assigned VEs + + Example of using 32 processes in total on 4 VHs and using 32 VE processes in total from logical VE#0 to logical VE#7 on each of VHs (1 process per VE, *ncore* OpenMP parallel per process). + + :: + + #PBS -T necmpi + #PBS -b 4 # The number of logical hosts + #PBS --venum-lhost=8 # The number of VEs per logical host + #PBS --cpunum-lhost=8 # The number of CPUs per logical host + #PBS --use-hca=2 # The number of HCAs + + source /opt/nec/ve/mpi/2.22.0/bin/necmpivars.sh + export NMPI_USE_COMMAND_SEARCH_PATH=ON + mpirun -veo -np 32 python a.py + +********* +Profiling +********* +NEC MPI provides the facility of displaying MPI communication information. +There are two formats of MPI communication information available as follows: + ++-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Reduced Format | The maximum, minimum, and average values of MPI communication information of all MPI processes are displayed. | ++-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Extended Format | MPI communication information of each MPI process is displayed in the ascending order of their ranks in the communicator MPI_COMM_WORLD after the information in the reduced format. | ++-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +You can control the display and format of MPI communication information by setting the environment variable NMPI_COMMINF at runtime as shown in the following table. + +The Settings of NMPI_COMMINF: + ++--------------+-----------------------+ +| NMPI_COMMINF | Displayed Information | ++--------------+-----------------------+ +| NO | (Default) No Output | ++--------------+-----------------------+ +| YES | Reduced Format | ++--------------+-----------------------+ +| ALL | Extended Format | ++--------------+-----------------------+ + +When using the *mpirun* command: + + :: + + $ export NMPI_COMMINF=ALL + $ mpirun -veo -np N python sample.py + +*************************************************** +Use mpi4py-ve with homebrew classes (without NLCPy) +*************************************************** + +Below links would be useful to use *mpi4py-ve* with homebrew classes (without NLCPy): + +* `use mpi4py-ve with homebrew classes (without NLCPy) `_ *************** Other Documents @@ -197,13 +329,62 @@ Other Documents Below links would be useful to understand *mpi4py-ve* in more detail: -* `mpi4py-ve tutorial `_ +* `mpi4py-ve tutorial `_ *********** Restriction *********** -* The value specified by np must not exceed the number of VE cards. -* The current version of *mpi4py-ve* does not support some functions that are listed in the section "List of Unsupported Functions" of `mpi4py-ve tutorial `_. +* The current version of *mpi4py-ve* does not support some functions that are listed in the section "List of Unsupported Functions" of `mpi4py-ve tutorial `_. +* Communication of type bool between NumPy and NLCPy will fail because of the different number of bytes. + +******* +Notices +******* +* If you import NLCPy before calling MPI_Init()/MPI_Init_thread(), a runtime error will be raised. + + Not recommended usage: :: + + $ mpirun -veo -np 1 $(which python) -c "import nlcpy; from mpi4pyve import MPI" + RuntimeError: NLCPy must be import after MPI initialization + + Recommended usage: :: + + $ mpirun -veo -np 1 $(which python) -c "from mpi4pyve import MPI; import nlcpy" + + MPI_Init() or MPI_Init_thread() is called when you import the MPI module from the mpi4pyve package. + +* If you use the Lock/Lock_all function for one-sided communication using NLCPy array data, you need to put in NLCPy synchronization control. + + Synchronization usage: + + .. code-block:: python + + import mpi4pyve + from mpi4pyve import MPI + import nlcpy as vp + + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + array = vp.array(0, dtype=int) + + if rank == 0: + win_n = MPI.Win.Create(array, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if rank == 0: + array.fill(1) + array.venode.synchronize() + comm.Barrier() + if rank != 0: + comm.Barrier() + win_n.Lock(MPI.LOCK_EXCLUSIVE, 0) + win_n.Get([array, MPI.INT], 0) + win_n.Unlock(0) + assert array == 1 + comm.Barrier() + win_n.Free() ******* License diff --git a/conf/mpiconfig.py b/conf/mpiconfig.py index 4194146..a33b548 100644 --- a/conf/mpiconfig.py +++ b/conf/mpiconfig.py @@ -111,7 +111,7 @@ def setup_library_info(self, options, environ): else: section = mpiopt if not filename: filename = "mpi.cfg" - if not section: section = "mpi" + if not section: section = "necmpi" mach = platform.machine() arch = platform.architecture()[0] diff --git a/coverage_device_test/coverage_device_util.py b/coverage_device_test/coverage_device_util.py new file mode 100644 index 0000000..010de1b --- /dev/null +++ b/coverage_device_test/coverage_device_util.py @@ -0,0 +1,118 @@ +from mpi4pyve import MPI +from numpy.testing import assert_array_equal +from functools import reduce +import os + + +if os.environ.get("MPI4PYVE_TEST_PATTERN") == "small": + _shapes = [5, (2, 20), (2, 10, 20), (2, 10, 20, 4)] +else: + _shapes = [5, 5**10, + (2,), (2, 2), (2, 2, 2), (2, 20), (20, 2), + (2, 10, 20), (10, 2, 20), (10, 20, 2), + (2, 10, 20, 4), (10, 2, 4, 20), (4, 10, 20, 2)] + +np = None +vp = None +_devices = None + +if os.environ.get("MPI4PYVE_TEST_DEVICE") == "vh": + import numpy as _np + _devices = [_np] + np = _np +elif os.environ.get("MPI4PYVE_TEST_DEVICE") == "ve": + import nlcpy as _vp + import numpy as _np + _devices = [_vp] + vp = _vp + np = _np +else: + import nlcpy as _vp + import numpy as _np + _devices = [_np, _vp] + vp = _vp + np = _np + +_dtypes = ['int32', 'int64', + 'uint32', 'uint64', + 'float32', 'float64', + 'complex64', 'complex128', + 'bool'] +_flush_dtypes = ['int32'] +_order = ['C', 'F'] +_patterns = [(dev1, dev2, shape, dtype, order) + for dev1 in _devices + for dev2 in _devices + for shape in _shapes + for dtype in _dtypes + for order in _order] +_flush_test_patterns = [(dev1, dtype, order) + for dev1 in _devices + for dtype in _flush_dtypes + for order in _order] +_default_fill_value = -1 +_rbuf_np_bool_size_adjust = 4 + + +def _get_array(a): + if vp is not None and isinstance(a, vp.ndarray): + return a.get() + return a + + +def _get_type(dtype): + if dtype == 'int32': + return MPI.INT + elif dtype == 'int64': + return MPI.LONG + elif dtype == 'uint32': + return MPI.UNSIGNED + elif dtype == 'uint64': + return MPI.UNSIGNED_LONG + elif dtype == 'float32': + return MPI.FLOAT + elif dtype == 'float64': + return MPI.DOUBLE + elif dtype == 'complex64': + return MPI.COMPLEX + elif dtype == 'complex128': + return MPI.DOUBLE_COMPLEX + elif dtype == 'bool': + return MPI.BOOL + + +def _get_sbuf(dev, shape, dtype, order): + if dtype != 'bool': + if isinstance(shape, tuple) or isinstance(shape, list): + n = reduce((lambda x, y: x * y), shape) + return dev.arange(n, dtype=dtype).reshape(shape, order=order) + else: + return dev.arange(shape, dtype=dtype).reshape(shape, order=order) + else: + return dev.random.randint(0, 2, shape).astype('?', order=order) + + +def _get_rbuf(dev, shape, dtype, order, fromdev=None): + if dtype == 'bool': + val = False + else: + val = _default_fill_value + return dev.full(shape, val, dtype=dtype, order=order) + + +def _assert_array(a, desired): + if desired is None: + return np.all(a == 0) + else: + return assert_array_equal(a, desired) + + +IS_MULTI_HOST = None + +try: + if IS_MULTI_HOST is None: + comm = MPI.COMM_WORLD + nodes = comm.allgather(os.environ['MPINODEID']) + IS_MULTI_HOST = (len(list(set(nodes))) != 1) +except KeyError: + pass diff --git a/coverage_device_test/run.sh b/coverage_device_test/run.sh new file mode 100644 index 0000000..6086034 --- /dev/null +++ b/coverage_device_test/run.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +TEST='full' +NP=4 +PYTHON_CMD=python +MPIRUN_CMD=mpirun +TEST_DEVICE='ve_vh' + +function usage() { + echo 'Usage: run.sh [ARGUMENT]...' + echo '' + echo ' ARGUMENT:' + echo ' --test MODE or --test=MODE: specify the test MODE' + echo ' available MODE are [full|small]' + echo ' (default: full)' + echo ' --np NO or --np=NO: specify the total number of processes' + echo ' (default: 4)' + echo ' --python-cmd CMD or --python-cmd=CMD: specify python command' + echo ' (default: python)' + echo ' --mpirun-cmd CMD or --mpirun-cmd=CMD: specify mpirun command' + echo ' (default: mpirun)' + echo ' --device DEVICE or --device=DEVICE: specify test device' + echo ' available DEVICE are [ve_vh|ve|vh]' + echo ' (default: ve_vh)' +} + +while (( $# > 0 )) +do + case $1 in + --test | --test=*) + if [[ "$1" =~ ^--test= ]]; then + TEST=$(echo $1 | sed -e 's/^--test=//') + elif [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then + usage + exit 1 + else + TEST="$2" + shift + fi + if [[ $TEST != "full" ]] && [[ $TEST != "small" ]]; then + usage + exit 1 + fi + ;; + --np | --np=*) + if [[ "$1" =~ ^--np= ]]; then + NP=$(echo $1 | sed -e 's/^--np=//') + elif [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then + usage + exit 1 + else + NP="$2" + shift + fi + if [[ ${NP} =~ ^[0-9]+$ ]]; then + NP=${NP} + else + usage + exit 1 + fi + ;; + --python-cmd | --python-cmd=*) + if [[ "$1" =~ ^--python-cmd= ]]; then + PYTHON_CMD=$(echo $1 | sed -e 's/^--python-cmd=//') + elif [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then + usage + exit 1 + else + PYTHON_CMD="$2" + shift + fi + if [[ -z ${PYTHON_CMD} ]]; then + usage + exit 1 + fi + ;; + --mpirun-cmd | --mpirun-cmd=*) + if [[ "$1" =~ ^--mpirun-cmd= ]]; then + MPIRUN_CMD=$(echo $1 | sed -e 's/^--mpirun-cmd=//') + elif [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then + usage + exit 1 + else + MPIRUN_CMD="$2" + shift + fi + if [[ -z ${MPIRUN_CMD} ]]; then + usage + exit 1 + fi + ;; + --device | --device=*) + if [[ "$1" =~ ^--device= ]]; then + TEST_DEVICE=$(echo $1 | sed -e 's/^--device=//') + elif [[ -z "$2" ]] || [[ "$2" =~ ^-+ ]]; then + usage + exit 1 + else + TEST_DEVICE="$2" + shift + fi + if [[ $TEST_DEVICE != "ve_vh" ]] && [[ $TEST_DEVICE != "ve" ]] && [[ $TEST_DEVICE != "vh" ]]; then + usage + exit 1 + fi + ;; + -h | --help) + usage + exit 1 + ;; + esac + shift +done + + +echo 'VE_NLCPY_NODELIST='${VE_NLCPY_NODELIST} + +export NMPI_USE_COMMAND_SEARCH_PATH=ON +set -x +MPI4PYVE_TEST_PATTERN=${TEST} MPI4PYVE_TEST_DEVICE=${TEST_DEVICE} ${MPIRUN_CMD} -veo -np ${NP} ${PYTHON_CMD} test_coverage_device_comm.py +MPI4PYVE_TEST_PATTERN=${TEST} MPI4PYVE_TEST_DEVICE=${TEST_DEVICE} ${MPIRUN_CMD} -veo -np ${NP} ${PYTHON_CMD} test_coverage_device_file.py +MPI4PYVE_TEST_PATTERN=${TEST} MPI4PYVE_TEST_DEVICE=${TEST_DEVICE} ${MPIRUN_CMD} -veo -np ${NP} ${PYTHON_CMD} test_coverage_device_win.py +MPI4PYVE_TEST_PATTERN=${TEST} MPI4PYVE_TEST_DEVICE=${TEST_DEVICE} ${MPIRUN_CMD} -veo -np ${NP} ${PYTHON_CMD} test_coverage_device_datatype.py +MPI4PYVE_TEST_PATTERN=${TEST} MPI4PYVE_TEST_DEVICE=${TEST_DEVICE} ${MPIRUN_CMD} -veo -np ${NP} ${PYTHON_CMD} test_coverage_device_message.py +set +x + diff --git a/coverage_device_test/test_coverage_device_comm.py b/coverage_device_test/test_coverage_device_comm.py new file mode 100644 index 0000000..1cb1b99 --- /dev/null +++ b/coverage_device_test/test_coverage_device_comm.py @@ -0,0 +1,1618 @@ +import unittest # NOQA +from unittest import TestCase # NOQA +from parameterized import parameterized # NOQA +from mpi4pyve import MPI # NOQA +from numpy.testing import ( + assert_equal, +) +import coverage_device_util # NOQA +from coverage_device_util import ( + _patterns, _get_array, _get_type, _get_sbuf, + _get_rbuf, _assert_array, vp, np +) + + +def create_topo_comms(comm): + size = comm.Get_size() + rank = comm.Get_rank() + # Cartesian + n = int(size ** 1 / 2.0) + m = int(size ** 1 / 3.0) + if m * m * m == size: + dims = [m, m, m] + elif n * n == size: + dims = [n, n] + else: + dims = [size] + periods = [True] * len(dims) + yield comm.Create_cart(dims, periods=periods) + # Graph + index, edges = [0], [] + for i in range(size): + pos = index[-1] + index.append(pos + 2) + edges.append((i - 1) % size) + edges.append((i + 1) % size) + yield comm.Create_graph(index, edges) + # Dist Graph + sources = [(rank - 2) % size, (rank - 1) % size] + destinations = [(rank + 1) % size, (rank + 2) % size] + yield comm.Create_dist_graph_adjacent(sources, destinations) + + +def get_neighbors_count(comm): + topo = comm.Get_topology() + if topo == MPI.CART: + ndim = comm.Get_dim() + return 2 * ndim, 2 * ndim + if topo == MPI.GRAPH: + rank = comm.Get_rank() + nneighbors = comm.Get_neighbors_count(rank) + return nneighbors, nneighbors + if topo == MPI.DIST_GRAPH: + indeg, outdeg, w = comm.Get_dist_neighbors_count() + return indeg, outdeg + return 0, 0 + + +class TestComm(unittest.TestCase): + + COMM = MPI.COMM_WORLD + + @parameterized.expand(_patterns) + def test_Send_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_send_recv_offset(self, dev1, dev2, shape, dtype, order): + if dev1 is np or dev2 is np: + self.skipTest('buffer is not contiguous case is not testable') + if isinstance(shape, tuple): + self.skipTest('unsupported tuple offset case is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + offset = 2 + + desired = _get_sbuf(np, shape, dtype, order)[offset:] + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape - offset, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Sendrecv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + a = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Sendrecv([x, mtype], dest=peer, sendtag=0, recvbuf=[a, mtype], + source=peer, recvtag=0) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(a, x)) + else: + y = dev1.array(desired, dtype=dtype, order=order) + z = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Sendrecv([y, mtype], dest=peer, sendtag=0, recvbuf=[z, mtype], + source=peer, recvtag=0) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(z, y)) + + @parameterized.expand(_patterns) + def test_Sendrecv_replace(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired_0 = _get_sbuf(np, shape, dtype, order) + desired_1 = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired_0, mtype], root=0) + comm.Bcast([desired_1, mtype], root=1) + + if rank % 2 == 0: + x = dev1.array(desired_1, dtype=dtype, order=order) + comm.Sendrecv_replace([x, mtype], dest=peer, sendtag=0, source=peer, + recvtag=0) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(x, desired_0)) + else: + y = dev2.array(desired_0, dtype=dtype, order=order) + comm.Sendrecv_replace([y, mtype], dest=peer, sendtag=0, source=peer, + recvtag=0) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(y, desired_1)) + + @parameterized.expand(_patterns) + def test_Isend_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Isend([x, mtype], dest=peer).Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Send_Irecv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Irecv([y, mtype], source=peer).Wait() + + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Send_init(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + req = comm.Send_init([x, mtype], dest=peer) + req.Start() + req.Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Recv_init(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + req = comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + req = comm.Recv_init([y, mtype], source=peer) + req.Start() + req.Wait() + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Rsend_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Rsend([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Ssend_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Ssend([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Issend_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Issend([x, mtype], dest=peer).Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Irsend_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.Irsend([x, mtype], dest=peer).Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Ssend_init(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + req = comm.Ssend_init([x, mtype], dest=peer) + req.Start() + req.Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Rsend_init(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + req = comm.Rsend_init([x, mtype], dest=peer) + req.Start() + req.Wait() + else: + y = _get_rbuf(dev2, shape, dtype, order, dev1) + comm.Recv([y, mtype], source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Bcast(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + sbuf = _get_sbuf(np, size, dtype, order) + comm.Bcast([sbuf, mtype], root=root) + + if rank == root: + x = dev1.array(sbuf, dtype=dtype, order=order) + else: + x = _get_rbuf(dev2, size, dtype, order, dev1) + + comm.Bcast([x, mtype], root=root) + self.assertFalse(_assert_array(x, sbuf)) + + @parameterized.expand(_patterns) + def test_Gather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.array([], dtype=dtype, order=order) + comm.Gather([sbuf, mtype], [rbuf, mtype], root=root) + + if rank == root: + desired = np.full((size, root + 1), root, dtype=dtype, + order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Gatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.array([], dtype=dtype) + comm.Gatherv([sbuf, mtype], [rbuf, mtype], root=root) + + if rank == root: + desired = np.full((size, root + 1), root, dtype=dtype, + order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Scatter(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full((size, size), root, dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + else: + sbuf = dev1.array([], dtype=dtype) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + comm.Scatter([sbuf, mtype], [rbuf, mtype], root=root) + + desired = np.full(size, root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Scatterv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full((size, size), root, dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + else: + sbuf = dev1.array([], dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + comm.Scatterv([sbuf, mtype], [rbuf, mtype], root=root) + + desired = np.full(size, root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Allgather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, order=order) + comm.Allgather([sbuf, mtype], [rbuf, mtype]) + + desired = np.full((size, root + 1), root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Allgatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + if isinstance(shape, tuple) or isinstance(shape, list): + self.skipTest('shape case in tuple or list is not testable') + comm = self.COMM + size = comm.Get_size() + # for terminated by signal(9). + if size > 4 and np.isscalar(shape) and shape >= 5**10: + shape = 5**5 + + for root in range(size): + sbuf = _get_sbuf(dev1, shape, dtype, order) + rbuf = _get_rbuf(dev2, size * shape, dtype, order, dev1) + self.COMM.Allgatherv(sbuf, rbuf) + + actual_sbuf = _get_sbuf(np, shape, dtype, order) + actual_rbuf = _get_rbuf(np, size * shape, dtype, order, np) + self.COMM.Allgatherv(actual_sbuf, actual_rbuf) + + @parameterized.expand(_patterns) + def test_Alltoall(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + sbuf = _get_sbuf(dev1, (size, root + 1), dtype, order) + rbuf = _get_rbuf(dev2, (size, root + 1), dtype, order, dev1) + self.COMM.Alltoall(sbuf, rbuf) + + actual_sbuf = np.array(sbuf, dtype=dtype, order=order) + actual_rbuf = _get_rbuf(np, (size, root + 1), dtype, order, np) + self.COMM.Alltoall(actual_sbuf, actual_rbuf) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Alltoallv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + sbuf = _get_sbuf(dev1, (size, size), dtype, order) + rbuf = _get_rbuf(dev2, (size, size), dtype, order, dev1) + self.COMM.Alltoallv(sbuf, rbuf) + + actual_sbuf = np.array(sbuf, dtype=dtype, order=order) + actual_rbuf = _get_rbuf(np, (size, size), dtype, order, np) + self.COMM.Alltoallv(actual_sbuf, actual_rbuf) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Alltoallw(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + sdt, rdt = mtype, mtype + sbuf = _get_sbuf(dev1, (size, 1), dtype, order) + rbuf = _get_rbuf(dev2, (size, 1), dtype, order, dev1) + sdsp = list(range(0, size * sbuf.itemsize, sbuf.itemsize)) + rdsp = list(range(0, size * rbuf.itemsize, rbuf.itemsize)) + smsg = (sbuf, ([1] * size, sdsp), [sdt] * size) + rmsg = (rbuf, ([1] * size, rdsp), [rdt] * size) + self.COMM.Alltoallw(smsg, rmsg) + + actual_sbuf = np.array(sbuf, dtype=dtype, order=order) + actual_rbuf = _get_rbuf(np, (size, 1), dtype, order, np) + actual_sdsp = list(range(0, size * actual_sbuf.itemsize, + actual_sbuf.itemsize)) + actual_rdsp = list(range(0, size * actual_rbuf.itemsize, + actual_rbuf.itemsize)) + actual_smsg = (actual_sbuf, ([1] * size, actual_sdsp), [sdt] * size) + actual_rmsg = (actual_rbuf, ([1] * size, actual_rdsp), [rdt] * size) + self.COMM.Alltoallw(actual_smsg, actual_rmsg) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Reduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + + comm.Reduce([sbuf, mtype], [rbuf, mtype], op, root) + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = np.full(size, -1, dtype=dtype, order=order) + comm.Reduce([actual_sbuf, mtype], [actual_rbuf, mtype], op, + root) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Allreduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + + comm.Allreduce([sbuf, mtype], [rbuf, mtype], op) + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = np.full(size, -1, dtype=dtype, order=order) + comm.Allreduce([actual_sbuf, mtype], [actual_rbuf, mtype], op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Reduce_scatter(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + rcnt = list(range(1, size + 1)) + sbuf = dev1.array([rank + 1] * sum(rcnt), dtype=dtype, + order=order) + rbuf = dev2.full(rank + 1, -1, dtype=dtype, order=order) + + comm.Reduce_scatter([sbuf, mtype], [rbuf, mtype], None, op) + + actual_sbuf = np.array([rank + 1] * sum(rcnt), dtype=dtype, + order=order) + actual_rbuf = np.full(rank + 1, -1, dtype=dtype, order=order) + comm.Reduce_scatter([actual_sbuf, mtype], + [actual_rbuf, mtype], None, op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Reduce_scatter_block(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + for rcnt in range(1, size): + sbuf = dev1.array([rank] * rcnt * size, dtype=dtype, + order=order) + rbuf = dev2.full(rcnt, -1, dtype=dtype, order=order) + if op == MPI.PROD: + sbuf = dev1.array([rank + 1] * rcnt * size, + dtype=dtype, order=order) + comm.Reduce_scatter_block([sbuf, mtype], [rbuf, mtype], + op=op) + + actual_sbuf = np.array([rank] * rcnt * size, dtype=dtype, + order=order) + actual_rbuf = np.full(rcnt, -1, dtype=dtype, order=order) + if op == MPI.PROD: + actual_sbuf = dev1.array([rank + 1] * rcnt * size, + dtype=dtype, order=order) + comm.Reduce_scatter_block([actual_sbuf, mtype], + [actual_rbuf, mtype], op=op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_send_recv(self, dev1, dev2, shape, dtype, order): + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.send(x, dest=peer) + else: + y = comm.recv(source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_Ibcast(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + sbuf = _get_sbuf(np, size, dtype, order) + comm.Bcast([sbuf, mtype], root=root) + + if rank == root: + x = dev1.array(sbuf, dtype=dtype, order=order) + else: + x = _get_rbuf(dev2, size, dtype, order, dev1) + + comm.Ibcast([x, mtype], root=root).Wait() + self.assertFalse(_assert_array(x, sbuf)) + + @parameterized.expand(_patterns) + def test_Igather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.array([], dtype=dtype, order=order) + comm.Igather([sbuf, mtype], [rbuf, mtype], root=root).Wait() + + if rank == root: + desired = np.full((size, root + 1), root, dtype=dtype, + order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Igatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.array([], dtype=dtype, order=order) + comm.Igatherv([sbuf, mtype], [rbuf, mtype], root=root).Wait() + + if rank == root: + desired = np.full((size, root + 1), root, dtype=dtype, + order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Iscatter(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full((size, size), root, dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + else: + sbuf = dev1.array([], dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + comm.Iscatter([sbuf, mtype], [rbuf, mtype], root=root).Wait() + + desired = np.full(size, root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Iscatterv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + if rank == root: + sbuf = dev1.full((size, size), root, dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + else: + sbuf = dev1.array([], dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + comm.Iscatterv([sbuf, mtype], [rbuf, mtype], root=root).Wait() + + desired = np.full(size, root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Iallgather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, order=order) + comm.Iallgather([sbuf, mtype], [rbuf, mtype]).Wait() + + desired = np.full((size, root + 1), root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_Iallgatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = _get_rbuf(np, size * size, dtype, order, np) + self.COMM.Iallgatherv(actual_sbuf, actual_rbuf).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, size * size, dtype, order, dev1) + self.COMM.Iallgatherv(sbuf, rbuf).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Ialltoall(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + actual_sbuf = _get_sbuf(np, (size, size), dtype, order) + actual_rbuf = _get_rbuf(np, (size, size), dtype, order, np) + self.COMM.Ialltoall(actual_sbuf, actual_rbuf).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (size, size), dtype, order, dev1) + self.COMM.Ialltoall(sbuf, rbuf).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Ialltoallv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + actual_sbuf = _get_sbuf(np, (size, size), dtype, order) + actual_rbuf = _get_rbuf(np, (size, size), dtype, order, np) + self.COMM.Ialltoallv(actual_sbuf, actual_rbuf).Wait() + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (size, size), dtype, order, dev1) + self.COMM.Ialltoallv(sbuf, rbuf).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Ialltoallw(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + sdt, rdt = mtype, mtype + + actual_sbuf = _get_sbuf(np, (size, 1), dtype, order) + actual_rbuf = _get_rbuf(np, (size, 1), dtype, order, np) + actual_sdsp = list(range(0, size * actual_sbuf.itemsize, + actual_sbuf.itemsize)) + actual_rdsp = list(range(0, size * actual_rbuf.itemsize, + actual_rbuf.itemsize)) + actual_smsg = (actual_sbuf, ([1] * size, actual_sdsp), [sdt] * size) + actual_rmsg = (actual_rbuf, ([1] * size, actual_rdsp), [rdt] * size) + self.COMM.Ialltoallw(actual_smsg, actual_rmsg).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (size, 1), dtype, order, dev1) + sdsp = list(range(0, size * sbuf.itemsize, sbuf.itemsize)) + rdsp = list(range(0, size * rbuf.itemsize, rbuf.itemsize)) + smsg = (sbuf, ([1] * size, sdsp), [sdt] * size) + rmsg = (rbuf, ([1] * size, rdsp), [rdt] * size) + self.COMM.Ialltoallw(smsg, rmsg).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Ireduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + + comm.Ireduce([sbuf, mtype], [rbuf, mtype], op, root).Wait() + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = np.full(size, -1, dtype=dtype, order=order) + comm.Ireduce([actual_sbuf, mtype], + [actual_rbuf, mtype], op, root).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Iallreduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + + comm.Iallreduce([sbuf, mtype], [rbuf, mtype], op).Wait() + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = np.full(size, -1, dtype=dtype, order=order) + comm.Iallreduce([actual_sbuf, mtype], + [actual_rbuf, mtype], op).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Ireduce_scatter_block(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + mtype = _get_type(dtype) + + for root in range(size): + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + for rcnt in range(1, size): + sbuf = dev1.array([rank] * rcnt * size, dtype=dtype, + order=order) + rbuf = dev2.full(rcnt, -1, dtype=dtype, order=order) + if op == MPI.PROD: + sbuf = dev1.array([rank + 1] * rcnt * size, + dtype=dtype, order=order) + comm.Ireduce_scatter_block([sbuf, mtype], + [rbuf, mtype], + op=op).Wait() + + actual_sbuf = np.array([rank] * rcnt * size, dtype=dtype, + order=order) + actual_rbuf = np.full(rcnt, -1, dtype=dtype, order=order) + if op == MPI.PROD: + actual_sbuf = np.array([rank + 1] * rcnt * size, + dtype=dtype, order=order) + comm.Ireduce_scatter_block([actual_sbuf, mtype], + [actual_rbuf, mtype], + op=op).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_ssend_recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.ssend(x, dest=peer) + else: + y = comm.recv(None, source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_sendrecv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + a = comm.sendrecv(x, peer, source=peer) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(a, x)) + else: + y = dev2.array(desired, dtype=dtype, order=order) + z = comm.sendrecv(y, peer, source=peer) + if peer != MPI.PROC_NULL: + self.assertFalse(_assert_array(z, y)) + + @parameterized.expand(_patterns) + def test_isend_irecv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + mtype = _get_type(dtype) + + # recv buffer size + if not (isinstance(shape, tuple) or isinstance(shape, list)): + if shape >= 5 ** 5: + shape = 5 ** 4 + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.isend(x, dest=peer).wait() + else: + y = comm.irecv(source=peer).wait() + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_issend_recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + if rank % 2 == 0: + x = dev1.array(desired, dtype=dtype, order=order) + comm.issend(x, dest=peer).wait() + else: + y = comm.recv(source=peer) + self.assertFalse(_assert_array(y, desired)) + + @parameterized.expand(_patterns) + def test_bcast(self, dev1, dev2, shape, dtype, order): + comm = self.COMM + size = comm.Get_size() + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + + x = dev1.array(desired, dtype=dtype, order=order) + comm.bcast(x, root=0) + self.assertFalse(_assert_array(x, desired)) + + @parameterized.expand(_patterns) + def test_gather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.array([], dtype=dtype, order=order) + rbuf = comm.gather(sbuf, root=root) + + if rank == root: + desired = np.full((size, root + 1), root, dtype=dtype, + order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_scatter(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + for root in range(size): + if rank == root: + sbuf = dev1.full((size, size), root, dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + else: + sbuf = dev1.array([], dtype=dtype, order=order) + rbuf = dev2.full(size, -1, dtype=dtype, order=order) + rbuf = comm.scatter(sbuf, root=root) + + desired = np.full(size, root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_allgather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + for root in range(size): + if rank == root: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + else: + sbuf = dev1.full(root + 1, root, dtype=dtype, order=order) + rbuf = dev2.full((size, root + 1), -1, dtype=dtype, + order=order) + rbuf = comm.allgather(sbuf) + + desired = np.full((size, root + 1), root, dtype=dtype, order=order) + assert_equal(_get_array(rbuf), desired) + + @parameterized.expand(_patterns) + def test_alltoall(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + sbuf = dev1.full((size, root + 1), root, dtype=dtype, order=order) + rbuf = self.COMM.alltoall(sbuf) + + actual_sbuf = np.full((size, root + 1), root, dtype=dtype, + order=order) + actual_rbuf = self.COMM.alltoall(actual_sbuf) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_reduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + for op in (MPI.SUM, MPI.PROD): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = comm.reduce(sbuf, op, root) + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = comm.reduce(actual_sbuf, op, root) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_allreduce(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + + for root in range(size): + for op in (MPI.SUM, MPI.PROD): + sbuf = dev1.array(range(size), dtype=dtype, order=order) + rbuf = comm.allreduce(sbuf, op) + + actual_sbuf = np.array(range(size), dtype=dtype, order=order) + actual_rbuf = comm.allreduce(actual_sbuf, op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Scan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + mtype = _get_type(dtype) + size = self.COMM.Get_size() + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = _get_rbuf(dev2, size, dtype, order, dev1) + comm.Scan([sbuf, mtype], [rbuf, mtype], op) + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = _get_rbuf(np, size, dtype, order, np) + comm.Scan([actual_sbuf, mtype], [actual_rbuf, mtype], op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Exscan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + mtype = _get_type(dtype) + size = self.COMM.Get_size() + + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = _get_rbuf(dev2, size, dtype, order, dev1) + comm.Exscan([sbuf, mtype], [rbuf, mtype], op) + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = _get_rbuf(np, size, dtype, order, np) + comm.Exscan([actual_sbuf, mtype], [actual_rbuf, mtype], op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Iscan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + + comm = self.COMM + mtype = _get_type(dtype) + size = self.COMM.Get_size() + + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = _get_rbuf(dev2, size, dtype, order, dev1) + comm.Iscan([sbuf, mtype], [rbuf, mtype], op).Wait() + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = _get_rbuf(np, size, dtype, order, np) + comm.Iscan([actual_sbuf, mtype], [actual_rbuf, mtype], op).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Iexscan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + mtype = _get_type(dtype) + size = self.COMM.Get_size() + + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = _get_rbuf(dev2, size, dtype, order, dev1) + comm.Iexscan([sbuf, mtype], [rbuf, mtype], op).Wait() + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = _get_rbuf(np, size, dtype, order, np) + comm.Iexscan([actual_sbuf, mtype], [actual_rbuf, mtype], op).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_scan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = self.COMM.Get_size() + + for op in (MPI.SUM, MPI.PROD): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = comm.scan(sbuf, op) + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = comm.scan(actual_sbuf, op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_exscan(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = self.COMM.Get_size() + + for op in (MPI.SUM, MPI.PROD): + sbuf = _get_sbuf(dev1, size, dtype, order) + rbuf = comm.exscan(sbuf, op) + + actual_sbuf = _get_sbuf(np, size, dtype, order) + actual_rbuf = comm.exscan(actual_sbuf, op) + assert_equal(_get_array(rbuf), actual_rbuf) + + @parameterized.expand(_patterns) + def test_Neighbor_allgather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, 3, dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Neighbor_allgather([actual_sbuf, mtype], [actual_rbuf, mtype]) + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Neighbor_allgather([sbuf, mtype], [rbuf, mtype]) + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Neighbor_allgatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + actual_sbuf = _get_sbuf(np, 3, dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Neighbor_allgatherv([actual_sbuf, mtype], + [actual_rbuf, mtype]) + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Neighbor_allgatherv([sbuf, mtype], [rbuf, mtype]) + + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Ineighbor_allgather(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, 3, dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Ineighbor_allgather([actual_sbuf, mtype], + [actual_rbuf, mtype]).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Ineighbor_allgather([sbuf, mtype], [rbuf, mtype]).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Ineighbor_allgatherv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, 3, dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Ineighbor_allgatherv([actual_sbuf, mtype], + [actual_rbuf, mtype]).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Ineighbor_allgatherv([sbuf, mtype], [rbuf, mtype]).Wait() + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Neighbor_alltoall(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, (ssize, 3), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Neighbor_alltoall([actual_sbuf, mtype], [actual_rbuf, mtype]) + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Neighbor_alltoall([sbuf, mtype], [rbuf, mtype]) + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Neighbor_alltoallv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + actual_sbuf = _get_sbuf(np, (ssize, 3), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Neighbor_alltoallv([actual_sbuf, mtype], [actual_rbuf, mtype]) + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Neighbor_alltoallv([sbuf, mtype], [rbuf, mtype]) + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Neighbor_alltoallw(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + sdt, rdt = mtype, mtype + + actual_sbuf = _get_sbuf(np, (ssize, 1), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 1), dtype, order, np) + actual_sdsp = list(range(0, ssize * actual_sbuf.itemsize, + actual_sbuf.itemsize)) + actual_rdsp = list(range(0, rsize * actual_rbuf.itemsize, + actual_rbuf.itemsize)) + actual_smsg = [actual_sbuf, ([1] * ssize, actual_sdsp), + [sdt] * ssize] + actual_rmsg = (actual_rbuf, ([1] * rsize, actual_rdsp), + [rdt] * rsize) + comm.Neighbor_alltoallw(actual_smsg, actual_rmsg) + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 1), dtype, order, dev1) + sdsp = list(range(0, ssize * sbuf.itemsize, sbuf.itemsize)) + rdsp = list(range(0, rsize * rbuf.itemsize, rbuf.itemsize)) + smsg = [sbuf, ([1] * ssize, sdsp), [sdt] * ssize] + rmsg = (rbuf, ([1] * rsize, rdsp), [rdt] * rsize) + comm.Neighbor_alltoallw(smsg, rmsg) + + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Ineighbor_alltoall(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, (ssize, 3), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Ineighbor_alltoall([actual_sbuf, mtype], + [actual_rbuf, mtype]).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Ineighbor_alltoall([sbuf, mtype], [rbuf, mtype]).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Ineighbor_alltoallv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + + actual_sbuf = _get_sbuf(np, (ssize, 3), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 3), dtype, order, np) + comm.Ineighbor_alltoallv([actual_sbuf, mtype], + [actual_rbuf, mtype]).Wait() + + sbuf = dev1.array(actual_sbuf, dtype=dtype, order=order) + rbuf = _get_rbuf(dev2, (rsize, 3), dtype, order, dev1) + comm.Ineighbor_alltoallv([sbuf, mtype], [rbuf, mtype]).Wait() + + assert_equal(_get_array(rbuf), actual_rbuf) + + comm.Free() + + @parameterized.expand(_patterns) + def test_Ineighbor_alltoallw(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + for comm in create_topo_comms(self.COMM): + rsize, ssize = get_neighbors_count(comm) + mtype = _get_type(dtype) + sbuf = _get_sbuf(dev1, (ssize, 1), dtype, order) + rbuf = _get_rbuf(dev2, (rsize, 1), dtype, order, dev1) + sdt, rdt = mtype, mtype + sdsp = list(range(0, ssize * sbuf.itemsize, sbuf.itemsize)) + rdsp = list(range(0, rsize * rbuf.itemsize, rbuf.itemsize)) + smsg = [sbuf, ([1] * ssize, sdsp), [sdt] * ssize] + rmsg = (rbuf, ([1] * rsize, rdsp), [rdt] * rsize) + comm.Ineighbor_alltoallw(smsg, rmsg).Wait() + + actual_sbuf = _get_sbuf(np, (ssize, 1), dtype, order) + actual_rbuf = _get_rbuf(np, (rsize, 1), dtype, order, np) + actual_sdsp = list(range(0, ssize * actual_sbuf.itemsize, + actual_sbuf.itemsize)) + actual_rdsp = list(range(0, rsize * actual_rbuf.itemsize, + actual_rbuf.itemsize)) + actual_smsg = [actual_sbuf, ([1] * ssize, actual_sdsp), + [sdt] * ssize] + actual_rmsg = (actual_rbuf, ([1] * rsize, actual_rdsp), + [rdt] * rsize) + comm.Ineighbor_alltoallw(actual_smsg, actual_rmsg).Wait() + (_get_array(rbuf), _get_array(actual_rbuf)) + + comm.Free() + + +if __name__ == '__main__': + unittest.main() diff --git a/coverage_device_test/test_coverage_device_datatype.py b/coverage_device_test/test_coverage_device_datatype.py new file mode 100644 index 0000000..8249bb8 --- /dev/null +++ b/coverage_device_test/test_coverage_device_datatype.py @@ -0,0 +1,47 @@ +import unittest # NOQA +from unittest import TestCase # NOQA +from parameterized import parameterized # NOQA +from mpi4pyve import MPI # NOQA +from mpi4pyve import util # NOQA +from numpy.testing import ( + assert_array_equal, +) +import coverage_device_util # NOQA +from coverage_device_util import ( + _patterns, _get_type, _get_sbuf, _get_rbuf, vp, np +) + + +class TestCoverageDeviceDataType(unittest.TestCase): + + COMM = MPI.COMM_WORLD + + @parameterized.expand(_patterns) + def test_PackUnpack(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + mtype = _get_type(dtype) + + desired = _get_sbuf(np, shape, dtype, order) + comm.Bcast([desired, mtype], root=0) + + x = dev1.array(desired, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, dev1) + size_x = mtype.Pack_size(x.size, self.COMM) + size_y = mtype.Pack_size(y.size, self.COMM) + z = np.empty(max(size_x, size_y) * 16, dtype='b') + mtype.Pack(x, z, 0, self.COMM) + mtype.Unpack(z, 0, y, self.COMM) + + assert_array_equal(x, y) + + +if __name__ == '__main__': + unittest.main() diff --git a/coverage_device_test/test_coverage_device_file.py b/coverage_device_test/test_coverage_device_file.py new file mode 100644 index 0000000..e1b8cc1 --- /dev/null +++ b/coverage_device_test/test_coverage_device_file.py @@ -0,0 +1,452 @@ +import unittest # NOQA +from unittest import TestCase # NOQA +from parameterized import parameterized # NOQA +from mpi4pyve import MPI # NOQA +from mpi4pyve import util # NOQA +from numpy.testing import ( + assert_equal, +) +import sys, os, tempfile # NOQA +import coverage_device_util # NOQA +from coverage_device_util import ( + _patterns, _get_array, _get_type, _get_sbuf, _get_rbuf, vp, np, IS_MULTI_HOST +) + + +class TestCoverageDeviceFile(unittest.TestCase): + + COMM = MPI.COMM_WORLD + FILE = MPI.FILE_NULL + + prefix = 'mpi4pyve' + tmpname = './tmp' + + def setUp(self): + comm = self.COMM + fname = None + if comm.Get_rank() == 0: + if not os.path.exists(self.tmpname): + try: + os.mkdir(self.tmpname) + except OSError as e: + if e.errno != 17: # not File exists + raise + pass + fd, fname = tempfile.mkstemp(prefix=self.prefix, dir=self.tmpname) + os.close(fd) + fname = comm.bcast(fname, 0) + amode = MPI.MODE_RDWR | MPI.MODE_CREATE + amode |= MPI.MODE_DELETE_ON_CLOSE + amode |= MPI.MODE_UNIQUE_OPEN + info = MPI.INFO_NULL + try: + self.FILE = MPI.File.Open(comm, fname, amode, info) + except Exception: + if comm.Get_rank() == 0: + os.remove(fname) + raise + + def tearDown(self): + if self.FILE: + self.FILE.Close() + self.COMM.Barrier() + + @parameterized.expand(_patterns) + def test_ReadWriteAt(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + + fh.Write_at(rank * nbytes * 10, x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Read_at(rank * nbytes * 10, y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_ReadWriteAtAll(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + fh = self.FILE + nbytes = max(x.nbytes, y.nbytes) + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + + fh.Write_at_all(rank * nbytes * 10, x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Read_at_all(rank * nbytes * 10, y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_IReadIWriteAt(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + + fh.Iwrite_at(rank * nbytes * 10, x).Wait() + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Iread_at(rank * nbytes * 10, y).Wait() + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_IReadIWriteAtAll(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Iwrite_at_all(rank * nbytes * 10, x).Wait() + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Iread_at_all(rank * nbytes * 10, y).Wait() + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_ReadWrite(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_ReadWriteAll(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write_all(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read_all(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_IreadIwrite(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iwrite(x).Wait() + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iread(y).Wait() + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_IreadIwriteAll(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iwrite_all(x).Wait() + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iread_all(y).Wait() + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + @unittest.skipIf(IS_MULTI_HOST, 'necmpi-multi-host') + def test_ReadWriteShared(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write_shared(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read_shared(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + @unittest.skipIf(IS_MULTI_HOST, 'necmpi-multi-host') + def test_IreadIwriteShared(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iwrite_shared(x).Wait() + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Iread_shared(y).Wait() + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + @unittest.skipIf(IS_MULTI_HOST, 'necmpi-multi-host') + def test_ReadWriteOrderd(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write_ordered(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read_ordered(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_ReadWriteAtAllBegin(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Write_at_all_begin(rank * nbytes * 10, x) + fh.Write_at_all_end(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Read_at_all_begin(rank * nbytes * 10, y) + fh.Read_at_all_end(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_ReadWriteAllBegin(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write_all_begin(x) + fh.Write_all_end(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read_all_begin(y) + fh.Read_all_end(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + @unittest.skipIf(IS_MULTI_HOST, 'necmpi-multi-host') + def test_ReadWriteOrderdBegin(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) + or (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + + actual = _get_sbuf(np, shape, dtype, order) + comm.Bcast(actual, root=0) + + x = dev1.array(actual, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype, order, fromdev=dev1) + nbytes = max(x.nbytes, y.nbytes) + fh = self.FILE + fh.Set_size(0) + fh.Set_view(0, _get_type(dtype)) + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Write_ordered_begin(x) + fh.Write_ordered_end(x) + fh.Sync() + comm.Barrier() + fh.Sync() + fh.Seek_shared(rank * nbytes * 10, MPI.SEEK_SET) + fh.Read_ordered_begin(y) + fh.Read_ordered_end(y) + + assert_equal(_get_array(y), actual) + + +if __name__ == '__main__': + unittest.main() diff --git a/coverage_device_test/test_coverage_device_message.py b/coverage_device_test/test_coverage_device_message.py new file mode 100644 index 0000000..dfb0c98 --- /dev/null +++ b/coverage_device_test/test_coverage_device_message.py @@ -0,0 +1,72 @@ +import unittest # NOQA +from unittest import TestCase # NOQA +from parameterized import parameterized # NOQA +from mpi4pyve import MPI # NOQA +from mpi4pyve import util # NOQA +from numpy.testing import ( + assert_equal, +) +import coverage_device_util # NOQA +from coverage_device_util import ( + _patterns, _get_array, _get_type, _get_sbuf, _get_rbuf, vp, np +) + + +class TestCoverageDeviceMessage(unittest.TestCase): + + COMM = MPI.COMM_WORLD + + @parameterized.expand(_patterns) + def test_Recv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + + actual = _get_sbuf(np, shape, dtype=dtype, order=order) + comm.Bcast(actual, root=0) + x = dev1.array(actual, dtype=dtype, order=order) + if rank % 2 == 0: + comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + m = MPI.Message.Probe(comm) + m.Recv(y) + + assert_equal(_get_array(y), actual) + + @parameterized.expand(_patterns) + def test_Irecv(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + mtype = _get_type(dtype) + actual = _get_sbuf(np, shape, dtype=dtype, order=order) + comm.Bcast(actual, root=0) + x = dev1.array(actual, dtype=dtype, order=order) + if rank % 2 == 0: + comm.Send([x, mtype], dest=peer) + else: + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + m = MPI.Message.Probe(comm) + m.Irecv(y).Wait() + + assert_equal(_get_array(y), actual) + + +if __name__ == '__main__': + unittest.main() diff --git a/coverage_device_test/test_coverage_device_win.py b/coverage_device_test/test_coverage_device_win.py new file mode 100644 index 0000000..6d5efec --- /dev/null +++ b/coverage_device_test/test_coverage_device_win.py @@ -0,0 +1,486 @@ +import unittest # NOQA +from unittest import TestCase # NOQA +from parameterized import parameterized # NOQA +from mpi4pyve import MPI # NOQA +from mpi4pyve import util # NOQA +from numpy.testing import ( + assert_equal, +) +import coverage_device_util # NOQA +from coverage_device_util import ( + _patterns, _get_array, _get_sbuf, _get_rbuf, vp, np, + _flush_test_patterns +) + + +class TestCoverageDeviceWin(unittest.TestCase): + + COMM = MPI.COMM_WORLD + INFO = MPI.INFO_NULL + + def memzero(self, m): + try: + m[:] = 0 + except IndexError: # cffi buffer + m[0:len(m)] = b'\0' * len(m) + + def setUp(self): + nbytes = 5 ** 11 * MPI.DOUBLE.size + try: + self.mpi_memory = MPI.Alloc_mem(nbytes) + self.memory = self.mpi_memory + self.memzero(self.memory) + except MPI.Exception: + import array + self.mpi_memory = None + self.memory = array.array('B', [0] * nbytes) + self.WIN = MPI.Win.Create(self.memory, 1, self.INFO, self.COMM) + try: + self.actual_mpi_memory = MPI.Alloc_mem(nbytes) + self.actual_memory = self.actual_mpi_memory + self.memzero(self.actual_memory) + except MPI.Exception: + import array + self.actual_mpi_memory = None + self.actual_memory = array.array('B', [0] * nbytes) + self.actual_WIN = MPI.Win.Create(self.actual_memory, 1, self.INFO, + self.COMM) + + def tearDown(self): + self.WIN.Free() + if self.mpi_memory: + MPI.Free_mem(self.mpi_memory) + self.actual_WIN.Free() + if self.actual_mpi_memory: + MPI.Free_mem(self.actual_mpi_memory) + + @parameterized.expand(_patterns) + def test_PutGet(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + actual_y = _get_sbuf(np, shape, dtype=dtype, order=order) + comm.Bcast(actual_y, root=0) + x = dev1.array(actual_y, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + target = x.itemsize + self.WIN.Fence() + self.WIN.Put(x, rank, target) + self.WIN.Fence() + self.WIN.Get(y, rank, target) + self.WIN.Fence() + + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_Accumulate(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, shape, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + actual_x = _get_sbuf(np, shape, dtype=dtype, order=order) + actual_y = _get_rbuf(np, shape, dtype=dtype, order=order) + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): + self.WIN.Fence() + self.WIN.Accumulate(x, rank, op=op) + self.WIN.Fence() + self.WIN.Get(y, rank) + self.WIN.Fence() + + self.actual_WIN.Fence() + self.actual_WIN.Accumulate(actual_x, rank, op=op) + self.actual_WIN.Fence() + self.actual_WIN.Get(actual_y, rank) + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_GetAccumulate(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, shape, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + actual_x = _get_sbuf(np, shape, dtype=dtype, order=order) + actual_y = _get_rbuf(np, shape, dtype=dtype, order=order) + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.NO_OP): + self.WIN.Fence() + self.WIN.Put(x, rank) + self.WIN.Get_accumulate(x, y, rank, op=op) + self.WIN.Fence() + self.WIN.Get(y, rank) + self.WIN.Fence() + + self.actual_WIN.Fence() + self.actual_WIN.Put(actual_x, rank) + self.actual_WIN.Get_accumulate(actual_x, actual_y, rank, op=op) + self.actual_WIN.Fence() + self.actual_WIN.Get(actual_y, rank) + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_Fetch_and_op(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, 1, dtype=dtype, order=order) + y = _get_rbuf(dev2, 1, dtype=dtype, order=order) + x.fill(1) + y.fill(-1) + actual_x = _get_sbuf(np, 1, dtype=dtype, order=order) + actual_y = _get_rbuf(np, 1, dtype=dtype, order=order) + actual_x.fill(1) + actual_y.fill(-1) + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE, + MPI.NO_OP): + self.WIN.Fence() + self.WIN.Fetch_and_op(x, y, rank, 0, op=op) + self.WIN.Fence() + + self.actual_WIN.Fence() + self.actual_WIN.Fetch_and_op(actual_x, actual_y, rank, 0, op=op) + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_Compare_and_swap(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, 1, dtype=dtype, order=order) + y = _get_rbuf(dev2, 1, dtype=dtype, order=order) + z = _get_rbuf(dev2, 1, dtype=dtype, order=order) + x.fill(1) + y.fill(0) + z.fill(-1) + self.WIN.Fence() + self.WIN.Compare_and_swap(x, y, z, rank, 0) + self.WIN.Fence() + + actual_x = _get_sbuf(np, 1, dtype=dtype, order=order) + actual_y = _get_rbuf(np, 1, dtype=dtype, order=order) + actual_z = _get_rbuf(np, 1, dtype=dtype, order=order) + actual_x.fill(1) + actual_y.fill(0) + actual_z.fill(-1) + self.actual_WIN.Fence() + self.actual_WIN.Compare_and_swap(actual_x, actual_y, actual_z, rank, 0) + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + assert_equal(_get_array(z), actual_z) + + @parameterized.expand(_patterns) + def test_Rput_Rget(self, dev1, dev2, shape, dtype, order): + if dtype == 'bool' and ((dev1 is vp and dev2 is np) or + (dev1 is np and dev2 is vp)): + self.skipTest('Booleans case in vp-to-np is not testable') + + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + actual_y = _get_sbuf(np, shape, dtype=dtype, order=order) + comm.Bcast(actual_y, root=0) + x = dev1.array(actual_y, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + self.WIN.Fence() + self.WIN.Rput(x, rank).Wait() + self.WIN.Fence() + self.WIN.Rget(y, rank).Wait() + self.WIN.Fence() + + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_Raccumulate(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, shape, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + actual_x = _get_sbuf(np, shape, dtype=dtype, order=order) + actual_y = _get_rbuf(np, shape, dtype=dtype, order=order) + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE): + self.WIN.Fence() + x.fill(1) + self.WIN.Rput(x, rank).Wait() + self.WIN.Fence() + self.WIN.Raccumulate(x, rank, op=op).Wait() + self.WIN.Fence() + self.WIN.Rget(y, rank).Wait() + self.WIN.Fence() + + self.actual_WIN.Fence() + actual_x.fill(1) + self.actual_WIN.Rput(actual_x, rank).Wait() + self.actual_WIN.Fence() + self.actual_WIN.Raccumulate(actual_x, rank, op=op).Wait() + self.actual_WIN.Fence() + self.actual_WIN.Rget(actual_y, rank).Wait() + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + + @parameterized.expand(_patterns) + def test_Rget_accumulate(self, dev1, dev2, shape, dtype, order): + if dtype in ('complex64', 'complex128', 'bool'): + self.skipTest('Datatype is not testable') + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + x = _get_sbuf(dev1, shape, dtype=dtype, order=order) + y = _get_rbuf(dev2, shape, dtype=dtype, order=order) + actual_x = _get_sbuf(np, shape, dtype=dtype, order=order) + actual_y = _get_rbuf(np, shape, dtype=dtype, order=order) + for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE, + MPI.NO_OP): + self.WIN.Fence() + x.fill(1) + self.WIN.Rput(x, rank).Wait() + self.WIN.Fence() + self.WIN.Rget_accumulate(x, y, rank, op=op).Wait() + self.WIN.Fence() + self.WIN.Rget(y, rank).Wait() + self.WIN.Fence() + + self.actual_WIN.Fence() + actual_x.fill(1) + self.actual_WIN.Rput(actual_x, rank).Wait() + self.actual_WIN.Fence() + self.actual_WIN.Rget_accumulate(actual_x, actual_y, + rank, op=op).Wait() + self.actual_WIN.Fence() + self.actual_WIN.Rget(actual_y, rank).Wait() + self.actual_WIN.Fence() + + assert_equal(_get_array(x), actual_x) + assert_equal(_get_array(y), actual_y) + + # Fence : No NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Fence_synchronize(self, dev1, dtype, order): + comm = self.COMM + rank = comm.Get_rank() + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + + if rank == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if rank == 0: + n.fill(1) + win_n.Fence() + if rank != 0: + win_n.Get([n, MPI.INT], 0) + win_n.Fence() + win_n.Free() + assert_equal(_get_array(n), _get_array(expect)) + + # Sync : No NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Sync_synchronize(self, dev1, dtype, order): + comm = self.COMM + rank = comm.Get_rank() + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + + if rank == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if rank == 0: + n.fill(1) + if dev1 == vp: + n.venode.synchronize() + if rank != 0: + win_n.Lock(MPI.LOCK_EXCLUSIVE, 0) + n.fill(0) + win_n.Sync() + win_n.Get([n, MPI.INT], 0) + win_n.Unlock(0) + comm.Barrier() + win_n.Free() + assert_equal(_get_array(n), _get_array(expect)) + + # Post : No NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Post_synchronize(self, dev1, dtype, order): + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + comm_group = comm.Get_group() + + if rank % 2 == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if peer != MPI.PROC_NULL: + group = comm_group.Incl(list([peer])) + if rank % 2 == 0: + n.fill(1) + win_n.Post(group) + win_n.Wait() + else: + win_n.Start(group) + win_n.Get([n, MPI.INT], peer) + win_n.Complete() + group.Free() + win_n.Free() + comm_group.Free() + if peer != MPI.PROC_NULL: + assert_equal(_get_array(n), _get_array(expect)) + + # Start : No NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Start_synchronize(self, dev1, dtype, order): + comm = self.COMM + size = comm.Get_size() + rank = comm.Get_rank() + peer = rank ^ 1 + if peer >= size: + peer = MPI.PROC_NULL + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + comm_group = comm.Get_group() + + if rank % 2 == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if peer != MPI.PROC_NULL: + group = comm_group.Incl(list([peer])) + if rank % 2 == 0: + win_n.Post(group) + win_n.Wait() + else: + n.fill(1) + win_n.Start(group) + win_n.Put([n, MPI.INT], peer) + win_n.Complete() + group.Free() + win_n.Free() + comm_group.Free() + if peer != MPI.PROC_NULL: + assert_equal(_get_array(n), _get_array(expect)) + + # Lock : NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Lock_synchronize(self, dev1, dtype, order): + comm = self.COMM + rank = comm.Get_rank() + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + + if rank == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if rank == 0: + n.fill(1) + if dev1 == vp: + n.venode.synchronize() + comm.Barrier() + if rank != 0: + win_n.Lock(MPI.LOCK_EXCLUSIVE, 0) + win_n.Get([n, MPI.INT], 0) + win_n.Unlock(0) + comm.Barrier() + win_n.Free() + assert_equal(_get_array(n), _get_array(expect)) + + # Lock_all : NLCPy synchronization required. + @parameterized.expand(_flush_test_patterns) + def test_Lock_all_synchronize(self, dev1, dtype, order): + comm = self.COMM + rank = comm.Get_rank() + + n = dev1.array(0, dtype=dtype, order=order) + expect = dev1.array(1, dtype=dtype, order=order) + + if rank == 0: + win_n = MPI.Win.Create(n, comm=MPI.COMM_WORLD) + else: + win_n = MPI.Win.Create(None, comm=MPI.COMM_WORLD) + if rank == 0: + n.fill(1) + if dev1 == vp: + n.venode.synchronize() + comm.Barrier() + if rank != 0: + win_n.Lock_all() + win_n.Get([n, MPI.INT], 0) + win_n.Unlock_all() + comm.Barrier() + win_n.Free() + assert_equal(_get_array(n), _get_array(expect)) + + +if __name__ == '__main__': + unittest.main() diff --git a/demo/osu_allgather.py b/demo/OSU/osu_allgather.py similarity index 100% rename from demo/osu_allgather.py rename to demo/OSU/osu_allgather.py diff --git a/demo/osu_alltoall.py b/demo/OSU/osu_alltoall.py similarity index 100% rename from demo/osu_alltoall.py rename to demo/OSU/osu_alltoall.py diff --git a/demo/osu_alltoallv.py b/demo/OSU/osu_alltoallv.py similarity index 100% rename from demo/osu_alltoallv.py rename to demo/OSU/osu_alltoallv.py diff --git a/demo/osu_barrier.py b/demo/OSU/osu_barrier.py similarity index 100% rename from demo/osu_barrier.py rename to demo/OSU/osu_barrier.py diff --git a/demo/osu_bcast.py b/demo/OSU/osu_bcast.py similarity index 100% rename from demo/osu_bcast.py rename to demo/OSU/osu_bcast.py diff --git a/demo/osu_bibw.py b/demo/OSU/osu_bibw.py similarity index 100% rename from demo/osu_bibw.py rename to demo/OSU/osu_bibw.py diff --git a/demo/osu_bw.py b/demo/OSU/osu_bw.py similarity index 100% rename from demo/osu_bw.py rename to demo/OSU/osu_bw.py diff --git a/demo/osu_gather.py b/demo/OSU/osu_gather.py similarity index 100% rename from demo/osu_gather.py rename to demo/OSU/osu_gather.py diff --git a/demo/osu_latency.py b/demo/OSU/osu_latency.py similarity index 100% rename from demo/osu_latency.py rename to demo/OSU/osu_latency.py diff --git a/demo/osu_multi_lat.py b/demo/OSU/osu_multi_lat.py similarity index 100% rename from demo/osu_multi_lat.py rename to demo/OSU/osu_multi_lat.py diff --git a/demo/osu_scatter.py b/demo/OSU/osu_scatter.py similarity index 100% rename from demo/osu_scatter.py rename to demo/OSU/osu_scatter.py diff --git a/demo/README.txt b/demo/README.rst similarity index 81% rename from demo/README.txt rename to demo/README.rst index 71c40d9..0fe57d7 100644 --- a/demo/README.txt +++ b/demo/README.rst @@ -1,6 +1,6 @@ Issuing at the command line:: - $ mpiexec -n 5 python helloworld.py + $ mpiexec -veo -n 5 python helloworld.py will launch a five-process run of the Python interpreter and execute the test script ``helloworld.py``, a parallelized version of the diff --git a/demo/compute-pi/README.txt b/demo/compute-pi/README.rst similarity index 100% rename from demo/compute-pi/README.txt rename to demo/compute-pi/README.rst diff --git a/demo/compute-pi/cpi-cco.py b/demo/compute-pi/cpi-cco.py index 325f2f8..11c47d8 100644 --- a/demo/compute-pi/cpi-cco.py +++ b/demo/compute-pi/cpi-cco.py @@ -5,7 +5,7 @@ usage:: - $ mpiexec -n python cpi-buf.py + $ mpiexec -veo -n python cpi-buf.py """ from mpi4pyve import MPI diff --git a/demo/compute-pi/cpi-dpm.py b/demo/compute-pi/cpi-dpm.py deleted file mode 100644 index 619ee7d..0000000 --- a/demo/compute-pi/cpi-dpm.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python -""" -Parallel PI computation using Dynamic Process Management (DPM) -within Python objects exposing memory buffers (requires NumPy). - -usage: - - + parent/child model:: - - $ mpiexec -n 1 python cpi-dpm.py [nchilds] - - + client/server model:: - - $ [xterm -e] mpiexec -n python cpi-dpm.py server [-v] & - $ [xterm -e] mpiexec -n 1 python cpi-dpm.py client [-v] -""" - -import sys -from mpi4pyve import MPI -import numpy as N - -def get_n(): - prompt = "Enter the number of intervals: (0 quits) " - try: - n = int(input(prompt)) - if n < 0: n = 0 - except: - n = 0 - return n - -def view(pi, np=None, wt=None): - from math import pi as PI - prn = sys.stdout.write - if pi is not None: - prn("computed pi is: %.16f\n" % pi) - prn("absolute error: %.16f\n" % abs(pi - PI)) - if np is not None: - prn("computing units: %d processes\n" % np) - if wt is not None: - prn("wall clock time: %g seconds\n" % wt) - sys.stdout.flush() - -def comp_pi(n, comm, root=0): - nprocs = comm.Get_size() - myrank = comm.Get_rank() - n = N.array(n, 'i') - comm.Bcast([n, MPI.INT], root=root) - if n == 0: return 0.0 - h = 1.0 / n; - s = 0.0; - for i in range(myrank, n, nprocs): - x = h * (i + 0.5); - s += 4.0 / (1.0 + x**2); - mypi = s * h - mypi = N.array(mypi, 'd') - pi = N.array(0, 'd') - comm.Reduce([mypi, MPI.DOUBLE], - [pi, MPI.DOUBLE], - root=root, op=MPI.SUM) - return pi - -def master(icomm): - n = get_n() - wt = MPI.Wtime() - n = N.array(n, 'i') - icomm.Send([n, MPI.INT], dest=0) - pi = N.array(0, 'd') - icomm.Recv([pi, MPI.DOUBLE], source=0) - wt = MPI.Wtime() - wt - if n == 0: return - np = icomm.Get_remote_size() - view(pi, np, wt) - -def worker(icomm): - myrank = icomm.Get_rank() - if myrank == 0: - source = dest = 0 - else: - source = dest = MPI.PROC_NULL - n = N.array(0, 'i') - icomm.Recv([n, MPI.INT], source=source) - pi = comp_pi(n, comm=MPI.COMM_WORLD, root=0) - pi = N.array(pi, 'd') - icomm.Send([pi, MPI.DOUBLE], dest=dest) - - -# Parent/Child - -def main_parent(nprocs=1): - assert nprocs > 0 - assert MPI.COMM_WORLD.Get_size() == 1 - icomm = MPI.COMM_WORLD.Spawn(command=sys.executable, - args=[__file__, 'child'], - maxprocs=nprocs) - master(icomm) - icomm.Disconnect() - -def main_child(): - icomm = MPI.Comm.Get_parent() - assert icomm != MPI.COMM_NULL - worker(icomm) - icomm.Disconnect() - -# Client/Server - -def main_server(COMM): - nprocs = COMM.Get_size() - myrank = COMM.Get_rank() - - service, port, info = None, None, MPI.INFO_NULL - if myrank == 0: - port = MPI.Open_port(info) - log(COMM, "open port '%s'", port) - service = 'cpi' - MPI.Publish_name(service, port, info) - log(COMM, "service '%s' published.", service) - else: - port = '' - - log(COMM, "waiting for client connection ...") - icomm = COMM.Accept(port, info, root=0) - log(COMM, "client connection accepted.") - - worker(icomm) - - log(COMM, "disconnecting from client ...") - icomm.Disconnect() - log(COMM, "client disconnected.") - - if myrank == 0: - MPI.Unpublish_name(service, port, info) - log(COMM, "service '%s' unpublished", port) - MPI.Close_port(port) - log(COMM, "closed port '%s' ", port) - - -def main_client(COMM): - assert COMM.Get_size() == 1 - - service, info = 'cpi', MPI.INFO_NULL - port = MPI.Lookup_name(service, info) - log(COMM, "service '%s' found in port '%s'.", service, port) - - log(COMM, "connecting to server ...") - icomm = COMM.Connect(port, info, root=0) - log(COMM, "server connected.") - - master(icomm) - - log(COMM, "disconnecting from server ...") - icomm.Disconnect() - log(COMM, "server disconnected.") - - -def main(): - assert len(sys.argv) <= 2 - - if 'server' in sys.argv: - main_server(MPI.COMM_WORLD) - elif 'client' in sys.argv: - main_client(MPI.COMM_WORLD) - elif 'child' in sys.argv: - main_child() - else: - try: nchilds = int(sys.argv[1]) - except: nchilds = 2 - main_parent(nchilds) - - -VERBOSE = False - -def log(COMM, fmt, *args): - if not VERBOSE: return - if COMM.rank != 0: return - sys.stdout.write(fmt % args) - sys.stdout.write('\n') - sys.stdout.flush() - -if __name__ == '__main__': - if '-v' in sys.argv: - VERBOSE = True - sys.argv.remove('-v') - main() diff --git a/demo/compute-pi/cpi-rma.py b/demo/compute-pi/cpi-rma.py index 66e4035..dc6b9f2 100644 --- a/demo/compute-pi/cpi-rma.py +++ b/demo/compute-pi/cpi-rma.py @@ -5,7 +5,7 @@ usage:: - $ mpiexec -n python cpi-rma.py + $ mpiexec -veo -n python cpi-rma.py """ from mpi4pyve import MPI diff --git a/demo/compute-pi/makefile b/demo/compute-pi/makefile index 3ec0dff..4e9cc5d 100644 --- a/demo/compute-pi/makefile +++ b/demo/compute-pi/makefile @@ -1,9 +1,8 @@ .PHONY: test -MPIEXEC=mpiexec -n 1 +MPIEXEC=mpiexec -veo -n 1 PYTHON=python test: echo 100 | ${MPIEXEC} ${PYTHON} cpi-cco.py echo 100 | ${MPIEXEC} ${PYTHON} cpi-rma.py - echo 100 | ${MPIEXEC} ${PYTHON} cpi-dpm.py diff --git a/demo/cython/helloworld.pyx b/demo/cython/helloworld.pyx deleted file mode 100644 index f3f88ea..0000000 --- a/demo/cython/helloworld.pyx +++ /dev/null @@ -1,67 +0,0 @@ -cdef extern from "mpi-compat.h": pass - -# --------- - - -# Python-level module import -# (file: mpi4pyve/MPI.so) - -from mpi4pyve import MPI - -# Python-level objects and code - -size = MPI.COMM_WORLD.Get_size() -rank = MPI.COMM_WORLD.Get_rank() -pname = MPI.Get_processor_name() - -hwmess = "Hello, World! I am process %d of %d on %s." -print (hwmess % (rank, size, pname)) - - - -# --------- - - -# Cython-level cimport -# this make available mpi4pyve's Python extension types -# (file: mpi4pyve/include/mpi4pyve/MPI.pxd) - -from mpi4pyve cimport MPI -from mpi4pyve.MPI cimport Intracomm as IntracommType - -# C-level cdef, typed, Python objects - -cdef MPI.Comm WORLD = MPI.COMM_WORLD -cdef IntracommType SELF = MPI.COMM_SELF - - -# --------- - - -# Cython-level cimport with PXD file -# this make available the native MPI C API -# with namespace-protection (stuff accessed as mpi.XXX) -# (file: mpi4pyve/include/mpi4pyve/libmpi.pxd) - -from mpi4pyve cimport libmpi as mpi - -cdef mpi.MPI_Comm world1 = WORLD.ob_mpi - -cdef int ierr1=0 - -cdef int size1 = 0 -ierr1 = mpi.MPI_Comm_size(mpi.MPI_COMM_WORLD, &size1) - -cdef int rank1 = 0 -ierr1 = mpi.MPI_Comm_rank(mpi.MPI_COMM_WORLD, &rank1) - -cdef int rlen1=0 -cdef char pname1[mpi.MPI_MAX_PROCESSOR_NAME] -ierr1 = mpi.MPI_Get_processor_name(pname1, &rlen1) -pname1[rlen1] = 0 # just in case ;-) - -hwmess = "Hello, World! I am process %d of %d on %s." -print (hwmess % (rank1, size1, pname1)) - - -# --------- diff --git a/demo/cython/makefile b/demo/cython/makefile deleted file mode 100644 index 179f542..0000000 --- a/demo/cython/makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config - -CYTHON = cython -.PHONY: src -src: helloworld.c -helloworld.c: helloworld.pyx - ${CYTHON} $< - - -MPICC = mpicc -CFLAGS = -fPIC ${shell ${PYTHON_CONFIG} --includes} -LDFLAGS = -shared ${shell ${PYTHON_CONFIG} --libs} -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -.PHONY: build -build: helloworld${SO} -helloworld${SO}: helloworld.c - ${MPICC} ${CFLAGS} -o $@ $< ${LDFLAGS} - - -.PHONY: test -test: build - ${PYTHON} -c 'import helloworld' - - -.PHONY: clean -clean: - ${RM} helloworld.c helloworld${SO} diff --git a/demo/cython/mpi-compat.h b/demo/cython/mpi-compat.h deleted file mode 100644 index 367c58a..0000000 --- a/demo/cython/mpi-compat.h +++ /dev/null @@ -1,14 +0,0 @@ -/* Author: Lisandro Dalcin */ -/* Contact: dalcinl@gmail.com */ - -#ifndef MPI_COMPAT_H -#define MPI_COMPAT_H - -#include - -#if (MPI_VERSION < 3) && !defined(PyMPI_HAVE_MPI_Message) -typedef void *PyMPI_MPI_Message; -#define MPI_Message PyMPI_MPI_Message -#endif - -#endif/*MPI_COMPAT_H*/ diff --git a/demo/embedding/helloworld.c b/demo/embedding/helloworld.c deleted file mode 100644 index cf5a0b7..0000000 --- a/demo/embedding/helloworld.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * You can use safely use mpi4pyve between multiple - * Py_Initialize()/Py_Finalize() calls ... - * but do not blame me for the memory leaks ;-) - * - */ - -#include -#include - -const char helloworld[] = \ - "from mpi4pyve import MPI \n" - "hwmess = 'Hello, World! I am process %d of %d on %s.' \n" - "myrank = MPI.COMM_WORLD.Get_rank() \n" - "nprocs = MPI.COMM_WORLD.Get_size() \n" - "procnm = MPI.Get_processor_name() \n" - "print (hwmess % (myrank, nprocs, procnm)) \n" - ""; - -int main(int argc, char *argv[]) -{ - int i,n=5; - - MPI_Init(&argc, &argv); - - for (i=0; i= (2, 6) else {} - return urlopen(url, **kwargs).read() - -def download_urls_sequential(urls, timeout=60): - url_to_content = {} - for url in urls: - try: - url_to_content[url] = load_url(url, timeout=timeout) - except: - pass - return url_to_content - -def download_urls_with_executor(executor, urls, timeout=60): - if executor is None: return {} - try: - url_to_content = {} - future_to_url = dict((executor.submit(load_url, url, timeout), url) - for url in urls) - for future in as_completed(future_to_url): - try: - url_to_content[future_to_url[future]] = future.result() - except: - pass - return url_to_content - finally: - executor.shutdown() - -def main(): - for meth, fn in [('sequential', - functools.partial(download_urls_sequential, - URLS)), - ('threads', - functools.partial(download_urls_with_executor, - ThreadPoolExecutor(10), URLS)), - ('processes', - functools.partial(download_urls_with_executor, - ProcessPoolExecutor(10), URLS)), - ('mpi4pyve', - functools.partial(download_urls_with_executor, - MPIPoolExecutor(10), URLS))]: - sys.stdout.write('%s: ' % meth.ljust(11)) - sys.stdout.flush() - start = time.time() - url_map = fn() - elapsed = time.time() - start - sys.stdout.write('%5.2f seconds (%2d of %d downloaded)\n' % - (elapsed, len(url_map), len(URLS))) - sys.stdout.flush() - -if __name__ == '__main__': - main() diff --git a/demo/futures/perf_primes.py b/demo/futures/perf_primes.py deleted file mode 100644 index 77ad5d9..0000000 --- a/demo/futures/perf_primes.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Compare the speed of primes sequentially vs. using futures. -""" - -import sys -import time -import math -try: - range = xrange -except NameError: - range = range - -try: - from concurrent.futures import ThreadPoolExecutor -except ImportError: - ThreadPoolExecutor = None -try: - from concurrent.futures import ProcessPoolExecutor -except ImportError: - ProcessPoolExecutor = None - -from mpi4pyve.futures import MPIPoolExecutor - -PRIMES = [ - 112272535095293, - 112582705942171, - 112272535095293, - 115280095190773, - 115797848077099, - 117450548693743, - 993960000099397, -] - -def is_prime(n): - if n % 2 == 0: - return False - sqrt_n = int(math.floor(math.sqrt(n))) - for i in range(3, sqrt_n + 1, 2): - if n % i == 0: - return False - return True - -def sequential(): - return list(map(is_prime, PRIMES)) - -def with_thread_pool_executor(): - if not ThreadPoolExecutor: return None - with ThreadPoolExecutor(4) as executor: - return list(executor.map(is_prime, PRIMES)) - -def with_process_pool_executor(): - if not ProcessPoolExecutor: return None - with ProcessPoolExecutor(4) as executor: - return list(executor.map(is_prime, PRIMES)) - -def with_mpi_pool_executor(): - with MPIPoolExecutor(4) as executor: - return list(executor.map(is_prime, PRIMES)) - -def main(): - for name, fn in [('sequential', sequential), - ('threads', with_thread_pool_executor), - ('processes', with_process_pool_executor), - ('mpi4pyve', with_mpi_pool_executor)]: - sys.stdout.write('%s: ' % name.ljust(11)) - sys.stdout.flush() - start = time.time() - result = fn() - if result is None: - sys.stdout.write(' not available\n') - elif result != [True] * len(PRIMES): - sys.stdout.write(' failed\n') - else: - sys.stdout.write('%5.2f seconds\n' % (time.time() - start)) - sys.stdout.flush() - -if __name__ == '__main__': - main() diff --git a/demo/futures/run_crawl.py b/demo/futures/run_crawl.py deleted file mode 100644 index 72f47b0..0000000 --- a/demo/futures/run_crawl.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import print_function -from __future__ import division - -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - -from mpi4pyve.futures import MPIPoolExecutor - -URLS = [ - 'http://www.google.com/', - 'http://www.apple.com/', - 'http://www.ibm.com/', - 'http://www.slashdot.org/', - 'http://www.python.org/', - 'http://www.bing.com/', - 'http://www.facebook.com/', - 'http://www.yahoo.com/', - 'http://www.youtube.com/', - 'http://www.blogger.com/', -] - -def load_url(url): - return url, urlopen(url).read() - -def test_crawl(): - with MPIPoolExecutor(10) as executor: - for url, content in executor.map(load_url, URLS, - timeout=10, unordered=True): - print('%-25s: %6.2f KiB' % (url, len(content)/(1 << 10))) - -if __name__ == '__main__': - test_crawl() diff --git a/demo/futures/run_julia.py b/demo/futures/run_julia.py deleted file mode 100644 index b4221e3..0000000 --- a/demo/futures/run_julia.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import print_function -from __future__ import division -import sys -import time - -from mpi4pyve.futures import MPICommExecutor - -try: - range = xrange -except NameError: - pass - -x0 = -2.0 -x1 = +2.0 -y0 = -1.5 -y1 = +1.5 - -w = 1600 -h = 1200 - -dx = (x1 - x0) / w -dy = (y1 - y0) / h - -def julia(x, y): - c = complex(0, 0.65) - z = complex(x, y) - n = 255 - while abs(z) < 3 and n > 1: - z = z**2 + c - n -= 1 - return n - -def julia_line(k): - line = bytearray(w) - y = y1 - k * dy - for j in range(w): - x = x0 + j * dx - line[j] = julia(x, y) - return line - -def plot(image): - import warnings - warnings.simplefilter('ignore', UserWarning) - try: - from matplotlib import pyplot as plt - except ImportError: - return - plt.figure() - plt.imshow(image, aspect='equal', cmap='cubehelix') - plt.axis('off') - try: - plt.draw() - plt.pause(2) - except: - pass - -def test_julia(): - with MPICommExecutor() as executor: - if executor is None: return # worker process - tic = time.time() - image = list(executor.map(julia_line, range(h), chunksize=10)) - toc = time.time() - - print("%s Set %dx%d in %.2f seconds." % ('Julia', w, h, toc-tic)) - if len(sys.argv) > 1 and sys.argv[1] == '-plot': - plot(image) - -if __name__ == '__main__': - test_julia() diff --git a/demo/futures/run_mandelbrot.py b/demo/futures/run_mandelbrot.py deleted file mode 100644 index 9739d07..0000000 --- a/demo/futures/run_mandelbrot.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import print_function -from __future__ import division -import sys -import time - -from mpi4pyve.futures import MPICommExecutor - -try: - range = xrange -except NameError: - pass - -x0 = -2.0 -x1 = +1.0 -y0 = -1.0 -y1 = +1.0 - -w = 750 -h = 500 - -dx = (x1 - x0) / w -dy = (y1 - y0) / h - -def mandelbrot(x, y, maxit=255): - c = complex(x, y) - z = complex(0, 0) - n = 255 - while abs(z) < 2 and n > 1: - z = z**2 + c - n -= 1 - return n - -def mandelbrot_line(k): - line = bytearray(w) - y = y1 - k * dy - for j in range(w): - x = x0 + j * dx - line[j] = mandelbrot(x, y) - return line - -def plot(image): - import warnings - warnings.simplefilter('ignore', UserWarning) - try: - from matplotlib import pyplot as plt - except ImportError: - return - plt.figure() - plt.imshow(image, aspect='equal', cmap='spectral') - plt.axis('off') - try: - plt.draw() - plt.pause(2) - except: - pass - -def test_mandelbrot(): - with MPICommExecutor() as executor: - if executor is None: return # worker process - tic = time.time() - image = list(executor.map(mandelbrot_line, range(h), chunksize=10)) - toc = time.time() - - print("%s Set %dx%d in %.2f seconds." % ('Mandelbrot', w, h, toc-tic)) - if len(sys.argv) > 1 and sys.argv[1] == '-plot': - plot(image) - -if __name__ == '__main__': - test_mandelbrot() diff --git a/demo/futures/run_primes.py b/demo/futures/run_primes.py deleted file mode 100644 index 845178b..0000000 --- a/demo/futures/run_primes.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import print_function -import math -try: - range = xrange -except NameError: - range = range - -from mpi4pyve.futures import MPIPoolExecutor - -PRIMES = [ - 112272535095293, - 112582705942171, - 112272535095293, - 115280095190773, - 115797848077099, - 117450548693743, - 993960000099397, -] - -def is_prime(n): - if n % 2 == 0: - return False - sqrt_n = int(math.floor(math.sqrt(n))) - for i in range(3, sqrt_n + 1, 2): - if n % i == 0: - return False - return True - -def test_primes(): - with MPIPoolExecutor(4) as executor: - for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)): - print('%d is prime: %s' % (number, prime)) - -if __name__ == '__main__': - test_primes() diff --git a/demo/futures/test_futures.py b/demo/futures/test_futures.py deleted file mode 100644 index 936a94c..0000000 --- a/demo/futures/test_futures.py +++ /dev/null @@ -1,1229 +0,0 @@ -import os -import sys -import time -import functools -import unittest - -from mpi4pyve import MPI -from mpi4pyve import futures -try: - from concurrent.futures._base import ( - PENDING, RUNNING, CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED) -except ImportError: - from mpi4pyve.futures._base import ( - PENDING, RUNNING, CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED) - - -SHARED_POOL = futures._lib.SharedPool is not None -WORLD_SIZE = MPI.COMM_WORLD.Get_size() - - -def create_future(state=PENDING, exception=None, result=None): - f = futures.Future() - f._state = state - f._exception = exception - f._result = result - return f - - -PENDING_FUTURE = create_future(state=PENDING) -RUNNING_FUTURE = create_future(state=RUNNING) -CANCELLED_FUTURE = create_future(state=CANCELLED) -CANCELLED_AND_NOTIFIED_FUTURE = create_future(state=CANCELLED_AND_NOTIFIED) -EXCEPTION_FUTURE = create_future(state=FINISHED, exception=OSError()) -SUCCESSFUL_FUTURE = create_future(state=FINISHED, result=42) - - -def mul(x, y): - return x * y - - -def sleep_and_raise(t): - time.sleep(t) - raise Exception('this is an exception') - - -def check_global_var(x): - return global_var == x - - -def check_run_name(name): - return __name__ == name - - -class ExecutorMixin: - worker_count = 2 - - def setUp(self): - self.t1 = time.time() - try: - self.executor = self.executor_type(max_workers=self.worker_count) - except NotImplementedError: - e = sys.exc_info()[1] - self.skipTest(str(e)) - self._prime_executor() - - def tearDown(self): - self.executor.shutdown(wait=True) - dt = time.time() - self.t1 - self.assertLess(dt, 60, 'synchronization issue: test lasted too long') - - def _prime_executor(self): - # Make sure that the executor is ready to do work before running the - # tests. This should reduce the probability of timeouts in the tests. - futures = [self.executor.submit(time.sleep, 0) - for _ in range(self.worker_count)] - for f in futures: - f.result() - - -class ProcessPoolMixin(ExecutorMixin): - executor_type = futures.MPIPoolExecutor - - if 'coverage' in sys.modules: - executor_type = functools.partial( - executor_type, - python_args='-m coverage run'.split(), - ) - - -class ProcessPoolInitTest(ProcessPoolMixin, - unittest.TestCase): - - def _prime_executor(self): - pass - - def test_init(self): - self.executor_type() - - def test_init_args(self): - self.executor_type(1) - - def test_init_kwargs(self): - executor = self.executor_type( - python_exe=sys.executable, - max_workers=None, - mpi_info=dict(soft="0:1"), - globals=None, - main=False, - path=[], - wdir=os.getcwd(), - env={}, - ) - futures = [executor.submit(time.sleep, 0) - for _ in range(self.worker_count)] - for f in futures: - f.result() - executor.shutdown() - - def test_init_pyargs(self): - executor_type = futures.MPIPoolExecutor - executor = executor_type(python_args=['-B', '-Wi']) - executor.submit(time.sleep, 0).result() - executor.shutdown() - - @unittest.skipIf(SHARED_POOL, 'shared-pool') - def test_init_globals(self): - executor = self.executor_type(globals=dict(global_var=42)) - future1 = executor.submit(check_global_var, 42) - future2 = executor.submit(check_global_var, 24) - self.assertTrue(future1.result()) - self.assertFalse(future2.result()) - executor.shutdown() - - @unittest.skipIf(SHARED_POOL and WORLD_SIZE == 1, 'shared-pool') - def test_run_name(self): - executor = self.executor_type() - run_name = futures._lib.MAIN_RUN_NAME - future = executor.submit(check_run_name, run_name) - self.assertTrue(future.result(), run_name) - - def test_max_workers_environ(self): - save = os.environ.get('MPI4PY_MAX_WORKERS') - os.environ['MPI4PY_MAX_WORKERS'] = '1' - try: - executor = self.executor_type() - executor.submit(time.sleep, 0).result() - executor.shutdown() - finally: - del os.environ['MPI4PY_MAX_WORKERS'] - if save is not None: - os.environ['MPI4PY_MAX_WORKERS'] = save - - def test_max_workers_negative(self): - for number in (0, -1): - self.assertRaises(ValueError, - self.executor_type, - max_workers=number) - - -class ProcessPoolBootupTest(ProcessPoolMixin, - unittest.TestCase): - - def _prime_executor(self): - pass - - def test_bootup(self): - executor = self.executor_type(1) - executor.bootup() - executor.bootup() - executor.shutdown() - self.assertRaises(RuntimeError, executor.bootup) - - def test_bootup_wait(self): - executor = self.executor_type(1) - executor.bootup(wait=True) - executor.bootup(wait=True) - executor.shutdown(wait=True) - self.assertRaises(RuntimeError, executor.bootup, True) - - def test_bootup_nowait(self): - executor = self.executor_type(1) - executor.bootup(wait=False) - executor.bootup(wait=False) - executor.shutdown(wait=False) - self.assertRaises(RuntimeError, executor.bootup, False) - executor.shutdown(wait=True) - - def test_bootup_nowait_wait(self): - executor = self.executor_type(1) - executor.bootup(wait=False) - executor.bootup(wait=True) - executor.shutdown() - self.assertRaises(RuntimeError, executor.bootup) - - def test_bootup_shutdown_nowait(self): - executor = self.executor_type(1) - executor.bootup(wait=False) - executor.shutdown(wait=False) - worker = executor._pool - del executor - worker.join() - - -class ExecutorShutdownTestMixin: - - def test_run_after_shutdown(self): - self.executor.shutdown() - self.assertRaises(RuntimeError, - self.executor.submit, - pow, 2, 5) - - def test_hang_issue12364(self): - fs = [self.executor.submit(time.sleep, 0.01) for _ in range(50)] - self.executor.shutdown() - for f in fs: - f.result() - - -class ProcessPoolShutdownTest(ProcessPoolMixin, - ExecutorShutdownTestMixin, - unittest.TestCase): - - def _prime_executor(self): - pass - - def test_shutdown(self): - executor = self.executor_type(max_workers=1) - self.assertEqual(executor._pool, None) - self.assertEqual(executor._shutdown, False) - executor.submit(mul, 21, 2) - executor.submit(mul, 6, 7) - executor.submit(mul, 3, 14) - self.assertNotEqual(executor._pool.thread, None) - self.assertEqual(executor._shutdown, False) - executor.shutdown(wait=False) - self.assertNotEqual(executor._pool.thread, None) - self.assertEqual(executor._shutdown, True) - executor.shutdown(wait=True) - self.assertEqual(executor._pool, None) - self.assertEqual(executor._shutdown, True) - - def test_init_bootup_shutdown(self): - executor = self.executor_type(max_workers=1) - self.assertEqual(executor._pool, None) - self.assertEqual(executor._shutdown, False) - executor.bootup() - self.assertTrue(executor._pool.event.is_set()) - self.assertEqual(executor._shutdown, False) - executor.shutdown() - self.assertEqual(executor._pool, None) - self.assertEqual(executor._shutdown, True) - - def test_context_manager_shutdown(self): - with self.executor_type(max_workers=1) as e: - self.assertEqual(list(e.map(abs, range(-5, 5))), - [5, 4, 3, 2, 1, 0, 1, 2, 3, 4]) - threads = [e._pool.thread] - queues = [e._pool.queue] - events = [e._pool.event] - - for t in threads: - t.join() - for q in queues: - self.assertRaises(LookupError, q.pop) - for e in events: - self.assertTrue(e.is_set()) - - def test_del_shutdown(self): - executor = self.executor_type(max_workers=1) - list(executor.map(abs, range(-5, 5))) - threads = [executor._pool.thread] - queues = [executor._pool.queue] - events = [executor._pool.event] - if hasattr(sys, 'pypy_version_info'): - executor.shutdown(False) - else: - del executor - - for t in threads: - t.join() - for q in queues: - self.assertRaises(LookupError, q.pop) - for e in events: - self.assertTrue(e.is_set()) - - -class WaitTestMixin: - - def test_first_completed(self): - future1 = self.executor.submit(mul, 21, 2) - future2 = self.executor.submit(time.sleep, 0.2) - - done, not_done = futures.wait( - [CANCELLED_FUTURE, future1, future2], - return_when=futures.FIRST_COMPLETED) - - self.assertEqual(set([future1]), done) - self.assertEqual(set([CANCELLED_FUTURE, future2]), not_done) - - def test_first_completed_some_already_completed(self): - future1 = self.executor.submit(time.sleep, 0.2) - - finished, pending = futures.wait( - [CANCELLED_AND_NOTIFIED_FUTURE, SUCCESSFUL_FUTURE, future1], - return_when=futures.FIRST_COMPLETED) - - self.assertEqual( - set([CANCELLED_AND_NOTIFIED_FUTURE, SUCCESSFUL_FUTURE]), - finished) - self.assertEqual(set([future1]), pending) - - def test_first_exception(self): - future1 = self.executor.submit(mul, 2, 21) - future2 = self.executor.submit(sleep_and_raise, 0.2) - future3 = self.executor.submit(time.sleep, 0.4) - - finished, pending = futures.wait( - [future1, future2, future3], - return_when=futures.FIRST_EXCEPTION) - - self.assertEqual(set([future1, future2]), finished) - self.assertEqual(set([future3]), pending) - - def test_first_exception_some_already_complete(self): - future1 = self.executor.submit(divmod, 21, 0) - future2 = self.executor.submit(time.sleep, 0.2) - - finished, pending = futures.wait( - [SUCCESSFUL_FUTURE, - CANCELLED_FUTURE, - CANCELLED_AND_NOTIFIED_FUTURE, - future1, future2], - return_when=futures.FIRST_EXCEPTION) - - self.assertEqual(set([SUCCESSFUL_FUTURE, - CANCELLED_AND_NOTIFIED_FUTURE, - future1]), finished) - self.assertEqual(set([CANCELLED_FUTURE, future2]), pending) - - def test_first_exception_one_already_failed(self): - future1 = self.executor.submit(time.sleep, 0.2) - - finished, pending = futures.wait( - [EXCEPTION_FUTURE, future1], - return_when=futures.FIRST_EXCEPTION) - - self.assertEqual(set([EXCEPTION_FUTURE]), finished) - self.assertEqual(set([future1]), pending) - - def test_all_completed(self): - future1 = self.executor.submit(divmod, 2, 0) - future2 = self.executor.submit(mul, 2, 21) - - finished, pending = futures.wait( - [SUCCESSFUL_FUTURE, - CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - future1, - future2], - return_when=futures.ALL_COMPLETED) - - self.assertEqual(set([SUCCESSFUL_FUTURE, - CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - future1, - future2]), finished) - self.assertEqual(set(), pending) - - def test_timeout(self): - future1 = self.executor.submit(mul, 6, 7) - future2 = self.executor.submit(time.sleep, 0.5) - - finished, pending = futures.wait( - [CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1, future2], - timeout=0.2, - return_when=futures.ALL_COMPLETED) - - self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1]), finished) - self.assertEqual(set([future2]), pending) - - -class ProcessPoolWaitTest(ProcessPoolMixin, - WaitTestMixin, - unittest.TestCase): - pass - - -class AsCompletedTestMixin: - - def test_no_timeout(self): - future1 = self.executor.submit(mul, 2, 21) - future2 = self.executor.submit(mul, 7, 6) - - completed = set(futures.as_completed( - [CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1, future2])) - self.assertEqual(set( - [CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1, future2]), - completed) - - def test_zero_timeout(self): - future1 = self.executor.submit(time.sleep, 0.2) - completed_futures = set() - try: - for future in futures.as_completed( - [CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1], - timeout=0): - completed_futures.add(future) - except futures.TimeoutError: - pass - - self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE]), - completed_futures) - - def test_nonzero_timeout(self): - future1 = self.executor.submit(time.sleep, 0.0) - future2 = self.executor.submit(time.sleep, 0.2) - completed_futures = set() - try: - for future in futures.as_completed( - [CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1], - timeout=0.1): - completed_futures.add(future) - except futures.TimeoutError: - pass - - self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE, - EXCEPTION_FUTURE, - SUCCESSFUL_FUTURE, - future1]), - completed_futures) - - def test_duplicate_futures(self): - py_version = sys.version_info[:3] - if py_version[0] == 3 and py_version < (3, 3, 5): return - # Issue 20367. Duplicate futures should not raise exceptions or give - # duplicate responses. - future1 = self.executor.submit(time.sleep, 0.1) - completed = [f for f in futures.as_completed([future1, future1])] - self.assertEqual(len(completed), 1) - - -class ProcessPoolAsCompletedTest(ProcessPoolMixin, - AsCompletedTestMixin, - unittest.TestCase): - pass - - -class ExecutorTestMixin: - - def test_submit(self): - future = self.executor.submit(pow, 2, 8) - self.assertEqual(256, future.result()) - - def test_submit_keyword(self): - future = self.executor.submit(mul, 2, y=8) - self.assertEqual(16, future.result()) - future = self.executor.submit(mul, x=2, y=8) - self.assertEqual(16, future.result()) - - def test_submit_cancel(self): - future1 = self.executor.submit(time.sleep, 0.25) - future2 = self.executor.submit(time.sleep, 0) - future2.cancel() - self.assertEqual(None, future1.result()) - self.assertEqual(False, future1.cancelled()) - self.assertEqual(True, future2.cancelled()) - - def test_map(self): - self.assertEqual( - list(self.executor.map(pow, range(10), range(10))), - list(map(pow, range(10), range(10)))) - - def test_starmap(self): - sequence = [(a,a) for a in range(10)] - self.assertEqual( - list(self.executor.starmap(pow, sequence)), - list(map(pow, range(10), range(10)))) - self.assertEqual( - list(self.executor.starmap(pow, iter(sequence))), - list(map(pow, range(10), range(10)))) - - def test_map_exception(self): - i = self.executor.map(divmod, [1, 1, 1, 1], [2, 3, 0, 5]) - self.assertEqual(next(i), (0, 1)) - self.assertEqual(next(i), (0, 1)) - self.assertRaises(ZeroDivisionError, next, i) - - def test_map_timeout(self): - results = [] - try: - for i in self.executor.map(time.sleep, - [0, 0, 1], - timeout=0.25): - results.append(i) - except futures.TimeoutError: - pass - else: - self.fail('expected TimeoutError') - - self.assertEqual([None, None], results) - - def test_map_timeout_one(self): - results = [] - for i in self.executor.map(time.sleep, [0, 0, 0], timeout=1): - results.append(i) - self.assertEqual([None, None, None], results) - - -class ProcessPoolExecutorTest(ProcessPoolMixin, - ExecutorTestMixin, - unittest.TestCase): - - def test_map_chunksize(self): - ref = list(map(pow, range(40), range(40))) - self.assertEqual( - list(self.executor.map(pow, range(40), range(40), chunksize=6)), - ref) - self.assertEqual( - list(self.executor.map(pow, range(40), range(40), chunksize=50)), - ref) - self.assertEqual( - list(self.executor.map(pow, range(40), range(40), chunksize=40)), - ref) - - def bad(): - list(self.executor.map(pow, range(40), range(40), chunksize=-1)) - self.assertRaises(ValueError, bad) - - def test_starmap_chunksize(self): - ref = list(map(pow, range(40), range(40))) - sequence = [(a, a) for a in range(40)] - self.assertEqual( - list(self.executor.starmap(pow, sequence, chunksize=6)), - ref) - self.assertEqual( - list(self.executor.starmap(pow, sequence, chunksize=50)), - ref) - self.assertEqual( - list(self.executor.starmap(pow, sequence, chunksize=40)), - ref) - self.assertEqual( - list(self.executor.starmap(pow, iter(sequence), chunksize=6)), - ref) - self.assertEqual( - list(self.executor.starmap(pow, iter(sequence), chunksize=50)), - ref) - self.assertEqual( - list(self.executor.starmap(pow, iter(sequence), chunksize=40)), - ref) - - def bad(): - list(self.executor.starmap(pow, sequence, chunksize=-1)) - self.assertRaises(ValueError, bad) - - def test_map_unordered(self): - map_unordered = functools.partial(self.executor.map, unordered=True) - self.assertEqual( - set(map_unordered(pow, range(10), range(10))), - set(map(pow, range(10), range(10)))) - - def test_map_unordered_timeout(self): - map_unordered = functools.partial(self.executor.map, unordered=True) - num_workers = self.executor._pool.size - results = [] - try: - args = [0.2] + [0]*(num_workers-1) - for i in map_unordered(time.sleep, args, timeout=0.1): - results.append(i) - except futures.TimeoutError: - pass - else: - self.fail('expected TimeoutError') - - self.assertEqual([None]*(num_workers-1), results) - - def test_map_unordered_timeout_one(self): - map_unordered = functools.partial(self.executor.map, unordered=True) - results = [] - for i in map_unordered(time.sleep, [0, 0, 0], timeout=1): - results.append(i) - self.assertEqual([None, None, None], results) - - def test_map_unordered_exception(self): - map_unordered = functools.partial(self.executor.map, unordered=True) - i = map_unordered(divmod, [1, 1, 1, 1], [2, 3, 0, 5]) - try: - self.assertEqual(next(i), (0, 1)) - except ZeroDivisionError: - return - - def test_map_unordered_chunksize(self): - map_unordered = functools.partial(self.executor.map, unordered=True) - ref = set(map(pow, range(40), range(40))) - self.assertEqual( - set(map_unordered(pow, range(40), range(40), chunksize=6)), - ref) - self.assertEqual( - set(map_unordered(pow, range(40), range(40), chunksize=50)), - ref) - self.assertEqual( - set(map_unordered(pow, range(40), range(40), chunksize=40)), - ref) - - def bad(): - set(map_unordered(pow, range(40), range(40), chunksize=-1)) - self.assertRaises(ValueError, bad) - - -class ProcessPoolSubmitTest(unittest.TestCase): - - @unittest.skipIf(MPI.get_vendor()[0] == 'Microsoft MPI', 'msmpi') - def test_multiple_executors(self): - executor1 = futures.MPIPoolExecutor(1).bootup(wait=True) - executor2 = futures.MPIPoolExecutor(1).bootup(wait=True) - executor3 = futures.MPIPoolExecutor(1).bootup(wait=True) - fs1 = [executor1.submit(abs, i) for i in range(100, 200)] - fs2 = [executor2.submit(abs, i) for i in range(200, 300)] - fs3 = [executor3.submit(abs, i) for i in range(300, 400)] - futures.wait(fs3+fs2+fs1) - for i, f in enumerate(fs1): - self.assertEqual(f.result(), i + 100) - for i, f in enumerate(fs2): - self.assertEqual(f.result(), i + 200) - for i, f in enumerate(fs3): - self.assertEqual(f.result(), i + 300) - executor1 = executor2 = executor3 = None - - def test_mpi_serialized_support(self): - futures._lib.setup_mpi_threads() - threading = futures._lib.threading - serialized = futures._lib.serialized - lock_save = serialized.lock - try: - if lock_save is None: - serialized.lock = threading.Lock() - executor = futures.MPIPoolExecutor(1).bootup() - executor.submit(abs, 0).result() - executor.shutdown() - serialized.lock = lock_save - else: - serialized.lock = None - with lock_save: - executor = futures.MPIPoolExecutor(1).bootup() - executor.submit(abs, 0).result() - executor.shutdown() - serialized.lock = lock_save - finally: - serialized.lock = lock_save - - def orig_test_mpi_serialized_support(self): - threading = futures._lib.threading - serialized = futures._lib.serialized - lock_save = serialized.lock - try: - serialized.lock = threading.Lock() - executor = futures.MPIPoolExecutor(1).bootup() - executor.submit(abs, 0).result() - if lock_save is not None: - serialized.lock = None - with lock_save: - executor.submit(abs, 0).result() - serialized.lock = lock_save - executor.submit(abs, 0).result() - executor.shutdown() - if lock_save is not None: - serialized.lock = None - with lock_save: - executor = futures.MPIPoolExecutor(1).bootup() - executor.submit(abs, 0).result() - executor.shutdown() - serialized.lock = lock_save - finally: - serialized.lock = lock_save - - def test_shared_executors(self): - if not SHARED_POOL: return - executors = [futures.MPIPoolExecutor() for _ in range(16)] - fs = [] - for i in range(128): - fs.extend(e.submit(abs, i*16+j) - for j, e in enumerate(executors)) - assert sorted(f.result() for f in fs) == list(range(16*128)) - world_size = MPI.COMM_WORLD.Get_size() - num_workers = max(1, world_size - 1) - for e in executors: - self.assertEqual(e._pool.size, num_workers) - del e, executors - - -def inout(arg): - return arg - - -class GoodPickle(object): - - def __init__(self, value=0): - self.value = value - self.pickled = False - self.unpickled = False - - def __getstate__(self): - self.pickled = True - return (self.value,) - - def __setstate__(self, state): - self.unpickled = True - self.value = state[0] - - -class BadPickle(object): - - def __init__(self): - self.pickled = False - - def __getstate__(self): - self.pickled = True - 1/0 - - def __setstate__(self, state): - pass - - -class BadUnpickle(object): - - def __init__(self): - self.pickled = False - - def __getstate__(self): - self.pickled = True - return (None,) - - def __setstate__(self, state): - if state[0] is not None: - raise ValueError - 1/0 - - -@unittest.skipIf(SHARED_POOL and WORLD_SIZE == 1, 'shared-pool') -class ProcessPoolPickleTest(unittest.TestCase): - - def setUp(self): - self.executor = futures.MPIPoolExecutor(1) - - def tearDown(self): - self.executor.shutdown() - - def test_good_pickle(self): - o = GoodPickle(42) - r = self.executor.submit(inout, o).result() - self.assertEqual(o.value, r.value) - self.assertTrue(o.pickled) - self.assertTrue(r.unpickled) - - r = self.executor.submit(GoodPickle, 77).result() - self.assertEqual(r.value, 77) - self.assertTrue(r.unpickled) - - def test_bad_pickle(self): - o = BadPickle() - self.assertFalse(o.pickled) - f = self.executor.submit(inout, o) - self.assertRaises(ZeroDivisionError, f.result) - self.assertTrue(o.pickled) - - f = self.executor.submit(BadPickle) - self.assertRaises(ZeroDivisionError, f.result) - - f = self.executor.submit(abs, 42) - self.assertEqual(f.result(), 42) - - def test_bad_unpickle(self): - o = BadUnpickle() - self.assertFalse(o.pickled) - f = self.executor.submit(inout, o) - self.assertRaises(ZeroDivisionError, f.result) - self.assertTrue(o.pickled) - - f = self.executor.submit(BadUnpickle) - self.assertRaises(ZeroDivisionError, f.result) - - f = self.executor.submit(abs, 42) - self.assertEqual(f.result(), 42) - - -class MPICommExecutorTest(unittest.TestCase): - - MPICommExecutor = futures.MPICommExecutor - - def test_default(self): - with self.MPICommExecutor() as executor: - if executor is not None: - executor.bootup() - future1 = executor.submit(time.sleep, 0) - future2 = executor.submit(time.sleep, 0) - executor.shutdown() - self.assertEqual(None, future1.result()) - self.assertEqual(None, future2.result()) - - def test_self(self): - with self.MPICommExecutor(MPI.COMM_SELF) as executor: - future = executor.submit(time.sleep, 0) - self.assertEqual(None, future.result()) - self.assertEqual(None, future.exception()) - - future = executor.submit(sleep_and_raise, 0) - self.assertRaises(Exception, future.result) - self.assertEqual(Exception, type(future.exception())) - - list(executor.map(time.sleep, [0, 0])) - list(executor.map(time.sleep, [0, 0], timeout=1)) - iterator = executor.map(time.sleep, [0.1, 0], timeout=0) - self.assertRaises(futures.TimeoutError, list, iterator) - - def test_args(self): - with self.MPICommExecutor(MPI.COMM_SELF) as executor: - self.assertTrue(executor is not None) - with self.MPICommExecutor(MPI.COMM_SELF, 0) as executor: - self.assertTrue(executor is not None) - - def test_kwargs(self): - with self.MPICommExecutor(comm=MPI.COMM_SELF) as executor: - self.assertTrue(executor is not None) - with self.MPICommExecutor(comm=MPI.COMM_SELF, root=0) as executor: - self.assertTrue(executor is not None) - - @unittest.skipIf(SHARED_POOL, 'shared-pool') - def test_arg_root(self): - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - for root in range(comm.Get_size()): - with self.MPICommExecutor(comm, root) as executor: - if rank != root: - self.assertTrue(executor is None) - with self.MPICommExecutor(root=root) as executor: - if rank != root: - self.assertTrue(executor is None) - - def test_arg_root_bad(self): - size = MPI.COMM_WORLD.Get_size() - self.assertRaises(ValueError, self.MPICommExecutor, root=-size) - self.assertRaises(ValueError, self.MPICommExecutor, root=-1) - self.assertRaises(ValueError, self.MPICommExecutor, root=+size) - - @unittest.skipIf(SHARED_POOL, 'shared-pool') - def test_arg_comm_bad(self): - if MPI.COMM_WORLD.Get_size() == 1: - return - intercomm = futures._lib.comm_split(MPI.COMM_WORLD) - try: - self.assertRaises(ValueError, self.MPICommExecutor, intercomm) - finally: - intercomm.Free() - - def test_with_bad(self): - mpicommexecutor = self.MPICommExecutor(MPI.COMM_SELF) - with mpicommexecutor as executor: - try: - with mpicommexecutor: - pass - except RuntimeError: - pass - else: - self.fail('expected RuntimeError') - - -from mpi4pyve.futures.aplus import ThenableFuture - -class ThenTest(unittest.TestCase): - - assert_ = unittest.TestCase.assertTrue - - def test_not_done(self): - - base_f = ThenableFuture() - new_f = base_f.then() - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f._invoke_callbacks() - self.assert_(new_f.cancelled()) - - def test_cancel(self): - - base_f = ThenableFuture() - new_f = base_f.then() - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.cancel() - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(base_f.cancelled()) - self.assert_(new_f.cancelled()) - - def test_then_multiple(self): - - base_f = ThenableFuture() - new_f1 = base_f.then() - new_f2 = base_f.then() - new_f3 = base_f.then() - - self.assert_(base_f is not new_f1) - self.assert_(base_f is not new_f2) - self.assert_(base_f is not new_f3) - self.assert_(not base_f.done()) - self.assert_(not new_f1.done()) - self.assert_(not new_f2.done()) - self.assert_(not new_f3.done()) - - base_f.set_result('done') - self.assert_(base_f.done()) - self.assert_(new_f1.done()) - self.assert_(new_f2.done()) - self.assert_(new_f3.done()) - - self.assert_(not new_f1.exception()) - self.assert_(not new_f2.exception()) - self.assert_(not new_f3.exception()) - self.assert_(new_f1.result() == 'done') - self.assert_(new_f2.result() == 'done') - self.assert_(new_f3.result() == 'done') - - def test_no_callbacks_and_success(self): - - base_f = ThenableFuture() - new_f = base_f.then() - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_result('done') - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(not new_f.exception()) - self.assert_(new_f.result() == 'done') - - def test_no_callbacks_and_failure(self): - - class MyException(Exception): - pass - - base_f = ThenableFuture() - new_f = base_f.then() - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_exception(MyException('sad')) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception()) - with self.assertRaises(MyException) as catcher: - new_f.result() - self.assert_(catcher.exception.args[0] == 'sad') - - def test_success_callback_and_success(self): - - base_f = ThenableFuture() - new_f = base_f.then(lambda result: result + ' manipulated') - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_result('done') - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(not new_f.exception()) - self.assert_(new_f.result() == 'done manipulated') - - def test_err_callback_and_failure_repackage(self): - - class MyException(Exception): - pass - - class MyRepackagedException(Exception): - pass - - class NotMatched(Exception): - pass - - def on_failure(ex): - if isinstance(ex, MyException): - return MyRepackagedException(ex.args[0] + ' repackaged') - else: - return NotMatched('?') - - base_f = ThenableFuture() - new_f = base_f.then(None, on_failure) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_exception(MyException('sad')) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception()) - with self.assertRaises(MyRepackagedException) as catcher: - new_f.result() - self.assert_(catcher.exception.args[0] == 'sad repackaged') - - def test_err_callback_and_failure_raised(self): - - class MyException(Exception): - pass - - class MyRepackagedException(Exception): - pass - - def raise_something_else(ex): - raise MyRepackagedException(ex.args[0] + ' repackaged') - - base_f = ThenableFuture() - new_f = base_f.then(None, raise_something_else) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_exception(MyException('sad')) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception()) - with self.assertRaises(MyRepackagedException) as catcher: - new_f.result() - self.assert_(catcher.exception.args[0] == 'sad repackaged') - - def test_err_callback_convert_to_success(self): - - class MyException(Exception): - pass - - class NotMatched(Exception): - pass - - def on_failure(ex): - if isinstance(ex, MyException): - return ex.args[0] + ' repackaged' - else: - return NotMatched('?') - - base_f = ThenableFuture() - new_f = base_f.catch(on_failure) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_exception(MyException('sad')) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(not new_f.exception()) - self.assert_(new_f.result() == 'sad repackaged') - - def test_err_catch_ignore(self): - - base_f = ThenableFuture() - new_f = base_f.catch() - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_exception(Exception('sad')) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception() is None) - self.assert_(new_f.result() is None) - - def test_success_callback_and_failure_raised(self): - - class MyException(Exception): - pass - - def raise_something_else(value): - raise MyException(value + ' repackaged') - - base_f = ThenableFuture() - new_f = base_f.then(raise_something_else) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_result('sad') - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception()) - with self.assertRaises(MyException) as catcher: - new_f.result() - assert catcher.exception.args[0] == 'sad repackaged' - - def test_chained_success_callback_and_success(self): - - def transform(value): - f = ThenableFuture() - if value < 5: - f.set_result(transform(value+1)) - else: - f.set_result(value) - return f - - base_f = ThenableFuture() - new_f = base_f.then(transform) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_result(1) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(not new_f.exception()) - self.assert_(new_f.result() == 5) - - def test_detect_circular_chains(self): - - f1 = ThenableFuture() - f2 = ThenableFuture() - chain = [f1, f2, f1] - - def transform(a): - try: - f = chain.pop(0) - f.set_result(transform(a)) - return f - except IndexError: - return 42 - - base_f = ThenableFuture() - new_f = base_f.then(transform) - - self.assert_(base_f is not new_f) - self.assert_(not base_f.done()) - self.assert_(not new_f.done()) - - base_f.set_result(1) - self.assert_(base_f.done()) - self.assert_(new_f.done()) - - self.assert_(new_f.exception()) - with self.assertRaises(RuntimeError) as catcher: - new_f.result() - assert 'Circular future chain detected' in catcher.exception.args[0] - - -SKIP_POOL_TEST = False -name, version = MPI.get_vendor() -if name == 'Open MPI': - if version < (3,0,0): - SKIP_POOL_TEST = True - if version == (4,0,0): - SKIP_POOL_TEST = True -if name == 'MPICH': - if MPI.COMM_WORLD.Get_attr(MPI.APPNUM) is None: - SKIP_POOL_TEST = True -if name == 'MVAPICH2': - SKIP_POOL_TEST = True -if name == 'MPICH2': - if MPI.COMM_WORLD.Get_attr(MPI.APPNUM) is None: - SKIP_POOL_TEST = True -if name == 'Microsoft MPI': - if version < (8,1,0): - SKIP_POOL_TEST = True - if MPI.COMM_WORLD.Get_attr(MPI.APPNUM) is None: - SKIP_POOL_TEST = True -if name == 'Platform MPI': - SKIP_POOL_TEST = True -if MPI.Get_version() < (2,0): - SKIP_POOL_TEST = True - - -if SHARED_POOL: - del MPICommExecutorTest.test_arg_root - del MPICommExecutorTest.test_arg_comm_bad - del ProcessPoolInitTest.test_init_globals - if WORLD_SIZE == 1: - del ProcessPoolInitTest.test_run_name - del ProcessPoolPickleTest -elif WORLD_SIZE > 1 or SKIP_POOL_TEST: - del ProcessPoolInitTest - del ProcessPoolBootupTest - del ProcessPoolShutdownTest - del ProcessPoolWaitTest - del ProcessPoolAsCompletedTest - del ProcessPoolExecutorTest - del ProcessPoolSubmitTest - del ProcessPoolPickleTest - - -if __name__ == '__main__': - unittest.main() diff --git a/demo/futures/test_service.py b/demo/futures/test_service.py deleted file mode 100644 index fcab708..0000000 --- a/demo/futures/test_service.py +++ /dev/null @@ -1,31 +0,0 @@ -import sys -from mpi4pyve.futures import MPIPoolExecutor - - -def main(): - def getarg(opt, default=None): - try: - return sys.argv[sys.argv.index('--'+opt)+1] - except ValueError: - return default - - options = {} - if '--host' in sys.argv or '--port' in sys.argv: - service = (getarg('host'), getarg('port')) - else: - service = getarg('service') - if '--info' in sys.argv: - info = getarg('info').split(',') - info = dict(entry.split('=') for entry in info if entry) - else: - info = None - - with MPIPoolExecutor(service=service, mpi_info=info) as executor: - fut1 = executor.submit(abs, +42) - fut2 = executor.submit(abs, -42) - assert fut1.result(0) == 42 - assert fut2.result(0) == 42 - - -if __name__ == '__main__': - main() diff --git a/demo/futures/test_service.sh b/demo/futures/test_service.sh deleted file mode 100755 index c9ce451..0000000 --- a/demo/futures/test_service.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -PYTHON=${1-${PYTHON-python}} -MPIEXEC=${MPIEXEC-mpiexec} -testdir=$(dirname "$0") - -set -e - -if [ $(command -v mpichversion) ]; then - $MPIEXEC -n 1 $PYTHON -m mpi4pyve.futures.server --xyz > /dev/null 2>&1 || true - $MPIEXEC -n 2 $PYTHON -m mpi4pyve.futures.server --bind localhost & - mpi4pyveserver=$!; sleep 0.25; - $MPIEXEC -n 1 $PYTHON $testdir/test_service.py --host localhost - wait $mpi4pyveserver - $MPIEXEC -n 2 $PYTHON -m mpi4pyve.futures.server --port 31414 --info "a=x,b=y" & - mpi4pyveserver=$!; sleep 0.25; - $MPIEXEC -n 1 $PYTHON $testdir/test_service.py --port 31414 --info "a=x,b=y" - wait $mpi4pyveserver -fi - -if [ $(command -v mpichversion) ] && [ $(command -v hydra_nameserver) ]; then - hydra_nameserver & - nameserver=$!; sleep 0.25; - $MPIEXEC -nameserver localhost -n 2 $PYTHON -m mpi4pyve.futures.server & - mpi4pyveserver=$!; sleep 0.25; - $MPIEXEC -nameserver localhost -n 1 $PYTHON $testdir/test_service.py - wait $mpi4pyveserver - $MPIEXEC -nameserver localhost -n 2 $PYTHON -m mpi4pyve.futures.server --service test-service & - mpi4pyveserver=$!; sleep 0.25; - $MPIEXEC -nameserver localhost -n 1 $PYTHON $testdir/test_service.py --service test-service - wait $mpi4pyveserver - kill -TERM $nameserver - wait $nameserver 2>/dev/null || true -fi diff --git a/demo/gemm/README.rst b/demo/gemm/README.rst new file mode 100644 index 0000000..29547ff --- /dev/null +++ b/demo/gemm/README.rst @@ -0,0 +1,27 @@ +Performs the matrix-matrix operations: + + C = A * B + +where A, B, and C are n by n matrices. + +This example must be satisfied following conditions: + + 1. int(sqrt(nproc)) * int(sqrt(nproc)) == nproc + 2. n % int(sqrt(nproc)) == 0 + +Note that this example is not fully optimized for SX-Aurora TSUBASA. +This is just only a prototype to demonstrate gemm on multi processes. + +Issuing at the command line for VH:: + + $ mpirun -veo -np 4 python gemm.py -dev vh -dtype float -n 10000 + {'dev': 'vh', 'dtype': 'float', 'n': 10000} + elapsed: 4.292237043380737 [sec], GFLOPS: 465.9574901820241 + result OK + +Issuing at the command line for VE:: + + $ VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 python gemm.py -dev ve -dtype float -n 10000 + {'dev': 've', 'dtype': 'float', 'n': 10000} + elapsed: 0.17874383926391602 [sec], TFLOPS: 11.189196831824741 + result OK diff --git a/demo/gemm/gemm.py b/demo/gemm/gemm.py new file mode 100644 index 0000000..5d3107b --- /dev/null +++ b/demo/gemm/gemm.py @@ -0,0 +1,190 @@ +from mpi4pyve import MPI +from mpi4pyve import util +import argparse +import math + +DTYPE = 'float32' +MPI_DTYPE = MPI.FLOAT +ROOT = 0 + +class Grid: + def __init__(self, nproc, order, cart_comm, row_comm, col_comm, row_pos, + col_pos, rank): + self.nproc = nproc + self.order = order + self.cart_comm = cart_comm + self.row_comm = row_comm + self.col_comm = col_comm + self.row_pos = row_pos + self.col_pos = col_pos + self.rank = rank + + def __str__(self): + return "Grid Process ".format( + self.row_pos, self.col_pos, self.rank) + + +def create_grid_process(nproc): + dims = MPI.Compute_dims(nproc, 2) + if dims[0] != dims[1]: + raise ValueError('the number of process is not a perfect square') + cart_comm = comm.Create_cart(dims, periods=[True, True], reorder=True) + row_comm = cart_comm.Sub([0, 1]) + col_comm = cart_comm.Sub([1, 0]) + coords = cart_comm.coords + grid = Grid(nproc, dims[0], cart_comm, row_comm, col_comm, coords[0], coords[1], + cart_comm.Get_rank()) + return grid + +def create_block_datatype(sizes, subsizes, grid, itemsize): + assert subsizes[0] * grid.order == sizes[0] + assert subsizes[1] * grid.order == sizes[1] + starts = [0, 0] + block_type = MPI.Datatype(MPI_DTYPE).Create_subarray( + sizes, subsizes, starts, order=MPI.ORDER_C) + resized_type = MPI.Datatype(block_type).Create_resized( + 0, subsizes[1] * itemsize) + resized_type.Commit() + return resized_type + +def scatter_matrix(root_mat, local_mat, grid, n_d, block_type): + sendcount = [1 for i in range(grid.nproc)] + recvcount = local_mat.size + displs = [] + offset = 0 + for i in range(grid.order): + offset = i * grid.order * n_d + for j in range(grid.order): + displs.append(offset) + offset += 1 + sendbuf = [root_mat, sendcount, displs, block_type] + recvbuf = [local_mat, recvcount, MPI_DTYPE] + grid.cart_comm.Scatterv(sendbuf, recvbuf, root=ROOT) + +def gather_matrix(root_mat, local_mat, grid, n_d, block_type): + recvcount = [1 for i in range(grid.nproc)] + sendcount = local_mat.size + displs = [] + offset = 0 + for i in range(grid.order): + offset = i * grid.order * n_d + for j in range(grid.order): + displs.append(offset) + offset += 1 + sendbuf = [local_mat, sendcount, MPI_DTYPE] + recvbuf = [root_mat, recvcount, displs, block_type] + grid.cart_comm.Gatherv(sendbuf, recvbuf, root=ROOT) + +def matmul(local_A, local_B, local_C, grid): + for i in range(grid.order - 1): + peer_send = (grid.col_pos + i + 1) % grid.order + grid.row_comm.Isend(local_A[grid.col_pos], peer_send) + A_recvreqs = [None for i in range(grid.order)] + for i in range(grid.order - 1): + peer_recv = (grid.col_pos - i - 1 + grid.order) % grid.order + req = grid.row_comm.Irecv(local_A[peer_recv], peer_recv) + A_recvreqs[peer_recv] = req + for i in range(grid.order - 1): + peer_send = (grid.row_pos + i + 1) % grid.order + grid.col_comm.Isend(local_B[grid.row_pos], peer_send) + B_recvreqs = [None for i in range(grid.order)] + for i in range(grid.order - 1): + peer_recv = (grid.row_pos - i - 1 + grid.order) % grid.order + req = grid.col_comm.Irecv(local_B[peer_recv], peer_recv) + B_recvreqs[peer_recv] = req + for i in range(grid.order): + idx = i + if A_recvreqs[idx]: A_recvreqs[idx].wait() + if B_recvreqs[idx]: B_recvreqs[idx].wait() + local_C += local_A[idx] @ local_B[idx] + +def scaling(flops): + units = [ + [1e12, 'TFLOPS'], [1e9, 'GFLOPS'], [1e6, 'MFLOPS'], + [1e3, 'KFLOPS'], [1, 'FLOPS']] + for scale, unit in units: + if flops >= scale: + break + return unit, flops / scale + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-dev', type=str, required=True, choices=['vh', 've'], + help='Execution device') + parser.add_argument('-dtype', type=str, required=True, choices=['float', 'double'], + help='Execution data type') + parser.add_argument('-n', type=int, required=False, default=10, + help='the number of row and col') + args = parser.parse_args() + + comm = MPI.COMM_WORLD + nproc = comm.Get_size() + rank = comm.Get_rank() + if rank == 0: print(vars(args)) + + # parse arguments + if args.dev == 'vh': + import numpy as np + dev = np + elif args.dev == 've': + import nlcpy as vp + dev = vp + if args.dtype == 'float': + DTYPE = 'f4' + MPI_DTYPE = MPI.FLOAT + elif args.dtype == 'double': + DTYPE = 'f8' + MPI_DTYPE = MPI.DOUBLE + else: + raise ValueError + n = args.n + + # create grid process + grid = create_grid_process(nproc) + + # create matrix A, B, C + rng = dev.random.default_rng() + if grid.rank == ROOT: + A = rng.random((n, n), dtype=DTYPE) + B = rng.random((n, n), dtype=DTYPE) + C = dev.zeros((n, n), dtype=DTYPE) + else: + A = None + B = None + C = None + if n % grid.order != 0: + raise ValueError('n is not evenly divisible by sqrt(nproc)') + + # create local matrix for computation + n_d = n // grid.order + local_A = [dev.zeros((n_d, n_d), dtype=DTYPE) for _ in range(grid.order)] + local_B = [dev.zeros((n_d, n_d), dtype=DTYPE) for _ in range(grid.order)] + local_C = dev.zeros((n_d, n_d), dtype=DTYPE) + block_type = create_block_datatype( + (n, n), (n_d, n_d), grid, dev.dtype(DTYPE).itemsize) + scatter_matrix(A, local_A[grid.row_pos], grid, n_d, block_type) + scatter_matrix(B, local_B[grid.col_pos], grid, n_d, block_type) + + # execute matmul + if dev.__name__ == 'nlcpy': + dev.request.flush() + grid.cart_comm.Barrier() + t0 = MPI.Wtime() + matmul(local_A, local_B, local_C, grid) + if dev.__name__ == 'nlcpy': + dev.request.flush() + grid.cart_comm.Barrier() + t1 = MPI.Wtime() + + # result check and show perf + gather_matrix(C, local_C, grid, n_d, block_type) + if grid.rank == 0: + elapsed = t1 - t0 + flops = 2 * n ** 3 / elapsed + print("elapsed: {} [sec], {}: {}".format(elapsed, *scaling(flops))) + exp = A @ B + norm = dev.linalg.norm(C) + if dev.all(((C - exp) / norm) < 1e-4): + print("result OK") + else: + print("result NG") diff --git a/demo/gemm/makefile b/demo/gemm/makefile new file mode 100644 index 0000000..4098a7c --- /dev/null +++ b/demo/gemm/makefile @@ -0,0 +1,11 @@ +.PHONY: test + +PYTHON=python + +N=10000 +DTYPE=float + +test: + echo "GEMM on VH (4 process)" && mpirun -veo -np 4 ${PYTHON} gemm.py -dev vh -dtype ${DTYPE} -n ${N} + echo "GEMM on 1VE (1 process)" && VE_NLCPY_NODELIST=0 mpirun -veo -np 1 ${PYTHON} gemm.py -dev ve -dtype ${DTYPE} -n ${N} + echo "GEMM on 4VE (4 process)" && VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 ${PYTHON} gemm.py -dev ve -dtype ${DTYPE} -n ${N} diff --git a/demo/gemv/Makefile b/demo/gemv/Makefile new file mode 100644 index 0000000..4e9cc5d --- /dev/null +++ b/demo/gemv/Makefile @@ -0,0 +1,8 @@ +.PHONY: test + +MPIEXEC=mpiexec -veo -n 1 +PYTHON=python + +test: + echo 100 | ${MPIEXEC} ${PYTHON} cpi-cco.py + echo 100 | ${MPIEXEC} ${PYTHON} cpi-rma.py diff --git a/demo/gemv/README.rst b/demo/gemv/README.rst new file mode 100644 index 0000000..7a0d021 --- /dev/null +++ b/demo/gemv/README.rst @@ -0,0 +1,19 @@ +Performs the matrix-vector operations: + + y = A * x + +where y is an m vector, x is an n vector, and A is an m by n matrix. + +Issuing at the command line for VH:: + + $ mpiexec -veo -np 4 python gemv.py -dev vh -dtype float -m 10000 -n 10000 -iter 100 + {'dev': 'vh', 'dtype': 'float', 'm': 10000, 'n': 10000, 'iter': 100} + elapsed: 0.7454090118408203 [sec] + Result success + +Issuing at the command line for VE:: + + $ VE_NLCPY_NODELIST=0,1,2,3 mpiexec -veo -np 4 python gemv.py -dev ve -dtype float -m 10000 -n 10000 -iter 100 + {'dev': 've', 'dtype': 'float', 'm': 10000, 'n': 10000, 'iter': 100} + elapsed: 0.012457132339477539 [sec] + Result success diff --git a/demo/gemv/gemv.py b/demo/gemv/gemv.py new file mode 100644 index 0000000..2fa1a17 --- /dev/null +++ b/demo/gemv/gemv.py @@ -0,0 +1,96 @@ +from mpi4pyve import MPI +import numpy as np +import nlcpy as vp +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('-dev', type=str, required=True, choices=['vh', 've'], + help='Execution device') +parser.add_argument('-dtype', type=str, required=True, choices=['float', 'double'], + help='Execution data type') +parser.add_argument('-m', type=int, required=False, default=10, + help='Number of rows of matrix A') +parser.add_argument('-n', type=int, required=False, default=10, + help='Number of cols of matrix A and number of vector x') +parser.add_argument('-iter', type=int, required=False, default=10000, + help='Number of iterations for gemv') +args = parser.parse_args() + +# set module +if args.dev == 'vh': + dev = np +elif args.dev == 've': + dev = vp +else: + raise ValueError + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + +if rank == 0: print(vars(args)) + +m = args.m +n = args.n +if args.dtype == 'float': + dtype = 'f4' + mpi_dtype = MPI.FLOAT +elif args.dtype == 'double': + dtype = 'f8' + mpi_dtype = MPI.DOUBLE +else: + raise ValueError + +# estimate send/recv count and displacement +A_count = [0 for _ in range(size)] +A_displ = [0 for _ in range(size)] +y_count = [0 for _ in range(size)] +y_displ = [0 for _ in range(size)] +for i in range(size): + m_s = m * i // size + m_e = m * (i + 1) // size + if i == rank: + m_d = m_e - m_s + A_count[i] = (m_e - m_s) * n + A_displ[i] = m_s * n + y_count[i] = (m_e - m_s) + y_displ[i] = m_s + +# create matrix A and vector x +rng = dev.random.default_rng() +if rank == 0: + A = rng.random((m, n), dtype=dtype) + x = rng.random(n, dtype=dtype) +else: + A = None + x = dev.zeros(n, dtype=dtype) +A_local = dev.empty((m_d, n), dtype=dtype) +comm.Scatterv([A, A_count, A_displ, mpi_dtype], [A_local, A_count[rank], mpi_dtype], root=0) # divide matrix A into each process. +comm.Bcast(x, root=0) # all processes share same vector x. + +# execute gemv +if dev.__name__ == 'nlcpy': + dev.request.flush() +comm.Barrier() +t0 = MPI.Wtime() +for _ in range(args.iter): + y_local = A_local @ x # local gemv +if dev.__name__ == 'nlcpy': + dev.request.flush() +comm.Barrier() +t1 = MPI.Wtime() + +# gather local vector y into root process +if rank == 0: + y = dev.empty(m, dtype=dtype) +else: + y = None +comm.Gatherv([y_local, y_count[rank], mpi_dtype], [y, y_count, y_displ, mpi_dtype], root=0) + +if rank == 0: + print("elapsed:", t1 - t0, "[sec]") + +# result check +if rank == 0: + res = dev.all((y - A @ x) / y < 1e-4) + print("Result {}".format("success" if res else "failed")) diff --git a/demo/gemv/makefile b/demo/gemv/makefile new file mode 100644 index 0000000..b23a5b8 --- /dev/null +++ b/demo/gemv/makefile @@ -0,0 +1,17 @@ +.PHONY: test + +PYTHON=python + +M=10000 +N=10000 +DTYPE=float +ITER=100 + +test: + echo "GEMV on VH ( 1 process)" && mpirun -veo -np 1 ${PYTHON} gemv.py -dev vh -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on VH ( 2 process)" && mpirun -veo -np 2 ${PYTHON} gemv.py -dev vh -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on VH ( 4 process)" && mpirun -veo -np 4 ${PYTHON} gemv.py -dev vh -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on 1VE ( 1 process)" && VE_NLCPY_NODELIST=0 mpirun -veo -np 1 ${PYTHON} gemv.py -dev ve -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on 2VE ( 2 process)" && VE_NLCPY_NODELIST=0,1 mpirun -veo -np 2 ${PYTHON} gemv.py -dev ve -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on 4VE ( 4 process)" && VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 ${PYTHON} gemv.py -dev ve -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} + echo "GEMV on 4VE (32 process)" && VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 32 ${PYTHON} gemv.py -dev ve -dtype ${DTYPE} -m ${M} -n ${N} -iter ${ITER} diff --git a/demo/helloworld.c b/demo/helloworld.c deleted file mode 100644 index f19ece7..0000000 --- a/demo/helloworld.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include - -int main(int argc, char *argv[]) -{ - int size, rank, len; - char name[MPI_MAX_PROCESSOR_NAME]; - -#if defined(MPI_VERSION) && (MPI_VERSION >= 2) - int provided; - MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); -#else - MPI_Init(&argc, &argv); -#endif - - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Get_processor_name(name, &len); - - printf("Hello, World! I am process %d of %d on %s.\n", rank, size, name); - - MPI_Finalize(); - return 0; -} - -/* - * Local Variables: - * mode: C - * c-basic-offset: 2 - * indent-tabs-mode: nil - * End: -*/ diff --git a/demo/helloworld.cxx b/demo/helloworld.cxx deleted file mode 100644 index e18066e..0000000 --- a/demo/helloworld.cxx +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include - -int main(int argc, char *argv[]) -{ -#if defined(MPI_VERSION) && (MPI_VERSION >= 2) - MPI::Init_thread(MPI_THREAD_MULTIPLE); -#else - MPI::Init(); -#endif - - int size = MPI::COMM_WORLD.Get_size(); - int rank = MPI::COMM_WORLD.Get_rank(); - int len; char name[MPI_MAX_PROCESSOR_NAME]; - MPI::Get_processor_name(name, len); - - std::cout << - "Hello, World! " << - "I am process " << rank << - " of " << size << - " on " << name << - "." << std::endl; - - MPI::Finalize(); - return 0; -} - -// Local Variables: -// mode: C++ -// c-basic-offset: 2 -// indent-tabs-mode: nil -// End: diff --git a/demo/helloworld.f08 b/demo/helloworld.f08 deleted file mode 100644 index 2a62542..0000000 --- a/demo/helloworld.f08 +++ /dev/null @@ -1,23 +0,0 @@ -program main - - use mpi_f08 - implicit none - - integer :: provided, size, rank, len - character (len=MPI_MAX_PROCESSOR_NAME) :: name - - call MPI_Init_thread(MPI_THREAD_MULTIPLE, provided) - - call MPI_Comm_rank(MPI_COMM_WORLD, rank) - call MPI_Comm_size(MPI_COMM_WORLD, size) - call MPI_Get_processor_name(name, len) - - write(*, '(2A,I2,A,I2,3A)') & - 'Hello, World! ', & - 'I am process ', rank, & - ' of ', size, & - ' on ', name(1:len), '.' - - call MPI_Finalize() - -end program main diff --git a/demo/helloworld.f90 b/demo/helloworld.f90 deleted file mode 100644 index 6c454bf..0000000 --- a/demo/helloworld.f90 +++ /dev/null @@ -1,23 +0,0 @@ -program main - - use mpi - implicit none - - integer :: provided, ierr, size, rank, len - character (len=MPI_MAX_PROCESSOR_NAME) :: name - - call MPI_Init_thread(MPI_THREAD_MULTIPLE, provided, ierr) - - call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr) - call MPI_Comm_size(MPI_COMM_WORLD, size, ierr) - call MPI_Get_processor_name(name, len, ierr) - - write(*, '(2A,I2,A,I2,3A)') & - 'Hello, World! ', & - 'I am process ', rank, & - ' of ', size, & - ' on ', name(1:len), '.' - - call MPI_Finalize(ierr) - -end program main diff --git a/demo/init-fini/makefile b/demo/init-fini/makefile deleted file mode 100644 index 5684e34..0000000 --- a/demo/init-fini/makefile +++ /dev/null @@ -1,15 +0,0 @@ -MPIEXEC=mpiexec -NP_FLAG=-n -NP=3 - -PYTHON=python - -.PHONY: test -test: - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_0.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_1.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_2a.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_2b.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_3.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_4.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_5.py diff --git a/demo/init-fini/runtests.bat b/demo/init-fini/runtests.bat deleted file mode 100644 index cf929c7..0000000 --- a/demo/init-fini/runtests.bat +++ /dev/null @@ -1,13 +0,0 @@ -@echo off -setlocal ENABLEEXTENSIONS - -set PYTHON=python - -@echo on -%PYTHON% test_0.py -%PYTHON% test_1.py -%PYTHON% test_2a.py -%PYTHON% test_2b.py -%PYTHON% test_3.py -%PYTHON% test_4.py -%PYTHON% test_5.py diff --git a/demo/init-fini/runtests.sh b/demo/init-fini/runtests.sh deleted file mode 100755 index 6858d75..0000000 --- a/demo/init-fini/runtests.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -MPIEXEC=mpiexec -NP_FLAG=-n -NP=3 - -PYTHON=python - -set -x -$MPIEXEC $NP_FLAG $NP $PYTHON test_0.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_1.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_2a.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_2b.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_3.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_4.py -$MPIEXEC $NP_FLAG $NP $PYTHON test_5.py diff --git a/demo/init-fini/test_0.py b/demo/init-fini/test_0.py deleted file mode 100644 index cb44a5f..0000000 --- a/demo/init-fini/test_0.py +++ /dev/null @@ -1,2 +0,0 @@ -from mpi4pyve import rc -from mpi4pyve import MPI diff --git a/demo/init-fini/test_1.py b/demo/init-fini/test_1.py deleted file mode 100644 index b6eb3af..0000000 --- a/demo/init-fini/test_1.py +++ /dev/null @@ -1,14 +0,0 @@ -from mpi4pyve import rc -rc.initialize = False - -from mpi4pyve import MPI -assert not MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Init() -assert MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Finalize() -assert MPI.Is_initialized() -assert MPI.Is_finalized() diff --git a/demo/init-fini/test_2a.py b/demo/init-fini/test_2a.py deleted file mode 100644 index de88275..0000000 --- a/demo/init-fini/test_2a.py +++ /dev/null @@ -1,14 +0,0 @@ -from mpi4pyve import rc -rc.initialize = False - -from mpi4pyve import MPI -assert not MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Init_thread(MPI.THREAD_MULTIPLE) -assert MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Finalize() -assert MPI.Is_initialized() -assert MPI.Is_finalized() diff --git a/demo/init-fini/test_2b.py b/demo/init-fini/test_2b.py deleted file mode 100644 index a7e3548..0000000 --- a/demo/init-fini/test_2b.py +++ /dev/null @@ -1,21 +0,0 @@ -from mpi4pyve import rc -rc.initialize = False - -from mpi4pyve import MPI -assert not MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Init_thread() -assert MPI.Is_initialized() -assert not MPI.Is_finalized() - -import sys -name, _ = MPI.get_vendor() -if name == 'MPICH': - assert MPI.Query_thread() == MPI.THREAD_MULTIPLE -if name == 'MPICH2' and sys.platform[:3] != 'win': - assert MPI.Query_thread() == MPI.THREAD_MULTIPLE - -MPI.Finalize() -assert MPI.Is_initialized() -assert MPI.Is_finalized() diff --git a/demo/init-fini/test_3.py b/demo/init-fini/test_3.py deleted file mode 100644 index 8a7d4a1..0000000 --- a/demo/init-fini/test_3.py +++ /dev/null @@ -1,6 +0,0 @@ -from mpi4pyve import rc -rc.finalize = False - -from mpi4pyve import MPI -assert MPI.Is_initialized() -assert not MPI.Is_finalized() diff --git a/demo/init-fini/test_4.py b/demo/init-fini/test_4.py deleted file mode 100644 index 03895d8..0000000 --- a/demo/init-fini/test_4.py +++ /dev/null @@ -1,10 +0,0 @@ -from mpi4pyve import rc -rc.finalize = False - -from mpi4pyve import MPI -assert MPI.Is_initialized() -assert not MPI.Is_finalized() - -MPI.Finalize() -assert MPI.Is_initialized() -assert MPI.Is_finalized() diff --git a/demo/init-fini/test_5.py b/demo/init-fini/test_5.py deleted file mode 100644 index 3d8ad9f..0000000 --- a/demo/init-fini/test_5.py +++ /dev/null @@ -1,16 +0,0 @@ -from mpi4pyve import rc -del rc.initialize -del rc.threads -del rc.thread_level -del rc.finalize - -from mpi4pyve import MPI -assert MPI.Is_initialized() -assert not MPI.Is_finalized() - -import sys -name, _ = MPI.get_vendor() -if name == 'MPICH': - assert MPI.Query_thread() == MPI.THREAD_MULTIPLE -if name == 'MPICH2' and sys.platform[:3] != 'win': - assert MPI.Query_thread() == MPI.THREAD_MULTIPLE diff --git a/demo/libmpi-cffi/apigen.py b/demo/libmpi-cffi/apigen.py deleted file mode 100644 index 5769b7f..0000000 --- a/demo/libmpi-cffi/apigen.py +++ /dev/null @@ -1,30 +0,0 @@ -import sys, os.path as p -wdir = p.abspath(p.dirname(__file__)) -topdir = p.normpath(p.join(wdir, p.pardir, p.pardir)) -srcdir = p.join(topdir, 'src') -sys.path.insert(0, p.join(topdir, 'conf')) - -from mpiscanner import Scanner -scanner = Scanner() -libmpi_pxd = p.join(srcdir, 'mpi4pyve', 'libmpi.pxd') -scanner.parse_file(libmpi_pxd) -libmpi_h = p.join(wdir, 'libmpi.h') -scanner.dump_header_h(libmpi_h) - -#try: -# from cStringIO import StringIO -#except ImportError: -# from io import StringIO -#libmpi_h = StringIO() -#scanner.dump_header_h(libmpi_h) -#print libmpi_h.read() - -libmpi_c = p.join(wdir, 'libmpi.c.in') -with open(libmpi_c, 'w') as f: - f.write("""\ -#include -#include "%(srcdir)s/lib-mpi/config.h" -#include "%(srcdir)s/lib-mpi/missing.h" -#include "%(srcdir)s/lib-mpi/fallback.h" -#include "%(srcdir)s/lib-mpi/compat.h" -""" % vars()) diff --git a/demo/libmpi-cffi/build.py b/demo/libmpi-cffi/build.py deleted file mode 100644 index 805f890..0000000 --- a/demo/libmpi-cffi/build.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -import cffi - -ffi = cffi.FFI() -with open("libmpi.c.in") as f: - ffi.set_source("libmpi", f.read()) -with open("libmpi.h") as f: - ffi.cdef(f.read()) - -class mpicompiler(object): - - from cffi import ffiplatform - - def __init__(self, cc, ld=None): - self.cc = cc - self.ld = ld if ld else cc - self.ffi_compile = self.ffiplatform.compile - - def __enter__(self): - self.ffiplatform.compile = self.compile - - def __exit__(self, *args): - self.ffiplatform.compile = self.ffi_compile - - def configure(self, compiler): - from distutils.util import split_quoted - from distutils.spawn import find_executable - def fix_command(command, cmd): - if not cmd: return - cmd = split_quoted(cmd) - exe = find_executable(cmd[0]) - if not exe: return - command[0] = exe - command += cmd[1:] - fix_command(compiler.compiler_so, self.cc) - fix_command(compiler.linker_so, self.ld) - - def compile(self, *args, **kargs): - from distutils.command import build_ext - customize_compiler_orig = build_ext.customize_compiler - def customize_compiler(compiler): - customize_compiler_orig(compiler) - self.configure(compiler) - build_ext.customize_compiler = customize_compiler - try: - return self.ffi_compile(*args, **kargs) - finally: - build_ext.customize_compiler = customize_compiler_orig - -if __name__ == '__main__': - cc = os.environ.get('MPICC', 'mpicc') - ld = os.environ.get('MPILD') - with mpicompiler(cc, ld): - ffi.compile() diff --git a/demo/libmpi-cffi/makefile b/demo/libmpi-cffi/makefile deleted file mode 100644 index 2ff6557..0000000 --- a/demo/libmpi-cffi/makefile +++ /dev/null @@ -1,24 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python - -.PHONY: build -build: libmpi.h libmpi.c.in - $(PYTHON) build.py - -libmpi.h libmpi.c.in: - $(PYTHON) apigen.py - -MPIEXEC = mpiexec -NP_FLAG = -n -.PHONY: test -test: build - $(MPIEXEC) $(NP_FLAG) 5 $(PYTHON) test_helloworld.py - $(MPIEXEC) $(NP_FLAG) 4 $(PYTHON) test_ringtest.py - $(MPIEXEC) $(NP_FLAG) 2 $(PYTHON) test_latency.py - -.PHONY: clean -clean: - $(RM) -r libmpi.* - $(RM) -r *py[co] __pycache__ diff --git a/demo/libmpi-cffi/test_helloworld.py b/demo/libmpi-cffi/test_helloworld.py deleted file mode 100644 index 3c3d689..0000000 --- a/demo/libmpi-cffi/test_helloworld.py +++ /dev/null @@ -1,23 +0,0 @@ -from libmpi import ffi, lib - -NULL = ffi.NULL -size_p = ffi.new('int*') -rank_p = ffi.new('int*') -nlen_p = ffi.new('int*') -name_p = ffi.new('char[]', lib.MPI_MAX_PROCESSOR_NAME); - -lib.MPI_Init(NULL, NULL); - -lib.MPI_Comm_size(lib.MPI_COMM_WORLD, size_p) -lib.MPI_Comm_rank(lib.MPI_COMM_WORLD, rank_p) -lib.MPI_Get_processor_name(name_p, nlen_p) - -size = size_p[0] -rank = rank_p[0] -nlen = nlen_p[0] -name = ffi.string(name_p[0:nlen]) - -print("Hello, World! I am process %d of %d on %s." - % (rank, size, name)) - -lib.MPI_Finalize() diff --git a/demo/libmpi-cffi/test_latency.py b/demo/libmpi-cffi/test_latency.py deleted file mode 100644 index 09a00f2..0000000 --- a/demo/libmpi-cffi/test_latency.py +++ /dev/null @@ -1,73 +0,0 @@ -# http://mvapich.cse.ohio-state.edu/benchmarks/ - -from libmpi import ffi, lib - -def osu_latency( - BENCHMARH = "MPI Latency Test", - skip = 1000, - loop = 10000, - skip_large = 10, - loop_large = 100, - large_message_size = 8192, - MAX_MSG_SIZE = 1<<22, - ): - - myid = ffi.new('int*') - numprocs = ffi.new('int*') - lib.MPI_Comm_rank(lib.MPI_COMM_WORLD, myid) - lib.MPI_Comm_size(lib.MPI_COMM_WORLD, numprocs) - myid = myid[0] - numprocs = numprocs[0] - - if numprocs != 2: - if myid == 0: - errmsg = "This test requires exactly two processes" - else: - errmsg = None - raise SystemExit(errmsg) - - sbuf = ffi.new('unsigned char[]', MAX_MSG_SIZE) - rbuf = ffi.new('unsigned char[]', MAX_MSG_SIZE) - dtype = lib.MPI_BYTE - tag = 1 - comm = lib.MPI_COMM_WORLD - status = lib.MPI_STATUS_IGNORE - - if myid == 0: - print ('# %s' % (BENCHMARH,)) - if myid == 0: - print ('# %-8s%20s' % ("Size [B]", "Latency [us]")) - - message_sizes = [0] + [2**i for i in range(30)] - for size in message_sizes: - if size > MAX_MSG_SIZE: - break - if size > large_message_size: - skip = skip_large - loop = loop_large - iterations = list(range(loop+skip)) - # - lib.MPI_Barrier(comm) - if myid == 0: - for i in iterations: - if i == skip: - t_start = lib.MPI_Wtime() - lib.MPI_Send(sbuf, size, dtype, 1, tag, comm) - lib.MPI_Recv(rbuf, size, dtype, 1, tag, comm, status) - t_end = lib.MPI_Wtime() - elif myid == 1: - for i in iterations: - lib.MPI_Recv(rbuf, size, dtype, 0, tag, comm, status) - lib.MPI_Send(sbuf, size, dtype, 0, tag, comm) - # - if myid == 0: - latency = (t_end - t_start) * 1e6 / (2 * loop) - print ('%-10d%20.2f' % (size, latency)) - -def main(): - lib.MPI_Init(ffi.NULL, ffi.NULL) - osu_latency() - lib.MPI_Finalize() - -if __name__ == '__main__': - main() diff --git a/demo/libmpi-cffi/test_ringtest.py b/demo/libmpi-cffi/test_ringtest.py deleted file mode 100644 index 7b5dc90..0000000 --- a/demo/libmpi-cffi/test_ringtest.py +++ /dev/null @@ -1,76 +0,0 @@ -from libmpi import ffi, lib - -def ring(comm, count=1, loop=1, skip=0): - - size_p = ffi.new('int*') - rank_p = ffi.new('int*') - lib.MPI_Comm_size(comm, size_p) - lib.MPI_Comm_rank(comm, rank_p) - size = size_p[0] - rank = rank_p[0] - - source = (rank - 1) % size - dest = (rank + 1) % size - sbuf = ffi.new('unsigned char[]', [42]*count) - rbuf = ffi.new('unsigned char[]', [ 0]*count) - - iterations = list(range((loop+skip))) - - if size == 1: - for i in iterations: - if i == skip: - tic = lib.MPI_Wtime() - lib.MPI_Sendrecv(sbuf, count, lib.MPI_BYTE, dest, 0, - rbuf, count, lib.MPI_BYTE, source, 0, - comm, lib.MPI_STATUS_IGNORE) - else: - if rank == 0: - for i in iterations: - if i == skip: - tic = lib.MPI_Wtime() - lib.MPI_Send(sbuf, count, lib.MPI_BYTE, dest, 0, comm) - lib.MPI_Recv(rbuf, count, lib.MPI_BYTE, source, 0, comm, lib.MPI_STATUS_IGNORE) - else: - sbuf = rbuf - for i in iterations: - if i == skip: - tic = lib.MPI_Wtime() - lib.MPI_Recv(rbuf, count, lib.MPI_BYTE, source, 0, comm, lib.MPI_STATUS_IGNORE) - lib.MPI_Send(sbuf, count, lib.MPI_BYTE, dest, 0, comm) - toc = lib.MPI_Wtime() - if rank == 0 and ffi.string(sbuf) != ffi.string(rbuf): - import warnings, traceback - try: - warnings.warn("received message does not match!") - except UserWarning: - traceback.print_exc() - lib.MPI_Abort(comm, 2) - return toc - tic - -def ringtest(comm): - - size = ( 1 ) - loop = ( 1 ) - skip = ( 0 ) - - lib.MPI_Barrier(comm) - elapsed = ring(comm, size, loop, skip) - - size_p = ffi.new('int*') - rank_p = ffi.new('int*') - lib.MPI_Comm_size(comm, size_p) - lib.MPI_Comm_rank(comm, rank_p) - comm_size = size_p[0] - comm_rank = rank_p[0] - - if comm_rank == 0: - print ("time for %d loops = %g seconds (%d processes, %d bytes)" - % (loop, elapsed, comm_size, size)) - -def main(): - lib.MPI_Init(ffi.NULL, ffi.NULL) - ringtest(lib.MPI_COMM_WORLD) - lib.MPI_Finalize() - -if __name__ == '__main__': - main() diff --git a/demo/makefile b/demo/makefile index cdc77cf..c1c3e34 100644 --- a/demo/makefile +++ b/demo/makefile @@ -2,26 +2,7 @@ PYTHON=python .PHONY: default default: ${MAKE} PYTHON=${PYTHON} -C compute-pi - ${MAKE} PYTHON=${PYTHON} -C mandelbrot - ${MAKE} PYTHON=${PYTHON} -C nxtval - ${MAKE} PYTHON=${PYTHON} -C reductions - ${MAKE} PYTHON=${PYTHON} -C sequential - ${MAKE} PYTHON=${PYTHON} -C spawning - - ${MAKE} PYTHON=${PYTHON} -C wrap-c - ${MAKE} PYTHON=${PYTHON} -C wrap-f2py - ${MAKE} PYTHON=${PYTHON} -C wrap-swig - ${MAKE} PYTHON=${PYTHON} -C wrap-boost - ${MAKE} PYTHON=${PYTHON} -C wrap-cython - ${MAKE} PYTHON=${PYTHON} -C wrap-ctypes - ${MAKE} PYTHON=${PYTHON} -C wrap-cffi - - ${MAKE} PYTHON=${PYTHON} -C cython - ${MAKE} PYTHON=${PYTHON} -C embedding - ${MAKE} PYTHON=${PYTHON} -C libmpi-cffi - - ${MAKE} PYTHON=${PYTHON} -C mpi-ref-v1 - ${MAKE} PYTHON=${PYTHON} -C init-fini - ${MAKE} PYTHON=${PYTHON} -C threads - - ${MAKE} PYTHON=${PYTHON} -C futures + ${MAKE} PYTHON=${PYTHON} -C gemv + ${MAKE} PYTHON=${PYTHON} -C gemm + ${MAKE} PYTHON=${PYTHON} -C ping-pong + ${MAKE} PYTHON=${PYTHON} -C thermal diff --git a/demo/mandelbrot/makefile b/demo/mandelbrot/makefile deleted file mode 100644 index eb1c254..0000000 --- a/demo/mandelbrot/makefile +++ /dev/null @@ -1,25 +0,0 @@ -.PHONY: default build test clean - -default: build test clean - -build: mandelbrot-worker.exe - -MPIF90=mpif90 -FFLAGS= -O3 -ifneq (${MPI_FORTRAN_MOD_DIR},) -FFLAGS += -I${MPI_FORTRAN_MOD_DIR} -endif -mandelbrot-worker.exe: mandelbrot-worker.f90 - ${MPIF90} ${FFLAGS} -o $@ $< - -PYTHON=python -MPIEXEC=mpiexec -NP_FLAG=-n - -test: build - ${MPIEXEC} ${NP_FLAG} 1 ${PYTHON} mandelbrot-master.py - ${MPIEXEC} ${NP_FLAG} 7 ${PYTHON} mandelbrot.py - ${PYTHON} mandelbrot-seq.py - -clean: - ${RM} mandelbrot-worker.exe diff --git a/demo/mandelbrot/mandelbrot-master.py b/demo/mandelbrot/mandelbrot-master.py deleted file mode 100644 index 913590b..0000000 --- a/demo/mandelbrot/mandelbrot-master.py +++ /dev/null @@ -1,66 +0,0 @@ -from mpi4pyve import MPI -import numpy as np - -x1 = -2.0 -x2 = 1.0 -y1 = -1.0 -y2 = 1.0 - -w = 600 -h = 400 -maxit = 255 - -import os -dirname = os.path.abspath(os.path.dirname(__file__)) -executable = os.path.join(dirname, 'mandelbrot-worker.exe') - -# spawn worker -worker = MPI.COMM_SELF.Spawn(executable, maxprocs=7) -size = worker.Get_remote_size() - -# send parameters -rmsg = np.array([x1, x2, y1, y2], dtype='f') -imsg = np.array([w, h, maxit], dtype='i') -worker.Bcast([rmsg, MPI.REAL], root=MPI.ROOT) -worker.Bcast([imsg, MPI.INTEGER], root=MPI.ROOT) - -# gather results -counts = np.empty(size, dtype='i') -indices = np.empty(h, dtype='i') -cdata = np.empty([h, w], dtype='i') -worker.Gather(sendbuf=None, - recvbuf=[counts, MPI.INTEGER], - root=MPI.ROOT) -worker.Gatherv(sendbuf=None, - recvbuf=[indices, (counts, None), MPI.INTEGER], - root=MPI.ROOT) -worker.Gatherv(sendbuf=None, - recvbuf=[cdata, (counts * w, None), MPI.INTEGER], - root=MPI.ROOT) - -# disconnect worker -worker.Disconnect() - -# reconstruct full result -M = np.zeros([h, w], dtype='i') -M[indices, :] = cdata - -# eye candy (requires matplotlib) -if 1: - try: - from matplotlib import pyplot as plt - plt.imshow(M, aspect='equal') - try: - plt.nipy_spectral() - except AttributeError: - plt.spectral() - try: - import signal - def action(*args): raise SystemExit - signal.signal(signal.SIGALRM, action) - signal.alarm(2) - except: - pass - plt.show() - except: - pass diff --git a/demo/mandelbrot/mandelbrot-seq.py b/demo/mandelbrot/mandelbrot-seq.py deleted file mode 100644 index 0904898..0000000 --- a/demo/mandelbrot/mandelbrot-seq.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np -import time - -tic = time.time() - -x1 = -2.0 -x2 = 1.0 -y1 = -1.0 -y2 = 1.0 - -w = 150 -h = 100 -maxit = 127 - -def mandelbrot(x, y, maxit): - c = x + y*1j - z = 0 + 0j - it = 0 - while abs(z) < 2 and it < maxit: - z = z**2 + c - it += 1 - return it - -dx = (x2 - x1) / w -dy = (y2 - y1) / h - -C = np.empty([h, w], dtype='i') -for k in np.arange(h): - y = y1 + k * dy - for j in np.arange(w): - x = x1 + j * dx - C[k, j] = mandelbrot(x, y, maxit) - -M = C - -toc = time.time() -print('wall clock time: %8.2f seconds' % (toc-tic)) - -# eye candy (requires matplotlib) -if 1: - try: - from matplotlib import pyplot as plt - plt.imshow(M, aspect='equal') - try: - plt.nipy_spectral() - except AttributeError: - plt.spectral() - try: - import signal - def action(*args): raise SystemExit - signal.signal(signal.SIGALRM, action) - signal.alarm(2) - except: - pass - plt.show() - except: - pass diff --git a/demo/mandelbrot/mandelbrot-worker.f90 b/demo/mandelbrot/mandelbrot-worker.f90 deleted file mode 100644 index 0bb7d15..0000000 --- a/demo/mandelbrot/mandelbrot-worker.f90 +++ /dev/null @@ -1,96 +0,0 @@ -! $ mpif90 -o mandelbrot.exe mandelbrot.f90 - -program main - - use MPI - implicit none - - integer master, nprocs, myrank, ierr - - real :: rmsg(4), x1, x2, y1, y2 - integer :: imsg(3), w, h, maxit - - integer :: N - integer, allocatable :: I(:) - integer, allocatable :: C(:,:) - integer :: j, k - real :: x, dx, y, dy - - call MPI_Init(ierr) - call MPI_Comm_get_parent(master, ierr) - if (master == MPI_COMM_NULL) then - print *, "parent communicator is MPI_COMM_NULL" - call MPI_Abort(MPI_COMM_WORLD, 1, ierr) - end if - call MPI_Comm_size(master, nprocs, ierr) - call MPI_Comm_rank(master, myrank, ierr) - - ! receive parameters and unpack - call MPI_Bcast(rmsg, 4, MPI_REAL, 0, master, ierr) - call MPI_Bcast(imsg, 3, MPI_INTEGER, 0, master, ierr) - x1 = rmsg(1); x2 = rmsg(2) - y1 = rmsg(3); y2 = rmsg(4) - w = imsg(1); h = imsg(2); maxit = imsg(3) - dx = (x2-x1)/real(w) - dy = (y2-y1)/real(h) - - ! number of lines to compute here - N = h / nprocs - if (modulo(h, nprocs) > myrank) then - N = N + 1 - end if - - ! indices of lines to compute here - allocate( I(0:N-1) ) - I = (/ (k, k=myrank, h-1, nprocs) /) - - ! compute local lines - allocate( C(0:w-1, 0:N-1) ) - do k = 0, N-1 - y = y1 + real(I(k)) * dy - do j = 0, w-1 - x = x1 + real(j) * dx - C(j, k) = mandelbrot(x, y, maxit) - end do - end do - - ! send number of lines computed here - call MPI_Gather(N, 1, MPI_INTEGER, & - MPI_BOTTOM, 0, MPI_BYTE, & - 0, master, ierr) - - ! send indices of lines computed here - call MPI_Gatherv(I, N, MPI_INTEGER, & - MPI_BOTTOM, MPI_BOTTOM, MPI_BOTTOM, MPI_BYTE, & - 0, master, ierr) - - ! send data of lines computed here - call MPI_Gatherv(C, N*w, MPI_INTEGER, & - MPI_BOTTOM, MPI_BOTTOM, MPI_BOTTOM, MPI_BYTE, & - 0, master, ierr) - - deallocate(C) - deallocate(I) - - ! we are done - call MPI_Comm_disconnect(master, ierr) - call MPI_Finalize(ierr) - -contains - - function mandelbrot(x, y, maxit) result (it) - implicit none - real, intent(in) :: x, y - integer, intent(in) :: maxit - integer :: it - complex :: z, c - z = cmplx(0, 0) - c = cmplx(x, y) - it = 0 - do while (abs(z) < 2.0 .and. it < maxit) - z = z*z + c - it = it + 1 - end do - end function mandelbrot - -end program main diff --git a/demo/mandelbrot/mandelbrot.py b/demo/mandelbrot/mandelbrot.py deleted file mode 100644 index e558f24..0000000 --- a/demo/mandelbrot/mandelbrot.py +++ /dev/null @@ -1,107 +0,0 @@ -from mpi4pyve import MPI -import numpy as np - -tic = MPI.Wtime() - -x1 = -2.0 -x2 = 1.0 -y1 = -1.0 -y2 = 1.0 - -w = 150 -h = 100 -maxit = 127 - -def mandelbrot(x, y, maxit): - c = x + y*1j - z = 0 + 0j - it = 0 - while abs(z) < 2 and it < maxit: - z = z**2 + c - it += 1 - return it - -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() - -rmsg = np.empty(4, dtype='f') -imsg = np.empty(3, dtype='i') - -if rank == 0: - rmsg[:] = [x1, x2, y1, y2] - imsg[:] = [w, h, maxit] - -comm.Bcast([rmsg, MPI.FLOAT], root=0) -comm.Bcast([imsg, MPI.INT], root=0) - -x1, x2, y1, y2 = [float(r) for r in rmsg] -w, h, maxit = [int(i) for i in imsg] -dx = (x2 - x1) / w -dy = (y2 - y1) / h - -# number of lines to compute here -N = h // size + (h % size > rank) -N = np.array(N, dtype='i') -# indices of lines to compute here -I = np.arange(rank, h, size, dtype='i') -# compute local lines -C = np.empty([N, w], dtype='i') -for k in np.arange(N): - y = y1 + I[k] * dy - for j in np.arange(w): - x = x1 + j * dx - C[k, j] = mandelbrot(x, y, maxit) -# gather results at root -counts = 0 -indices = None -cdata = None -if rank == 0: - counts = np.empty(size, dtype='i') - indices = np.empty(h, dtype='i') - cdata = np.empty([h, w], dtype='i') -comm.Gather(sendbuf=[N, MPI.INT], - recvbuf=[counts, MPI.INT], - root=0) -comm.Gatherv(sendbuf=[I, MPI.INT], - recvbuf=[indices, (counts, None), MPI.INT], - root=0) -comm.Gatherv(sendbuf=[C, MPI.INT], - recvbuf=[cdata, (counts*w, None), MPI.INT], - root=0) -# reconstruct full result at root -if rank == 0: - M = np.zeros([h,w], dtype='i') - M[indices, :] = cdata - -toc = MPI.Wtime() -wct = comm.gather(toc-tic, root=0) -if rank == 0: - for task, time in enumerate(wct): - print('wall clock time: %8.2f seconds (task %d)' % (time, task)) - def mean(seq): return sum(seq)/len(seq) - print ('all tasks, mean: %8.2f seconds' % mean(wct)) - print ('all tasks, min: %8.2f seconds' % min(wct)) - print ('all tasks, max: %8.2f seconds' % max(wct)) - print ('all tasks, sum: %8.2f seconds' % sum(wct)) - -# eye candy (requires matplotlib) -if rank == 0: - try: - from matplotlib import pyplot as plt - plt.imshow(M, aspect='equal') - try: - plt.nipy_spectral() - except AttributeError: - plt.spectral() - try: - import signal - def action(*args): raise SystemExit - signal.signal(signal.SIGALRM, action) - signal.alarm(2) - except: - pass - plt.show() - except: - pass -MPI.COMM_WORLD.Barrier() diff --git a/demo/mpe-logging/cpilog.py b/demo/mpe-logging/cpilog.py deleted file mode 100644 index 001ed94..0000000 --- a/demo/mpe-logging/cpilog.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python - -# If you want MPE to log MPI calls, you have to add the two lines -# below at the very beginning of your main bootstrap script. -import mpi4pyve -mpi4pyve.profile('mpe', logfile='cpilog') - -# Import the MPI extension module -from mpi4pyve import MPI -if 0: # <- use '1' to disable logging of MPI calls - MPI.Pcontrol(0) - -# Import the 'array' module -from array import array - -# This is just to make the logging -# output a bit more interesting -from time import sleep - -comm = MPI.COMM_WORLD -nprocs = comm.Get_size() -myrank = comm.Get_rank() - -n = array('i', [0]) -pi = array('d', [0]) -mypi = array('d', [0]) - -def comp_pi(n, myrank=0, nprocs=1): - h = 1.0 / n; - s = 0.0; - for i in range(myrank + 1, n + 1, nprocs): - x = h * (i - 0.5); - s += 4.0 / (1.0 + x**2); - return s * h - -comm.Barrier() - -for N in [10000]*10: - - if myrank == 0: - n[0] = N - - comm.Bcast([n, MPI.INT], root=0) - - mypi[0] = comp_pi(n[0], myrank, nprocs) - - comm.Reduce([mypi, MPI.DOUBLE], - [pi, MPI.DOUBLE], - op=MPI.SUM, root=0) - - comm.Barrier() - - sleep(0.01) diff --git a/demo/mpe-logging/makefile b/demo/mpe-logging/makefile deleted file mode 100644 index 31ec83e..0000000 --- a/demo/mpe-logging/makefile +++ /dev/null @@ -1,42 +0,0 @@ -MPIEXEC = mpiexec -PYTHON = python -N = 8 - -.PHONY: default -default: build test clean - - -.PHONY: run-cpilog run-ring run-threads run -run: run-cpilog run-ring run-threads -run-cpilog: - ${MPIEXEC} -n ${N} ${PYTHON} cpilog.py -run-ring: - ${MPIEXEC} -n ${N} ${PYTHON} ring.py -run-threads: - ${MPIEXEC} -n ${N} ${PYTHON} threads.py - -.PHONY: view-cpilog view-ring view-threads view -view: view-cpilog view-ring view-threads -view-cpilog: cpilog.slog2 - jumpshot $< -view-ring: ring.slog2 - jumpshot $< -view-threads: threads.slog2 - jumpshot $< - -cpilog.clog2: run-cpilog -ring.clog2: run-ring -threads.clog2: run-threads -%.slog2: %.clog2 - clog2TOslog2 $< - - -.PHONY: build -build: run - -.PHONY: test -test: - -.PHONY: clean -clean: - ${RM} *.[cs]log2 diff --git a/demo/mpe-logging/ring.py b/demo/mpe-logging/ring.py deleted file mode 100644 index 39eb741..0000000 --- a/demo/mpe-logging/ring.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -import os -os.environ['MPE_LOGFILE_PREFIX'] = 'ring' -import mpi4pyve -mpi4pyve.profile('mpe') - -from mpi4pyve import MPI -from array import array - -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() - -src = rank-1 -dest = rank+1 -if rank == 0: - src = size-1 -if rank == size-1: - dest = 0 - -try: - from numpy import zeros - a1 = zeros(1000000, 'd') - a2 = zeros(1000000, 'd') -except ImportError: - from array import array - a1 = array('d', [0]*1000); a1 *= 1000 - a2 = array('d', [0]*1000); a2 *= 1000 - -comm.Sendrecv(sendbuf=a1, recvbuf=a2, - source=src, dest=dest) - -MPI.Request.Waitall([ - comm.Isend(a1, dest=dest), - comm.Irecv(a2, source=src), - ]) diff --git a/demo/mpe-logging/threads.py b/demo/mpe-logging/threads.py deleted file mode 100644 index d4b3587..0000000 --- a/demo/mpe-logging/threads.py +++ /dev/null @@ -1,32 +0,0 @@ -import sys -import mpi4pyve -mpi4pyve.profile('mpe', logfile='threads') - -from mpi4pyve import MPI -from array import array -try: - import threading -except ImportError: - sys.stderr.write("threading module not available\n") - sys.exit(0) - -send_msg = array('i', [7]*1000); send_msg *= 1000 -recv_msg = array('i', [0]*1000); recv_msg *= 1000 - - -def self_send(comm, rank): - comm.Send([send_msg, MPI.INT], dest=rank, tag=0) - -def self_recv(comm, rank): - comm.Recv([recv_msg, MPI.INT], source=rank, tag=0) - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -send_thread = threading.Thread(target=self_send, args=(comm, rank)) -recv_thread = threading.Thread(target=self_recv, args=(comm, rank)) - - -send_thread.start() -recv_thread.start() -recv_thread.join() -send_thread.join() diff --git a/demo/mpi-ref-v1/README.txt b/demo/mpi-ref-v1/README.txt deleted file mode 100644 index 689dc1a..0000000 --- a/demo/mpi-ref-v1/README.txt +++ /dev/null @@ -1,11 +0,0 @@ -@Book{MPI-Ref-V1, - title = {{MPI} - The Complete Reference: Volume 1, The {MPI} Core}, - author = {Marc Snir and Steve Otto and Steven Huss-Lederman - and David Walker and Jack Dongarra}, - edition = {2nd.}, - year = 1998, - publisher = {MIT Press}, - volume = {1, The MPI Core}, - series = {Scientific and Engineering Computation}, - address = {Cambridge, MA, USA}, -} diff --git a/demo/mpi-ref-v1/ex-2.01.py b/demo/mpi-ref-v1/ex-2.01.py deleted file mode 100644 index a42c5d5..0000000 --- a/demo/mpi-ref-v1/ex-2.01.py +++ /dev/null @@ -1,39 +0,0 @@ -## mpiexec -n 2 python ex-2.01.py - -# Process 0 sends a message to process 1 - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -import array - -if MPI.COMM_WORLD.Get_size() < 2: - raise SystemExit - -# -------------------------------------------------------------------- - -s = "Hello there" - -msg = array.array('c', '\0'*20) -tag = 99 -status = MPI.Status() - -myrank = MPI.COMM_WORLD.Get_rank() - -if myrank == 0: - msg[:len(s)] = array.array('c', s) - MPI.COMM_WORLD.Send([msg, len(s)+1, MPI.CHAR], 1, tag) -elif myrank == 1: - MPI.COMM_WORLD.Recv([msg, 20, MPI.CHAR], 0, tag, status) - -# -------------------------------------------------------------------- - -if myrank == 1: - assert list(msg[:len(s)]) == list(s) - assert msg[len(s)] == '\0' - assert status.source == 0 - assert status.tag == tag - assert status.error == MPI.SUCCESS - assert status.Get_count(MPI.CHAR) == len(s)+1 - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-2.08.py b/demo/mpi-ref-v1/ex-2.08.py deleted file mode 100644 index f7809e0..0000000 --- a/demo/mpi-ref-v1/ex-2.08.py +++ /dev/null @@ -1,46 +0,0 @@ -## mpiexec -n 2 python ex-2.08.py - -# An exchange of messages - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -import array - -if MPI.COMM_WORLD.Get_size() < 2: - raise SystemExit - -# -------------------------------------------------------------------- - -sendbuf = array.array('d', [0]*10) -recvbuf = array.array('d', [0]*10) -tag = 0 -status = MPI.Status() - -myrank = MPI.COMM_WORLD.Get_rank() - -if myrank == 0: - sendbuf[:] = array.array('d', range(len(sendbuf))) - MPI.COMM_WORLD.Send([sendbuf, MPI.DOUBLE], 1, tag) - MPI.COMM_WORLD.Recv([recvbuf, MPI.DOUBLE], 1, tag, status) -elif myrank == 1: - MPI.COMM_WORLD.Recv([recvbuf, MPI.DOUBLE], 0, tag, status) - sendbuf[:] = recvbuf - MPI.COMM_WORLD.Send([sendbuf, MPI.DOUBLE], 0, tag) - -# -------------------------------------------------------------------- - -if myrank == 0: - assert status.source == 1 - assert status.tag == tag - assert status.error == MPI.SUCCESS - assert status.Get_count(MPI.DOUBLE) == len(recvbuf) - assert sendbuf == recvbuf -elif myrank == 1: - assert status.source == 0 - assert status.tag == tag - assert status.error == MPI.SUCCESS - assert status.Get_count(MPI.DOUBLE) == len(recvbuf) - assert sendbuf == recvbuf - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-2.16.py b/demo/mpi-ref-v1/ex-2.16.py deleted file mode 100644 index b1f0255..0000000 --- a/demo/mpi-ref-v1/ex-2.16.py +++ /dev/null @@ -1,71 +0,0 @@ -## mpiexec -n 4 python ex-2.16.py - -# Jacobi code -# version of parallel code using sendrecv and null proceses. - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - -# -------------------------------------------------------------------- - -n = 5 * MPI.COMM_WORLD.Get_size() - -# compute number of processes and myrank -p = MPI.COMM_WORLD.Get_size() -myrank = MPI.COMM_WORLD.Get_rank() - -# compute size of local block -m = n/p -if myrank < (n - p * m): - m = m + 1 - -#compute neighbors -if myrank == 0: - left = MPI.PROC_NULL -else: - left = myrank - 1 -if myrank == p - 1: - right = MPI.PROC_NULL -else: - right = myrank + 1 - -# allocate local arrays -A = numpy.empty((n+2, m+2), dtype='d', order='fortran') -B = numpy.empty((n, m), dtype='d', order='fortran') - -A.fill(1) -A[0, :] = A[-1, :] = 0 -A[:, 0] = A[:, -1] = 0 - -# main loop -converged = False -while not converged: - # compute, B = 0.25 * ( N + S + E + W) - N, S = A[:-2, 1:-1], A[2:, 1:-1] - E, W = A[1:-1, :-2], A[1:-1, 2:] - numpy.add(N, S, B) - numpy.add(E, B, B) - numpy.add(W, B, B) - B *= 0.25 - A[1:-1, 1:-1] = B - # communicate - tag = 0 - MPI.COMM_WORLD.Sendrecv([B[:, -1], MPI.DOUBLE], right, tag, - [A[:, 0], MPI.DOUBLE], left, tag) - MPI.COMM_WORLD.Sendrecv((B[:, 0], MPI.DOUBLE), left, tag, - (A[:, -1], MPI.DOUBLE), right, tag) - # convergence - myconv = numpy.allclose(B, 0) - loc_conv = numpy.asarray(myconv, dtype='i') - glb_conv = numpy.asarray(0, dtype='i') - MPI.COMM_WORLD.Allreduce([loc_conv, MPI.INT], - [glb_conv, MPI.INT], - op=MPI.LAND) - converged = bool(glb_conv) - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-2.29.py b/demo/mpi-ref-v1/ex-2.29.py deleted file mode 100644 index 9209350..0000000 --- a/demo/mpi-ref-v1/ex-2.29.py +++ /dev/null @@ -1,44 +0,0 @@ -## mpiexec -n 3 python ex-2.29.py - -# Use a blocking probe to wait for an incoming message - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -import array - -if MPI.COMM_WORLD.Get_size() < 3: - raise SystemExit - -# -------------------------------------------------------------------- - -comm = MPI.COMM_WORLD -rank = comm.Get_rank() - -if rank == 0: - i = array.array('i', [7]*5) - comm.Send([i, MPI.INT], 2, 0) -elif rank == 1: - x = array.array('f', [7]*5) - comm.Send([x, MPI.FLOAT], 2, 0) -elif rank == 2: - i = array.array('i', [0]*5) - x = array.array('f', [0]*5) - status = MPI.Status() - for j in range(2): - comm.Probe(MPI.ANY_SOURCE, 0, status) - if status.Get_source() == 0: - comm.Recv([i, MPI.INT], 0, 0, status) - else: - comm.Recv([x, MPI.FLOAT], 1, 0, status) - -# -------------------------------------------------------------------- - -if rank == 2: - for v in i: assert v == 7 - for v in x: assert v == 7 - assert status.source in (0, 1) - assert status.tag == 0 - assert status.error == 0 - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-2.32.py b/demo/mpi-ref-v1/ex-2.32.py deleted file mode 100644 index ab7f275..0000000 --- a/demo/mpi-ref-v1/ex-2.32.py +++ /dev/null @@ -1,94 +0,0 @@ -# Jacobi computation, using persitent requests - -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - - -n = 5 * MPI.COMM_WORLD.Get_size() - -# compute number of processes and myrank -p = MPI.COMM_WORLD.Get_size() -myrank = MPI.COMM_WORLD.Get_rank() - -# compute size of local block -m = n/p -if myrank < (n - p * m): - m = m + 1 - -#compute neighbors -if myrank == 0: - left = MPI.PROC_NULL -else: - left = myrank - 1 -if myrank == p - 1: - right = MPI.PROC_NULL -else: - right = myrank + 1 - -# allocate local arrays -A = numpy.empty((n+2, m+2), dtype=float, order='fortran') -B = numpy.empty((n, m), dtype=float, order='fortran') - -A.fill(1) -A[0, :] = A[-1, :] = 0 -A[:, 0] = A[:, -1] = 0 - -# create persintent requests -tag = 0 -sreq1 = MPI.COMM_WORLD.Send_init((B[:, 0], MPI.DOUBLE), left, tag) -sreq2 = MPI.COMM_WORLD.Send_init((B[:, -1], MPI.DOUBLE), right, tag) -rreq1 = MPI.COMM_WORLD.Recv_init((A[:, 0], MPI.DOUBLE), left, tag) -rreq2 = MPI.COMM_WORLD.Recv_init((A[:, -1], MPI.DOUBLE), right, tag) -reqlist = [sreq1, sreq2, rreq1, rreq2] - -for req in reqlist: - assert req != MPI.REQUEST_NULL - -# main loop -converged = False -while not converged: - # compute boundary columns - N, S = A[ :-2, 1], A[2:, 1] - E, W = A[1:-1, 0], A[1:-1, 2] - C = B[:, 0] - numpy.add(N, S, C) - numpy.add(C, E, C) - numpy.add(C, W, C) - C *= 0.25 - N, S = A[ :-2, -2], A[2:, -2] - E, W = A[1:-1, -3], A[1:-1, -1] - C = B[:, -1] - numpy.add(N, S, C) - numpy.add(C, E, C) - numpy.add(C, W, C) - C *= 0.25 - # start communication - #MPI.Prequest.Startall(reqlist) - for r in reqlist: - r.Start() - # compute interior - N, S = A[ :-2, 2:-2], A[2, 2:-2] - E, W = A[1:-1, 2:-2], A[1:-1, 2:-2] - C = B[:, 1:-1] - numpy.add(N, S, C) - numpy.add(E, C, C) - numpy.add(W, C, C) - C *= 0.25 - A[1:-1, 1:-1] = B - # complete communication - MPI.Prequest.Waitall(reqlist) - # convergence - myconv = numpy.allclose(B, 0) - loc_conv = numpy.asarray(myconv, dtype='i') - glb_conv = numpy.asarray(0, dtype='i') - MPI.COMM_WORLD.Allreduce([loc_conv, MPI.INT], - [glb_conv, MPI.INT], - op=MPI.LAND) - converged = bool(glb_conv) - -# free persintent requests -for req in reqlist: - req.Free() diff --git a/demo/mpi-ref-v1/ex-2.34.py b/demo/mpi-ref-v1/ex-2.34.py deleted file mode 100644 index e2ee957..0000000 --- a/demo/mpi-ref-v1/ex-2.34.py +++ /dev/null @@ -1,47 +0,0 @@ -## mpiexec -n 2 python ex-2.34.py - -# Use of ready-mode and synchonous-mode - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - -if MPI.COMM_WORLD.Get_size() < 2: - raise SystemExit - -# -------------------------------------------------------------------- - -comm = MPI.COMM_WORLD - -buff = numpy.empty((1000,2), dtype='f', order='fortran') - -rank = comm.Get_rank() - -if rank == 0: - req1 = comm.Irecv([buff[:, 0], MPI.FLOAT], 1, 1) - req2 = comm.Irecv([buff[:, 1], MPI.FLOAT], 1, 2) - status = [MPI.Status(), MPI.Status()] - MPI.Request.Waitall([req1, req2], status) -elif rank == 1: - buff[:, 0] = 5 - buff[:, 1] = 7 - comm.Ssend([buff[:, 1], MPI.FLOAT], 0, 2) - comm.Rsend([buff[:, 0], MPI.FLOAT], 0, 1) - -# -------------------------------------------------------------------- - -all = numpy.all - -if rank == 0: - assert all(buff[:, 0] == 5) - assert all(buff[:, 1] == 7) - assert status[0].source == 1 - assert status[0].tag == 1 - assert status[1].source == 1 - assert status[1].tag == 2 - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-2.35.py b/demo/mpi-ref-v1/ex-2.35.py deleted file mode 100644 index d768528..0000000 --- a/demo/mpi-ref-v1/ex-2.35.py +++ /dev/null @@ -1,34 +0,0 @@ -## mpiexec -n 1 python ex-2.35.py - -# Calls to attach and detach buffers - -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -try: - from numpy import empty -except ImportError: - from array import array - def empty(size, dtype): - return array(dtype, [0]*size) - -# -------------------------------------------------------------------- - -BUFSISE = 10000 + MPI.BSEND_OVERHEAD - -buff = empty(BUFSISE, dtype='b') - -MPI.Attach_buffer(buff) - -buff2 = MPI.Detach_buffer() - -MPI.Attach_buffer(buff2) - -MPI.Detach_buffer() - - -# -------------------------------------------------------------------- - -assert len(buff2) == BUFSISE - -# -------------------------------------------------------------------- diff --git a/demo/mpi-ref-v1/ex-3.01.py b/demo/mpi-ref-v1/ex-3.01.py deleted file mode 100644 index 07a8f88..0000000 --- a/demo/mpi-ref-v1/ex-3.01.py +++ /dev/null @@ -1,33 +0,0 @@ -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - -# send a upper triangular matrix - -N = 10 - -a = numpy.empty((N, N), dtype=float, order='c') -b = numpy.zeros((N, N), dtype=float, order='c') -a.flat = numpy.arange(a.size, dtype=float) - -# compute start and size of each row -i = numpy.arange(N) -blocklen = N - i -disp = N * i + i - -# create datatype for upper triangular part -upper = MPI.DOUBLE.Create_indexed(blocklen, disp) -upper.Commit() - -# send and recv matrix -myrank = MPI.COMM_WORLD.Get_rank() -MPI.COMM_WORLD.Sendrecv((a, 1, upper), myrank, 0, - [b, 1, upper], myrank, 0) - -assert numpy.allclose(numpy.triu(b), numpy.triu(a)) -assert numpy.allclose(numpy.tril(b, -1), numpy.zeros((N,N))) - -upper.Free() - diff --git a/demo/mpi-ref-v1/ex-3.02.py b/demo/mpi-ref-v1/ex-3.02.py deleted file mode 100644 index 686d3ac..0000000 --- a/demo/mpi-ref-v1/ex-3.02.py +++ /dev/null @@ -1,12 +0,0 @@ -from mpi4pyve import MPI - -# Type = { (double, 0), (char, 8) } - -blens = (1, 1) -disps = (0, MPI.DOUBLE.size) -types = (MPI.DOUBLE, MPI.CHAR) - -dtype = MPI.Datatype.Create_struct(blens, disps, types) - -if 'ex-3.02' in __file__: - dtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.03.py b/demo/mpi-ref-v1/ex-3.03.py deleted file mode 100644 index 75df6c3..0000000 --- a/demo/mpi-ref-v1/ex-3.03.py +++ /dev/null @@ -1,6 +0,0 @@ -execfile('ex-3.02.py') - -assert dtype.size == MPI.DOUBLE.size + MPI.CHAR.size -assert dtype.extent >= dtype.size - -dtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.04.py b/demo/mpi-ref-v1/ex-3.04.py deleted file mode 100644 index 11cbcdf..0000000 --- a/demo/mpi-ref-v1/ex-3.04.py +++ /dev/null @@ -1,9 +0,0 @@ -execfile('ex-3.02.py') - -count = 3 -newtype = dtype.Create_contiguous(count) - -assert newtype.extent == dtype.extent * count - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.05.py b/demo/mpi-ref-v1/ex-3.05.py deleted file mode 100644 index e00ca4b..0000000 --- a/demo/mpi-ref-v1/ex-3.05.py +++ /dev/null @@ -1,11 +0,0 @@ -execfile('ex-3.02.py') - -count = 2 -blklen = 3 -stride = 4 -newtype = dtype.Create_vector(count, blklen, stride) - -assert newtype.size == dtype.size * count * blklen - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.06.py b/demo/mpi-ref-v1/ex-3.06.py deleted file mode 100644 index 922bcc4..0000000 --- a/demo/mpi-ref-v1/ex-3.06.py +++ /dev/null @@ -1,11 +0,0 @@ -execfile('ex-3.02.py') - -count = 3 -blklen = 1 -stride = -2 -newtype = dtype.Create_vector(count, blklen, stride) - -assert newtype.size == dtype.size * count * blklen - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.07.py b/demo/mpi-ref-v1/ex-3.07.py deleted file mode 100644 index 49243cf..0000000 --- a/demo/mpi-ref-v1/ex-3.07.py +++ /dev/null @@ -1,11 +0,0 @@ -execfile('ex-3.02.py') - -count = 2 -blklen = 3 -stride = 4 * dtype.extent -newtype = dtype.Create_hvector(count, blklen, stride) - -assert newtype.size == dtype.size * count * blklen - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.08.py b/demo/mpi-ref-v1/ex-3.08.py deleted file mode 100644 index 0737ba1..0000000 --- a/demo/mpi-ref-v1/ex-3.08.py +++ /dev/null @@ -1,34 +0,0 @@ -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - -# extract the section a[0:6:2, 0:5:2] and store it in e[:,:] - -a = numpy.empty((6, 5), dtype=float, order='fortran') -e = numpy.empty((3, 3), dtype=float, order='fortran') -a.flat = numpy.arange(a.size, dtype=float) - -lb, sizeofdouble = MPI.DOUBLE.Get_extent() - -# create datatype for a 1D section -oneslice = MPI.DOUBLE.Create_vector(3, 1, 2) - -# create datatype for a 2D section -twoslice = oneslice.Create_hvector(3, 1, 12*sizeofdouble) -twoslice.Commit() - -# send and recv on same process -myrank = MPI.COMM_WORLD.Get_rank() -status = MPI.Status() -MPI.COMM_WORLD.Sendrecv([a, 1, twoslice], myrank, 0, - (e, MPI.DOUBLE), myrank, 0, status) - -assert numpy.allclose(a[::2, ::2], e) -assert status.Get_count(twoslice) == 1 -assert status.Get_count(MPI.DOUBLE) == e.size - -oneslice.Free() -twoslice.Free() - diff --git a/demo/mpi-ref-v1/ex-3.09.py b/demo/mpi-ref-v1/ex-3.09.py deleted file mode 100644 index ceab636..0000000 --- a/demo/mpi-ref-v1/ex-3.09.py +++ /dev/null @@ -1,39 +0,0 @@ -from mpi4pyve import MPI -try: - import numpy -except ImportError: - raise SystemExit - -# transpose a matrix a into b - -a = numpy.empty((100, 100), dtype=float, order='fortran') -b = numpy.empty((100, 100), dtype=float, order='fortran') -a.flat = numpy.arange(a.size, dtype=float) - -lb, sizeofdouble = MPI.DOUBLE.Get_extent() - -# create datatype dor one row -# (vector with 100 double entries and stride 100) -row = MPI.DOUBLE.Create_vector(100, 1, 100) - -# create datatype for matrix in row-major order - -# (one hundred copies of the row datatype, strided one word -# apart; the succesive row datatypes are interleaved) -xpose = row.Create_hvector(100, 1, sizeofdouble) -xpose.Commit() - -# send matrix in row-major order and receive in column major order -abuf = (a, xpose) -bbuf = (b, MPI.DOUBLE) -myrank = MPI.COMM_WORLD.Get_rank() -status = MPI.Status() -MPI.COMM_WORLD.Sendrecv(abuf, myrank, 0, bbuf, myrank, 0, status) - -assert numpy.allclose(a, b.transpose()) -assert status.Get_count(xpose) == 1 -assert status.Get_count(MPI.DOUBLE) == b.size - -row.Free() -xpose.Free() - diff --git a/demo/mpi-ref-v1/ex-3.11.py b/demo/mpi-ref-v1/ex-3.11.py deleted file mode 100644 index 488acef..0000000 --- a/demo/mpi-ref-v1/ex-3.11.py +++ /dev/null @@ -1,8 +0,0 @@ -execfile('ex-3.02.py') - -B = (3, 1) -D = (4, 0) -newtype = dtype.Create_indexed(B, D) - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.12.py b/demo/mpi-ref-v1/ex-3.12.py deleted file mode 100644 index f75986f..0000000 --- a/demo/mpi-ref-v1/ex-3.12.py +++ /dev/null @@ -1,8 +0,0 @@ -execfile('ex-3.02.py') - -B = (3, 1) -D = (4 * dtype.extent, 0) -newtype = dtype.Create_hindexed(B, D) - -dtype.Free() -newtype.Free() diff --git a/demo/mpi-ref-v1/ex-3.13.py b/demo/mpi-ref-v1/ex-3.13.py deleted file mode 100644 index 9173bed..0000000 --- a/demo/mpi-ref-v1/ex-3.13.py +++ /dev/null @@ -1,14 +0,0 @@ -from mpi4pyve import MPI - -blens = (1, 1) -disps = (0, MPI.DOUBLE.size) -types = (MPI.DOUBLE, MPI.CHAR) -type1 = MPI.Datatype.Create_struct(blens, disps, types) - -B = (2, 1, 3) -D = (0, 16, 26) -T = (MPI.FLOAT, type1, MPI.CHAR) -dtype = MPI.Datatype.Create_struct(B, D, T) - -type1.Free() -dtype.Free() diff --git a/demo/mpi-ref-v1/makefile b/demo/mpi-ref-v1/makefile deleted file mode 100644 index 727d33a..0000000 --- a/demo/mpi-ref-v1/makefile +++ /dev/null @@ -1,23 +0,0 @@ -.PHONY: default build test clean test_seq test_mpi - -default: build test clean - -build: - -PYTHON = python -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 3 - -test_seq: - ${MAKE} MPIEXEC= NP_FLAG= NP= test_mpi - -test_mpi: - -@for i in `ls ex-*.py`; do \ - echo ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} $$i; \ - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} $$i; \ - done - -test: test_seq test_mpi - -clean: diff --git a/demo/mpi-ref-v1/runtests.bat b/demo/mpi-ref-v1/runtests.bat deleted file mode 100644 index d75b9a8..0000000 --- a/demo/mpi-ref-v1/runtests.bat +++ /dev/null @@ -1,37 +0,0 @@ -@echo off -setlocal ENABLEEXTENSIONS - -set MPI=Microsoft MPI -set PATH="%ProgramFiles%\%MPI%\bin";%PATH% - -set MPIEXEC=mpiexec -set NP_FLAG=-n -set NP=5 - -set PYTHON=C:\Python27\python.exe -set PYTHON=C:\Python36\python.exe -set PYTHON=python - -@echo on -set MPIEXEC= -set NP_FLAG= -set NP= -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.01.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.08.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.16.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.29.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.32.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.34.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-2.35.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.01.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.02.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.03.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.04.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.05.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.06.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.07.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.08.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.09.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.11.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.12.py -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% ex-3.13.py diff --git a/demo/mpi-ref-v1/runtests.sh b/demo/mpi-ref-v1/runtests.sh deleted file mode 100755 index 25f8da9..0000000 --- a/demo/mpi-ref-v1/runtests.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -MPIEXEC=mpiexec -NP_FLAG=-n -NP=3 - -PYTHON=python - -set -x -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.01.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.08.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.16.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.29.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.32.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.34.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-2.35.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.01.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.02.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.03.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.04.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.05.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.06.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.07.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.08.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.09.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.11.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.12.py -$MPIEXEC $NP_FLAG $NP $PYTHON ex-3.13.py diff --git a/demo/nxtval/makefile b/demo/nxtval/makefile deleted file mode 100644 index f08b5ca..0000000 --- a/demo/nxtval/makefile +++ /dev/null @@ -1,13 +0,0 @@ -MPIEXEC=mpiexec -NP_FLAG=-n -NP=5 - -PYTHON=python - -.PHONY: test -test: - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} nxtval-threads.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} nxtval-dynproc.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} nxtval-onesided.py - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} nxtval-scalable.py -# ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} nxtval-mpi3.py diff --git a/demo/nxtval/nxtval-dynproc.py b/demo/nxtval/nxtval-dynproc.py deleted file mode 100644 index 015c360..0000000 --- a/demo/nxtval/nxtval-dynproc.py +++ /dev/null @@ -1,77 +0,0 @@ -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -import sys, os - -class Counter(object): - - def __init__(self, comm): - assert not comm.Is_inter() - self.comm = comm.Dup() - # start counter process - script = os.path.abspath(__file__) - if script[-4:] in ('.pyc', '.pyo'): - script = script[:-1] - self.child = self.comm.Spawn(sys.executable, - [script, '--child'], 1) - - def free(self): - self.comm.Barrier() - # stop counter process - rank = self.child.Get_rank() - if rank == 0: - self.child.send(None, 0, 1) - self.child.Disconnect() - # - self.comm.Free() - - def next(self): - # - incr = 1 - self.child.send(incr, 0, 0) - ival = self.child.recv(None, 0, 0) - nxtval = ival - # - return nxtval - -# -------------------------------------------------------------------- - -def _counter_child(): - parent = MPI.Comm.Get_parent() - assert parent != MPI.COMM_NULL - try: - counter = 0 - status = MPI.Status() - any_src, any_tag = MPI.ANY_SOURCE, MPI.ANY_TAG - while True: # server loop - incr = parent.recv(None, any_src, any_tag, status) - if status.tag == 1: break - parent.send(counter, status.source, 0) - counter += incr - finally: - parent.Disconnect() - -if __name__ == '__main__': - if (len(sys.argv) > 1 and - sys.argv[0] == __file__ and - sys.argv[1] == '--child'): - _counter_child() - sys.exit(0) - -# -------------------------------------------------------------------- - -def test(): - vals = [] - counter = Counter(MPI.COMM_WORLD) - for i in range(5): - c = counter.next() - vals.append(c) - counter.free() - # - vals = MPI.COMM_WORLD.allreduce(vals) - assert sorted(vals) == list(range(len(vals))) - -if __name__ == '__main__': - test() - -# -------------------------------------------------------------------- diff --git a/demo/nxtval/nxtval-mpi3.py b/demo/nxtval/nxtval-mpi3.py deleted file mode 100644 index 4922117..0000000 --- a/demo/nxtval/nxtval-mpi3.py +++ /dev/null @@ -1,85 +0,0 @@ -from mpi4pyve import MPI -from array import array as _array -import struct as _struct - -# -------------------------------------------------------------------- - -class Counter(object): - - def __init__(self, comm): - rank = comm.Get_rank() - itemsize = MPI.INT.Get_size() - if rank == 0: - n = 1 - else: - n = 0 - self.win = MPI.Win.Allocate(n*itemsize, itemsize, - MPI.INFO_NULL, comm) - if rank == 0: - mem = self.win.tomemory() - mem[:] = _struct.pack('i', 0) - - def free(self): - self.win.Free() - - def next(self, increment=1): - incr = _array('i', [increment]) - nval = _array('i', [0]) - self.win.Lock(0) - self.win.Get_accumulate([incr, 1, MPI.INT], - [nval, 1, MPI.INT], - 0, op=MPI.SUM) - self.win.Unlock(0) - return nval[0] - -# ----------------------------------------------------------------------------- - -class Mutex(object): - - def __init__(self, comm): - self.counter = Counter(comm) - - def __enter__(self): - self.lock() - return self - - def __exit__(self, *exc): - self.unlock() - return None - - def free(self): - self.counter.free() - - def lock(self): - value = self.counter.next(+1) - while value != 0: - value = self.counter.next(-1) - value = self.counter.next(+1) - - def unlock(self): - self.counter.next(-1) - -# ----------------------------------------------------------------------------- - -def test_counter(): - vals = [] - counter = Counter(MPI.COMM_WORLD) - for i in range(5): - c = counter.next() - vals.append(c) - counter.free() - - vals = MPI.COMM_WORLD.allreduce(vals) - assert sorted(vals) == list(range(len(vals))) - -def test_mutex(): - mutex = Mutex(MPI.COMM_WORLD) - mutex.lock() - mutex.unlock() - mutex.free() - -if __name__ == '__main__': - test_counter() - test_mutex() - -# ----------------------------------------------------------------------------- diff --git a/demo/nxtval/nxtval-onesided.py b/demo/nxtval/nxtval-onesided.py deleted file mode 100644 index 235c2d0..0000000 --- a/demo/nxtval/nxtval-onesided.py +++ /dev/null @@ -1,72 +0,0 @@ -# -------------------------------------------------------------------- - -from mpi4pyve import MPI -from array import array as _array -import struct as _struct - -class Counter(object): - - def __init__(self, comm): - # - size = comm.Get_size() - rank = comm.Get_rank() - # - itemsize = MPI.INT.Get_size() - if rank == 0: - mem = MPI.Alloc_mem(itemsize*size, MPI.INFO_NULL) - mem[:] = _struct.pack('i', 0) * size - else: - mem = MPI.BOTTOM - self.win = MPI.Win.Create(mem, itemsize, MPI.INFO_NULL, comm) - # - blens = [rank, size-rank-1] - disps = [0, rank+1] - self.dt_get = MPI.INT.Create_indexed(blens, disps).Commit() - # - self.myval = 0 - - def free(self): - self.dt_get.Free() - mem = self.win.tomemory() - self.win.Free() - if mem: MPI.Free_mem(mem) - - def next(self): - # - group = self.win.Get_group() - size = group.Get_size() - rank = group.Get_rank() - group.Free() - # - incr = _array('i', [1]) - vals = _array('i', [0])*size - self.win.Lock(0) - self.win.Accumulate([incr, 1, MPI.INT], 0, - [rank, 1, MPI.INT], MPI.SUM) - self.win.Get([vals, 1, self.dt_get], 0, - [ 0, 1, self.dt_get]) - self.win.Unlock(0) - # - vals[rank] = self.myval - self.myval += 1 - nxtval = sum(vals) - # - return nxtval - -# -------------------------------------------------------------------- - -def test(): - vals = [] - counter = Counter(MPI.COMM_WORLD) - for i in range(5): - c = counter.next() - vals.append(c) - counter.free() - - vals = MPI.COMM_WORLD.allreduce(vals) - assert sorted(vals) == list(range(len(vals))) - -if __name__ == '__main__': - test() - -# -------------------------------------------------------------------- diff --git a/demo/nxtval/nxtval-scalable.py b/demo/nxtval/nxtval-scalable.py deleted file mode 100644 index 675682b..0000000 --- a/demo/nxtval/nxtval-scalable.py +++ /dev/null @@ -1,148 +0,0 @@ -from mpi4pyve import MPI - -# ----------------------------------------------------------------------------- - -import struct as _struct -try: - from numpy import empty as _empty - def _array_new(size, typecode, init=0): - a = _empty(size, typecode) - a.fill(init) - return a - def _array_set(ary, value): - ary.fill(value) - def _array_sum(ary): - return ary.sum() -except ImportError: - from array import array as _array - def _array_new(size, typecode, init=0): - return _array(typecode, [init]) * size - def _array_set(ary, value): - for i, _ in enumerate(ary): - ary[i] = value - def _array_sum(ary): - return sum(ary, 0) - -# ----------------------------------------------------------------------------- - -class Counter(object): - - def __init__(self, comm, init=0): - # - size = comm.Get_size() - rank = comm.Get_rank() - mask = 1 - while mask < size: - mask <<= 1 - mask >>= 1 - idx = 0 - get_idx = [] - acc_idx = [] - while mask >= 1: - left = idx + 1 - right = idx + (mask<<1) - if rank < mask: - acc_idx.append( left ) - get_idx.append( right ) - idx = left - else: - acc_idx.append( right ) - get_idx.append( left ) - idx = right - rank = rank % mask - mask >>= 1 - # - typecode = 'i' - datatype = MPI.INT - itemsize = datatype.Get_size() - # - root = 0 - rank = comm.Get_rank() - if rank == root: - nlevels = len(get_idx) + 1 - nentries = (1< 0: + part = buf[begin:min(begin + remain, begin + max_count - 1)] + if rank == 0: + comm.Send([part, MPI.DOUBLE], 1) + comm.Recv([part, MPI.DOUBLE], 1) + elif rank == 1: + comm.Recv([part, MPI.DOUBLE], 0) + comm.Send([part, MPI.DOUBLE], 0) + begin += part.size + remain -= part.size + +parser = argparse.ArgumentParser() +parser.add_argument('-dev1', type=str, required=True, choices=['vh', 've']) +parser.add_argument('-dev2', type=str, required=True, choices=['vh', 've']) +parser.add_argument('-n', type=int, required=False, default=20) +parser.add_argument('-loop_count', type=int, required=False, default=10) +args = parser.parse_args() + +# set module +if args.dev1 == 'vh': + dev1 = np +elif args.dev1 == 've': + dev1 = vp +else: + raise ValueError +if args.dev2 == 'vh': + dev2 = np +elif args.dev2 == 've': + dev2 = vp +else: + raise ValueError + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() +if rank == 0: print(vars(args)) +if size != 2: + raise ValueError + +loop_count = args.loop_count +if rank == 0: + print('| Data Size (B) | Avg Transfer Time (s) | Bandwidth (GB/s) |') + print('|---------------|-----------------------|------------------|') + +for n in range(0, args.n): + nelem = 1 << n + if rank == 0: + buf = dev1.arange(nelem, dtype='f8') + else: + buf = dev2.empty(nelem, dtype='f8') + comm.Barrier() + t0 = MPI.Wtime() + for i in range(loop_count): + send_recv_helper(buf, comm) + comm.Barrier() + t1 = MPI.Wtime() + elapsed_time = t1 - t0 + avg_transfer_time = elapsed_time / (2.0 * loop_count) + bandwidth = buf.nbytes / (1024 ** 3) / avg_transfer_time + if rank == 0: + print('|{:>15d}|{:23.9f}|{:18.9f}|'.format(buf.nbytes, avg_transfer_time, bandwidth)) + if not np.array_equal(np.asarray(buf), np.arange(nelem, dtype='f8')): + print("Result mismatch (rank = {})".format(rank)) + MPI.Finalize() + exit() + +if rank == 0: + print('|---------------|-----------------------|------------------|') +MPI.Finalize() diff --git a/demo/python-config b/demo/python-config deleted file mode 100755 index 498b414..0000000 --- a/demo/python-config +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- python -*- - -import sys, os -import getopt -try: - import sysconfig -except ImportError: - from distutils import sysconfig - -valid_opts = ['help', 'prefix', 'exec-prefix', 'includes', 'libs', 'cflags', - 'ldflags', 'extension-suffix', 'abiflags', 'configdir'] - -def exit_with_usage(code=1): - sys.stderr.write("Usage: %s [%s]\n" % ( - sys.argv[0], '|'.join('--'+opt for opt in valid_opts))) - sys.exit(code) - -try: - opts, args = getopt.getopt(sys.argv[1:], '', valid_opts) -except getopt.error: - exit_with_usage() - -if not opts: - exit_with_usage() - -getvar = sysconfig.get_config_var -pyver = getvar('VERSION') -try: - abiflags = sys.abiflags -except AttributeError: - abiflags = '' - -opt_flags = [flag for (flag, val) in opts] - -if '--help' in opt_flags: - exit_with_usage(code=0) - -for opt in opt_flags: - if opt == '--prefix': - print(getvar('prefix')) - - elif opt == '--exec-prefix': - print(getvar('exec_prefix')) - - elif opt in ('--includes', '--cflags'): - try: - include = sysconfig.get_path('include') - platinclude = sysconfig.get_path('platinclude') - except AttributeError: - include = sysconfig.get_python_inc() - platinclude = sysconfig.get_python_inc(plat_specific=True) - flags = ['-I' + include] - if include != platinclude: - flags.append('-I' + platinclude) - if opt == '--cflags': - flags.extend(getvar('CFLAGS').split()) - print(' '.join(flags)) - - elif opt in ('--libs', '--ldflags'): - libs = getvar('LIBS').split() + getvar('SYSLIBS').split() - libs.append('-lpython' + pyver + abiflags) - if opt == '--ldflags': - if not getvar('Py_ENABLE_SHARED'): - libs.insert(0, '-L' + getvar('LIBPL')) - if not getvar('PYTHONFRAMEWORK'): - libs.extend(getvar('LINKFORSHARED').split()) - print(' '.join(libs)) - - elif opt == '--extension-suffix': - ext_suffix = getvar('EXT_SUFFIX') - if ext_suffix is None: - ext_suffix = getvar('SO') - print(ext_suffix) - - elif opt == '--abiflags': - print(abiflags) - - elif opt == '--configdir': - print(getvar('LIBPL')) diff --git a/demo/reductions/makefile b/demo/reductions/makefile deleted file mode 100644 index ef24f38..0000000 --- a/demo/reductions/makefile +++ /dev/null @@ -1,10 +0,0 @@ -MPIEXEC=mpiexec -NP_FLAG=-n -NP=5 - -PYTHON=python - -.PHONY: test -test: - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_reductions.py -q - ${RM} -r *.py[co] __pycache__ diff --git a/demo/reductions/reductions.py b/demo/reductions/reductions.py deleted file mode 100644 index cff7ca1..0000000 --- a/demo/reductions/reductions.py +++ /dev/null @@ -1,103 +0,0 @@ -from mpi4pyve import MPI - -class Intracomm(MPI.Intracomm): - """ - Intracommunicator class with scalable, point-to-point based - implementations of global reduction operations. - """ - - def __new__(cls, comm=None): - return super(Intracomm, cls).__new__(cls, comm) - - def reduce(self, sendobj=None, recvobj=None, op=MPI.SUM, root=0): - size = self.size - rank = self.rank - assert 0 <= root < size - tag = MPI.COMM_WORLD.Get_attr(MPI.TAG_UB)-1 - - recvobj = sendobj - mask = 1 - - while mask < size: - if (mask & rank) != 0: - target = (rank & ~mask) % size - self.send(recvobj, dest=target, tag=tag) - else: - target = (rank | mask) - if target < size: - tmp = self.recv(None, source=target, tag=tag) - recvobj = op(recvobj, tmp) - mask <<= 1 - - if root != 0: - if rank == 0: - self.send(recvobj, dest=root, tag=tag) - elif rank == root: - recvobj = self.recv(None, source=0, tag=tag) - - if rank != root: - recvobj = None - - return recvobj - - def allreduce(self, sendobj=None, recvobj=None, op=MPI.SUM): - recvobj = self.reduce(sendobj, recvobj, op, 0) - recvobj = self.bcast(recvobj, 0) - return recvobj - - def scan(self, sendobj=None, recvobj=None, op=MPI.SUM): - size = self.size - rank = self.rank - tag = MPI.COMM_WORLD.Get_attr(MPI.TAG_UB)-1 - - recvobj = sendobj - partial = sendobj - mask = 1 - - while mask < size: - target = rank ^ mask - if target < size: - tmp = self.sendrecv(partial, dest=target, source=target, - sendtag=tag, recvtag=tag) - if rank > target: - partial = op(tmp, partial) - recvobj = op(tmp, recvobj) - else: - tmp = op(partial, tmp) - partial = tmp - mask <<= 1 - - return recvobj - - def exscan(self, sendobj=None, recvobj=None, op=MPI.SUM): - size = self.size - rank = self.rank - tag = MPI.COMM_WORLD.Get_attr(MPI.TAG_UB)-1 - - recvobj = sendobj - partial = sendobj - mask = 1 - flag = False - - while mask < size: - target = rank ^ mask - if target < size: - tmp = self.sendrecv(partial, dest=target, source=target, - sendtag=tag, recvtag=tag) - if rank > target: - partial = op(tmp, partial) - if rank != 0: - if not flag: - recvobj = tmp - flag = True - else: - recvobj = op(tmp, recvobj) - else: - tmp = op(partial, tmp) - partial = tmp - mask <<= 1 - - if rank == 0: - recvobj = None - - return recvobj diff --git a/demo/reductions/runtests.bat b/demo/reductions/runtests.bat deleted file mode 100644 index 9fd4b1a..0000000 --- a/demo/reductions/runtests.bat +++ /dev/null @@ -1,16 +0,0 @@ -@echo off -setlocal ENABLEEXTENSIONS - -set MPI=Microsoft MPI -set PATH="%ProgramFiles%\%MPI%\bin";%PATH% - -set MPIEXEC=mpiexec -set NP_FLAG=-n -set NP=5 - -set PYTHON=C:\Python27\python.exe -set PYTHON=C:\Python36\python.exe -set PYTHON=python - -@echo on -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% test_reductions.py -q diff --git a/demo/reductions/runtests.sh b/demo/reductions/runtests.sh deleted file mode 100755 index c03dfdd..0000000 --- a/demo/reductions/runtests.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -MPIEXEC=mpiexec -NP_FLAG=-n -NP=5 - -PYTHON=python - -set -x -$MPIEXEC $NP_FLAG $NP $PYTHON test_reductions.py -q diff --git a/demo/reductions/test_reductions.py b/demo/reductions/test_reductions.py deleted file mode 100644 index 3522f1a..0000000 --- a/demo/reductions/test_reductions.py +++ /dev/null @@ -1,210 +0,0 @@ -#import mpi4pyve -#mpi4pyve.profile("mpe") -from mpi4pyve import MPI - -import unittest - -import sys, os -sys.path.insert(0, os.path.dirname(__file__)) -from reductions import Intracomm -del sys.path[0] - -class BaseTest(object): - - def test_reduce(self): - rank = self.comm.rank - size = self.comm.size - for root in range(size): - msg = rank - res = self.comm.reduce(sendobj=msg, root=root) - if self.comm.rank == root: - self.assertEqual(res, sum(range(size))) - else: - self.assertEqual(res, None) - - def test_reduce_min(self): - rank = self.comm.rank - size = self.comm.size - for root in range(size): - msg = rank - res = self.comm.reduce(sendobj=msg, op=MPI.MIN, root=root) - if self.comm.rank == root: - self.assertEqual(res, 0) - else: - self.assertEqual(res, None) - - def test_reduce_max(self): - rank = self.comm.rank - size = self.comm.size - for root in range(size): - msg = rank - res = self.comm.reduce(sendobj=msg, op=MPI.MAX, root=root) - if self.comm.rank == root: - self.assertEqual(res, size-1) - else: - self.assertEqual(res, None) - - def test_reduce_minloc(self): - rank = self.comm.rank - size = self.comm.size - for root in range(size): - msg = rank - res = self.comm.reduce(sendobj=(msg, rank), op=MPI.MINLOC, root=root) - if self.comm.rank == root: - self.assertEqual(res, (0, 0)) - else: - self.assertEqual(res, None) - - def test_reduce_maxloc(self): - rank = self.comm.rank - size = self.comm.size - for root in range(size): - msg = rank - res = self.comm.reduce(sendobj=(msg, rank), op=MPI.MAXLOC, root=root) - if self.comm.rank == root: - self.assertEqual(res, (size-1, size-1)) - else: - self.assertEqual(res, None) - - def test_allreduce(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.allreduce(sendobj=msg) - self.assertEqual(res, sum(range(size))) - - def test_allreduce_min(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.allreduce(sendobj=msg, op=MPI.MIN) - self.assertEqual(res, 0) - - def test_allreduce_max(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.allreduce(sendobj=msg, op=MPI.MAX) - self.assertEqual(res, size-1) - - def test_allreduce_minloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.allreduce(sendobj=(msg, rank), op=MPI.MINLOC) - self.assertEqual(res, (0, 0)) - - def test_allreduce_maxloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.allreduce(sendobj=(msg, rank), op=MPI.MAXLOC) - self.assertEqual(res, (size-1, size-1)) - - def test_scan(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.scan(sendobj=msg) - self.assertEqual(res, sum(list(range(size))[:rank+1])) - - def test_scan_min(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.scan(sendobj=msg, op=MPI.MIN) - self.assertEqual(res, 0) - - def test_scan_max(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.scan(sendobj=msg, op=MPI.MAX) - self.assertEqual(res, rank) - - def test_scan_minloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.scan(sendobj=(msg, rank), op=MPI.MINLOC) - self.assertEqual(res, (0, 0)) - - def test_scan_maxloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.scan(sendobj=(msg, rank), op=MPI.MAXLOC) - self.assertEqual(res, (rank, rank)) - - def test_exscan(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.exscan(sendobj=msg) - if self.comm.rank == 0: - self.assertEqual(res, None) - else: - self.assertEqual(res, sum(list(range(size))[:rank])) - - def test_exscan_min(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.exscan(sendobj=msg, op=MPI.MIN) - if self.comm.rank == 0: - self.assertEqual(res, None) - else: - self.assertEqual(res, 0) - - def test_exscan_max(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.exscan(sendobj=msg, op=MPI.MAX) - if self.comm.rank == 0: - self.assertEqual(res, None) - else: - self.assertEqual(res, rank-1) - - def test_exscan_minloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.exscan(sendobj=(msg, rank), op=MPI.MINLOC) - if self.comm.rank == 0: - self.assertEqual(res, None) - else: - self.assertEqual(res, (0, 0)) - - def test_exscan_maxloc(self): - rank = self.comm.rank - size = self.comm.size - msg = rank - res = self.comm.exscan(sendobj=(msg, rank), op=MPI.MAXLOC) - if self.comm.rank == 0: - self.assertEqual(res, None) - else: - self.assertEqual(res, (rank-1, rank-1)) - -class TestS(BaseTest, unittest.TestCase): - def setUp(self): - self.comm = Intracomm(MPI.COMM_SELF) - -class TestW(BaseTest, unittest.TestCase): - def setUp(self): - self.comm = Intracomm(MPI.COMM_WORLD) - -class TestSD(BaseTest, unittest.TestCase): - def setUp(self): - self.comm = Intracomm(MPI.COMM_SELF.Dup()) - def tearDown(self): - self.comm.Free() - -class TestWD(BaseTest, unittest.TestCase): - def setUp(self): - self.comm = Intracomm(MPI.COMM_WORLD.Dup()) - def tearDown(self): - self.comm.Free() - -if __name__ == "__main__": - unittest.main() diff --git a/demo/sequential/makefile b/demo/sequential/makefile deleted file mode 100644 index ce63ee1..0000000 --- a/demo/sequential/makefile +++ /dev/null @@ -1,10 +0,0 @@ -MPIEXEC=mpiexec -NP_FLAG=-n -NP=5 - -PYTHON=python - -.PHONY: test -test: - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test_seq.py - ${RM} -r *.py[co] __pycache__ diff --git a/demo/sequential/runtests.bat b/demo/sequential/runtests.bat deleted file mode 100644 index b219574..0000000 --- a/demo/sequential/runtests.bat +++ /dev/null @@ -1,16 +0,0 @@ -@echo off -setlocal ENABLEEXTENSIONS - -set MPI=Microsoft MPI -set PATH="%ProgramFiles%\%MPI%\bin";%PATH% - -set MPIEXEC=mpiexec -set NP_FLAG=-n -set NP=5 - -set PYTHON=C:\Python27\python.exe -set PYTHON=C:\Python36\python.exe -set PYTHON=python - -@echo on -%MPIEXEC% %NP_FLAG% %NP% %PYTHON% test_seq.py diff --git a/demo/sequential/runtests.sh b/demo/sequential/runtests.sh deleted file mode 100755 index e030e81..0000000 --- a/demo/sequential/runtests.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -MPIEXEC=mpiexec -NP_FLAG=-n -NP=5 - -PYTHON=python - -set -x -$MPIEXEC $NP_FLAG $NP $PYTHON test_seq.py diff --git a/demo/sequential/seq.py b/demo/sequential/seq.py deleted file mode 100644 index 1abe451..0000000 --- a/demo/sequential/seq.py +++ /dev/null @@ -1,52 +0,0 @@ -class Seq(object): - - """ - Sequential execution - """ - - def __init__(self, comm, ng=1, tag=0): - ng = int(ng) - tag = int(tag) - assert ng >= 1 - assert ng <= comm.Get_size() - self.comm = comm - self.ng = ng - self.tag = tag - - def __enter__(self): - self.begin() - return self - - def __exit__(self, *exc): - self.end() - return None - - def begin(self): - """ - Begin a sequential execution of a section of code - """ - comm = self.comm - size = comm.Get_size() - if size == 1: return - rank = comm.Get_rank() - ng = self.ng - tag = self.tag - if rank != 0: - comm.Recv([None, 'B'], rank - 1, tag) - if rank != (size - 1) and (rank % ng) < (ng - 1): - comm.Send([None, 'B'], rank + 1, tag) - - def end(self): - """ - End a sequential execution of a section of code - """ - comm = self.comm - size = comm.Get_size() - if size == 1: return - rank = comm.Get_rank() - ng = self.ng - tag = self.tag - if rank == (size - 1) or (rank % ng) == (ng - 1): - comm.Send([None, 'B'], (rank + 1) % size, tag) - if rank == 0: - comm.Recv([None, 'B'], size - 1, tag) diff --git a/demo/sequential/test_seq.py b/demo/sequential/test_seq.py deleted file mode 100644 index 12810d1..0000000 --- a/demo/sequential/test_seq.py +++ /dev/null @@ -1,22 +0,0 @@ -#import mpi4pyve -#mpi4pyve.profile("mpe") -from mpi4pyve import MPI - -import unittest - -import sys, os -sys.path.insert(0, os.path.dirname(__file__)) -from seq import Seq -del sys.path[0] - -def test(): - size = MPI.COMM_WORLD.Get_size() - rank = MPI.COMM_WORLD.Get_rank() - name = MPI.Get_processor_name() - with Seq(MPI.COMM_WORLD, 1, 10): - print( - "Hello, World! I am process %d of %d on %s." - % (rank, size, name)) - -if __name__ == "__main__": - test() diff --git a/demo/spawning/cpi-master.c b/demo/spawning/cpi-master.c deleted file mode 100644 index 0d84c8c..0000000 --- a/demo/spawning/cpi-master.c +++ /dev/null @@ -1,35 +0,0 @@ -#include -#include -#include -#include - -int main(int argc, char *argv[]) -{ - char cmd[32] = "./cpi-worker-c.exe"; - MPI_Comm worker; - int n; - double pi; - - MPI_Init(&argc, &argv); - - if (argc > 1) strcpy(cmd, argv[1]); - printf("%s -> %s\n", argv[0], cmd); - - MPI_Comm_spawn(cmd, MPI_ARGV_NULL, 5, - MPI_INFO_NULL, 0, - MPI_COMM_SELF, &worker, - MPI_ERRCODES_IGNORE); - - n = 100; - MPI_Bcast(&n, 1, MPI_INT, MPI_ROOT, worker); - - MPI_Reduce(MPI_BOTTOM, &pi, 1, MPI_DOUBLE, - MPI_SUM, MPI_ROOT, worker); - - MPI_Comm_disconnect(&worker); - - printf("pi: %.16f, error: %.16f\n", pi, fabs(M_PI-pi)); - - MPI_Finalize(); - return 0; -} diff --git a/demo/spawning/cpi-master.cxx b/demo/spawning/cpi-master.cxx deleted file mode 100644 index f119e8e..0000000 --- a/demo/spawning/cpi-master.cxx +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include - -int main(int argc, char *argv[]) -{ - MPI::Init(); - - char cmd[32] = "./cpi-worker-cxx.exe"; - if (argc > 1) std::strcpy(cmd, argv[1]); - std::printf("%s -> %s\n", argv[0], cmd); - - MPI::Intercomm worker; - worker = MPI::COMM_SELF.Spawn(cmd, MPI::ARGV_NULL, 5, - MPI::INFO_NULL, 0); - - int n = 100; - worker.Bcast(&n, 1, MPI::INT, MPI::ROOT); - - double pi; - worker.Reduce(MPI::BOTTOM, &pi, 1, MPI::DOUBLE, - MPI::SUM, MPI::ROOT); - - worker.Disconnect(); - - std::printf("pi: %.16f, error: %.16f\n", pi, std::fabs(M_PI-pi)); - - MPI::Finalize(); - return 0; -} diff --git a/demo/spawning/cpi-master.f90 b/demo/spawning/cpi-master.f90 deleted file mode 100644 index 30451f9..0000000 --- a/demo/spawning/cpi-master.f90 +++ /dev/null @@ -1,45 +0,0 @@ -PROGRAM main - - USE mpi - implicit none - - real (kind=8), parameter :: PI = 3.1415926535897931D0 - - integer argc - character(len=32) argv(0:1) - - character(len=32) cmd - integer ierr, n, worker - real(kind=8) cpi - - call MPI_INIT(ierr) - - argc = iargc() + 1 - call getarg(0, argv(0)) - call getarg(1, argv(1)) - - cmd = './cpi-worker-f90.exe' - if (argc > 1) then - cmd = argv(1) - end if - write(*,'(A,A,A)') trim(argv(0)), ' -> ', trim(cmd) - - call MPI_COMM_SPAWN(cmd, MPI_ARGV_NULL, 5, & - MPI_INFO_NULL, 0, & - MPI_COMM_SELF, worker, & - MPI_ERRCODES_IGNORE, ierr) - - n = 100 - call MPI_BCAST(n, 1, MPI_INTEGER, & - MPI_ROOT, worker, ierr) - - call MPI_REDUCE(MPI_BOTTOM, cpi, 1, MPI_DOUBLE_PRECISION, & - MPI_SUM, MPI_ROOT, worker, ierr) - - call MPI_COMM_DISCONNECT(worker, ierr) - - write(*,'(A,F18.16,A,F18.16)') 'pi: ', cpi, ', error: ', abs(PI-cpi) - - call MPI_FINALIZE(ierr) - -END PROGRAM main diff --git a/demo/spawning/cpi-master.py b/demo/spawning/cpi-master.py deleted file mode 100644 index 133d602..0000000 --- a/demo/spawning/cpi-master.py +++ /dev/null @@ -1,23 +0,0 @@ -from mpi4pyve import MPI -from array import array -from math import pi as PI -from sys import argv - -cmd = './cpi-worker-py.exe' -if len(argv) > 1: cmd = argv[1] -print("%s -> %s" % (argv[0], cmd)) - -worker = MPI.COMM_SELF.Spawn(cmd, None, 5) - -n = array('i', [100]) -worker.Bcast([n,MPI.INT], root=MPI.ROOT) - -pi = array('d', [0.0]) -worker.Reduce(sendbuf=None, - recvbuf=[pi, MPI.DOUBLE], - op=MPI.SUM, root=MPI.ROOT) -pi = pi[0] - -worker.Disconnect() - -print('pi: %.16f, error: %.16f' % (pi, abs(PI-pi))) diff --git a/demo/spawning/cpi-worker.c b/demo/spawning/cpi-worker.c deleted file mode 100644 index 7aed62a..0000000 --- a/demo/spawning/cpi-worker.c +++ /dev/null @@ -1,33 +0,0 @@ -#include - -int main(int argc, char *argv[]) -{ - int myrank, nprocs; - int n, i; - double h, s, pi; - MPI_Comm master; - - MPI_Init(&argc, &argv); - - MPI_Comm_get_parent(&master); - MPI_Comm_size(master, &nprocs); - MPI_Comm_rank(master, &myrank); - - MPI_Bcast(&n, 1, MPI_INT, 0, master); - - h = 1.0 / (double) n; - s = 0.0; - for (i = myrank+1; i < n+1; i += nprocs) { - double x = h * (i - 0.5); - s += 4.0 / (1.0 + x*x); - } - pi = s * h; - - MPI_Reduce(&pi, MPI_BOTTOM, 1, MPI_DOUBLE, - MPI_SUM, 0, master); - - MPI_Comm_disconnect(&master); - - MPI_Finalize(); - return 0; -} diff --git a/demo/spawning/cpi-worker.cxx b/demo/spawning/cpi-worker.cxx deleted file mode 100644 index 8d0eae2..0000000 --- a/demo/spawning/cpi-worker.cxx +++ /dev/null @@ -1,29 +0,0 @@ -#include - -int main(int argc, char *argv[]) -{ - MPI::Init(); - - MPI::Intercomm master = MPI::Comm::Get_parent(); - int nprocs = master.Get_size(); - int myrank = master.Get_rank(); - - int n; - master.Bcast(&n, 1, MPI_INT, 0); - - double h = 1.0 / (double) n; - double s = 0.0; - for (int i = myrank+1; i < n+1; i += nprocs) { - double x = h * (i - 0.5); - s += 4.0 / (1.0 + x*x); - } - double pi = s * h; - - master.Reduce(&pi, MPI_BOTTOM, 1, MPI_DOUBLE, - MPI_SUM, 0); - - master.Disconnect(); - - MPI::Finalize(); - return 0; -} diff --git a/demo/spawning/cpi-worker.f90 b/demo/spawning/cpi-worker.f90 deleted file mode 100644 index 9b20955..0000000 --- a/demo/spawning/cpi-worker.f90 +++ /dev/null @@ -1,32 +0,0 @@ -PROGRAM main - - USE mpi - implicit none - - integer ierr - integer n, i, master, myrank, nprocs - real (kind=8) h, s, x, cpi - - call MPI_INIT(ierr) - call MPI_COMM_GET_PARENT(master, ierr) - call MPI_COMM_SIZE(master, nprocs, ierr) - call MPI_COMM_RANK(master, myrank, ierr) - - call MPI_BCAST(n, 1, MPI_INTEGER, & - 0, master, ierr) - - h = 1 / DFLOAT(n) - s = 0.0 - DO i=myrank+1,n,nprocs - x = h * (DFLOAT(i) - 0.5) - s = s + 4.0 / (1.0 + x*x) - END DO - cpi = s * h - - call MPI_REDUCE(cpi, MPI_BOTTOM, 1, MPI_DOUBLE_PRECISION, & - MPI_SUM, 0, master, ierr) - - call MPI_COMM_DISCONNECT(master, ierr) - call MPI_FINALIZE(ierr) - -END PROGRAM main diff --git a/demo/spawning/cpi-worker.py b/demo/spawning/cpi-worker.py deleted file mode 100644 index 8862d54..0000000 --- a/demo/spawning/cpi-worker.py +++ /dev/null @@ -1,24 +0,0 @@ -from mpi4pyve import MPI -from array import array - -master = MPI.Comm.Get_parent() -nprocs = master.Get_size() -myrank = master.Get_rank() - -n = array('i', [0]) -master.Bcast([n, MPI.INT], root=0) -n = n[0] - -h = 1.0 / n -s = 0.0 -for i in range(myrank+1, n+1, nprocs): - x = h * (i - 0.5) - s += 4.0 / (1.0 + x**2) -pi = s * h - -pi = array('d', [pi]) -master.Reduce(sendbuf=[pi, MPI.DOUBLE], - recvbuf=None, - op=MPI.SUM, root=0) - -master.Disconnect() diff --git a/demo/spawning/makefile b/demo/spawning/makefile deleted file mode 100644 index 2017ba7..0000000 --- a/demo/spawning/makefile +++ /dev/null @@ -1,57 +0,0 @@ -.PHONY: default build test clean - -MPIEXEC=mpiexec -n 1 - -default: build test clean - -MASTERS = cpi-master-py.exe cpi-master-c.exe cpi-master-cxx.exe cpi-master-f90.exe -WORKERS = cpi-worker-py.exe cpi-worker-c.exe cpi-worker-cxx.exe cpi-worker-f90.exe - -build: ${MASTERS} ${WORKERS} - -LANGS=py c cxx f90 -test: build - @for i in ${LANGS}; do \ - for j in ${LANGS}; do \ - ${MPIEXEC} ./cpi-master-$$i.exe ./cpi-worker-$$j.exe; \ - done; \ - done - -clean: - ${RM} -r ${MASTERS} ${WORKERS} - - -MPICC=mpicc -MPICXX=mpicxx -MPIF90=mpif90 -ifneq (${MPI_FORTRAN_MOD_DIR},) -FFLAGS += -I${MPI_FORTRAN_MOD_DIR} -endif - -# Python -cpi-master-py.exe: cpi-master.py - echo '#!'`which python` > $@ - cat $< >> $@ - chmod +x $@ -cpi-worker-py.exe: cpi-worker.py - echo '#!'`which python` > $@ - cat $< >> $@ - chmod +x $@ - -# C -cpi-master-c.exe: cpi-master.c - ${MPICC} $< -o $@ -cpi-worker-c.exe: cpi-worker.c - ${MPICC} $< -o $@ - -# C++ -cpi-master-cxx.exe: cpi-master.cxx - ${MPICXX} $< -o $@ -cpi-worker-cxx.exe: cpi-worker.cxx - ${MPICXX} $< -o $@ - -# Fortran 90 -cpi-master-f90.exe: cpi-master.f90 - ${MPIF90} ${FFLAGS} $< -o $@ -cpi-worker-f90.exe: cpi-worker.f90 - ${MPIF90} ${FFLAGS} $< -o $@ diff --git a/demo/test-run/makefile b/demo/test-run/makefile deleted file mode 100644 index ef0bd20..0000000 --- a/demo/test-run/makefile +++ /dev/null @@ -1,18 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python - -.PHONY: build -build: - mkdir -p run-directory/ - cp run-script.py run-directory/__main__.py - zip -qj run-zipfile.zip run-directory/__main__.py - -.PHONY: test -test: - ${PYTHON} test_run.py -v - -.PHONY: clean -clean: - ${RM} -r run-directory run-zipfile.zip diff --git a/demo/test-run/run-script.py b/demo/test-run/run-script.py deleted file mode 100644 index 9fc4075..0000000 --- a/demo/test-run/run-script.py +++ /dev/null @@ -1,43 +0,0 @@ -from mpi4pyve import MPI -import sys, os, optparse - -assert __name__ == '__main__' -from os.path import split, splitext, dirname, realpath -dirname = dirname(__file__) -assert sys.path[0] == realpath(dirname) -if split(__file__)[1] == '__main__.py': - if splitext(dirname)[0] == '.zip': - assert sys.argv[0] == dirname - else: - assert realpath(sys.argv[0]) == realpath(dirname) -else: - assert sys.argv[0] == __file__ - -parser = optparse.OptionParser() -parser.add_option("--rank", action='store', - type='int', dest="rank", default=0) -parser.add_option("--sys-exit", action='store', - type='int', dest="sys_exit", default=None) -parser.add_option("--sys-exit-msg", action="store", - type="string", dest="sys_exit", default=None) -parser.add_option("--exception", action="store", - type="string", dest="exception", default=None) -(options, args) = parser.parse_args() -assert not args - -comm = MPI.COMM_WORLD -if comm.rank == options.rank: - if options.sys_exit: - sys.exit(options.sys_exit) - if options.exception: - raise RuntimeError(options.exception) - -comm.Barrier() -if comm.rank > 0: - comm.Recv([None, 'B'], comm.rank - 1) -print("Hello, World!") -if comm.rank < comm.size - 1: - comm.Send([None, 'B'], comm.rank + 1) -comm.Barrier() - -sys.exit() diff --git a/demo/test-run/test_run.py b/demo/test-run/test_run.py deleted file mode 100644 index 0c7df8b..0000000 --- a/demo/test-run/test_run.py +++ /dev/null @@ -1,173 +0,0 @@ -import sys, os, shlex -import subprocess as sp -import unittest -import mpi4pyve - -def find_executable(exe): - from distutils.spawn import find_executable as find_exe - command = shlex.split(exe) - executable = find_exe(command[0]) - if executable: - command[0] = executable - return ' '.join(command) - -def find_mpiexec(mpiexec='mpiexec'): - mpiexec = os.environ.get('MPIEXEC') or mpiexec - mpiexec = find_executable(mpiexec) - if not mpiexec and sys.platform.startswith('win'): - MSMPI_BIN = os.environ.get('MSMPI_BIN', '') - mpiexec = os.path.join(MSMPI_BIN, mpiexec) - mpiexec = find_executable(mpiexec) - if not mpiexec: - mpiexec = find_executable('mpirun') - return mpiexec - -def launcher(np): - mpiexec = find_mpiexec() - python = sys.executable - if 'coverage' in sys.modules: - python += ' -m coverage run -p -m' - module = 'mpi4pyve.run -rc threads=False' - command = '{mpiexec} -n {np} {python} -m {module}' - return shlex.split(command.format(**vars())) - -def execute(np, command, args=''): - env = os.environ.copy() - pypath = os.environ.get('PYTHONPATH', '').split(os.pathsep) - pypath.insert(0, os.path.abspath(os.path.dirname(mpi4pyve.__path__[0]))) - env['PYTHONPATH'] = os.pathsep.join(pypath) - if isinstance(command, str): - command = shlex.split(command) - if isinstance(args, str): - args = shlex.split(args) - cmdline = launcher(np) + command + args - p = sp.Popen(cmdline, stdout=sp.PIPE, stderr=sp.PIPE, env=env, bufsize=0) - stdout, stderr = p.communicate() - return p.returncode, stdout.decode(), stderr.decode() - - -class BaseTestRun(object): - - def assertMPIAbort(self, stdout, stderr): - if not ('MPI_Abort' in stdout or 'MPI_ABORT' in stdout or - 'MPI_Abort' in stderr or 'MPI_ABORT' in stderr): - msg = ("expecting MPI_Abort() message in stdout/stderr:\n" - "[stdout]:\n{0}\n[stderr]:\n{1}\n").format(stdout, stderr) - raise self.failureException(msg) - - -class TestRunScript(BaseTestRun, unittest.TestCase): - pyfile = 'run-script.py' - - def execute(self, args='', np=1): - dirname = os.path.abspath(os.path.dirname(__file__)) - script = os.path.join(dirname, self.pyfile) - return execute(np, script, args) - - def testSuccess(self): - success = 'Hello, World!' - for np in (1, 2, 3): - status, stdout, stderr = self.execute(np=np) - self.assertEqual(status, 0) - self.assertEqual(stderr, '') - self.assertEqual(stdout.count(success), np) - - def testException(self): - message = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - excmess = 'RuntimeError: {0}'.format(message) - for np in (1, 2, 3): - for rank in range(0, np): - args = ['--rank', str(rank), '--exception', message] - status, stdout, stderr = self.execute(args, np) - self.assertEqual(status, 1) - self.assertMPIAbort(stdout, stderr) - self.assertTrue(excmess in stderr) - - def testSysExitCode(self): - errcode = 7 - for np in (1, 2, 3): - for r in sorted(set([0, np-1])): - args = ['--rank', str(r), '--sys-exit', str(errcode)] - status, stdout, stderr = self.execute(args, np) - self.assertTrue(status in (errcode, 1)) - self.assertMPIAbort(stdout, stderr) - self.assertTrue('Traceback' not in stderr) - - def testSysExitMess(self): - exitmsg = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - for np in (1, 2, 3): - for r in sorted(set([0, np-1])): - args = ['--rank', str(r), '--sys-exit-msg', exitmsg] - status, stdout, stderr = self.execute(args, np) - self.assertEqual(status, 1) - self.assertMPIAbort(stdout, stderr) - self.assertTrue('Traceback' not in stderr) - self.assertTrue(exitmsg in stderr) - -if os.path.exists(os.path.join(os.path.dirname(__file__), 'run-directory')): - class TestRunDirectory(TestRunScript): - pyfile = 'run-directory' - -if os.path.exists(os.path.join(os.path.dirname(__file__), 'run-zipfile.zip')): - class TestRunZipFile(TestRunScript): - pyfile = 'run-zipfile.zip' - - -class TestRunModule(BaseTestRun, unittest.TestCase): - - def execute(self, module, np=1): - return execute(np, '-m', module) - - def testSuccess(self): - module = 'mpi4pyve.bench --no-threads helloworld' - message = 'Hello, World!' - for np in (1, 2, 3): - status, stdout, stderr = self.execute(module, np) - self.assertEqual(status, 0) - self.assertEqual(stdout.count(message), np) - self.assertEqual(stderr, '') - - -class TestRunCommand(BaseTestRun, unittest.TestCase): - - def execute(self, command, np=1): - return execute(np, '-c', command) - - def testArgv0(self): - command = '"import sys; print(sys.argv[0])"' - status, stdout, stderr = self.execute(command, 1) - self.assertEqual(status, 0) - self.assertEqual(stdout, '-c\n') - self.assertEqual(stderr, '') - - def testSuccess(self): - command = '"from mpi4pyve import MPI"' - for np in (1, 2, 3): - status, stdout, stderr = self.execute(command, np) - self.assertEqual(status, 0) - self.assertEqual(stdout, '') - self.assertEqual(stderr, '') - - def testException(self): - command = '"from mpi4pyve import MPI; 1/0 if MPI.COMM_WORLD.Get_rank()==0 else 0;"' - excmess = 'ZeroDivisionError:' - for np in (1, 2, 3): - for rank in range(0, np): - status, stdout, stderr = self.execute(command, np) - self.assertEqual(status, 1) - self.assertMPIAbort(stdout, stderr) - self.assertTrue(excmess in stderr) - - -if not find_mpiexec(): - del TestRunScript - try: del TestRunDirectory - except: pass - try: del TestRunZipFile - except: pass - del TestRunModule - del TestRunCommand - - -if __name__ == '__main__': - unittest.main() diff --git a/demo/thermal/README.rst b/demo/thermal/README.rst new file mode 100644 index 0000000..4e353b8 --- /dev/null +++ b/demo/thermal/README.rst @@ -0,0 +1,53 @@ +Performs the 3D Thermal Simulation + + +Issuing at the command line for VH:: + + $ mpirun -veo -np 8 python thermal.py -dev vh + loop_cnt: 0, l2_norm: 560.756531 + loop_cnt: 1000, l2_norm: 3.863911 + loop_cnt: 2000, l2_norm: 1.941148 + loop_cnt: 3000, l2_norm: 1.204764 + loop_cnt: 4000, l2_norm: 0.803408 + loop_cnt: 5000, l2_norm: 0.552903 + loop_cnt: 6000, l2_norm: 0.386492 + loop_cnt: 7000, l2_norm: 0.272347 + loop_cnt: 8000, l2_norm: 0.192709 + loop_cnt: 9000, l2_norm: 0.136654 + loop_cnt: 10000, l2_norm: 0.097005 + loop_cnt: 11000, l2_norm: 0.068903 + loop_cnt: 12000, l2_norm: 0.048968 + loop_cnt: 13000, l2_norm: 0.034811 + loop_cnt: 14000, l2_norm: 0.024766 + loop_cnt: 15000, l2_norm: 0.017631 + loop_cnt: 16000, l2_norm: 0.012578 + loop_cnt: 17000, l2_norm: 0.009005 + elapsed: 12.070626020431519 + + +Issuing at the command line for VE:: + + $ VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 python thermal.py -dev ve + loop_cnt: 0, l2_norm: 560.756592 + loop_cnt: 1000, l2_norm: 3.863911 + loop_cnt: 2000, l2_norm: 1.941150 + loop_cnt: 3000, l2_norm: 1.204767 + loop_cnt: 4000, l2_norm: 0.803411 + loop_cnt: 5000, l2_norm: 0.552902 + loop_cnt: 6000, l2_norm: 0.386493 + loop_cnt: 7000, l2_norm: 0.272352 + loop_cnt: 8000, l2_norm: 0.192710 + loop_cnt: 9000, l2_norm: 0.136653 + loop_cnt: 10000, l2_norm: 0.097009 + loop_cnt: 11000, l2_norm: 0.068909 + loop_cnt: 12000, l2_norm: 0.048973 + loop_cnt: 13000, l2_norm: 0.034821 + loop_cnt: 14000, l2_norm: 0.024770 + loop_cnt: 15000, l2_norm: 0.017647 + loop_cnt: 16000, l2_norm: 0.012596 + loop_cnt: 17000, l2_norm: 0.009027 + elapsed: 3.367392063140869 + + +.. image:: ./img/img_thermal_nlcpy.png + :alt: img_thermal_simulation diff --git a/demo/thermal/img/img_thermal_nlcpy.png b/demo/thermal/img/img_thermal_nlcpy.png new file mode 100644 index 0000000..15a9e1d Binary files /dev/null and b/demo/thermal/img/img_thermal_nlcpy.png differ diff --git a/demo/thermal/img/img_thermal_numpy.png b/demo/thermal/img/img_thermal_numpy.png new file mode 100644 index 0000000..15a9e1d Binary files /dev/null and b/demo/thermal/img/img_thermal_numpy.png differ diff --git a/demo/thermal/makefile b/demo/thermal/makefile new file mode 100644 index 0000000..675dfde --- /dev/null +++ b/demo/thermal/makefile @@ -0,0 +1,7 @@ +.PHONY: test + +PYTHON=python + +test: + echo "Thermal Simulation on VH (8 process)" && mpirun -veo -np 8 ${PYTHON} thermal.py -dev vh + echo "Thermal Simulation on 4VE (4 process)" && VE_NLCPY_NODELIST=0,1,2,3 mpirun -veo -np 4 ${PYTHON} thermal.py -dev ve diff --git a/demo/thermal/thermal.py b/demo/thermal/thermal.py new file mode 100644 index 0000000..9239efb --- /dev/null +++ b/demo/thermal/thermal.py @@ -0,0 +1,257 @@ +from mpi4pyve import MPI +from mpi4pyve import util +from matplotlib import pyplot as plt +import argparse + +NX = 100 # The number of grid points in X-direction. +NY = 100 # The number of grid points in Y-direction. +NZ = 100 # The number of grid points in Z-direction. +DT = 0.001 # The time step interval. +CHECK_INTERVAL = 1000 # The number of time steps for checking convergence. +LX = NX * 1e-3 +LY = NY * 1e-3 +LZ = NZ * 1e-3 +T0 = 20.0 +T1 = 60.0 +T2 = 80.0 +HC = 398.0 / (8960.0 * 385.0) +DTYPE = 'float32' +TOLERANCE = 1e-2 + +def initialize(grid, dev): + grid.fill(T0) + grid[:, :, 0] = T1 * dev.sin( + dev.linspace(0, dev.pi, grid.shape[0]))[:, None] + grid[:, 0, :] = T2 * dev.sin( + dev.linspace(0, dev.pi, grid.shape[0]))[:, None] + +def get_count_and_displs(rank, size): + offset = 0 + count = [] + displs = [] + for r in range(size): + lz_s = NZ * r // size + lz_e = NZ * (r + 1) // size + count.append(lz_e - lz_s + 2) + displs.append(lz_s) + return count, displs + +def create_stencil_kernel(grid_work1, grid_work2, coef, vp): + kernels = [] + dgrid1, dgrid2 = vp.sca.create_descriptor((grid_work1, grid_work2)) + # input: grid_work1, output: grid_work2 + desc = ((dgrid1[0, 0, -1] + dgrid1[0, 0, 1]) * coef[0] + + (dgrid1[0, -1, 0] + dgrid1[0, 1, 0]) * coef[1] + + (dgrid1[-1, 0, 0] + dgrid1[1, 0, 0]) * coef[2] + + dgrid1[0, 0, 0] * coef[3]) + kernels.append(vp.sca.create_kernel(desc, desc_o=dgrid2[0, 0, 0])) + # input: grid_work2, output: grid_work1 + desc = ((dgrid2[0, 0, -1] + dgrid2[0, 0, 1]) * coef[0] + + (dgrid2[0, -1, 0] + dgrid2[0, 1, 0]) * coef[1] + + (dgrid2[-1, 0, 0] + dgrid2[1, 0, 0]) * coef[2] + + dgrid2[0, 0, 0] * coef[3]) + kernels.append(vp.sca.create_kernel(desc, desc_o=dgrid1[0, 0, 0])) + return kernels + +def execute_naive(grid_in, grid_out, coef): + grid_out[1:-1, 1:-1, 1:-1] = ( + (grid_in[1:-1, 1:-1, 0:-2] + grid_in[1:-1, 1:-1, 2:]) * coef[0] + + (grid_in[1:-1, 0:-2, 1:-1] + grid_in[1:-1, 2:, 1:-1]) * coef[1] + + (grid_in[0:-2, 1:-1, 1:-1] + grid_in[2:, 1:-1, 1:-1]) * coef[2] + + grid_in[1:-1, 1:-1, 1:-1] * coef[3]) + return grid_out + +def exchange_data(grid, comm, rank, size): + """ Exchange local boundary data + '-' indicates xy planar. + grid_root: ----------- + * transfer to upper process + rank0 : ----- + ^ + | + rank1 : ----- + ^ + | + rank2 : ----- + * transfer to lower process + rank0 : ----- + | + v + rank1 : ----- + | + v + rank2 : ----- + """ + if size == 1: return + # transfer to upper process + if rank == 0: + peer_src = rank + 1 + peer_dst = MPI.PROC_NULL + elif rank == size - 1: + peer_src = MPI.PROC_NULL + peer_dst = rank - 1 + else: + peer_src = rank + 1 + peer_dst = rank - 1 + comm.Sendrecv(grid[1], dest=peer_dst, recvbuf=grid[-1], source=peer_src) + # transfer to lower process + if rank == 0: + peer_src = MPI.PROC_NULL + peer_dst = rank + 1 + elif rank == size - 1: + peer_src = rank - 1 + peer_dst = MPI.PROC_NULL + else: + peer_src = rank - 1 + peer_dst = rank + 1 + comm.Sendrecv(grid[-2], dest=peer_dst, recvbuf=grid[0], source=peer_src) + +def get_l2_norm(grid_work1, grid_work2, comm, dev): + norm_local = dev.power( + grid_work1[1:-1, 1:-1, 1:-1] - grid_work2[1:-1, 1:-1, 1:-1], 2).sum() + l2_norm = dev.zeros_like(norm_local) + comm.Allreduce(norm_local, l2_norm, op=MPI.SUM) + l2_norm = dev.sqrt(l2_norm) + return float(l2_norm) + +def scatter_to_local_grid(grid_root, grid_local, count, displs, comm, rank, size): + if size == 1: + grid_local[...] = grid_root + else: + if rank == 0: + begin = displs[0] + end = begin + count[0] + grid_local[...] = grid_root[begin:end] + for r in range(1, size): + if rank == 0: + begin = displs[r] + end = begin + count[r] + comm.Send(grid_root[begin:end], dest=r) + elif rank == r: + comm.Recv(grid_local, source=0) + +def gather_from_local_grid(grid_root, grid_local, count, displs, comm, rank, size): + if size == 1: + grid_root[...] = grid_local + else: + if rank == 0: + begin = displs[0] + end = begin + count[0] + grid_root[begin:end] = grid_local + for r in range(1, size): + if rank == 0: + begin = displs[r] + end = begin + count[r] + comm.Recv(grid_root[begin:end], source=r) + elif rank == r: + comm.Send(grid_local, dest=0) + +def draw(fig, ax, xx, yy, grid, z, t): + ax.set_xlabel("x[m]") + ax.set_ylabel("y[m]") + ax.set_title("z = {:4.3f} [m], timestep = {:>10d}".format(z, t)) + c = ax.pcolormesh(xx, yy, grid, cmap='coolwarm', vmin=0, vmax=100) + return c + +def thermal(dev): + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + dx = LX / (NX + 1) + dy = LY / (NY + 1) + dz = LZ / (NZ + 1) + coef = [ + (HC * DT) / (dx * dx), + (HC * DT) / (dy * dy), + (HC * DT) / (dz * dz), + 1.0 - 2.0 * HC * DT * (1 / (dx * dx) + 1 / (dy * dy) + 1 / (dz * dz)), + ] + mx = NX + 2 + my = NY + 2 + mz = NZ + 2 + # create base grid on root process + if rank == 0: + x = dev.linspace(0, LX, mx) + y = dev.linspace(0, LY, my) + z = dev.linspace(0, LZ, mz) + zz, yy, xx = dev.meshgrid(z, y, x, indexing='ij') + grid_root = dev.empty((mz, my, mx), dtype=DTYPE) + initialize(grid_root, dev) + else: + grid_root = None + # draw initial grid + if rank == 0: + fig, axes = plt.subplots(3, 2, figsize=(9, 9), constrained_layout=True) + zstep = dev.linspace(0, mz, 5, dtype=int)[1:-1] + for i, ax in enumerate(axes[:, 0]): + zind = int(zstep[i]) + c = draw(fig, ax, xx[zind, :, :], yy[zind, :, :], + grid_root[zind, :, :], float(LZ * zind / mz), 0) + # create local grid + count, displs = get_count_and_displs(rank, size) + lz_d = count[rank] + grid_work1 = dev.empty((lz_d, my, mx), dtype=DTYPE) + grid_work2 = dev.empty((lz_d, my, mx), dtype=DTYPE) + scatter_to_local_grid(grid_root, grid_work1, count, displs, comm, rank, size) + grid_work2[...] = grid_work1 + + if dev.__name__ == 'nlcpy': + # create stencil kernels + kernels = create_stencil_kernel(grid_work1, grid_work2, coef, dev) + + # execute difference method + comm.Barrier() + t0 = MPI.Wtime() + loop_cnt = 0 + while True: + if dev.__name__ == 'nlcpy': + grid = kernels[loop_cnt % 2].execute() + else: + grid = execute_naive( + grid_work1 if loop_cnt % 2 == 0 else grid_work2, + grid_work2 if loop_cnt % 2 == 0 else grid_work1, + coef) + exchange_data(grid, comm, rank, size) + if loop_cnt % CHECK_INTERVAL == 0: # check convergence + l2_norm = get_l2_norm(grid_work1, grid_work2, comm, dev) + if rank == 0: print("loop_cnt: {:>10d}, l2_norm: {:>12.6f}".format(loop_cnt, l2_norm)) + if l2_norm < TOLERANCE: + break + loop_cnt += 1 + comm.Barrier() + t1 = MPI.Wtime() + if rank == 0: + print("elapsed:", t1 - t0) + + gather_from_local_grid(grid_root, grid, count, displs, comm, rank, size) + # draw latest grid and save figure + if rank == 0: + for i, ax in enumerate(axes[:, 1]): + zind = int(zstep[i]) + c = draw(fig, ax, xx[zind, :, :], yy[zind, :, :], + grid_root[zind, :, :], float(LZ * zind / mz), loop_cnt) + fig.colorbar(c, ax=axes[:, 1], location='bottom', label='T[$^{\circ}$C]') + plt.savefig('img_thermal_{}.png'.format(dev.__name__)) + + if dev.__name__ == 'nlcpy': + # destroy stencil kernels + for kern in kernels: + vp.sca.destroy_kernel(kern) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-dev', type=str, required=True, choices=['vh', 've'], + help='Execution device') + args = parser.parse_args() + + # set module + if args.dev == 'vh': + import numpy as np + dev = np + elif args.dev == 've': + import nlcpy as vp + dev = vp + else: + raise ValueError + + thermal(dev) diff --git a/demo/threads/makefile b/demo/threads/makefile deleted file mode 100644 index 845e13d..0000000 --- a/demo/threads/makefile +++ /dev/null @@ -1,12 +0,0 @@ -.PHONY: default build test clean - -default: build test clean - -PYTHON=python - -build: - -test: - ${PYTHON} sendrecv.py - -clean: \ No newline at end of file diff --git a/demo/threads/sendrecv.py b/demo/threads/sendrecv.py deleted file mode 100644 index a843b03..0000000 --- a/demo/threads/sendrecv.py +++ /dev/null @@ -1,48 +0,0 @@ -from mpi4pyve import MPI -import sys - -if MPI.Query_thread() < MPI.THREAD_MULTIPLE: - sys.stderr.write("MPI does not provide enough thread support\n") - sys.exit(0) - -try: - import threading -except ImportError: - sys.stderr.write("threading module not available\n") - sys.exit(0) - -try: - import numpy -except ImportError: - sys.stderr.write("NumPy package not available\n") - sys.exit(0) - -send_msg = numpy.arange(1000000, dtype='i') -recv_msg = numpy.zeros_like(send_msg) - -start_event = threading.Event() - -def self_send(): - start_event.wait() - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - comm.Send([send_msg, MPI.INT], dest=rank, tag=0) - -def self_recv(): - start_event.wait() - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - comm.Recv([recv_msg, MPI.INT], source=rank, tag=0) - -send_thread = threading.Thread(target=self_send) -recv_thread = threading.Thread(target=self_recv) - -for t in (recv_thread, send_thread): - t.start() -assert not numpy.allclose(send_msg, recv_msg) - -start_event.set() - -for t in (recv_thread, send_thread): - t.join() -assert numpy.allclose(send_msg, recv_msg) diff --git a/demo/vampirtrace/cpilog.py b/demo/vampirtrace/cpilog.py deleted file mode 100644 index 3b119e5..0000000 --- a/demo/vampirtrace/cpilog.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -# If you want VampirTrace to log MPI calls, you have to add the two -# lines below at the very beginning of your main bootstrap script. -import mpi4pyve -mpi4pyve.rc.threads = False -mpi4pyve.profile('vt', logfile='cpilog') - -# Import the MPI extension module -from mpi4pyve import MPI - -# Import the 'array' module -from array import array - -# This is just to make the logging -# output a bit more interesting -from time import sleep - -comm = MPI.COMM_WORLD -nprocs = comm.Get_size() -myrank = comm.Get_rank() - -n = array('i', [0]) -pi = array('d', [0]) -mypi = array('d', [0]) - -def comp_pi(n, myrank=0, nprocs=1): - h = 1.0 / n; - s = 0.0; - for i in range(myrank + 1, n + 1, nprocs): - x = h * (i - 0.5); - s += 4.0 / (1.0 + x**2); - return s * h - -comm.Barrier() - -for N in [10000]*10: - - if myrank == 0: - n[0] = N - - comm.Bcast([n, MPI.INT], root=0) - - mypi[0] = comp_pi(n[0], myrank, nprocs) - - comm.Reduce([mypi, MPI.DOUBLE], - [pi, MPI.DOUBLE], - op=MPI.SUM, root=0) - - comm.Barrier() - - sleep(0.01) diff --git a/demo/vampirtrace/makefile b/demo/vampirtrace/makefile deleted file mode 100644 index fcafdb5..0000000 --- a/demo/vampirtrace/makefile +++ /dev/null @@ -1,37 +0,0 @@ -MPIEXEC = mpiexec -PYTHON = python -N = 8 - -.PHONY: default -default: build test clean - - -.PHONY: run-cpilog run-ring run-threads run -run: run-cpilog run-ring run-threads -run-cpilog: - ${MPIEXEC} -n ${N} ${PYTHON} cpilog.py -run-ring: - ${MPIEXEC} -n ${N} ${PYTHON} ring.py -run-threads: - ${MPIEXEC} -n ${N} ${PYTHON} threads.py - -.PHONY: view-cpilog view-ring view-threads view -view: view-cpilog view-ring view-threads -view-cpilog: cpilog.otf -view-ring: ring.otf -view-threads: threads.otf - -cpilog.otf: run-cpilog -ring.otf: run-ring -threads.otf: run-threads - -.PHONY: build -build: - -.PHONY: test -test: run - -.PHONY: clean -clean: - ${RM} *.otf *.uctl *.*.def.z *.*.events.z *.*.marker.z - ${RM} *.thumb *.*.def *.*.events diff --git a/demo/vampirtrace/ring.py b/demo/vampirtrace/ring.py deleted file mode 100644 index eee0ea1..0000000 --- a/demo/vampirtrace/ring.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -# If you want VampirTrace to log MPI calls, you have to add the two -# lines below at the very beginning of your main bootstrap script. -import mpi4pyve -mpi4pyve.rc.threads = False -mpi4pyve.profile('vt-mpi', logfile='ring') - -from mpi4pyve import MPI - -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() - -src = rank-1 -dest = rank+1 -if rank == 0: - src = size-1 -if rank == size-1: - dest = 0 - -try: - from numpy import zeros - a1 = zeros(1000000, 'd') - a2 = zeros(1000000, 'd') -except ImportError: - from array import array - a1 = array('d', [0]*1000); a1 *= 1000 - a2 = array('d', [0]*1000); a2 *= 1000 - -comm.Sendrecv(sendbuf=a1, recvbuf=a2, - source=src, dest=dest) - -MPI.Request.Waitall([ - comm.Isend(a1, dest=dest), - comm.Irecv(a2, source=src), - ]) diff --git a/demo/vampirtrace/threads.py b/demo/vampirtrace/threads.py deleted file mode 100644 index d2f8d32..0000000 --- a/demo/vampirtrace/threads.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -import mpi4pyve -mpi4pyve.rc.threads = True -mpi4pyve.rc.thread_level = "funneled" -mpi4pyve.profile('vt-hyb', logfile='threads') - -from mpi4pyve import MPI -from threading import Thread - -MPI.COMM_WORLD.Barrier() - -# Understanding the Python GIL -# David Beazley, http://www.dabeaz.com -# PyCon 2010, Atlanta, Georgia -# http://www.dabeaz.com/python/UnderstandingGIL.pdf - -# Consider this trivial CPU-bound function -def countdown(n): - while n > 0: - n -= 1 - -# Run it once with a lot of work -COUNT = 10000000 # 10 millon -tic = MPI.Wtime() -countdown(COUNT) -toc = MPI.Wtime() -print ("sequential: %f seconds" % (toc-tic)) - -# Now, subdivide the work across two threads -t1 = Thread(target=countdown, args=(COUNT//2,)) -t2 = Thread(target=countdown, args=(COUNT//2,)) -tic = MPI.Wtime() -for t in (t1, t2): t.start() -for t in (t1, t2): t.join() -toc = MPI.Wtime() -print ("threaded: %f seconds" % (toc-tic)) diff --git a/demo/wrap-boost/helloworld.cxx b/demo/wrap-boost/helloworld.cxx deleted file mode 100644 index f4598d4..0000000 --- a/demo/wrap-boost/helloworld.cxx +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include - -static void sayhello(MPI_Comm comm) -{ - if (comm == MPI_COMM_NULL) { - std::cout << "You passed MPI_COMM_NULL !!!" << std::endl; - return; - } - int size; - MPI_Comm_size(comm, &size); - int rank; - MPI_Comm_rank(comm, &rank); - int plen; char pname[MPI_MAX_PROCESSOR_NAME]; - MPI_Get_processor_name(pname, &plen); - std::cout << - "Hello, World! " << - "I am process " << rank << - " of " << size << - " on " << pname << - "." << std::endl; -} - - -#include -#include -using namespace boost::python; - -static void hw_sayhello(object py_comm) -{ - PyObject* py_obj = py_comm.ptr(); - MPI_Comm *comm_p = PyMPIComm_Get(py_obj); - if (comm_p == NULL) throw_error_already_set(); - sayhello(*comm_p); -} - -BOOST_PYTHON_MODULE(helloworld) -{ - if (import_mpi4pyve() < 0) return; /* Python 2.X */ - - def("sayhello", hw_sayhello); -} - - -/* - * Local Variables: - * mode: C++ - * End: - */ diff --git a/demo/wrap-boost/makefile b/demo/wrap-boost/makefile deleted file mode 100644 index 95ad41f..0000000 --- a/demo/wrap-boost/makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config -MPI4PY_INCLUDE = ${shell ${PYTHON} -c 'import mpi4pyve; print( mpi4pyve.get_include() )'} -BOOST_INCS = -BOOST_LIBS = -lboost_python - - -MPICXX = mpicxx -CXXFLAGS = -fPIC ${shell ${PYTHON_CONFIG} --includes} ${BOOST_INCS} -LDFLAGS = -shared ${shell ${PYTHON_CONFIG} --libs} ${BOOST_LIBS} -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -.PHONY: build -build: helloworld${SO} -helloworld${SO}: helloworld.cxx - ${MPICXX} ${CXXFLAGS} -I${MPI4PY_INCLUDE} -o $@ $< ${LDFLAGS} - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} helloworld${SO} diff --git a/demo/wrap-boost/test.py b/demo/wrap-boost/test.py deleted file mode 100644 index bf1ab30..0000000 --- a/demo/wrap-boost/test.py +++ /dev/null @@ -1,15 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-c/helloworld.c b/demo/wrap-c/helloworld.c deleted file mode 100644 index f3ac3e6..0000000 --- a/demo/wrap-c/helloworld.c +++ /dev/null @@ -1,112 +0,0 @@ -#define MPICH_SKIP_MPICXX 1 -#define OMPI_SKIP_MPICXX 1 -#include -#include - -/* -------------------------------------------------------------------------- */ - -static void -sayhello(MPI_Comm comm) { - int size, rank; - char pname[MPI_MAX_PROCESSOR_NAME]; int len; - if (comm == MPI_COMM_NULL) { - printf("You passed MPI_COMM_NULL !!!\n"); - return; - } - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); - MPI_Get_processor_name(pname, &len); - pname[len] = 0; - printf("Hello, World! I am process %d of %d on %s.\n", - rank, size, pname); -} - -/* -------------------------------------------------------------------------- */ - -static PyObject * -hw_sayhello(PyObject *self, PyObject *args) -{ - PyObject *py_comm = NULL; - MPI_Comm *comm_p = NULL; - - if (!PyArg_ParseTuple(args, "O:sayhello", &py_comm)) - return NULL; - - comm_p = PyMPIComm_Get(py_comm); - if (comm_p == NULL) - return NULL; - - sayhello(*comm_p); - - Py_INCREF(Py_None); - return Py_None; -} - -static struct PyMethodDef hw_methods[] = { - {"sayhello", (PyCFunction)hw_sayhello, METH_VARARGS, NULL}, - {NULL, NULL, 0, NULL} /* sentinel */ -}; - -#if PY_MAJOR_VERSION < 3 -/* --- Python 2 --- */ - -PyMODINIT_FUNC inithelloworld(void) -{ - PyObject *m = NULL; - - /* Initialize mpi4pyve C-API */ - if (import_mpi4pyve() < 0) goto bad; - - /* Module initialization */ - m = Py_InitModule("helloworld", hw_methods); - if (m == NULL) goto bad; - - return; - - bad: - return; -} - -#else -/* --- Python 3 --- */ - -static struct PyModuleDef hw_module = { - PyModuleDef_HEAD_INIT, - "helloworld", /* m_name */ - NULL, /* m_doc */ - -1, /* m_size */ - hw_methods /* m_methods */, - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ -}; - -PyMODINIT_FUNC -PyInit_helloworld(void) -{ - PyObject *m = NULL; - - /* Initialize mpi4pyve's C-API */ - if (import_mpi4pyve() < 0) goto bad; - - /* Module initialization */ - m = PyModule_Create(&hw_module); - if (m == NULL) goto bad; - - return m; - - bad: - return NULL; -} - -#endif - -/* -------------------------------------------------------------------------- */ - -/* - Local variables: - c-basic-offset: 2 - indent-tabs-mode: nil - End: -*/ diff --git a/demo/wrap-c/makefile b/demo/wrap-c/makefile deleted file mode 100644 index 32adff2..0000000 --- a/demo/wrap-c/makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config -MPI4PY_INCLUDE = ${shell ${PYTHON} -c 'import mpi4pyve; print( mpi4pyve.get_include() )'} - - -MPICC = mpicc -CFLAGS = -fPIC ${shell ${PYTHON_CONFIG} --includes} -LDFLAGS = -shared ${shell ${PYTHON_CONFIG} --libs} -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -.PHONY: build -build: helloworld${SO} -helloworld${SO}: helloworld.c - ${MPICC} ${CFLAGS} -I${MPI4PY_INCLUDE} -o $@ $< ${LDFLAGS} - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} helloworld${SO} diff --git a/demo/wrap-c/test.py b/demo/wrap-c/test.py deleted file mode 100644 index bf1ab30..0000000 --- a/demo/wrap-c/test.py +++ /dev/null @@ -1,15 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-cffi/helloworld.c b/demo/wrap-cffi/helloworld.c deleted file mode 100644 index ba121d4..0000000 --- a/demo/wrap-cffi/helloworld.c +++ /dev/null @@ -1,27 +0,0 @@ -#define MPICH_SKIP_MPICXX 1 -#define OMPI_SKIP_MPICXX 1 -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif -extern void sayhello(MPI_Comm); -#ifdef __cplusplus -} -#endif - -void sayhello(MPI_Comm comm) { - int size, rank; - char pname[MPI_MAX_PROCESSOR_NAME]; int len; - if (comm == MPI_COMM_NULL) { - printf("You passed MPI_COMM_NULL !!!\n"); - return; - } - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); - MPI_Get_processor_name(pname, &len); - pname[len] = 0; - printf("Hello, World! I am process %d of %d on %s.\n", - rank, size, pname); -} diff --git a/demo/wrap-cffi/helloworld.py b/demo/wrap-cffi/helloworld.py deleted file mode 100644 index 267fccc..0000000 --- a/demo/wrap-cffi/helloworld.py +++ /dev/null @@ -1,21 +0,0 @@ -from mpi4pyve import MPI -import cffi -import os - -_libdir = os.path.dirname(__file__) - -ffi = cffi.FFI() -if MPI._sizeof(MPI.Comm) == ffi.sizeof('int'): - _mpi_comm_t = 'int' -else: - _mpi_comm_t = 'void*' -ffi.cdef(""" -typedef %(_mpi_comm_t)s MPI_Comm; -void sayhello(MPI_Comm); -""" % vars()) -lib = ffi.dlopen(os.path.join(_libdir, "libhelloworld.so")) - -def sayhello(comm): - comm_ptr = MPI._addressof(comm) - comm_val = ffi.cast('MPI_Comm*', comm_ptr)[0] - lib.sayhello(comm_val) diff --git a/demo/wrap-cffi/makefile b/demo/wrap-cffi/makefile deleted file mode 100644 index e7eb200..0000000 --- a/demo/wrap-cffi/makefile +++ /dev/null @@ -1,24 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python - -MPICC = mpicc -LIBNAME = libhelloworld.so -.PHONY: build -build: ${LIBNAME} -${LIBNAME}: helloworld.c - ${MPICC} -shared -o $@ $< - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} -r ${LIBNAME} *.pyc __pycache__ diff --git a/demo/wrap-cffi/test.py b/demo/wrap-cffi/test.py deleted file mode 100644 index bf1ab30..0000000 --- a/demo/wrap-cffi/test.py +++ /dev/null @@ -1,15 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-ctypes/helloworld.c b/demo/wrap-ctypes/helloworld.c deleted file mode 100644 index ba121d4..0000000 --- a/demo/wrap-ctypes/helloworld.c +++ /dev/null @@ -1,27 +0,0 @@ -#define MPICH_SKIP_MPICXX 1 -#define OMPI_SKIP_MPICXX 1 -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif -extern void sayhello(MPI_Comm); -#ifdef __cplusplus -} -#endif - -void sayhello(MPI_Comm comm) { - int size, rank; - char pname[MPI_MAX_PROCESSOR_NAME]; int len; - if (comm == MPI_COMM_NULL) { - printf("You passed MPI_COMM_NULL !!!\n"); - return; - } - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); - MPI_Get_processor_name(pname, &len); - pname[len] = 0; - printf("Hello, World! I am process %d of %d on %s.\n", - rank, size, pname); -} diff --git a/demo/wrap-ctypes/helloworld.py b/demo/wrap-ctypes/helloworld.py deleted file mode 100644 index 275942a..0000000 --- a/demo/wrap-ctypes/helloworld.py +++ /dev/null @@ -1,18 +0,0 @@ -from mpi4pyve import MPI -import ctypes -import os - -_libdir = os.path.dirname(__file__) - -if MPI._sizeof(MPI.Comm) == ctypes.sizeof(ctypes.c_int): - MPI_Comm = ctypes.c_int -else: - MPI_Comm = ctypes.c_void_p -_lib = ctypes.CDLL(os.path.join(_libdir, "libhelloworld.so")) -_lib.sayhello.restype = None -_lib.sayhello.argtypes = [MPI_Comm] - -def sayhello(comm): - comm_ptr = MPI._addressof(comm) - comm_val = MPI_Comm.from_address(comm_ptr) - _lib.sayhello(comm_val) diff --git a/demo/wrap-ctypes/makefile b/demo/wrap-ctypes/makefile deleted file mode 100644 index e7eb200..0000000 --- a/demo/wrap-ctypes/makefile +++ /dev/null @@ -1,24 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python - -MPICC = mpicc -LIBNAME = libhelloworld.so -.PHONY: build -build: ${LIBNAME} -${LIBNAME}: helloworld.c - ${MPICC} -shared -o $@ $< - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} -r ${LIBNAME} *.pyc __pycache__ diff --git a/demo/wrap-ctypes/test.py b/demo/wrap-ctypes/test.py deleted file mode 100644 index bf1ab30..0000000 --- a/demo/wrap-ctypes/test.py +++ /dev/null @@ -1,15 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-cython/helloworld.pyx b/demo/wrap-cython/helloworld.pyx deleted file mode 100644 index 4ec2671..0000000 --- a/demo/wrap-cython/helloworld.pyx +++ /dev/null @@ -1,23 +0,0 @@ -cdef extern from "mpi-compat.h": pass - -cimport mpi4pyve.MPI as MPI -from mpi4pyve.libmpi cimport * - -cdef extern from "stdio.h": - int printf(char*, ...) - -cdef void c_sayhello(MPI_Comm comm): - cdef int size, rank, plen - cdef char pname[MPI_MAX_PROCESSOR_NAME] - if comm == MPI_COMM_NULL: - printf(b"You passed MPI_COMM_NULL !!!%s", b"\n") - return - MPI_Comm_size(comm, &size) - MPI_Comm_rank(comm, &rank) - MPI_Get_processor_name(pname, &plen) - printf(b"Hello, World! I am process %d of %d on %s.\n", - rank, size, pname) - -def sayhello(MPI.Comm comm not None ): - cdef MPI_Comm c_comm = comm.ob_mpi - c_sayhello(c_comm) diff --git a/demo/wrap-cython/makefile b/demo/wrap-cython/makefile deleted file mode 100644 index b8c3ad1..0000000 --- a/demo/wrap-cython/makefile +++ /dev/null @@ -1,35 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config - - -CYTHON = cython -.PHONY: src -src: helloworld.c -helloworld.c: helloworld.pyx - ${CYTHON} $< - - -MPICC = mpicc -CFLAGS = -fPIC ${shell ${PYTHON_CONFIG} --includes} -LDFLAGS = -shared ${shell ${PYTHON_CONFIG} --libs} -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -.PHONY: build -build: helloworld${SO} -helloworld${SO}: helloworld.c - ${MPICC} ${CFLAGS} -o $@ $< ${LDFLAGS} - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} helloworld.c helloworld${SO} diff --git a/demo/wrap-cython/mpi-compat.h b/demo/wrap-cython/mpi-compat.h deleted file mode 100644 index 367c58a..0000000 --- a/demo/wrap-cython/mpi-compat.h +++ /dev/null @@ -1,14 +0,0 @@ -/* Author: Lisandro Dalcin */ -/* Contact: dalcinl@gmail.com */ - -#ifndef MPI_COMPAT_H -#define MPI_COMPAT_H - -#include - -#if (MPI_VERSION < 3) && !defined(PyMPI_HAVE_MPI_Message) -typedef void *PyMPI_MPI_Message; -#define MPI_Message PyMPI_MPI_Message -#endif - -#endif/*MPI_COMPAT_H*/ diff --git a/demo/wrap-cython/test.py b/demo/wrap-cython/test.py deleted file mode 100644 index 9d93d76..0000000 --- a/demo/wrap-cython/test.py +++ /dev/null @@ -1,22 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(None) -except: - pass -else: - assert 0, "exception not raised" - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-f2py/helloworld.f90 b/demo/wrap-f2py/helloworld.f90 deleted file mode 100644 index 51661c4..0000000 --- a/demo/wrap-f2py/helloworld.f90 +++ /dev/null @@ -1,31 +0,0 @@ -! -! $ f2py --f90exec=mpif90 -m helloworld -c helloworld.f90 -! - -subroutine sayhello(comm) - use mpi - implicit none - integer :: comm - integer :: rank, size, nlen, ierr - character (len=MPI_MAX_PROCESSOR_NAME) :: pname - if (comm == MPI_COMM_NULL) then - print *, 'You passed MPI_COMM_NULL !!!' - return - end if - call MPI_Comm_rank(comm, rank, ierr) - call MPI_Comm_size(comm, size, ierr) - call MPI_Get_processor_name(pname, nlen, ierr) - print *, 'Hello, World!', & - ' I am process ', rank, & - ' of ', size, & - ' on ', pname(1:nlen), '.' -end subroutine sayhello - -! program main -! use mpi -! implicit none -! integer ierr -! call MPI_Init(ierr) -! call sayhello(MPI_COMM_WORLD) -! call MPI_Finalize(ierr) -! end program main diff --git a/demo/wrap-f2py/makefile b/demo/wrap-f2py/makefile deleted file mode 100644 index 5de785a..0000000 --- a/demo/wrap-f2py/makefile +++ /dev/null @@ -1,30 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config - - -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -MPIF90 = mpif90 -F2PY = f2py -ifneq (${MPI_FORTRAN_MOD_DIR},) -F2PYFLAGS += --f90flags=-I${MPI_FORTRAN_MOD_DIR} -endif -.PHONY: build -build: helloworld${SO} -helloworld${SO}: helloworld.f90 - ${F2PY} ${F2PYFLAGS} --f90exec=${MPIF90} -m helloworld -c $< - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} helloworld${SO} diff --git a/demo/wrap-f2py/test.py b/demo/wrap-f2py/test.py deleted file mode 100644 index 304c1c1..0000000 --- a/demo/wrap-f2py/test.py +++ /dev/null @@ -1,17 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -fnull = null.py2f() -hw.sayhello(fnull) - -comm = MPI.COMM_WORLD -fcomm = comm.py2f() -hw.sayhello(fcomm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/demo/wrap-swig/helloworld.i b/demo/wrap-swig/helloworld.i deleted file mode 100644 index 30c64e6..0000000 --- a/demo/wrap-swig/helloworld.i +++ /dev/null @@ -1,38 +0,0 @@ -%module helloworld - -%{ - -#define MPICH_SKIP_MPICXX 1 -#define OMPI_SKIP_MPICXX 1 - -#include -#include - -void sayhello(MPI_Comm comm) { - int size, rank; - char pname[MPI_MAX_PROCESSOR_NAME]; int len; - if (comm == MPI_COMM_NULL) { - printf("You passed MPI_COMM_NULL !!!\n"); - return; - } - MPI_Comm_size(comm, &size); - MPI_Comm_rank(comm, &rank); - MPI_Get_processor_name(pname, &len); - pname[len] = 0; - printf("Hello, World! I am process %d of %d on %s.\n", - rank, size, pname); -} - -%} - -%include mpi4pyve/mpi4pyve.i - -%mpi4pyve_typemap(Comm, MPI_Comm); - -void sayhello(MPI_Comm comm); - -/* - * Local Variables: - * mode: C - * End: - */ diff --git a/demo/wrap-swig/makefile b/demo/wrap-swig/makefile deleted file mode 100644 index 70fd805..0000000 --- a/demo/wrap-swig/makefile +++ /dev/null @@ -1,36 +0,0 @@ -.PHONY: default -default: build test clean - -PYTHON = python -PYTHON_CONFIG = ${PYTHON} ../python-config -MPI4PY_INCLUDE = ${shell ${PYTHON} -c 'import mpi4pyve; print( mpi4pyve.get_include() )'} - - -SWIG = swig -SWIG_PY = ${SWIG} -python -.PHONY: src -src: helloworld_wrap.c -helloworld_wrap.c: helloworld.i - ${SWIG_PY} -I${MPI4PY_INCLUDE} -o $@ $< - -MPICC = mpicc -CFLAGS = -fPIC ${shell ${PYTHON_CONFIG} --includes} -LDFLAGS = -shared ${shell ${PYTHON_CONFIG} --libs} -SO = ${shell ${PYTHON_CONFIG} --extension-suffix} -.PHONY: build -build: _helloworld${SO} -_helloworld${SO}: helloworld_wrap.c - ${MPICC} ${CFLAGS} -I${MPI4PY_INCLUDE} -o $@ $< ${LDFLAGS} - - -MPIEXEC = mpiexec -NP_FLAG = -n -NP = 5 -.PHONY: test -test: build - ${MPIEXEC} ${NP_FLAG} ${NP} ${PYTHON} test.py - - -.PHONY: clean -clean: - ${RM} helloworld_wrap.c helloworld.py* _helloworld${SO} diff --git a/demo/wrap-swig/test.py b/demo/wrap-swig/test.py deleted file mode 100644 index bf1ab30..0000000 --- a/demo/wrap-swig/test.py +++ /dev/null @@ -1,15 +0,0 @@ -from mpi4pyve import MPI -import helloworld as hw - -null = MPI.COMM_NULL -hw.sayhello(null) - -comm = MPI.COMM_WORLD -hw.sayhello(comm) - -try: - hw.sayhello(list()) -except: - pass -else: - assert 0, "exception not raised" diff --git a/docs/index.rst b/docs/index.rst index cab5270..bdd3741 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,7 +22,7 @@ This package supports: interface* (NLCPy arrays, builtin bytes/string/array objects) + point-to-point (blocking/nonbloking/persistent send & receive) - + collective (broadcast, block/vector scatter & gather, reductions) + + collective (broadcast, block scatter & gather, reductions, vector scatter & gather) * Process groups and communication domains @@ -35,14 +35,14 @@ This package supports: + blocking/nonbloking & collective/noncollective + individual/shared file pointers & explicit offset -This package has NOT supported the following functions yet: - * One-sided operations + remote memory access (put, get, accumulate) + passive target syncronization (start/complete & post/wait) + active target syncronization (lock & unlock) +This package has NOT supported the following functions yet: + * Dynamic process management + spawn & spawn multiple @@ -50,8 +50,8 @@ This package has NOT supported the following functions yet: + name publishing & lookup -List of Supprted Functions --------------------------- +List of Supported Functions +--------------------------- A list of supported functions is shown below. @@ -67,8 +67,6 @@ Alltoall All to All Scatter/Gather, send data from all to all proce Alltoallv All to All Scatter/Gather Vector, send data from all to all processes in a group providing different amount of data and displacements. Alltoallw Generalized All-to-All communication allowing different counts, displacements and datatypes for each partner. Bcast Broadcast a message from one process to all other processes in a group. -Bsend Blocking send in buffered mode. -Bsend_init Persistent request for a send in buffered mode. Gather Gather together values from a group of processes. Gatherv Gather Vector, gather data to one process from all other processes in a group providing different amount of data and displacements at the receiving sides. Iallgather Nonblocking Gather to All. @@ -78,7 +76,6 @@ Ialltoall Nonblocking All to All Scatter/Gather. Ialltoallv Nonblocking All to All Scatter/Gather Vector. Ialltoallw Nonblocking Generalized All-to-All. Ibcast Nonblocking Broadcast. -Ibsend Nonblocking send in buffered mode. Igather Nonblocking Gather. Igatherv Nonblocking Gather Vector. Irecv Nonblocking receive. @@ -109,9 +106,7 @@ allgather Gather to All. allreduce Reduce to All. alltoall All to All Scatter/Gather. bcast Broadcast. -bsend Send in buffered mode. gather Gather. -ibsend Nonblocking send in buffered mode. irecv Nonblocking receive. isend Nonblocking send. issend Nonblocking send in synchronous mode. @@ -155,14 +150,6 @@ neighbor_allgather Neighbor Gather to All. neighbor_alltoall Neighbor All to All Scatter/Gather. ===================== =============================================================================================================================================================== -* MPI (Miscellanea) - -===================== =============================================================================================================================================================== -Name Summary -===================== =============================================================================================================================================================== -Attach_buffer Attach a user-provided buffer for sending in buffered mode. -===================== =============================================================================================================================================================== - * MPI.Request Class (Request handle) ===================== =============================================================================================================================================================== @@ -188,23 +175,13 @@ irecv Nonblocking receive of matched message. recv Blocking receive of matched message. ===================== =============================================================================================================================================================== -* MPI.Op Class (Ancillay / Operation object) - -===================== =============================================================================================================================================================== -Name Summary -===================== =============================================================================================================================================================== -Reduce_local Apply a reduction operator to local data. -===================== =============================================================================================================================================================== - * MPI.Datatype Class (Ancillay / Datatype object) ===================== =============================================================================================================================================================== Name Summary ===================== =============================================================================================================================================================== Pack Pack into contiguous memory according to datatype. -Pack_external Pack into contiguous memory according to datatype, using a portable data representation (external32). Unpack Unpack from contiguous memory according to datatype. -Unpack_external Unpack from contiguous memory according to datatype, using a portable data representation (external32). ===================== =============================================================================================================================================================== * MPI.File Class (Parallel input/output) @@ -248,8 +225,52 @@ Write_ordered_end Complete a split collective write using shared file pointe Write_shared Write using shared file pointer. ===================== =============================================================================================================================================================== -List of Unsupprted Functions ----------------------------- +* MPI.Win Class (One-sided operations) + +===================== =============================================================================================================================================================== +Name Summary +===================== =============================================================================================================================================================== +Accumulate Accumulate data into the target process. +Compare_and_swap Perform one-sided atomic compare-and-swap. +Fetch_and_op Perform one-sided read-modify-write. +Get Get data from a memory window on a remote process. +Get_accumulate Fetch-and-accumulate data into the target process. +Put Put data into a memory window on a remote process. +Raccumulate Fetch-and-accumulate data into the target process. +Rget Get data from a memory window on a remote process. +Rget_accumulate Accumulate data into the target process using remote memory access. +Rput Put data into a memory window on a remote process. +===================== =============================================================================================================================================================== + +List of mpi4py-ve Original Functions +------------------------------------ + +* veo (VE Offloading operations) + ++-------------------------------+-----------------------------------------------------------------------------------------------------+ +| Name | Summary | ++===============================+=====================================================================================================+ +| alloc_hmem(proc_handle,size) | Allocate a VE memory buffer or a VH memory buffer which users can use them as heterogeneous memory. | +| | | +| | Parameters: | +| | proc_handle: pointer | +| | VEO process handle | +| | size: int | +| | size in bytes | +| | | +| | Returns: | +| | addr: int | +| | VEMVA address with the identifier | ++-------------------------------+-----------------------------------------------------------------------------------------------------+ +| free_hmem(addr) | Free a VE memory buffer. | +| | | +| | Parameters: | +| | addr: int | +| | VEMVA address | ++-------------------------------+-----------------------------------------------------------------------------------------------------+ + +List of Unsupported Functions +----------------------------- The current version of *mpi4py-ve* does not support the following functions. Please note that "NotImplementedError" occurs if your Python script calls them. @@ -258,6 +279,11 @@ The current version of *mpi4py-ve* does not support the following functions. Ple ===================== =============================================================================================================================================================== Name Summary ===================== =============================================================================================================================================================== +Bsend Blocking send in buffered mode. +Bsend_init Persistent request for a send in buffered mode. +Ibsend Nonblocking send in buffered mode. +bsend Send in buffered mode. +ibsend Nonblocking send in buffered mode. Accept Accept a request to form a new intercommunicator. Connect Make a request to form a new intercommunicator. Close_port Close a port. @@ -268,21 +294,39 @@ Publish_name Publish a service name. Unpublish_name Unpublish a service name. ===================== =============================================================================================================================================================== -* MPI.Win Class (One-sided operations) +* MPI (Miscellanea) ===================== =============================================================================================================================================================== Name Summary ===================== =============================================================================================================================================================== -Accumulate Accumulate data into the target process. -Compare_and_swap Perform one-sided atomic compare-and-swap. -Fetch_and_op Perform one-sided read-modify-write. -Get Get data from a memory window on a remote process. -Get_accumulate Fetch-and-accumulate data into the target process. -Put Put data into a memory window on a remote process. -Raccumulate Fetch-and-accumulate data into the target process. -Rget Get data from a memory window on a remote process. -Rget_accumulate Accumulate data into the target process using remote memory access. -Rput Put data into a memory window on a remote process. +Attach_buffer Attach a user-provided buffer for sending in buffered mode. +===================== =============================================================================================================================================================== + +* MPI.Op Class (Ancillay / Operation object) + +===================== =============================================================================================================================================================== +Name Summary +===================== =============================================================================================================================================================== +Reduce_local Apply a reduction operator to local data. +===================== =============================================================================================================================================================== + +* MPI.Datatype Class (Ancillay / Datatype object) + +===================== =============================================================================================================================================================== +Name Summary +===================== =============================================================================================================================================================== +Pack_external Pack into contiguous memory according to datatype, using a portable data representation (external32). +Unpack_external Unpack from contiguous memory according to datatype, using a portable data representation (external32). +===================== =============================================================================================================================================================== + +* mpi4pyve.futures package ( MPIPoolExecutor / MPICommExecutor) + +===================== =============================================================================================================================================================== +Name Summary +===================== =============================================================================================================================================================== +MPIPoolExecutor The MPIPoolExecutor class uses a pool of MPI processes to execute calls asynchronously. +MPICommExecutor Context manager for MPIPoolExecutor. + This context manager splits a MPI (intra) communicator comm in two disjoint sets: a single master process and the remaining worker processes. ===================== =============================================================================================================================================================== Exception Handling @@ -306,7 +350,7 @@ Assume this code is stored in a standard Python script file and run with mpirun :: - $ mpirun -vh -np 2 $(which python) ZeroDivisionError.py + $ mpirun -veo -np 2 $(which python) ZeroDivisionError.py Process 0 raises **ZeroDivisionError** exception before performing a send call to process 1. As the exception is not handled, the Python interpreter running in process 0 will proceed to exit with non-zero status. However, as *mpi4py-ve* installed a finalizer hook to call *MPI_Finalize()* before exit, process 0 will block waiting for other processes to also enter the *MPI_Finalize()* call. Meanwhile, process 1 will block waiting for a message to arrive from process 0, thus never reaching to *MPI_Finalize()*. The whole MPI execution environment is irremediably in a deadlock state. @@ -314,7 +358,7 @@ To alleviate this issue, *mpi4py-ve* offers a simple, alternative command line e :: - $ mpirun -vh -np 2 $(which python) -m mpi4pyve ZeroDivisionError.py + $ mpirun -veo -np 2 $(which python) -m mpi4pyve ZeroDivisionError.py This is a mimic of the option **-m mpi4py** described in the `mpi4py manual (mpi4py.run) `_. diff --git a/docs/vai_spec_example.rst b/docs/vai_spec_example.rst new file mode 100644 index 0000000..9f567b4 --- /dev/null +++ b/docs/vai_spec_example.rst @@ -0,0 +1,231 @@ +################################################### +Use mpi4py-ve with homebrew classes (without NLCPy) +################################################### + +*mpi4py-ve* allows objects with the *__ve_array_interface__* attribute to be specified as arguments to the communication API. + +****************************** +VE Array Interface (Version 1) +****************************** +The *VE Array Interface* (or VAI) is created for interoperability between different implementations +of VE array-like objects in various projects. The idea is borrowed from the `NumPy array interface `_ +and `CUDA Array Interface `_. + +------------------------------ +Python Interface Specification +------------------------------ + + Note + + Experimental feature. Specification may change. + +The ``__ve_array_interface__`` attribute returns a dictionary ( ``dict`` ) that must contain the +following entries: + +* **shape**: ``(integer, ...)`` + A tuple of ``int`` (or ``long`` ) representing the size of each dimension. + +* **typestr**: ``str`` + The type string. This has the same definition as ``typestr`` in the `numpy array interface `_. + +* **data**: ``(integer, boolean)`` + The data is a 2-tuple. The first element is the data pointer to VEO HMEM (Heterogenious + Memory) as a Python ``int`` (or ``long`` ). For zero-size arrays, use ``0`` here. The second element + is the read-only flag as a Python ``bool`` . + +* **version**: ``integer`` + An integer for the version of the interface being exported. The current version is *1*. + +The followings are optional entries: + +* **strides**: ``None`` or ``(integer, ...)`` + If **strides** is not given, or it is ``None`` , the array is in C-contiguous layout. Otherwise, a tuple + of ``int`` (or ``long`` ) is explicitly given for representing the number of bytes to skip to access + the next element at each dimension. + +* **descr**: + This is for describing more complicated types. This follows the same specification as in + the `numpy array interface `_. + +* **mask**: ``None`` or object exposing the ``__ve_array_interface__`` + If ``None`` then all values in **data** are valid. All elements of the mask array should be + interpreted only as true or not true indicating which elements of this array are valid. This + has the same definition as ``mask`` in the `numpy array interface `_. + + Note + + mpi4py-ve does not currently support working with masked VE arrays and will raise + a exception if one is passed to a function. + +* **veo_ctxt**: ``None`` or ``integer`` + The pointer of ``veo_thr_ctxt`` as a Python ``int`` (or ``long``). + +************************************************* +Example code for mpi4py-ve using homebrew classes +************************************************* + +----------- +source code +----------- +* mpi_send_recv.py: Main script to communicate between objects that have ``__ve_array_interface__`` attribute. + +.. code-block:: python + + from mpi4pyve import MPI + from mpi4pyve import util + import numpy as np + import veo_Py_wrapper + + comm = MPI.COMM_WORLD + size = comm.Get_size() + rank = comm.Get_rank() + + veo = veo_Py_wrapper.Veo(rank) # create VE process + x = np.array([123, 456, 789], dtype=int) # create buffer on VH + x_hmem = veo.alloc_hmem(x.dtype, x.size) # create buffer on VE + + if rank == 0: + x_hmem.set_value(x) # set value into VE buffer + comm.Send(x_hmem, dest=1) + comm.Recv(x_hmem, source=1) + elif rank == 1: + comm.Recv(x_hmem, source=0) + comm.Send(x_hmem, dest=0) + comm.Barrier() + + res = np.all(x == x_hmem.get_value()) # result check + print('Result {} (rank={})'.format('Success' if res else 'Failed', rank)) + + del x_hmem + +* veo_Py_wrapper.py: Sub script to call veo C APIs and to create an object that have ``__ve_array_interface__``. + +.. code-block:: python + + from mpi4pyve import veo + import ctypes + import numpy as np + import atexit + + _veo_proc_destroyed = False + + + class VEMem(object): + """ + Object that controls VE memory + """ + + def __init__(self, libveo, proc, ctxt, dtype, nelem): + """ + Allocate VE memory + """ + self.libveo = libveo + self.proc = proc + self.ctxt = ctxt + self.dtype = dtype + self.nelem = nelem + self.nbytes = dtype.itemsize * nelem + self.hmem = veo.alloc_hmem(self.proc, self.nbytes) + + def __del__(self): + """ + Free VE memory + """ + if not _veo_proc_destroyed: + veo.free_hmem(self.hmem) + + def set_value(self, val): + """ + Set value into VE memory + """ + val = np.asarray(val, dtype=self.dtype) + if val.size != self.nelem: + raise ValueError + src = ctypes.c_void_p(val.ctypes.data) + dst = ctypes.c_void_p(self.hmem) + ret = self.libveo.veo_hmemcpy(dst, src, self.nbytes) + if ret: + raise RuntimeError("ret = %d" % ret) + + def get_value(self): + """ + Retrieve value from VE memory + """ + vhbuf = np.empty(self.nelem, dtype=self.dtype) + dst = ctypes.c_void_p(vhbuf.ctypes.data) + src = ctypes.c_void_p(self.hmem) + ret = self.libveo.veo_hmemcpy( dst, src, self.nbytes) + if ret: + raise RuntimeError("ret = %d" % ret) + return vhbuf + + @property + def __ve_array_interface__(self): + """ + VE array interface for interoperating Python VE libraries. + """ + return { + 'shape': (self.nelem,), + 'typestr': self.dtype.str, + 'version': 1, + 'strides': None, + 'data': (self.hmem, False)} + + class Veo(object): + + def __init__(self, venode, libpath='/opt/nec/ve/veos/lib64/libveo.so.1'): + # Load shared object + self.libveo = ctypes.cdll.LoadLibrary(libpath) + + # + # Register argument types and return type for veo C APIs. + # + # veo_proc_create + self.libveo.veo_proc_create.argtypes = (ctypes.c_int32,) + self.libveo.veo_proc_create.restype = ctypes.c_uint64 + # veo_context_open + self.libveo.veo_context_open.argtypes = (ctypes.c_void_p,) + self.libveo.veo_context_open.restype = ctypes.c_uint64 + # veo_hmemcpy + self.libveo.veo_hmemcpy.argtypes = (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t) + self.libveo.veo_hmemcpy.restype = ctypes.c_int32 + # veo_proc_destroy + self.libveo.veo_proc_destroy.argtypes = (ctypes.c_void_p,) + self.libveo.veo_proc_destroy.restype = ctypes.c_int32 + # veo_context_close + self.libveo.veo_context_close.argtypes = (ctypes.c_void_p,) + self.libveo.veo_context_close.restype = ctypes.c_int32 + + # + # Call veo C APIs for initialization. + # + self.proc = self.libveo.veo_proc_create(ctypes.c_int32(venode)) + self.ctxt = self.libveo.veo_context_open(ctypes.c_void_p(self.proc)) + + def finalize(libveo, ctxt, proc): + # Close veo context and destroy veo process. + libveo.veo_context_close(ctypes.c_void_p(ctxt)) + libveo.veo_proc_destroy(ctypes.c_void_p(proc)) + global _veo_proc_destroyed + _veo_proc_destroyed = True + + # Register function that calls at exit time. + atexit.register(finalize, self.libveo, self.ctxt, self.proc) + + def alloc_hmem(self, dtype, nelem): + return VEMem(self.libveo, self.proc, self.ctxt, dtype, nelem) + +| The above example uses ctypes to call veo C APIs from a Python script, although there are other ways to call them. +| e.g.) ctypes, cython, pybind, Python C API, etc. + +--------- +Execution +--------- + +:: + + $ mpirun -veo -np 2 python mpi_send_recv.py + Result Success (rank=0) + Result Success (rank=1) + + diff --git a/mpi.cfg b/mpi.cfg index 9885167..455c882 100644 --- a/mpi.cfg +++ b/mpi.cfg @@ -188,5 +188,6 @@ include_dirs = misc/mpiuni:$PETSC_DIR/include:$PETSC_DIR/$PETSC_ARCH/include mpi_dir = $NMPI_ROOT mpicc = %(mpi_dir)s/bin64/mpincc mpicxx = %(mpi_dir)s/bin64/mpinc++ -extra_link_args = -vh -L/opt/nec/ve/veos/lib64 -Wl,-rpath=/opt/nec/ve/veos/lib64 -lveo -shared-mpi -extra_compile_args = -vh +extra_link_args = -vh -L/opt/nec/ve/veos/lib64 -Wl,-rpath=/opt/nec/ve/veos/lib64 -lveo -shared-mpi -mpiprof +extra_compile_args = -vh -mpiprof +define_macros = MPI4PYVE_NEC_MPI=1 diff --git a/nlcpy_test/109_gather.py b/nlcpy_test/109_gather.py index 7cc8df4..cca96db 100644 --- a/nlcpy_test/109_gather.py +++ b/nlcpy_test/109_gather.py @@ -19,9 +19,8 @@ print("x = ",x) if rank == root: - print("type(x[0]) = ",type(x[0])) - print("type(x[1]) = ",type(x[1])) - print("type(x[2]) = ",type(x[2])) + for i in range(rank): + print("type(x[{}]) = ".format(i),type(x[i])) import sys try: for y in x: diff --git a/nlcpy_test/111_allgather.py b/nlcpy_test/111_allgather.py index cbba581..742aefd 100644 --- a/nlcpy_test/111_allgather.py +++ b/nlcpy_test/111_allgather.py @@ -17,9 +17,8 @@ print("allgather done") print("x = ",x) -print("type(x[0]) = ",type(x[0])) -print("type(x[1]) = ",type(x[1])) -print("type(x[2]) = ",type(x[2])) +for i in range(rank): + print("type(x[{}]) = ".format(i),type(x[i])) import sys try: for y in x: diff --git a/nlcpy_test/112_alltoall.py b/nlcpy_test/112_alltoall.py index 45dc14b..a2c8b18 100644 --- a/nlcpy_test/112_alltoall.py +++ b/nlcpy_test/112_alltoall.py @@ -16,9 +16,8 @@ print("allgather done") print("x = ",x) -print("type(x[0]) = ",type(x[0])) -print("type(x[1]) = ",type(x[1])) -print("type(x[2]) = ",type(x[2])) +for i in range(rank): + print("type(x[{}]) = ".format(i),type(x[i])) import sys try: for y in x: diff --git a/nlcpy_test/121_AttachDetach.py b/nlcpy_test/121_AttachDetach.py new file mode 100644 index 0000000..eb18b0b --- /dev/null +++ b/nlcpy_test/121_AttachDetach.py @@ -0,0 +1,17 @@ +from mpi4pyve import MPI +import numpy as np +import nlcpy as vp +import sys + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + +if rank == 0: + print("rank = ",rank) + + x = vp.arange(200000, dtype=int) + print(x.__ve_array_interface__) + MPI.Attach_buffer(x) + MPI.Detach_buffer() + diff --git a/nlcpy_test/200_Send_Recv_Offset.py b/nlcpy_test/200_Send_Recv_Offset.py new file mode 100644 index 0000000..18b319d --- /dev/null +++ b/nlcpy_test/200_Send_Recv_Offset.py @@ -0,0 +1,32 @@ +from mpi4pyve import MPI +import numpy as np +import nlcpy as vp + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + +if rank == 0: + print("rank = ",rank) + + #x = vp.arange(10)[::2] + x = vp.arange(10)[2:] + print("x = ",x) + print("type(x) = ",type(x)) + comm.Send([x, MPI.INT], dest=1) + +elif rank == 1: + print("rank = ",rank) + + y = vp.empty(10-2, dtype=int) + comm.Recv([y, MPI.INT]) + print("y = ",y) + print("type(y) = ",type(y)) + + import sys + try: + y + if not isinstance(y, vp.core.core.ndarray): + print("NG : ", __file__, file=sys.stderr) + except NameError: + print("Failure test case : ", __file__, file=sys.stderr) diff --git a/nlcpy_test/214_Bcast.py b/nlcpy_test/214_Bcast.py index 06d4168..1f4a2d3 100644 --- a/nlcpy_test/214_Bcast.py +++ b/nlcpy_test/214_Bcast.py @@ -9,7 +9,7 @@ if rank == 0: x = vp.array([1,2,3], dtype=int) else: - x = vp.empty(3, dtype=int) + x = vp.zeros(3, dtype=int) print("rank = ",rank) diff --git a/nlcpy_test/217_Scatter.py b/nlcpy_test/217_Scatter.py index a42264f..851e5b8 100644 --- a/nlcpy_test/217_Scatter.py +++ b/nlcpy_test/217_Scatter.py @@ -15,7 +15,7 @@ else: x = None -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) diff --git a/nlcpy_test/218_Scatterv.py b/nlcpy_test/218_Scatterv.py index ce43d45..96edfb9 100644 --- a/nlcpy_test/218_Scatterv.py +++ b/nlcpy_test/218_Scatterv.py @@ -21,7 +21,7 @@ else: x = None -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) diff --git a/nlcpy_test/219_Allgather.py b/nlcpy_test/219_Allgather.py index e9e617b..001185e 100644 --- a/nlcpy_test/219_Allgather.py +++ b/nlcpy_test/219_Allgather.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((size, 2), dtype=int) +y = vp.zeros((size, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/220_Allgatherv.py b/nlcpy_test/220_Allgatherv.py index 38c6b21..481ea71 100644 --- a/nlcpy_test/220_Allgatherv.py +++ b/nlcpy_test/220_Allgatherv.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((size, 2), dtype=int) +y = vp.zeros((size, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/221_Alltoall.py b/nlcpy_test/221_Alltoall.py index 8c2911a..2b582f1 100644 --- a/nlcpy_test/221_Alltoall.py +++ b/nlcpy_test/221_Alltoall.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.arange(size**2, dtype=int).reshape(size, size) * (rank + 1) -y = vp.empty((size, size), dtype=int) +y = vp.zeros((size, size), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/222_Alltoallv.py b/nlcpy_test/222_Alltoallv.py index ab96793..92a9245 100644 --- a/nlcpy_test/222_Alltoallv.py +++ b/nlcpy_test/222_Alltoallv.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.arange(size**2, dtype=int).reshape(size, size) * (rank + 1) -y = vp.empty((size, size), dtype=int) +y = vp.zeros((size, size), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/224_Reduce.py b/nlcpy_test/224_Reduce.py index 41cd186..75d572f 100644 --- a/nlcpy_test/224_Reduce.py +++ b/nlcpy_test/224_Reduce.py @@ -10,7 +10,7 @@ print("rank = ",rank) x = vp.array([(rank+1)**2 , rank], dtype=int) -y = vp.empty(2, dtype=int) +y = vp.zeros(2, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/225_Allreduce.py b/nlcpy_test/225_Allreduce.py index c6d8f75..03a2768 100644 --- a/nlcpy_test/225_Allreduce.py +++ b/nlcpy_test/225_Allreduce.py @@ -10,7 +10,7 @@ print("rank = ",rank) x = vp.array([(rank+1)**2 , rank], dtype=int) -y = vp.empty(2, dtype=int) +y = vp.zeros(2, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/226_Reduce_scatter_block.py b/nlcpy_test/226_Reduce_scatter_block.py index 2e2b030..b5e29da 100644 --- a/nlcpy_test/226_Reduce_scatter_block.py +++ b/nlcpy_test/226_Reduce_scatter_block.py @@ -10,7 +10,7 @@ print("rank = ",rank) x = vp.arange(size, dtype=int) * (rank + 1) -y = vp.empty(1, dtype=int) +y = vp.zeros(1, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/227_Reduce_scatter.py b/nlcpy_test/227_Reduce_scatter.py index 2f8734e..def86b5 100644 --- a/nlcpy_test/227_Reduce_scatter.py +++ b/nlcpy_test/227_Reduce_scatter.py @@ -10,7 +10,7 @@ print("rank = ",rank) x = vp.arange(size, dtype=int) * (rank + 1) -y = vp.empty(1, dtype=int) +y = vp.zeros(1, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/233_Iallgather.py b/nlcpy_test/233_Iallgather.py index 7a5eea1..fe27fc6 100644 --- a/nlcpy_test/233_Iallgather.py +++ b/nlcpy_test/233_Iallgather.py @@ -10,7 +10,7 @@ #x = vp.array([(rank+1)**2 ,rank], dtype=int) x = vp.array([1,2,3], dtype=int) -y = vp.empty((3,3), dtype=int) +y = vp.empty((size,3), dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/234_Iallgatherv.py b/nlcpy_test/234_Iallgatherv.py index 15739d2..e584f19 100644 --- a/nlcpy_test/234_Iallgatherv.py +++ b/nlcpy_test/234_Iallgatherv.py @@ -10,7 +10,7 @@ #x = vp.array([(rank+1)**2 ,rank], dtype=int) x = vp.array([1,2,3], dtype=int) -y = vp.empty((3,3), dtype=int) +y = vp.empty((size,3), dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/240_Ireduce_scatter_block.py b/nlcpy_test/240_Ireduce_scatter_block.py index 41f2847..673e0d1 100644 --- a/nlcpy_test/240_Ireduce_scatter_block.py +++ b/nlcpy_test/240_Ireduce_scatter_block.py @@ -11,7 +11,7 @@ #x = vp.arange(size, dtype=int) * (rank + 1) x = vp.array([[rank+1 for i in range(size)] for j in range(size) ], dtype=int) -y = vp.empty(3, dtype=int) +y = vp.empty(size, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/241_Ireduce_scatter.py b/nlcpy_test/241_Ireduce_scatter.py index d883480..5d61e98 100644 --- a/nlcpy_test/241_Ireduce_scatter.py +++ b/nlcpy_test/241_Ireduce_scatter.py @@ -11,7 +11,7 @@ #x = vp.arange(size, dtype=int) * (rank + 1) x = vp.array([[rank+1 for i in range(size)] for j in range(size) ], dtype=int) -y = vp.empty(3, dtype=int) +y = vp.empty(size, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/300_Scan.py b/nlcpy_test/300_Scan.py index 6325aa3..9e4c4bd 100644 --- a/nlcpy_test/300_Scan.py +++ b/nlcpy_test/300_Scan.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([1,2,3], dtype=int) * (rank+1) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/301_Exscan.py b/nlcpy_test/301_Exscan.py index 65e2605..6184e71 100644 --- a/nlcpy_test/301_Exscan.py +++ b/nlcpy_test/301_Exscan.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([1,2,3], dtype=int) * (rank+1) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/302_Iscan.py b/nlcpy_test/302_Iscan.py index 005b22f..cc42da6 100644 --- a/nlcpy_test/302_Iscan.py +++ b/nlcpy_test/302_Iscan.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([1,2,3], dtype=int) * (rank+1) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/303_Iexscan.py b/nlcpy_test/303_Iexscan.py index 0b619e9..44691df 100644 --- a/nlcpy_test/303_Iexscan.py +++ b/nlcpy_test/303_Iexscan.py @@ -9,7 +9,7 @@ print("rank = ",rank) x = vp.array([1,2,3], dtype=int) * (rank+1) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/306_Neighbor_allgather.py b/nlcpy_test/306_Neighbor_allgather.py index a0b1a2c..71be247 100644 --- a/nlcpy_test/306_Neighbor_allgather.py +++ b/nlcpy_test/306_Neighbor_allgather.py @@ -2,14 +2,15 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) + +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2, 2), dtype=int) +y = vp.zeros((2, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/307_Neighbor_allgatherv.py b/nlcpy_test/307_Neighbor_allgatherv.py index d648a78..f29e9d7 100644 --- a/nlcpy_test/307_Neighbor_allgatherv.py +++ b/nlcpy_test/307_Neighbor_allgatherv.py @@ -2,14 +2,14 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2, 2), dtype=int) +y = vp.zeros((2, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/308_Neighbor_alltoall.py b/nlcpy_test/308_Neighbor_alltoall.py index c734a6a..fa80a1e 100644 --- a/nlcpy_test/308_Neighbor_alltoall.py +++ b/nlcpy_test/308_Neighbor_alltoall.py @@ -2,14 +2,14 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2,), dtype=int) +y = vp.zeros((2,), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/309_Neighbor_alltoallv.py b/nlcpy_test/309_Neighbor_alltoallv.py index b62776e..0727184 100644 --- a/nlcpy_test/309_Neighbor_alltoallv.py +++ b/nlcpy_test/309_Neighbor_alltoallv.py @@ -2,14 +2,14 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2,), dtype=int) +y = vp.zeros((2,), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/310_Neighbor_alltoallw.py b/nlcpy_test/310_Neighbor_alltoallw.py index e67df45..ec623ca 100644 --- a/nlcpy_test/310_Neighbor_alltoallw.py +++ b/nlcpy_test/310_Neighbor_alltoallw.py @@ -3,7 +3,7 @@ import nlcpy as vp dim = 3 -comm = MPI.COMM_WORLD.Create_cart((dim,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/311_Ineighbor_allgather.py b/nlcpy_test/311_Ineighbor_allgather.py index 5343fb0..238950f 100644 --- a/nlcpy_test/311_Ineighbor_allgather.py +++ b/nlcpy_test/311_Ineighbor_allgather.py @@ -2,14 +2,14 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2, 2), dtype=int) +y = vp.zeros((2, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/312_Ineighbor_allgatherv.py b/nlcpy_test/312_Ineighbor_allgatherv.py index f1eca84..f31e94a 100644 --- a/nlcpy_test/312_Ineighbor_allgatherv.py +++ b/nlcpy_test/312_Ineighbor_allgatherv.py @@ -2,14 +2,14 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() print("rank = ",rank) x = vp.array([(rank+1)**2 ,rank], dtype=int) -y = vp.empty((2, 2), dtype=int) +y = vp.zeros((2, 2), dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) diff --git a/nlcpy_test/313_Ineighbor_alltoall.py b/nlcpy_test/313_Ineighbor_alltoall.py index ed32684..995af40 100644 --- a/nlcpy_test/313_Ineighbor_alltoall.py +++ b/nlcpy_test/313_Ineighbor_alltoall.py @@ -3,7 +3,7 @@ import nlcpy as vp dim = 3 -comm = MPI.COMM_WORLD.Create_cart((dim,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/314_Ineighbor_alltoallv.py b/nlcpy_test/314_Ineighbor_alltoallv.py index ff78fb8..63fe412 100644 --- a/nlcpy_test/314_Ineighbor_alltoallv.py +++ b/nlcpy_test/314_Ineighbor_alltoallv.py @@ -3,7 +3,7 @@ import nlcpy as vp dim = 3 -comm = MPI.COMM_WORLD.Create_cart((dim,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/315_Ineighbor_alltoallw.py b/nlcpy_test/315_Ineighbor_alltoallw.py index 2811e42..071bb98 100644 --- a/nlcpy_test/315_Ineighbor_alltoallw.py +++ b/nlcpy_test/315_Ineighbor_alltoallw.py @@ -3,7 +3,7 @@ import nlcpy as vp dim = 3 -comm = MPI.COMM_WORLD.Create_cart((dim,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/316_neighbor_allgather.py b/nlcpy_test/316_neighbor_allgather.py index ebaccf6..1354e78 100644 --- a/nlcpy_test/316_neighbor_allgather.py +++ b/nlcpy_test/316_neighbor_allgather.py @@ -2,7 +2,7 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/317_neighbor_alltoall.py b/nlcpy_test/317_neighbor_alltoall.py index 9f110e8..f45212f 100644 --- a/nlcpy_test/317_neighbor_alltoall.py +++ b/nlcpy_test/317_neighbor_alltoall.py @@ -2,7 +2,7 @@ import numpy as np import nlcpy as vp -comm = MPI.COMM_WORLD.Create_cart((3,)) +comm = MPI.COMM_WORLD.Create_cart((MPI.COMM_WORLD.Get_size(),)) size = comm.Get_size() rank = comm.Get_rank() diff --git a/nlcpy_test/500_Reduce_local.py b/nlcpy_test/500_Reduce_local.py index aaf0b78..1a8f019 100644 --- a/nlcpy_test/500_Reduce_local.py +++ b/nlcpy_test/500_Reduce_local.py @@ -12,7 +12,7 @@ op = MPI.SUM x = vp.array([(rank+1)**2 , rank], dtype=int) -y = vp.empty(2, dtype=int) +y = vp.zeros(2, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/600_Pack_Unpack.py b/nlcpy_test/600_Pack_Unpack.py index b86a1b9..6c4a426 100644 --- a/nlcpy_test/600_Pack_Unpack.py +++ b/nlcpy_test/600_Pack_Unpack.py @@ -12,11 +12,11 @@ datatype = MPI.INT x = vp.array([(rank+1)**2 , rank], dtype=int) -y = vp.empty(2, dtype=int) +y = vp.zeros(2, dtype=int) size1 = datatype.Pack_size(len(x), comm) size2 = datatype.Pack_size(len(y), comm) -tmpbuf = vp.empty(size1 + size2 + 1, dtype=int) +tmpbuf = vp.zeros(size1 + size2 + 1, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/601_Pack_Unpack_external.py b/nlcpy_test/601_Pack_Unpack_external.py index 52ca71a..578adf2 100644 --- a/nlcpy_test/601_Pack_Unpack_external.py +++ b/nlcpy_test/601_Pack_Unpack_external.py @@ -13,11 +13,11 @@ EXT32 = 'external32' x = vp.array([(rank+1)**2 , rank], dtype=int) -y = vp.empty(2, dtype=int) +y = vp.zeros(2, dtype=int) size1 = datatype.Pack_external_size(EXT32, x.size) size2 = datatype.Pack_external_size(EXT32, len(y)) -tmpbuf = vp.empty(size1 + size2 + 1, dtype=int) +tmpbuf = vp.zeros(size1 + size2 + 1, dtype=int) print("x = ",x) print("type(x) = ",type(x)) diff --git a/nlcpy_test/713_IReadWrite_AtAll.py b/nlcpy_test/713_IReadWrite_AtAll.py index 9f2e7de..e0f1673 100644 --- a/nlcpy_test/713_IReadWrite_AtAll.py +++ b/nlcpy_test/713_IReadWrite_AtAll.py @@ -9,21 +9,21 @@ fh = get_fh() fh.Set_size(0) -fh.Set_view(0, MPI.INT) +fh.Set_view(rank*12, MPI.INT) x = vp.array([1,2,3], dtype=int) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) print("type(y) = ",type(y)) -fh.Iwrite_at_all(3, x).Wait() +fh.Iwrite_at_all(rank*12, x).Wait() fh.Sync() comm.Barrier() fh.Sync() -fh.Iread_at_all(3, y).Wait() +fh.Iread_at_all(rank*12, y).Wait() comm.Barrier() print("Iwrite_at_all-Iread_at_all done") diff --git a/nlcpy_test/714_IReadWrite_All.py b/nlcpy_test/714_IReadWrite_All.py index 647369a..79e2354 100644 --- a/nlcpy_test/714_IReadWrite_All.py +++ b/nlcpy_test/714_IReadWrite_All.py @@ -9,23 +9,25 @@ fh = get_fh() fh.Set_size(0) -fh.Set_view(0, MPI.INT) +fh.Set_view(rank*12, MPI.INT) x = vp.array([1,2,3], dtype=int) -y = vp.empty(3, dtype=int) +y = vp.zeros(3, dtype=int) print("x = ",x) print("type(x) = ",type(x)) print("y = ",y) print("type(y) = ",type(y)) -fh.Seek(3, MPI.SEEK_SET) -fh.Iwrite_all(x).Wait() +fh.Seek(rank*12, MPI.SEEK_SET) +req = fh.Iwrite_all(x) +req.Wait() fh.Sync() comm.Barrier() fh.Sync() -fh.Seek(3, MPI.SEEK_SET) -fh.Iread_all(y).Wait() +fh.Seek(rank*12, MPI.SEEK_SET) +req = fh.Iread_all(y) +req.Wait() comm.Barrier() print("Iwrite_all-Iread_all done") diff --git a/nlcpy_test/pingpong_elapse.py b/nlcpy_test/pingpong_elapse.py new file mode 100644 index 0000000..b5a875c --- /dev/null +++ b/nlcpy_test/pingpong_elapse.py @@ -0,0 +1,46 @@ +from mpi4pyve import MPI +import nlcpy as vp +import time + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + +tag1 = 10 +tag2 = 20 +loop_count = 50 + +comm.barrier() +status = MPI.Status() + +for N in range(0,28): + A = 1 << N + V = vp.zeros(A, dtype=float) + + # Warm-up + for i in range(5): + if rank == 0: + comm.Send([V, MPI.DOUBLE], 1, tag1) + comm.Recv([V, MPI.DOUBLE], 1, tag2, status=status) + elif rank == 1: + comm.Recv([V, MPI.DOUBLE], 0, tag1, status=status) + comm.Send([V, MPI.DOUBLE], 0, tag2) + + comm.barrier() + + t0 = MPI.Wtime() + for i in range(loop_count): + if rank == 0: + comm.Send([V, MPI.DOUBLE], 1, tag1) + comm.Recv([V, MPI.DOUBLE], 1, tag2, status=status) + elif rank == 1: + comm.Recv([V, MPI.DOUBLE], 0, tag1, status=status) + comm.Send([V, MPI.DOUBLE], 0, tag2) + t1 = MPI.Wtime() + elapsed_time = t1 - t0 + num_B = 8*A + B_in_GB = 1 << 30 + num_GB = num_B / B_in_GB + avg_time_per_transfer = elapsed_time / (2.0 * loop_count) + if rank == 0: + print('Transfer size (B): {:>10d}, Transfer Time (s): {:15.9f}, Bandwidth (GB/s): {:15.9f}'.format(num_B, avg_time_per_transfer, num_GB/avg_time_per_transfer)) diff --git a/rpm/Makefile b/rpm/Makefile new file mode 100644 index 0000000..f69147c --- /dev/null +++ b/rpm/Makefile @@ -0,0 +1,57 @@ +.PHONY:mpi4py-ve + +all:mpi4py-ve + +BASEDIR = .. + +# Parameters passed into the spec file +PLAT = el8 +ARCH = x86_64 +VERSION_PYTHON = python3.6 +VERSION_MPI4PYVE = 1.0.0 +VERSION_RPM = 1 +RELEASE = 1 + +DISTDIR = $(BASEDIR)/dist +RPMDIR = $(DISTDIR)/rpm/RPMBUILD +SPECS = $(RPMDIR)/SPECS +RPMS = $(RPMDIR)/RPMS +SOURCES = $(RPMDIR)/SOURCES + +# convert 'python3.x -> python3x' +$(eval REQUIRES_PYTHON = $(shell echo $(VERSION_PYTHON) | sed -e s"/\.//")) + +PRODUCT = nec-$(REQUIRES_PYTHON)-mpi4py-ve-$(VERSION_MPI4PYVE) +SRC_MPI4PYVE = nec-$(REQUIRES_PYTHON)-mpi4py-ve-$(VERSION_MPI4PYVE)-$(VERSION_RPM)-$(RELEASE) +RPM_MPI4PYVE = $(SRC_MPI4PYVE).$(PLAT).$(ARCH).rpm + +# Options of rpmbuild command +$(eval SPECDEF =--define "_topdir `cd ..;pwd`") +$(eval SPECDEF +=--define "python_version $(VERSION_PYTHON)") +$(eval SPECDEF +=--define "mpi4py_ve_version $(VERSION_MPI4PYVE)") +$(eval SPECDEF +=--define "rpm_version $(VERSION_RPM)") +$(eval SPECDEF +=--define "mpi4py_ve_platform $(PLAT)") +$(eval SPECDEF +=--define "mpi4py_ve_arch $(ARCH)") +$(eval SPECDEF +=--define "rpm_release $(RELEASE)") +$(eval SPECDEF +=--define "product $(PRODUCT)") + +mpi4py-ve: $(RPMS)/$(ARCH)/$(RPM_MPI4PYVE) + +# Creates Specfiles +$(SPECS)/$(RPM_MPI4PYVE:%.rpm=%.spec): + mkdir -p $(SPECS) $(RPMS) $(SOURCES) + cp -p SPECS/nec-python3-mpi4py-ve $@ + +# Creates RPM files +$(RPMS)/$(ARCH)/$(RPM_MPI4PYVE): $(SPECS)/$(RPM_MPI4PYVE:%.rpm=%.spec) + $(eval SRC = $(SRC_MPI4PYVE)) + $(eval SRCDIR = $(SOURCES)/$(SRC)) + rm -rf $(SRCDIR) + mkdir -p $(SRCDIR) + cp -p $(DISTDIR)/mpi4py_ve*.whl $(SRCDIR)/ + cd $(SRCDIR) && unzip mpi4py_ve*.whl && rm -f mpi4py_ve*.whl + cd $(SOURCES) && tar zcvf $(SRC).tar.gz $(SRC) + cd $(SPECS) && rpmbuild --bb $(SPECDEF) $(^F) + +clean: + rm -rf $(RPMDIR) diff --git a/rpm/SPECS/nec-python3-mpi4py-ve b/rpm/SPECS/nec-python3-mpi4py-ve new file mode 100644 index 0000000..2b0557d --- /dev/null +++ b/rpm/SPECS/nec-python3-mpi4py-ve @@ -0,0 +1,113 @@ +%define __os_install_post %{nil} +%define debug_package %{nil} + +%define mpi4py_ve_topdir /opt/nec/ve/mpi4py-ve +%define mpi4py_ve_libdir %{mpi4py_ve_topdir}/%{mpi4py_ve_version}/lib/%{python_version} + +Name: %{product} +Version: %{rpm_version} +Release: %{rpm_release}.%{mpi4py_ve_platform} +Summary: Message Passing Interface Python library for SX-Aurora TSUBASA +Group: Development/Libraries +Vendor: NEC Corporation +License: BSD-2-Clause +URL: https://github.com/SX-Aurora/mpi4py-ve +BuildArch: %{mpi4py_ve_arch} +# +Source0: %{product}-%{rpm_version}-%{rpm_release}.tar.gz +# +BuildRoot: %(mktemp -ud %{_tmppath}/%{product}-XXXXXX) + +AutoReqProv: no + +%description +mpi4py-ve is an extension to mpi4py for SX-Aurora TSUBASA systems. + + +################################################## +## +%prep +## +################################################## +# install Libraries +%__rm -rf $RPM_BUILD_ROOT + +%setup -q -n %{product}-%{rpm_version}-%{rpm_release} + +################################################## +## +%build +## +################################################## + +################################################## +## +%install +## +################################################## +# create install directories +install -d --mode=755 %{buildroot}%{mpi4py_ve_libdir} +cp -r mpi4pyve %{buildroot}%{mpi4py_ve_libdir}/ +find %{buildroot}%{mpi4py_ve_libdir} -name "*.so" -type f | xargs chmod 755 +find %{buildroot}%{mpi4py_ve_libdir} ! -name "*.so" -type f | xargs chmod 644 + +############################################## +## +%clean +## +############################################## +%__rm -rf %{buildroot} + +############################################## +## +%files +## +############################################## +%defattr(-,root,root,-) + +%{mpi4py_ve_libdir} + +############################################## +## +%post +## +############################################## +if [ ! -d /usr/lib64/%{python_version}/site-packages ]; then + install -m 755 -d /usr/lib64/%{python_version}/site-packages +fi +cd /usr/lib64/%{python_version}/site-packages/ +if [ ! -e mpi4py-ve.pth ]; then + touch mpi4py-ve.pth + chmod 644 mpi4py-ve.pth +fi +mpi4py_ve_path=`ls -d /opt/nec/ve/mpi4py-ve/*.*.*/lib/%{python_version} | sort -rV | head -1` +echo ${mpi4py_ve_path} > mpi4py-ve.pth + +############################################## +## +%postun +## +############################################## +if [ $1 == 0 ]; then + rm -rf %{mpi4py_ve_libdir}/mpi4py-ve + rmdir --ignore-fail-on-non-empty %{mpi4py_ve_libdir} > /dev/null 2>&1 + rmdir --ignore-fail-on-non-empty %{mpi4py_ve_topdir}/%{mpi4py_ve_version}/lib > /dev/null 2>&1 + rmdir --ignore-fail-on-non-empty %{mpi4py_ve_topdir}/%{mpi4py_ve_version} > /dev/null 2>&1 + rmdir --ignore-fail-on-non-empty %{mpi4py_ve_topdir} > /dev/null 2>&1 + if ls -d /opt/nec/ve/mpi4py-ve/*.*.*/lib/%{python_version}/mpi4py_ve > /dev/null 2>&1; then + mpi4py_ve_path=`ls -d /opt/nec/ve/mpi4py-ve/*.*.*/lib/%{python_version} | sort -rV | head -1` + echo ${mpi4py_ve_path} > /usr/lib64/%{python_version}/site-packages/mpi4py-ve.pth + else + rm -f /usr/lib64/%{python_version}/site-packages/mpi4py-ve.pth + rmdir --ignore-fail-on-non-empty /usr/lib64/%{python_version}/site-packages > /dev/null 2>&1 + rmdir --ignore-fail-on-non-empty /usr/lib64/%{python_version} > /dev/null 2>&1 + fi +fi + +############################################## +## +%changelog +## +############################################## +* Tue Oct 25 2022 NEC Corporation +- First version of this spec file. diff --git a/setup.py b/setup.py index c789571..7604b78 100644 --- a/setup.py +++ b/setup.py @@ -94,7 +94,6 @@ def description(): } metadata['provides'] = ['mpi4py_ve'] -metadata['install_requires'] = ['nlcpy>=2.1.1'] # -------------------------------------------------------------------- # Extension modules @@ -453,8 +452,10 @@ def run_setup(): if not has_src or has_git or has_hg: setup_args['setup_requires'] = ['Cython>='+CYTHON] # - setup(packages = ['mpi4pyve', 'mpi4pyve.futures'], - package_dir = {'mpi4pyve' : 'src/mpi4pyve'}, + setup(packages = ['mpi4pyve', 'mpi4pyve.futures', 'mpi4pyve.util', 'mpi4pyve.veo'], + package_dir = {'mpi4pyve' : 'src/mpi4pyve', + 'mpi4pyve.util' : 'src/mpi4pyve/util', + 'mpi4pyve.veo' : 'src/mpi4pyve/veo'}, package_data = {'mpi4pyve' : ['*.pxd', 'include/mpi4pyve/*.h', 'include/mpi4pyve/*.i', diff --git a/src/mpi4pyve/MPI.pxd b/src/mpi4pyve/MPI.pxd index 6098ce0..544789c 100644 --- a/src/mpi4pyve/MPI.pxd +++ b/src/mpi4pyve/MPI.pxd @@ -95,8 +95,6 @@ ctypedef public api class Request [ cdef MPI_Request ob_mpi cdef unsigned flags cdef object ob_buf - cdef object numpy_arr - cdef object nlcpy_arr ctypedef public api class Prequest(Request) [ type PyMPIPrequest_Type, diff --git a/src/mpi4pyve/MPI/Comm.pyx b/src/mpi4pyve/MPI/Comm.pyx index 1db9525..c39b5f0 100644 --- a/src/mpi4pyve/MPI/Comm.pyx +++ b/src/mpi4pyve/MPI/Comm.pyx @@ -53,11 +53,8 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -import numpy -import nlcpy import mpi4pyve -include "NLCPy.pyx" -include "Notimpl.pyx" + # Communicator Comparisons # ------------------------ @@ -320,7 +317,6 @@ cdef class Comm: # Blocking Send and Receive Operations # ------------------------------------ - @send_for_nlcpy_array def Send(self, buf, int dest, int tag=0): """ Blocking send @@ -334,7 +330,6 @@ cdef class Comm: smsg.buf, smsg.count, smsg.dtype, dest, tag, self.ob_mpi) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Recv(self, buf, int source=ANY_SOURCE, int tag=ANY_TAG, Status status=None): """ @@ -350,7 +345,6 @@ cdef class Comm: # Send-Receive # ------------ - @sendrecv_buffer_kwarg_for_nlcpy_array def Sendrecv(self, sendbuf, int dest, int sendtag=0, recvbuf=None, int source=ANY_SOURCE, int recvtag=ANY_TAG, Status status=None): @@ -373,7 +367,6 @@ cdef class Comm: rmsg.buf, rmsg.count, rmsg.dtype, source, recvtag, self.ob_mpi, statusp) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Sendrecv_replace(self, buf, int dest, int sendtag=0, int source=ANY_SOURCE, int recvtag=ANY_TAG, Status status=None): @@ -401,7 +394,6 @@ cdef class Comm: # Nonblocking Communications # -------------------------- - @send_for_nlcpy_array def Isend(self, buf, int dest, int tag=0): """ Nonblocking send @@ -414,20 +406,12 @@ cdef class Comm: request.ob_buf = smsg return request - @nb_recv_for_nlcpy_array(arg_idx=1) - def Irecv(self, buf, int source=ANY_SOURCE, int tag=ANY_TAG, - numpy_arr=None, nlcpy_arr=None): + def Irecv(self, buf, int source=ANY_SOURCE, int tag=ANY_TAG): """ Nonblocking receive """ cdef _p_msg_p2p rmsg = message_p2p_recv(buf, source) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Irecv( rmsg.buf, rmsg.count, rmsg.dtype, source, tag, self.ob_mpi, &request.ob_mpi) ) @@ -494,7 +478,6 @@ cdef class Comm: # Persistent Communication # ------------------------ - @send_for_nlcpy_array def Send_init(self, buf, int dest, int tag=0): """ Create a persistent request for a standard send @@ -507,20 +490,12 @@ cdef class Comm: request.ob_buf = smsg return request - @nb_recv_for_nlcpy_array(arg_idx=1) - def Recv_init(self, buf, int source=ANY_SOURCE, int tag=ANY_TAG, - numpy_arr=None, nlcpy_arr=None): + def Recv_init(self, buf, int source=ANY_SOURCE, int tag=ANY_TAG): """ Create a persistent request for a receive """ cdef _p_msg_p2p rmsg = message_p2p_recv(buf, source) - cdef Prequest request - if numpy_arr is None: - request = Prequest.__new__(Prequest) - else: - request = Prequest.__new__(Prequest, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Prequest request = Prequest.__new__(Prequest) with nogil: CHKERR( MPI_Recv_init( rmsg.buf, rmsg.count, rmsg.dtype, source, tag, self.ob_mpi, &request.ob_mpi) ) @@ -532,7 +507,7 @@ cdef class Comm: # Blocking calls - @send_for_nlcpy_array + @raise_notimpl_for_vai_buffer def Bsend(self, buf, int dest, int tag=0): """ Blocking send in buffered mode @@ -542,7 +517,6 @@ cdef class Comm: smsg.buf, smsg.count, smsg.dtype, dest, tag, self.ob_mpi) ) - @send_for_nlcpy_array def Ssend(self, buf, int dest, int tag=0): """ Blocking send in synchronous mode @@ -552,7 +526,6 @@ cdef class Comm: smsg.buf, smsg.count, smsg.dtype, dest, tag, self.ob_mpi) ) - @send_for_nlcpy_array def Rsend(self, buf, int dest, int tag=0): """ Blocking send in ready mode @@ -564,7 +537,7 @@ cdef class Comm: # Nonblocking calls - @send_for_nlcpy_array + @raise_notimpl_for_vai_buffer def Ibsend(self, buf, int dest, int tag=0): """ Nonblocking send in buffered mode @@ -577,7 +550,6 @@ cdef class Comm: request.ob_buf = smsg return request - @send_for_nlcpy_array def Issend(self, buf, int dest, int tag=0): """ Nonblocking send in synchronous mode @@ -590,7 +562,6 @@ cdef class Comm: request.ob_buf = smsg return request - @send_for_nlcpy_array def Irsend(self, buf, int dest, int tag=0): """ Nonblocking send in ready mode @@ -605,7 +576,7 @@ cdef class Comm: # Persistent Requests - @send_for_nlcpy_array + @raise_notimpl_for_vai_buffer def Bsend_init(self, buf, int dest, int tag=0): """ Persistent request for a send in buffered mode @@ -618,7 +589,6 @@ cdef class Comm: request.ob_buf = smsg return request - @send_for_nlcpy_array def Ssend_init(self, buf, int dest, int tag=0): """ Persistent request for a send in synchronous mode @@ -631,7 +601,6 @@ cdef class Comm: request.ob_buf = smsg return request - @send_for_nlcpy_array def Rsend_init(self, buf, int dest, int tag=0): """ Persistent request for a send in ready mode @@ -659,7 +628,6 @@ cdef class Comm: # Global Communication Functions # ------------------------------ - @recv_buffer_for_nlcpy_array(arg_idx=1) def Bcast(self, buf, int root=0): """ Broadcast a message from one process @@ -671,7 +639,6 @@ cdef class Comm: m.sbuf, m.scount, m.stype, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Gather(self, sendbuf, recvbuf, int root=0): """ Gather together values from a group of processes @@ -683,7 +650,6 @@ cdef class Comm: m.rbuf, m.rcount, m.rtype, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Gatherv(self, sendbuf, recvbuf, int root=0): """ Gather Vector, gather data to one process from all other @@ -697,7 +663,6 @@ cdef class Comm: m.rbuf, m.rcounts, m.rdispls, m.rtype, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Scatter(self, sendbuf, recvbuf, int root=0): """ Scatter data from one process @@ -710,7 +675,6 @@ cdef class Comm: m.rbuf, m.rcount, m.rtype, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Scatterv(self, sendbuf, recvbuf, int root=0): """ Scatter Vector, scatter data from one process to all other @@ -724,7 +688,6 @@ cdef class Comm: m.rbuf, m.rcount, m.rtype, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Allgather(self, sendbuf, recvbuf): """ Gather to All, gather data from all processes and @@ -737,7 +700,6 @@ cdef class Comm: m.rbuf, m.rcount, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Allgatherv(self, sendbuf, recvbuf): """ Gather to All Vector, gather data from all processes and @@ -751,7 +713,6 @@ cdef class Comm: m.rbuf, m.rcounts, m.rdispls, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Alltoall(self, sendbuf, recvbuf): """ All to All Scatter/Gather, send data from all to all @@ -764,7 +725,6 @@ cdef class Comm: m.rbuf, m.rcount, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Alltoallv(self, sendbuf, recvbuf): """ All to All Scatter/Gather Vector, send data from all to all @@ -778,7 +738,6 @@ cdef class Comm: m.rbuf, m.rcounts, m.rdispls, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Alltoallw(self, sendbuf, recvbuf): """ Generalized All-to-All communication allowing different @@ -795,7 +754,6 @@ cdef class Comm: # Global Reduction Operations # --------------------------- - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Reduce(self, sendbuf, recvbuf, Op op=SUM, int root=0): """ Reduce @@ -806,7 +764,6 @@ cdef class Comm: m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, root, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Allreduce(self, sendbuf, recvbuf, Op op=SUM): """ All Reduce @@ -817,7 +774,6 @@ cdef class Comm: m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Reduce_scatter_block(self, sendbuf, recvbuf, Op op=SUM): """ Reduce-Scatter Block (regular, non-vector version) @@ -828,7 +784,6 @@ cdef class Comm: m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Reduce_scatter(self, sendbuf, recvbuf, recvcounts=None, Op op=SUM): """ Reduce-Scatter (vector version) @@ -851,44 +806,26 @@ cdef class Comm: with nogil: CHKERR( MPI_Ibarrier(self.ob_mpi, &request.ob_mpi) ) return request - @nb_recv_for_nlcpy_array(arg_idx=1) - def Ibcast(self, buf, int root=0, numpy_arr=None, nlcpy_arr=None): + def Ibcast(self, buf, int root=0): """ Nonblocking Broadcast """ cdef _p_msg_cco m = message_cco() m.for_bcast(buf, root, self.ob_mpi) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ibcast( m.sbuf, m.scount, m.stype, root, self.ob_mpi, &request.ob_mpi) ) request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Igather(self, sendbuf, recvbuf, int root=0, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Igather(self, sendbuf, recvbuf, int root=0): """ Nonblocking Gather """ cdef _p_msg_cco m = message_cco() m.for_gather(0, sendbuf, recvbuf, root, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Igather( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -896,24 +833,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Igatherv(self, sendbuf, recvbuf, int root=0, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Igatherv(self, sendbuf, recvbuf, int root=0): """ Nonblocking Gather Vector """ cdef _p_msg_cco m = message_cco() m.for_gather(1, sendbuf, recvbuf, root, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Igatherv( m.sbuf, m.scount, m.stype, m.rbuf, m.rcounts, m.rdispls, m.rtype, @@ -921,24 +847,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iscatter(self, sendbuf, recvbuf, int root=0, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iscatter(self, sendbuf, recvbuf, int root=0): """ Nonblocking Scatter """ cdef _p_msg_cco m = message_cco() m.for_scatter(0, sendbuf, recvbuf, root, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iscatter( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -946,24 +861,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iscatterv(self, sendbuf, recvbuf, int root=0, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iscatterv(self, sendbuf, recvbuf, int root=0): """ Nonblocking Scatter Vector """ cdef _p_msg_cco m = message_cco() m.for_scatter(1, sendbuf, recvbuf, root, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iscatterv( m.sbuf, m.scounts, m.sdispls, m.stype, m.rbuf, m.rcount, m.rtype, @@ -971,24 +875,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iallgather(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iallgather(self, sendbuf, recvbuf): """ Nonblocking Gather to All """ cdef _p_msg_cco m = message_cco() m.for_allgather(0, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iallgather( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -996,48 +889,26 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iallgatherv(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iallgatherv(self, sendbuf, recvbuf): """ Nonblocking Gather to All Vector """ cdef _p_msg_cco m = message_cco() m.for_allgather(1, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iallgatherv( m.sbuf, m.scount, m.stype, m.rbuf, m.rcounts, m.rdispls, m.rtype, self.ob_mpi, &request.ob_mpi) ) return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ialltoall(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ialltoall(self, sendbuf, recvbuf): """ Nonblocking All to All Scatter/Gather """ cdef _p_msg_cco m = message_cco() m.for_alltoall(0, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ialltoall( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -1045,24 +916,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ialltoallv(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ialltoallv(self, sendbuf, recvbuf): """ Nonblocking All to All Scatter/Gather Vector """ cdef _p_msg_cco m = message_cco() m.for_alltoall(1, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ialltoallv( m.sbuf, m.scounts, m.sdispls, m.stype, m.rbuf, m.rcounts, m.rdispls, m.rtype, @@ -1070,24 +930,13 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ialltoallw(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ialltoallw(self, sendbuf, recvbuf): """ Nonblocking Generalized All-to-All """ cdef _p_msg_ccow m = message_ccow() m.for_alltoallw(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ialltoallw( m.sbuf, m.scounts, m.sdispls, m.stypes, m.rbuf, m.rcounts, m.rdispls, m.rtypes, @@ -1095,94 +944,50 @@ cdef class Comm: request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ireduce(self, sendbuf, recvbuf, Op op=SUM, int root=0, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ireduce(self, sendbuf, recvbuf, Op op=SUM, int root=0): """ Nonblocking Reduce """ cdef _p_msg_cco m = message_cco() m.for_reduce(sendbuf, recvbuf, root, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ireduce( m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, root, self.ob_mpi, &request.ob_mpi) ) return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iallreduce(self, sendbuf, recvbuf, Op op=SUM, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iallreduce(self, sendbuf, recvbuf, Op op=SUM): """ Nonblocking All Reduce """ cdef _p_msg_cco m = message_cco() m.for_allreduce(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iallreduce( m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi, &request.ob_mpi) ) return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ireduce_scatter_block(self, sendbuf, recvbuf, Op op=SUM, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ireduce_scatter_block(self, sendbuf, recvbuf, Op op=SUM): """ Nonblocking Reduce-Scatter Block (regular, non-vector version) """ cdef _p_msg_cco m = message_cco() m.for_reduce_scatter_block(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ireduce_scatter_block( m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi, &request.ob_mpi) ) return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ireduce_scatter(self, sendbuf, recvbuf, recvcounts=None, Op op=SUM, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ireduce_scatter(self, sendbuf, recvbuf, recvcounts=None, Op op=SUM,): """ Nonblocking Reduce-Scatter (vector version) """ cdef _p_msg_cco m = message_cco() m.for_reduce_scatter(sendbuf, recvbuf, recvcounts, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ireduce_scatter( m.sbuf, m.rbuf, m.rcounts, m.rtype, op.ob_mpi, self.ob_mpi, &request.ob_mpi) ) @@ -1405,25 +1210,22 @@ cdef class Comm: # Python Communication # -------------------- # - @send_for_nlcpy_array def send(self, obj, int dest, int tag=0): """Send""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_send(obj, dest, tag, comm) # - @send_for_nlcpy_array + @raise_notimpl_for_vai_buffer def bsend(self, obj, int dest, int tag=0): """Send in buffered mode""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_bsend(obj, dest, tag, comm) # - @send_for_nlcpy_array def ssend(self, obj, int dest, int tag=0): """Send in synchronous mode""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_ssend(obj, dest, tag, comm) # - @recv_for_nlcpy_array def recv(self, buf=None, int source=ANY_SOURCE, int tag=ANY_TAG, Status status=None): """Receive""" @@ -1431,8 +1233,6 @@ cdef class Comm: cdef MPI_Status *statusp = arg_Status(status) return PyMPI_recv(buf, source, tag, comm, statusp) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def sendrecv(self, sendobj, int dest, int sendtag=0, recvbuf=None, int source=ANY_SOURCE, int recvtag=ANY_TAG, Status status=None): @@ -1443,7 +1243,6 @@ cdef class Comm: recvbuf, source, recvtag, comm, statusp) # - @send_for_nlcpy_array def isend(self, obj, int dest, int tag=0): """Nonblocking send""" cdef MPI_Comm comm = self.ob_mpi @@ -1451,7 +1250,7 @@ cdef class Comm: request.ob_buf = PyMPI_isend(obj, dest, tag, comm, &request.ob_mpi) return request # - @send_for_nlcpy_array + @raise_notimpl_for_vai_buffer def ibsend(self, obj, int dest, int tag=0): """Nonblocking send in buffered mode""" cdef MPI_Comm comm = self.ob_mpi @@ -1459,7 +1258,6 @@ cdef class Comm: request.ob_buf = PyMPI_ibsend(obj, dest, tag, comm, &request.ob_mpi) return request # - @send_for_nlcpy_array def issend(self, obj, int dest, int tag=0): """Nonblocking send in synchronous mode""" cdef MPI_Comm comm = self.ob_mpi @@ -1467,18 +1265,10 @@ cdef class Comm: request.ob_buf = PyMPI_issend(obj, dest, tag, comm, &request.ob_mpi) return request # - @nb_recv_for_nlcpy_array(arg_idx=1) - def irecv(self, buf=None, int source=ANY_SOURCE, int tag=ANY_TAG, - numpy_arr=None, nlcpy_arr=None): + def irecv(self, buf=None, int source=ANY_SOURCE, int tag=ANY_TAG): """Nonblocking receive""" cdef MPI_Comm comm = self.ob_mpi - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) request.ob_buf = PyMPI_irecv(buf, source, tag, comm, &request.ob_mpi) return request # @@ -1523,51 +1313,37 @@ cdef class Comm: cdef MPI_Comm comm = self.ob_mpi return PyMPI_barrier(comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def bcast(self, obj, int root=0): """Broadcast""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_bcast(obj, root, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def gather(self, sendobj, int root=0): """Gather""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_gather(sendobj, root, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def scatter(self, sendobj, int root=0): """Scatter""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_scatter(sendobj, root, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def allgather(self, sendobj): """Gather to All""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_allgather(sendobj, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def alltoall(self, sendobj): """All to All Scatter/Gather""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_alltoall(sendobj, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def reduce(self, sendobj, op=SUM, int root=0): """Reduce""" if op is None: op = SUM cdef MPI_Comm comm = self.ob_mpi return PyMPI_reduce(sendobj, op, root, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def allreduce(self, sendobj, op=SUM): """Reduce to All""" if op is None: op = SUM @@ -1731,7 +1507,6 @@ cdef class Intracomm(Comm): # Inclusive Scan - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Scan(self, sendbuf, recvbuf, Op op=SUM): """ Inclusive Scan @@ -1744,7 +1519,6 @@ cdef class Intracomm(Comm): # Exclusive Scan - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Exscan(self, sendbuf, recvbuf, Op op=SUM): """ Exclusive Scan @@ -1757,47 +1531,25 @@ cdef class Intracomm(Comm): # Nonblocking - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iscan(self, sendbuf, recvbuf, Op op=SUM, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iscan(self, sendbuf, recvbuf, Op op=SUM ): """ Inclusive Scan """ cdef _p_msg_cco m = message_cco() m.for_scan(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iscan( m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi, &request.ob_mpi) ) return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Iexscan(self, sendbuf, recvbuf, Op op=SUM, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Iexscan(self, sendbuf, recvbuf, Op op=SUM): """ Inclusive Scan """ cdef _p_msg_cco m = message_cco() m.for_exscan(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Iexscan( m.sbuf, m.rbuf, m.rcount, m.rtype, op.ob_mpi, self.ob_mpi, &request.ob_mpi) ) @@ -1805,16 +1557,12 @@ cdef class Intracomm(Comm): # Python Communication # - @recv_for_nlcpy_array - @send_for_nlcpy_array def scan(self, sendobj, op=SUM): """Inclusive Scan""" if op is None: op = SUM cdef MPI_Comm comm = self.ob_mpi return PyMPI_scan(sendobj, op, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def exscan(self, sendobj, op=SUM): """Exclusive Scan""" if op is None: op = SUM @@ -2013,7 +1761,6 @@ cdef class Topocomm(Intracomm): # Neighborhood Collectives # ------------------------ - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Neighbor_allgather(self, sendbuf, recvbuf): """ Neighbor Gather to All @@ -2025,7 +1772,6 @@ cdef class Topocomm(Intracomm): m.rbuf, m.rcount, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Neighbor_allgatherv(self, sendbuf, recvbuf): """ Neighbor Gather to All Vector @@ -2037,7 +1783,6 @@ cdef class Topocomm(Intracomm): m.rbuf, m.rcounts, m.rdispls, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Neighbor_alltoall(self, sendbuf, recvbuf): """ Neighbor All-to-All @@ -2049,7 +1794,6 @@ cdef class Topocomm(Intracomm): m.rbuf, m.rcount, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Neighbor_alltoallv(self, sendbuf, recvbuf): """ Neighbor All-to-All Vector @@ -2061,7 +1805,6 @@ cdef class Topocomm(Intracomm): m.rbuf, m.rcounts, m.rdispls, m.rtype, self.ob_mpi) ) - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Neighbor_alltoallw(self, sendbuf, recvbuf): """ Neighbor All-to-All Generalized @@ -2076,24 +1819,13 @@ cdef class Topocomm(Intracomm): # Nonblocking Neighborhood Collectives # ------------------------------------ - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ineighbor_allgather(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ineighbor_allgather(self, sendbuf, recvbuf): """ Nonblocking Neighbor Gather to All """ cdef _p_msg_cco m = message_cco() m.for_neighbor_allgather(0, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ineighbor_allgather( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -2101,24 +1833,13 @@ cdef class Topocomm(Intracomm): request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ineighbor_allgatherv(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ineighbor_allgatherv(self, sendbuf, recvbuf): """ Nonblocking Neighbor Gather to All Vector """ cdef _p_msg_cco m = message_cco() m.for_neighbor_allgather(1, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ineighbor_allgatherv( m.sbuf, m.scount, m.stype, m.rbuf, m.rcounts, m.rdispls, m.rtype, @@ -2126,24 +1847,13 @@ cdef class Topocomm(Intracomm): request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ineighbor_alltoall(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ineighbor_alltoall(self, sendbuf, recvbuf): """ Nonblocking Neighbor All-to-All """ cdef _p_msg_cco m = message_cco() m.for_neighbor_alltoall(0, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ineighbor_alltoall( m.sbuf, m.scount, m.stype, m.rbuf, m.rcount, m.rtype, @@ -2151,24 +1861,13 @@ cdef class Topocomm(Intracomm): request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ineighbor_alltoallv(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ineighbor_alltoallv(self, sendbuf, recvbuf): """ Nonblocking Neighbor All-to-All Vector """ cdef _p_msg_cco m = message_cco() m.for_neighbor_alltoall(1, sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ineighbor_alltoallv( m.sbuf, m.scounts, m.sdispls, m.stype, m.rbuf, m.rcounts, m.rdispls, m.rtype, @@ -2176,24 +1875,13 @@ cdef class Topocomm(Intracomm): request.ob_buf = m return request - @nb_sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) - def Ineighbor_alltoallw(self, sendbuf, recvbuf, - send_nlcpy_arr=None, send_numpy_arr=None, - recv_nlcpy_arr=None, recv_numpy_arr=None): + def Ineighbor_alltoallw(self, sendbuf, recvbuf): """ Nonblocking Neighbor All-to-All Generalized """ cdef _p_msg_ccow m = message_ccow() m.for_neighbor_alltoallw(sendbuf, recvbuf, self.ob_mpi) - cdef Request request - if send_nlcpy_arr is None and recv_nlcpy_arr is None: - request = Request.__new__(Request) - else: - numpy_arrays=[send_numpy_arr, recv_numpy_arr] - nlcpy_arrays=[send_nlcpy_arr, recv_nlcpy_arr] - request = Request.__new__(Request, - numpy_arr=numpy_arrays, - nlcpy_arr=nlcpy_arrays) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Ineighbor_alltoallw( m.sbuf, m.scounts, m.sdisplsA, m.stypes, m.rbuf, m.rcounts, m.rdisplsA, m.rtypes, @@ -2203,15 +1891,11 @@ cdef class Topocomm(Intracomm): # Python Communication # - @recv_for_nlcpy_array - @send_for_nlcpy_array def neighbor_allgather(self, sendobj): """Neighbor Gather to All""" cdef MPI_Comm comm = self.ob_mpi return PyMPI_neighbor_allgather(sendobj, comm) # - @recv_for_nlcpy_array - @send_for_nlcpy_array def neighbor_alltoall(self, sendobj): """Neighbor All to All Scatter/Gather""" cdef MPI_Comm comm = self.ob_mpi @@ -2613,8 +2297,7 @@ COMM_WORLD = __COMM_WORLD__ #: World communicator handle BSEND_OVERHEAD = MPI_BSEND_OVERHEAD #: Upper bound of memory overhead for sending in buffered mode - -@send_for_nlcpy_array +@raise_notimpl_for_vai_buffer def Attach_buffer(buf): """ Attach a user-provided buffer for diff --git a/src/mpi4pyve/MPI/Datatype.pyx b/src/mpi4pyve/MPI/Datatype.pyx index 9bf47fc..99fca08 100644 --- a/src/mpi4pyve/MPI/Datatype.pyx +++ b/src/mpi4pyve/MPI/Datatype.pyx @@ -95,7 +95,6 @@ COMBINER_F90_COMPLEX = MPI_COMBINER_F90_COMPLEX COMBINER_F90_INTEGER = MPI_COMBINER_F90_INTEGER -include "NLCPy.pyx" cdef class Datatype: @@ -689,7 +688,6 @@ cdef class Datatype: # Pack and Unpack # --------------- - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) def Pack(self, inbuf, outbuf, int position, Comm comm): """ Pack into contiguous memory according to datatype. @@ -708,7 +706,6 @@ cdef class Datatype: &position, comm.ob_mpi) ) return position - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 3)) def Unpack(self, inbuf, int position, outbuf, Comm comm): """ Unpack from contiguous memory according to datatype. @@ -740,7 +737,7 @@ cdef class Datatype: # Canonical Pack and Unpack # ------------------------- - @sendrecv_buffer_for_nlcpy_array(arg_idx=(2, 3)) + @raise_notimpl_for_vai_buffer def Pack_external(self, datarep, inbuf, outbuf, Aint position): """ Pack into contiguous memory according to datatype, @@ -762,7 +759,7 @@ cdef class Datatype: obptr, oblen, &position) ) return position - @sendrecv_buffer_for_nlcpy_array(arg_idx=(2, 4)) + @raise_notimpl_for_vai_buffer def Unpack_external(self, datarep, inbuf, Aint position, outbuf): """ Unpack from contiguous memory according to datatype, diff --git a/src/mpi4pyve/MPI/File.pyx b/src/mpi4pyve/MPI/File.pyx index bd35df6..36c3060 100644 --- a/src/mpi4pyve/MPI/File.pyx +++ b/src/mpi4pyve/MPI/File.pyx @@ -283,7 +283,6 @@ cdef class File: # Data Access with Explicit Offsets # --------------------------------- - @recv_buffer_for_nlcpy_array(arg_idx=2) def Read_at(self, Offset offset, buf, Status status=None): """ Read using explicit offset @@ -293,7 +292,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_at( self.ob_mpi, offset, m.buf, m.count, m.dtype, statusp) ) - @recv_buffer_for_nlcpy_array(arg_idx=2) def Read_at_all(self, Offset offset, buf, Status status=None): """ Collective read using explicit offset @@ -303,7 +301,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_at_all( self.ob_mpi, offset, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write_at(self, Offset offset, buf, Status status=None): """ Write using explicit offset @@ -313,7 +310,6 @@ cdef class File: with nogil: CHKERR( MPI_File_write_at( self.ob_mpi, offset, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write_at_all(self, Offset offset, buf, Status status=None): """ Collective write using explicit offset @@ -323,58 +319,40 @@ cdef class File: with nogil: CHKERR( MPI_File_write_at_all( self.ob_mpi, offset, m.buf, m.count, m.dtype, statusp) ) - @nb_recv_for_nlcpy_array(arg_idx=2) - def Iread_at(self, Offset offset, buf, - numpy_arr=None, nlcpy_arr=None): + def Iread_at(self, Offset offset, buf): """ Nonblocking read using explicit offset """ cdef _p_msg_io m = message_io_read(buf) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iread_at( self.ob_mpi, offset, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @nb_recv_for_nlcpy_array(arg_idx=2) - def Iread_at_all(self, Offset offset, buf, - numpy_arr=None, nlcpy_arr=None): + def Iread_at_all(self, Offset offset, buf): """ Nonblocking collective read using explicit offset """ cdef _p_msg_io m = message_io_read(buf) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iread_at_all( self.ob_mpi, offset, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @send_for_nlcpy_array def Iwrite_at(self, Offset offset, buf): """ Nonblocking write using explicit offset """ cdef _p_msg_io m = message_io_write(buf) - cdef Request request = Request.__new__(Request) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iwrite_at( self.ob_mpi, offset, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @send_for_nlcpy_array def Iwrite_at_all(self, Offset offset, buf): """ Nonblocking collective write using explicit offset @@ -389,7 +367,6 @@ cdef class File: # Data Access with Individual File Pointers # ----------------------------------------- - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read(self, buf, Status status=None): """ Read using individual file pointer @@ -399,7 +376,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_all(self, buf, Status status=None): """ Collective read using individual file pointer @@ -409,7 +385,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_all( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write(self, buf, Status status=None): """ Write using individual file pointer @@ -419,7 +394,6 @@ cdef class File: with nogil: CHKERR( MPI_File_write( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write_all(self, buf, Status status=None): """ Collective write using individual file pointer @@ -429,57 +403,39 @@ cdef class File: with nogil: CHKERR( MPI_File_write_all( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @nb_recv_for_nlcpy_array(arg_idx=1) - def Iread(self, buf, - numpy_arr=None, nlcpy_arr=None): + def Iread(self, buf): """ Nonblocking read using individual file pointer """ cdef _p_msg_io m = message_io_read(buf) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iread( self.ob_mpi, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @nb_recv_for_nlcpy_array(arg_idx=1) - def Iread_all(self, buf, - numpy_arr=None, nlcpy_arr=None): + def Iread_all(self, buf): """ Nonblocking collective read using individual file pointer """ cdef _p_msg_io m = message_io_read(buf) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iread_all( self.ob_mpi, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @send_for_nlcpy_array def Iwrite(self, buf): """ Nonblocking write using individual file pointer """ cdef _p_msg_io m = message_io_write(buf) - cdef Request request = Request.__new__(Request) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iwrite( self.ob_mpi, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @send_for_nlcpy_array def Iwrite_all(self, buf): """ Nonblocking collective write using individual file pointer @@ -519,7 +475,6 @@ cdef class File: # Data Access with Shared File Pointers # ------------------------------------- - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_shared(self, buf, Status status=None): """ Read using shared file pointer @@ -529,7 +484,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_shared( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write_shared(self, buf, Status status=None): """ Write using shared file pointer @@ -539,38 +493,28 @@ cdef class File: with nogil: CHKERR( MPI_File_write_shared( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @nb_recv_for_nlcpy_array(arg_idx=1) - def Iread_shared(self, buf, - numpy_arr=None, nlcpy_arr=None): + def Iread_shared(self, buf): """ Nonblocking read using shared file pointer """ cdef _p_msg_io m = message_io_read(buf) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iread_shared( self.ob_mpi, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @send_for_nlcpy_array def Iwrite_shared(self, buf): """ Nonblocking write using shared file pointer """ cdef _p_msg_io m = message_io_write(buf) - cdef Request request = Request.__new__(Request) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_File_iwrite_shared( self.ob_mpi, m.buf, m.count, m.dtype, &request.ob_mpi) ) request.ob_buf = m return request - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_ordered(self, buf, Status status=None): """ Collective read using shared file pointer @@ -580,7 +524,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_ordered( self.ob_mpi, m.buf, m.count, m.dtype, statusp) ) - @send_for_nlcpy_array def Write_ordered(self, buf, Status status=None): """ Collective write using shared file pointer @@ -612,7 +555,6 @@ cdef class File: # explicit offset - @recv_buffer_for_nlcpy_array(arg_idx=2) def Read_at_all_begin(self, Offset offset, buf): """ Start a split collective read using explict offset @@ -621,7 +563,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_at_all_begin( self.ob_mpi, offset, m.buf, m.count, m.dtype) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_at_all_end(self, buf, Status status=None): """ Complete a split collective read using explict offset @@ -631,7 +572,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_at_all_end( self.ob_mpi, m.buf, statusp) ) - @send_for_nlcpy_array def Write_at_all_begin(self, Offset offset, buf): """ Start a split collective write using explict offset @@ -640,7 +580,6 @@ cdef class File: with nogil: CHKERR( MPI_File_write_at_all_begin( self.ob_mpi, offset, m.buf, m.count, m.dtype) ) - @send_for_nlcpy_array def Write_at_all_end(self, buf, Status status=None): """ Complete a split collective write using explict offset @@ -652,7 +591,6 @@ cdef class File: # individual file pointer - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_all_begin(self, buf): """ Start a split collective read @@ -662,7 +600,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_all_begin( self.ob_mpi, m.buf, m.count, m.dtype) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_all_end(self, buf, Status status=None): """ Complete a split collective read @@ -673,7 +610,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_all_end( self.ob_mpi, m.buf, statusp) ) - @send_for_nlcpy_array def Write_all_begin(self, buf): """ Start a split collective write @@ -683,7 +619,6 @@ cdef class File: with nogil: CHKERR( MPI_File_write_all_begin( self.ob_mpi, m.buf, m.count, m.dtype) ) - @send_for_nlcpy_array def Write_all_end(self, buf, Status status=None): """ Complete a split collective write @@ -696,7 +631,6 @@ cdef class File: # shared file pointer - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_ordered_begin(self, buf): """ Start a split collective read @@ -706,7 +640,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_ordered_begin( self.ob_mpi, m.buf, m.count, m.dtype) ) - @recv_buffer_for_nlcpy_array(arg_idx=1) def Read_ordered_end(self, buf, Status status=None): """ Complete a split collective read @@ -717,7 +650,6 @@ cdef class File: with nogil: CHKERR( MPI_File_read_ordered_end( self.ob_mpi, m.buf, statusp) ) - @send_for_nlcpy_array def Write_ordered_begin(self, buf): """ Start a split collective write using @@ -727,7 +659,6 @@ cdef class File: with nogil: CHKERR( MPI_File_write_ordered_begin( self.ob_mpi, m.buf, m.count, m.dtype) ) - @send_for_nlcpy_array def Write_ordered_end(self, buf, Status status=None): """ Complete a split collective write diff --git a/src/mpi4pyve/MPI/MPI.pyx b/src/mpi4pyve/MPI/MPI.pyx index cb80d09..cbc82b9 100644 --- a/src/mpi4pyve/MPI/MPI.pyx +++ b/src/mpi4pyve/MPI/MPI.pyx @@ -64,6 +64,7 @@ include "atimport.pxi" bootstrap() initialize() +set_mpi_local_size() include "asstring.pxi" include "asbuffer.pxi" @@ -127,6 +128,7 @@ WIN_MODEL = MPI_WIN_MODEL include "Exception.pyx" include "Errhandler.pyx" +include "Notimpl.pyx" include "Datatype.pyx" include "Status.pyx" include "Request.pyx" @@ -137,6 +139,8 @@ include "Group.pyx" include "Comm.pyx" include "Win.pyx" include "File.pyx" +include "Util.pyx" +include "Veo.pyx" # Memory Allocation @@ -168,6 +172,7 @@ def Init(): """ CHKERR( MPI_Init(NULL, NULL) ) initialize() + set_mpi_local_size() def Finalize(): """ @@ -191,13 +196,18 @@ THREAD_SERIALIZED = MPI_THREAD_SERIALIZED THREAD_MULTIPLE = MPI_THREAD_MULTIPLE #: Multiple threads may call MPI -def Init_thread(int required=THREAD_MULTIPLE): +def Init_thread(int required=THREAD_SERIALIZED): """ Initialize the MPI execution environment """ cdef int provided = MPI_THREAD_SINGLE + + if required == THREAD_MULTIPLE: + PyErr_WarnEx(UserWarning, b"MPI_THREAD_MULTIPLE cannot be used with NEC MPI", 1) + CHKERR( MPI_Init_thread(NULL, NULL, required, &provided) ) initialize() + set_mpi_local_size() return provided def Query_thread(): diff --git a/src/mpi4pyve/MPI/Message.pyx b/src/mpi4pyve/MPI/Message.pyx index a0818b8..fc7169a 100644 --- a/src/mpi4pyve/MPI/Message.pyx +++ b/src/mpi4pyve/MPI/Message.pyx @@ -117,7 +117,6 @@ cdef class Message: # Matched receives # ---------------- - @recv_buffer_for_nlcpy_array(arg_idx=1) def Recv(self, buf, Status status=None): """ Blocking receive of matched message @@ -134,8 +133,7 @@ cdef class Message: if self is not __MESSAGE_NO_PROC__: self.ob_mpi = message - @nb_recv_for_nlcpy_array(arg_idx=1) - def Irecv(self, buf, numpy_arr=None, nlcpy_arr=None): + def Irecv(self, buf): """ Nonblocking receive of matched message """ @@ -144,13 +142,7 @@ cdef class Message: if message == MPI_MESSAGE_NO_PROC: source = MPI_PROC_NULL cdef _p_msg_p2p rmsg = message_p2p_recv(buf, source) - cdef Request request - if numpy_arr is None: - request = Request.__new__(Request) - else: - request = Request.__new__(Request, - numpy_arr=numpy_arr, - nlcpy_arr=nlcpy_arr) + cdef Request request = Request.__new__(Request) with nogil: CHKERR( MPI_Imrecv( rmsg.buf, rmsg.count, rmsg.dtype, &message, &request.ob_mpi) ) @@ -184,7 +176,6 @@ cdef class Message: if flag == 0: return None return message # - @recv_for_nlcpy_array def recv(self, Status status=None): """Blocking receive of matched message""" cdef object rmsg = self.ob_buf diff --git a/src/mpi4pyve/MPI/NLCPy.pyx b/src/mpi4pyve/MPI/NLCPy.pyx deleted file mode 100644 index 25249f0..0000000 --- a/src/mpi4pyve/MPI/NLCPy.pyx +++ /dev/null @@ -1,312 +0,0 @@ -### mpi4py-ve License ## -# -# Copyright (c) 2022, NEC Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, this -# list of conditions and the following disclaimer listed in this license in the -# documentation and/or other materials provided with the distribution. -# -# The copyright holders provide no reassurances that the source code provided does not -# infringe any patent, copyright, or any other intellectual property rights of third -# parties. The copyright holders disclaim any liability to any recipient for claims -# brought against recipient by any third party for infringement of that parties -# intellectual property rights. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -import numpy -import nlcpy -import mpi4pyve - - -def _replace_nlcpy_to_numpy(args): - if args is None: - return args - _type = type(args) - _args = list(args) - for i, arg in enumerate(_args): - if isinstance(arg, nlcpy.core.core.ndarray): - _args[i] = numpy.asarray(arg) - elif isinstance(arg, (list, tuple)): - _args[i] = _replace_nlcpy_to_numpy(arg) - return _type(_args) - - -def _replace_nlcpy_to_numpy_kwargs(kwargs): - for k in kwargs.keys(): - if isinstance(kwargs[k], nlcpy.core.core.ndarray): - kwargs[k] = numpy.asarray(kwargs[k]) - elif isinstance(kwargs[k], (list, tuple)): - kwargs[k] = _replace_nlcpy_to_numpy(kwargs[k]) - return kwargs - - -def _undo_numpy_to_nlcpy(args): - if args is None: - return args - _type = type(args) - _args = list(args) - for i, arg in enumerate(_args): - if isinstance(arg, numpy.ndarray): - _args[i] = nlcpy.asarray(arg) - elif isinstance(arg, (list, tuple)): - _args[i] = _undo_numpy_to_nlcpy(arg) - return _type(_args) - - -def _undo_numpy_to_nlcpy_kwargs(kwargs): - for k in kwargs.keys(): - if isinstance(kwargs[k], nlcpy.core.core.ndarray): - kwargs[k] = nlcpy.asarray(kwargs[k]) - elif isinstance(kwargs[k], (list, tuple)): - kwargs[k] = _undo_numpy_to_nlcpy(kwargs[k]) - return kwargs - - -def send_for_nlcpy_array(send_func): - def _get_numpy_array_wrapper(*args, **kwargs): - args = _replace_nlcpy_to_numpy(args) - kwargs = _replace_nlcpy_to_numpy_kwargs(kwargs) - return send_func(*args, **kwargs) - return _get_numpy_array_wrapper - - -def recv_for_nlcpy_array(recv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - result = recv_func(*args, **kwargs) - if isinstance(result, (list, tuple)): - result = _undo_numpy_to_nlcpy(result) - elif isinstance(result, numpy.ndarray): - result = nlcpy.asarray(result) - return result - return _get_nlcpy_array_wrapper - - -def nb_recv_for_nlcpy_array(arg_idx): - def _nb_recv_for_nlcpy_array(recv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - if len(args) > arg_idx: - if isinstance(args[arg_idx], (list, tuple)): - if isinstance(args[arg_idx][0], nlcpy.core.core.ndarray): - kwargs["nlcpy_arr"] = args[arg_idx][0] - args = _replace_nlcpy_to_numpy(args) - kwargs["numpy_arr"] = args[arg_idx][0] - elif isinstance(args[arg_idx], nlcpy.core.core.ndarray): - kwargs["nlcpy_arr"] = args[arg_idx] - args = _replace_nlcpy_to_numpy(args) - kwargs["numpy_arr"] = args[arg_idx] - return recv_func(*args, **kwargs) - return _get_nlcpy_array_wrapper - return _nb_recv_for_nlcpy_array - - -def recv_buffer_for_nlcpy_array(arg_idx): - def _recv_buffer_for_nlcpy_array(recv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - tmp_array, tmp_buf, tmp_buftype = None, None, None - if len(args) > arg_idx: - if isinstance(args[arg_idx], (list, tuple)): - if isinstance(args[arg_idx][0], nlcpy.core.core.ndarray): - tmp_buftype = type(args[arg_idx]) - tmp_buf = list(args[arg_idx]) - tmp_array = args[arg_idx][0] - elif isinstance(args[arg_idx], nlcpy.core.core.ndarray): - tmp_array = args[arg_idx] - args = _replace_nlcpy_to_numpy(args) - recv_func(*args, **kwargs) - if tmp_array is not None: - args = _undo_numpy_to_nlcpy(args) - if tmp_buf is not None: - tmp_array[:len(args[arg_idx][0])] = args[arg_idx][0] - tmp_buf[0] = tmp_array - _args = list(args) - _args[arg_idx] = tmp_buftype(tmp_buf) - args = tuple(_args) - else: - tmp_array[:len(args[arg_idx])] = args[arg_idx] - _args = list(args) - _args[arg_idx] = tmp_array - args = tuple(_args) - return _get_nlcpy_array_wrapper - return _recv_buffer_for_nlcpy_array - - -def sendrecv_buffer_for_nlcpy_array(arg_idx): - def _sendrecv_buffer_for_nlcpy_array(sendrecv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - tmp_send_array, tmp_send_buf, tmp_send_buftype = None, None, None - tmp_recv_array, tmp_recv_buf, tmp_recv_buftype = None, None, None - if len(args) > arg_idx[0]: - if isinstance(args[arg_idx[0]], (list, tuple)): - if isinstance(args[arg_idx[0]][0], nlcpy.core.core.ndarray): - tmp_send_buftype = type(args[arg_idx[0]]) - tmp_send_buf = list(args[arg_idx[0]]) - tmp_send_array = args[arg_idx[0]][0] - elif isinstance(args[arg_idx[0]], nlcpy.core.core.ndarray): - tmp_send_array = args[arg_idx[0]] - if len(args) > arg_idx[1]: - if isinstance(args[arg_idx[1]], (list, tuple)): - if isinstance(args[arg_idx[1]][0], nlcpy.core.core.ndarray): - tmp_recv_buftype = type(args[arg_idx[1]]) - tmp_recv_buf = list(args[arg_idx[1]]) - tmp_recv_array = args[arg_idx[1]][0] - elif isinstance(args[arg_idx[1]], nlcpy.core.core.ndarray): - tmp_recv_array = args[arg_idx[1]] - args = _replace_nlcpy_to_numpy(args) - result = sendrecv_func(*args, **kwargs) - if tmp_send_array is not None or tmp_recv_array is not None: - if tmp_send_array is not None and tmp_send_buf is None: - tmp_send_array[:len(args[arg_idx[0]])] = args[arg_idx[0]] - tmp_send_buf = tmp_send_array - elif tmp_send_array is not None: - tmp_send_array[:len(args[arg_idx[0]][0])] = args[arg_idx[0]][0] - tmp_send_buf[0] = tmp_send_array - tmp_send_buf = tmp_send_buftype(tmp_send_buf) - if tmp_recv_array is not None and tmp_recv_buf is None: - tmp_recv_array[:len(args[arg_idx[1]])] = args[arg_idx[1]] - tmp_recv_buf = tmp_recv_array - elif tmp_recv_array is not None: - tmp_recv_array[:len(args[arg_idx[1]][0])] = args[arg_idx[1]][0] - tmp_recv_buf[0] = tmp_recv_array - tmp_recv_buf = tmp_recv_buftype(tmp_recv_buf) - args = tuple((args[0], tmp_send_buf, tmp_recv_buf)) - return result - return _get_nlcpy_array_wrapper - return _sendrecv_buffer_for_nlcpy_array - - -def nb_sendrecv_buffer_for_nlcpy_array(arg_idx): - def _nb_sendrecv_buffer_for_nlcpy_array(recv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - if (isinstance(args[arg_idx[0]], (list, tuple)) or - (not isinstance(args[arg_idx[0]], - (list, tuple, nlcpy.core.core.ndarray)) and - ((args[arg_idx[0]] is None) or - (args[arg_idx[0]] == mpi4pyve.MPI.IN_PLACE)))) and\ - (isinstance(args[arg_idx[1]], (list, tuple)) or - (not isinstance(args[arg_idx[1]], - (list, tuple, nlcpy.core.core.ndarray)) and - ((args[arg_idx[1]] is None) or - (args[arg_idx[1]] == mpi4pyve.MPI.IN_PLACE)))): - kwargs["send_nlcpy_arr"] = args[arg_idx[0]][0]\ - if (isinstance(args[arg_idx[0]], (list, tuple)) and - len(args[arg_idx[0]]) > 0 and - isinstance(args[arg_idx[0]][0], - nlcpy.core.core.ndarray)) else None - kwargs["recv_nlcpy_arr"] = args[arg_idx[1]][0]\ - if (isinstance(args[arg_idx[1]], (list, tuple)) and - len(args[arg_idx[1]]) > 0 and - isinstance(args[arg_idx[1]][0], - nlcpy.core.core.ndarray)) else None - args = _replace_nlcpy_to_numpy(args) - kwargs["send_numpy_arr"] = args[arg_idx[0]][0]\ - if kwargs["send_nlcpy_arr"] is not None else None - kwargs["recv_numpy_arr"] = args[arg_idx[1]][0]\ - if kwargs["recv_nlcpy_arr"] is not None else None - - elif (isinstance(args[arg_idx[0]], (list, tuple)) and - isinstance(args[arg_idx[1]], nlcpy.core.core.ndarray)): - kwargs["send_nlcpy_arr"] = args[arg_idx[0]][0]\ - if isinstance(args[arg_idx[0]][0], nlcpy.core.core.ndarray) else None - kwargs["recv_nlcpy_arr"] = args[arg_idx[1]]\ - if isinstance(args[arg_idx[1]], nlcpy.core.core.ndarray) else None - args = _replace_nlcpy_to_numpy(args) - kwargs["send_numpy_arr"] = args[arg_idx[0]][0]\ - if kwargs["send_nlcpy_arr"] is not None else None - kwargs["recv_numpy_arr"] = args[arg_idx[1]]\ - if kwargs["recv_nlcpy_arr"] is not None else None - - elif (isinstance(args[arg_idx[0]], nlcpy.core.core.ndarray) and - isinstance(args[arg_idx[1]], (list, tuple))): - kwargs["send_nlcpy_arr"] = args[arg_idx[0]]\ - if isinstance(args[arg_idx[0]], nlcpy.core.core.ndarray) else None - kwargs["recv_nlcpy_arr"] = args[arg_idx[1]][0]\ - if isinstance(args[arg_idx[1]][0], nlcpy.core.core.ndarray) else None - args = _replace_nlcpy_to_numpy(args) - kwargs["send_numpy_arr"] = args[arg_idx[0]]\ - if kwargs["send_nlcpy_arr"] is not None else None - kwargs["recv_numpy_arr"] = args[arg_idx[1]][0]\ - if kwargs["recv_nlcpy_arr"] is not None else None - - elif (isinstance(args[arg_idx[0]], nlcpy.core.core.ndarray) or - args[arg_idx[0]] is None or - args[arg_idx[0]] == mpi4pyve.MPI.IN_PLACE) and\ - (isinstance(args[arg_idx[1]], nlcpy.core.core.ndarray) or - args[arg_idx[1]] is None or args[arg_idx[1]] == mpi4pyve.MPI.IN_PLACE): - kwargs["send_nlcpy_arr"] = args[arg_idx[0]]\ - if isinstance(args[arg_idx[0]], nlcpy.core.core.ndarray) else None - kwargs["recv_nlcpy_arr"] = args[arg_idx[1]]\ - if isinstance(args[arg_idx[1]], nlcpy.core.core.ndarray) else None - args = _replace_nlcpy_to_numpy(args) - kwargs["send_numpy_arr"] = args[arg_idx[0]]\ - if kwargs["send_nlcpy_arr"] is not None else None - kwargs["recv_numpy_arr"] = args[arg_idx[1]]\ - if kwargs["recv_nlcpy_arr"] is not None else None - - return recv_func(*args, **kwargs) - return _get_nlcpy_array_wrapper - return _nb_sendrecv_buffer_for_nlcpy_array - - -def sendrecv_buffer_kwarg_for_nlcpy_array(recv_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - tmp_send_array = None - tmp_send_buf = None - tmp_recv_array = None - tmp_recv_buf = None - sendbuf, sendkey = (kwargs['sendbuf'], True)\ - if 'sendbuf' in kwargs else (args[0], False) - recvbuf, recvkey = (kwargs['recvbuf'], True)\ - if 'recvbuf' in kwargs else (args[4], False)\ - if len(args) > 4 else (None, False) - - if isinstance(sendbuf, list): - if isinstance(sendbuf[0], nlcpy.core.core.ndarray): - tmp_send_array = sendbuf[0] - tmp_send_buf = sendbuf - elif isinstance(sendbuf, nlcpy.core.core.ndarray): - tmp_send_array = sendbuf - - if recvbuf is not None: - if isinstance(recvbuf, list): - if isinstance(recvbuf[0], nlcpy.core.core.ndarray): - tmp_recv_array = recvbuf[0] - tmp_recv_buf = recvbuf - elif isinstance(recvbuf, nlcpy.core.core.ndarray): - tmp_recv_array = recvbuf - args = _replace_nlcpy_to_numpy(args) - kwargs = _replace_nlcpy_to_numpy_kwargs(kwargs) - - recv_func(*args, **kwargs) - - sendfact = kwargs['sendbuf'] if sendkey else args[0] - recvfact = kwargs['recvbuf'] if recvkey else args[4]\ - if len(args) > 4 else None - args = _undo_numpy_to_nlcpy(args) - kwargs = _undo_numpy_to_nlcpy_kwargs(kwargs) - if tmp_send_array is not None: - sendfact = tmp_send_array if tmp_send_buf is None else tmp_send_buf # NOQA - if tmp_recv_array is not None: - if tmp_recv_buf is not None: - tmp_recv_array[:len(recvfact[0])] = recvfact[0] - tmp_recv_buf[0] = tmp_recv_array - recvfact = tmp_recv_buf - else: - tmp_recv_array[:len(recvfact)] = recvfact - recvfact = tmp_recv_array - return - return _get_nlcpy_array_wrapper diff --git a/src/mpi4pyve/MPI/Notimpl.pyx b/src/mpi4pyve/MPI/Notimpl.pyx index 90dc254..d11cd74 100644 --- a/src/mpi4pyve/MPI/Notimpl.pyx +++ b/src/mpi4pyve/MPI/Notimpl.pyx @@ -28,37 +28,37 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -import nlcpy +def _check_vai_buffer(obj): + try: return hasattr(obj, '__ve_array_interface__') + except: return False - -def _find_nlcpy(args): +def _find_vai_buffer(args): for arg in args: - _raise_nlcpy_array(arg) + _raise_vai_buffer(arg) return - -def _find_nlcpy_kwargs(kwargs): +def _find_vai_buffer_kwargs(kwargs): for k in kwargs.keys(): - _raise_nlcpy_array(kwargs[k]) + _raise_vai_buffer(kwargs[k]) return -def _raise_nlcpy_array(arg): - if isinstance(arg, nlcpy.core.core.ndarray): - raise NotImplementedError('NLCPy array is not implemented yet.') +def _raise_vai_buffer(arg): + if _check_vai_buffer(arg): + raise NotImplementedError('__ve_array_interface__ is not implemented yet.') elif isinstance(arg, (list, tuple)): - _find_nlcpy(arg) + _find_vai_buffer(arg) elif isinstance(arg, dict): - _find_nlcpy_kwargs(arg) + _find_vai_buffer_kwargs(arg) return -def check_for_nlcpy_array(func): - def _raise_nlcpy_array_wrapper(*args, **kwargs): - _find_nlcpy(args) - _find_nlcpy_kwargs(kwargs) +def raise_notimpl_for_vai_buffer(func): + def _raise_vai_buffer_wrapper(*args, **kwargs): + _find_vai_buffer(args) + _find_vai_buffer_kwargs(kwargs) return func(*args, **kwargs) - return _raise_nlcpy_array_wrapper + return _raise_vai_buffer_wrapper def raise_notimpl_for_necmpi(func): diff --git a/src/mpi4pyve/MPI/Op.pyx b/src/mpi4pyve/MPI/Op.pyx index 0ee9983..4e05427 100644 --- a/src/mpi4pyve/MPI/Op.pyx +++ b/src/mpi4pyve/MPI/Op.pyx @@ -53,7 +53,6 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # - cdef class Op: """ @@ -138,7 +137,7 @@ cdef class Op: def __get__(self): return self.Is_commutative() - @sendrecv_buffer_for_nlcpy_array(arg_idx=(1, 2)) + @raise_notimpl_for_vai_buffer def Reduce_local(self, inbuf, inoutbuf): """ Apply a reduction operator to local data diff --git a/src/mpi4pyve/MPI/Request.pyx b/src/mpi4pyve/MPI/Request.pyx index e113dcb..ed34ea2 100644 --- a/src/mpi4pyve/MPI/Request.pyx +++ b/src/mpi4pyve/MPI/Request.pyx @@ -53,59 +53,13 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # -def _replace_nlcpy_to_numpy(args): - _type = type(args) - _args = list(args) - for i, arg in enumerate(_args): - if isinstance(arg, nlcpy.core.core.ndarray): - _args[i] = numpy.asarray(arg) - elif isinstance(arg, (list, tuple)): - _args[i] = _replace_nlcpy_to_numpy(arg) - return _type(_args) - - -def _undo_numpy_to_nlcpy(args): - _type = type(args) - _args = list(args) - for i, arg in enumerate(_args): - if isinstance(arg, numpy.ndarray): - _args[i] = nlcpy.asarray(arg) - elif isinstance(arg, (list, tuple)): - _args[i] = _undo_numpy_to_nlcpy(arg) - return _type(_args) - - -cdef wait_for_nlcpy_array(wait_func): - def _get_nlcpy_array_wrapper(*args, **kwargs): - result = wait_func(*args, **kwargs) - if isinstance(result, (list, tuple)): - result = _undo_numpy_to_nlcpy(result) - elif isinstance(result, numpy.ndarray): - result = nlcpy.asarray(result) - return result - return _get_nlcpy_array_wrapper - - -def _get_numpy_to_nlcpy(args): - if isinstance(args, (list, tuple)): - return _undo_numpy_to_nlcpy(args) - elif isinstance(args, numpy.ndarray): - return nlcpy.asarray(args) - else: - return args - - cdef class Request: """ Request """ - def __cinit__(self, Request request=None, numpy_arr=None, nlcpy_arr=None): - if numpy_arr is not None: - self.numpy_arr = numpy_arr - self.nlcpy_arr = nlcpy_arr - + def __cinit__(self, Request request=None): self.ob_mpi = MPI_REQUEST_NULL if request is None: return self.ob_mpi = request.ob_mpi @@ -130,16 +84,6 @@ cdef class Request: # Completion Operations # --------------------- - def _trans_nlcpy_array(self): - if self.numpy_arr is not None: - if not isinstance(self.numpy_arr, list): - self.nlcpy_arr[:] = self.numpy_arr - else: - for i in range(0, len(self.numpy_arr)): - if not self.numpy_arr[i] is None: - self.nlcpy_arr[i][:] = self.numpy_arr[i] - return - def Wait(self, Status status=None): """ Wait for a send or receive to complete @@ -150,8 +94,6 @@ cdef class Request: &self.ob_mpi, statusp) ) if self.ob_mpi == MPI_REQUEST_NULL: self.ob_buf = None - - self._trans_nlcpy_array() return True def Test(self, Status status=None): @@ -164,8 +106,6 @@ cdef class Request: &self.ob_mpi, &flag, statusp) ) if self.ob_mpi == MPI_REQUEST_NULL: self.ob_buf = None - - self._trans_nlcpy_array() return flag def Free(self): @@ -202,8 +142,6 @@ cdef class Request: with nogil: CHKERR( MPI_Waitany( count, irequests, &index, statusp) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, None, count, irequests, NULL) return index @@ -223,8 +161,6 @@ cdef class Request: with nogil: CHKERR( MPI_Testany( count, irequests, &index, &flag, statusp) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, None, count, irequests, NULL) # return (index, flag) @@ -244,8 +180,6 @@ cdef class Request: with nogil: CHKERR( MPI_Waitall( count, irequests, istatuses) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, statuses, count, irequests, istatuses) return True @@ -265,8 +199,6 @@ cdef class Request: with nogil: CHKERR( MPI_Testall( count, irequests, &flag, istatuses) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, statuses, count, irequests, istatuses) return flag @@ -287,8 +219,6 @@ cdef class Request: with nogil: CHKERR( MPI_Waitsome( incount, irequests, &outcount, iindices, istatuses) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, statuses, incount, irequests, istatuses) # cdef int i = 0 @@ -314,8 +244,6 @@ cdef class Request: with nogil: CHKERR( MPI_Testsome( incount, irequests, &outcount, iindices, istatuses) ) finally: - for req in requests: - req._trans_nlcpy_array() release_rs(requests, statuses, incount, irequests, istatuses) # cdef int i = 0 @@ -356,23 +284,19 @@ cdef class Request: # Python Communication # -------------------- # - @wait_for_nlcpy_array def wait(self, Status status=None): """ Wait for a send or receive to complete """ cdef msg = PyMPI_wait(self, status) - self._trans_nlcpy_array() return msg # - @wait_for_nlcpy_array def test(self, Status status=None): """ Test for the completion of a send or receive """ cdef int flag = 0 cdef msg = PyMPI_test(self, &flag, status) - self._trans_nlcpy_array() return (flag, msg) # @classmethod @@ -382,10 +306,6 @@ cdef class Request: """ cdef int index = MPI_UNDEFINED cdef msg = PyMPI_waitany(requests, &index, status) - - for req in requests: - req._trans_nlcpy_array() - msg = _get_numpy_to_nlcpy(msg) return (index, msg) # @classmethod @@ -396,10 +316,6 @@ cdef class Request: cdef int index = MPI_UNDEFINED cdef int flag = 0 cdef msg = PyMPI_testany(requests, &index, &flag, status) - - for req in requests: - req._trans_nlcpy_array() - msg = _get_numpy_to_nlcpy(msg) return (index, flag, msg) # @classmethod @@ -408,10 +324,6 @@ cdef class Request: Wait for all previously initiated requests to complete """ cdef msg = PyMPI_waitall(requests, statuses) - - for req in requests: - req._trans_nlcpy_array() - msg = _get_numpy_to_nlcpy(msg) return msg # @classmethod @@ -421,10 +333,6 @@ cdef class Request: """ cdef int flag = 0 cdef msg = PyMPI_testall(requests, &flag, statuses) - - for req in requests: - req._trans_nlcpy_array() - msg = _get_numpy_to_nlcpy(msg) return (flag, msg) @@ -434,11 +342,7 @@ cdef class Prequest(Request): Persistent request """ - def __cinit__(self, Request request=None, numpy_arr=None, nlcpy_arr=None): - if numpy_arr is not None: - self.numpy_arr = numpy_arr - self.nlcpy_arr = nlcpy_arr - + def __cinit__(self, Request request=None): if self.ob_mpi == MPI_REQUEST_NULL: return (request) diff --git a/src/mpi4pyve/MPI/Util.pyx b/src/mpi4pyve/MPI/Util.pyx new file mode 100644 index 0000000..89c3867 --- /dev/null +++ b/src/mpi4pyve/MPI/Util.pyx @@ -0,0 +1,25 @@ +cdef extern long nmpi_aveo_dma_count[3] +cdef extern long nmpi_aveo_dma_size[3] +cdef extern double nmpi_aveo_dma_time[3] + +def _get_dma_count(): + return [nmpi_aveo_dma_count[0], + nmpi_aveo_dma_count[1], + nmpi_aveo_dma_count[2],] + +def _get_dma_size(): + return [nmpi_aveo_dma_size[0], + nmpi_aveo_dma_size[1], + nmpi_aveo_dma_size[2],] + +def _get_dma_time(): + return [nmpi_aveo_dma_time[0], + nmpi_aveo_dma_time[1], + nmpi_aveo_dma_time[2],] + +def _nmpi_aveo_dma_clear(): + for i in range(3): + nmpi_aveo_dma_count[i] = 0 + nmpi_aveo_dma_size[i] = 0 + nmpi_aveo_dma_time[i] = 0 + diff --git a/src/mpi4pyve/MPI/Veo.pyx b/src/mpi4pyve/MPI/Veo.pyx new file mode 100644 index 0000000..4aca2db --- /dev/null +++ b/src/mpi4pyve/MPI/Veo.pyx @@ -0,0 +1,98 @@ +### mpi4py-ve License ## +# +# Copyright (c) 2022, NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer listed in this license in the +# documentation and/or other materials provided with the distribution. +# +# The copyright holders provide no reassurances that the source code provided does not +# infringe any patent, copyright, or any other intellectual property rights of third +# parties. The copyright holders disclaim any liability to any recipient for claims +# brought against recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + + +from libc.stdint cimport * + +cdef extern from "" nogil: + void *dlopen(const char *, int) + char *dlerror() + void *dlsym(void *, const char *) + int dlclose(void *) + int RTLD_LAZY + int RTLD_NOW + int RTLD_GLOBAL + int RTLD_LOCAL + +cdef int (*hooked_veo_alloc_hmem)(void *, void **, const size_t) +cdef int (*hooked_veo_free_hmem)(void *) + + +cdef _get_veo_sym(): + global hooked_veo_alloc_hmem, hooked_veo_free_hmem + cdef void *hdl_veo = NULL + cdef void *hdl_mpi = NULL + cdef char *err = NULL + + hdl_veo = dlopen('libmpi_veo.so.1', RTLD_NOW) + err = dlerror() + if err != NULL: + raise RuntimeError(err) + hooked_veo_alloc_hmem = \ + dlsym( + hdl_veo, 'veo_alloc_hmem') + err = dlerror() + if err != NULL: + raise RuntimeError(err) + hooked_veo_free_hmem = \ + dlsym(hdl_veo, 'veo_free_hmem') + err = dlerror() + if err != NULL: + raise RuntimeError(err) + +cdef int _hooked_alloc_hmem(void* proc, uint64_t* addr, const size_t size): + global hooked_veo_alloc_hmem + if hooked_veo_alloc_hmem == NULL: + _get_veo_sym() + cdef void *vemem = NULL + cdef int ret = 0 + ret = hooked_veo_alloc_hmem(proc, &vemem, size) + addr[0] = vemem + return ret + +cdef int _hooked_free_hmem(uint64_t addr): + global hooked_veo_free_hmem + if hooked_veo_free_hmem == NULL: + _get_veo_sym() + cdef int ret = 0 + ret = hooked_veo_free_hmem(addr) + return ret + + +def _alloc_hmem(uint64_t proc_handle, size_t size): + cdef uint64_t hmem_addr = 0 + if _hooked_alloc_hmem(proc_handle, &hmem_addr, size): + raise MemoryError("Out of memory on VE") + return (hmem_addr) + + +def _free_hmem(uint64_t addr): + if _hooked_free_hmem(addr): + raise RuntimeError("veo_free_hmem failed") diff --git a/src/mpi4pyve/MPI/Win.pyx b/src/mpi4pyve/MPI/Win.pyx index db054d4..dd71a75 100644 --- a/src/mpi4pyve/MPI/Win.pyx +++ b/src/mpi4pyve/MPI/Win.pyx @@ -53,6 +53,8 @@ # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # +import sys + # Create flavors # -------------- WIN_FLAVOR_CREATE = MPI_WIN_FLAVOR_CREATE @@ -79,7 +81,13 @@ LOCK_EXCLUSIVE = MPI_LOCK_EXCLUSIVE LOCK_SHARED = MPI_LOCK_SHARED -include "Notimpl.pyx" +def _request_flush(): + try: + if 'nlcpy' in sys.modules: + vp = sys.modules['nlcpy'] + vp.request.flush() + except Exception as e: + pass cdef class Win: @@ -402,7 +410,6 @@ cdef class Win: # Communication Operations # ------------------------ - @check_for_nlcpy_array def Put(self, origin, int target_rank, target=None): """ Put data into a memory window on a remote process. @@ -415,7 +422,6 @@ cdef class Win: msg.tdisp, msg.tcount, msg.ttype, self.ob_mpi) ) - @check_for_nlcpy_array def Get(self, origin, int target_rank, target=None): """ Get data from a memory window on a remote process. @@ -428,7 +434,6 @@ cdef class Win: msg.tdisp, msg.tcount, msg.ttype, self.ob_mpi) ) - @check_for_nlcpy_array def Accumulate(self, origin, int target_rank, target=None, Op op=SUM): """ @@ -442,7 +447,6 @@ cdef class Win: msg.tdisp, msg.tcount, msg.ttype, op.ob_mpi, self.ob_mpi) ) - @check_for_nlcpy_array def Get_accumulate(self, origin, result, int target_rank, target=None, Op op=SUM): """ @@ -457,7 +461,6 @@ cdef class Win: msg.tdisp, msg.tcount, msg.ttype, op.ob_mpi, self.ob_mpi) ) - @check_for_nlcpy_array def Fetch_and_op(self, origin, result,int target_rank, Aint target_disp=0, Op op=SUM): """ @@ -470,7 +473,6 @@ cdef class Win: target_rank, target_disp, op.ob_mpi, self.ob_mpi) ) - @check_for_nlcpy_array def Compare_and_swap(self, origin, compare, result, int target_rank, Aint target_disp=0): """ @@ -485,7 +487,6 @@ cdef class Win: # Request-based RMA Communication Operations # ------------------------------------------ - @check_for_nlcpy_array def Rput(self, origin, int target_rank, target=None): """ Put data into a memory window on a remote process. @@ -501,7 +502,6 @@ cdef class Win: request.ob_buf = msg return request - @check_for_nlcpy_array def Rget(self, origin, int target_rank, target=None): """ Get data from a memory window on a remote process. @@ -517,7 +517,6 @@ cdef class Win: request.ob_buf = msg return request - @check_for_nlcpy_array def Raccumulate(self, origin, int target_rank, target=None, Op op=SUM): """ @@ -534,7 +533,6 @@ cdef class Win: request.ob_buf = msg return request - @check_for_nlcpy_array def Rget_accumulate(self, origin, result, int target_rank, target=None, Op op=SUM): """ @@ -563,6 +561,7 @@ cdef class Win: """ Perform an MPI fence synchronization on a window """ + _request_flush() with nogil: CHKERR( MPI_Win_fence(assertion, self.ob_mpi) ) # General Active Target Synchronization @@ -572,6 +571,7 @@ cdef class Win: """ Start an RMA access epoch for MPI """ + _request_flush() with nogil: CHKERR( MPI_Win_start( group.ob_mpi, assertion, self.ob_mpi) ) @@ -585,6 +585,7 @@ cdef class Win: """ Start an RMA exposure epoch """ + _request_flush() with nogil: CHKERR( MPI_Win_post( group.ob_mpi, assertion, self.ob_mpi) ) @@ -609,6 +610,7 @@ cdef class Win: """ Begin an RMA access epoch at the target process """ + _request_flush() with nogil: CHKERR( MPI_Win_lock( lock_type, rank, assertion, self.ob_mpi) ) @@ -622,6 +624,7 @@ cdef class Win: """ Begin an RMA access epoch at all processes """ + _request_flush() with nogil: CHKERR( MPI_Win_lock_all(assertion, self.ob_mpi) ) def Unlock_all(self): @@ -661,6 +664,7 @@ cdef class Win: """ Synchronize public and private copies of the given window """ + _request_flush() with nogil: CHKERR( MPI_Win_sync(self.ob_mpi) ) diff --git a/src/mpi4pyve/MPI/asbuffer.pxi b/src/mpi4pyve/MPI/asbuffer.pxi index be7979a..36b70e2 100644 --- a/src/mpi4pyve/MPI/asbuffer.pxi +++ b/src/mpi4pyve/MPI/asbuffer.pxi @@ -132,10 +132,18 @@ cdef int Py27_GetBuffer(object obj, Py_buffer *view, int flags) except -1: #------------------------------------------------------------------------------ +include "asvaibuf.pxi" + cdef int PyMPI_GetBuffer(object obj, Py_buffer *view, int flags) except -1: - if PYPY: return PyPy_GetBuffer(obj, view, flags) - if PY2: return Py27_GetBuffer(obj, view, flags) - return PyObject_GetBuffer(obj, view, flags) + try: + if PYPY: return PyPy_GetBuffer(obj, view, flags) + if PY2: return Py27_GetBuffer(obj, view, flags) + return PyObject_GetBuffer(obj, view, flags) + except BaseException: + try: return Py_GetVAIBuffer(obj, view, flags) + except NotImplementedError: pass + except BaseException: raise + raise #------------------------------------------------------------------------------ diff --git a/src/mpi4pyve/MPI/asvaibuf.pxi b/src/mpi4pyve/MPI/asvaibuf.pxi new file mode 100644 index 0000000..66170ca --- /dev/null +++ b/src/mpi4pyve/MPI/asvaibuf.pxi @@ -0,0 +1,128 @@ +cdef inline int ve_is_contig(tuple shape, + tuple strides, + Py_ssize_t itemsize, + char order) except -1: + cdef Py_ssize_t i, ndim = len(shape) + cdef Py_ssize_t start, step, index + if order == c'F': + start = 0 + step = 1 + else: + start = ndim - 1 + step = -1 + for i from 0 <= i < ndim: + index = start + step * i + if itemsize != strides[index]: + return 0 + itemsize *= shape[index] + return 1 + +cdef inline char* ve_get_format(char typekind, Py_ssize_t itemsize) nogil: + if typekind == c'b': + if itemsize == sizeof(char): return b"?" + if itemsize == (sizeof(char)*4): return b"?" # NLCPy's bool is 4byte. + if typekind == c'i': + if itemsize == sizeof(char): return b"b" + if itemsize == sizeof(short): return b"h" + if itemsize == sizeof(int): return b"i" + if itemsize == sizeof(long): return b"l" + if itemsize == sizeof(long long): return b"q" + if typekind == c'u': + if itemsize == sizeof(char): return b"B" + if itemsize == sizeof(short): return b"H" + if itemsize == sizeof(int): return b"I" + if itemsize == sizeof(long): return b"L" + if itemsize == sizeof(long long): return b"Q" + if typekind == c'f': + if itemsize == sizeof(float)//2: return b"e" + if itemsize == sizeof(float): return b"f" + if itemsize == sizeof(double): return b"d" + if itemsize == sizeof(long double): return b"g" + if typekind == c'c': + if itemsize == 2*sizeof(float)//2: return b"Ze" + if itemsize == 2*sizeof(float): return b"Zf" + if itemsize == 2*sizeof(double): return b"Zd" + if itemsize == 2*sizeof(long double): return b"Zg" + return BYTE_FMT + +#------------------------------------------------------------------------------ + +cdef int Py_CheckVAIBuffer(object obj): + try: return hasattr(obj, '__ve_array_interface__') + except: return 0 + +cdef int Py_GetVAIBuffer(object obj, Py_buffer *view, int flags) except -1: + cdef dict ve_array_interface + cdef tuple data + cdef str typestr + cdef tuple shape + cdef tuple strides + cdef list descr + cdef object dev_ptr, mask + cdef void *buf = NULL + cdef bint readonly = 0 + cdef Py_ssize_t s, size = 1 + cdef Py_ssize_t itemsize = 1 + cdef char typekind = c'u' + cdef bint fixnull = 0 + + try: + ve_array_interface = obj.__ve_array_interface__ + except AttributeError: + raise NotImplementedError("missing VE array interface") + + # mandatory + data = ve_array_interface['data'] + typestr = ve_array_interface['typestr'] + shape = tuple(ve_array_interface['shape']) + + # optional + strides = None if ve_array_interface.get('strides') is None else \ + tuple(ve_array_interface.get('strides')) + descr = ve_array_interface.get('descr') + mask = ve_array_interface.get('mask') + + dev_ptr, readonly = data + for s in shape: size *= s + if dev_ptr is None and size == 0: dev_ptr = 0 # XXX + buf = PyLong_AsVoidPtr(dev_ptr) + typekind = ord(typestr[1]) + itemsize = int(typestr[2:]) + + if mask is not None: + raise BufferError( + "__ve_array_interface__: " + "cannot handle masked arrays" + ) + if size < 0: + raise BufferError( + "__ve_array_interface__: " + "buffer with negative size (shape:%s, size:%d)" + % (shape, size) + ) + if (strides is not None and + not ve_is_contig(shape, strides, itemsize, c'C') and + not ve_is_contig(shape, strides, itemsize, c'F')): + raise BufferError( + "__ve_array_interface__: " + "buffer is not contiguous (shape:%s, strides:%s, itemsize:%d)" + % (shape, strides, itemsize) + ) + if descr is not None and (len(descr) != 1 or descr[0] != ('', typestr)): + PyErr_WarnEx(RuntimeWarning, + b"__ve_array_interface__: " + b"ignoring 'descr' key", 1) + + if PYPY and readonly and ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE): + raise BufferError("Object is not writable") + + fixnull = (buf == NULL and size == 0) + if fixnull: buf = &fixnull + PyBuffer_FillInfo(view, obj, buf, size*itemsize, readonly, flags) + if fixnull: view.buf = NULL + + if (flags & PyBUF_FORMAT) == PyBUF_FORMAT: + view.format = ve_get_format(typekind, itemsize) + if view.format != BYTE_FMT: + view.itemsize = itemsize + return 0 diff --git a/src/mpi4pyve/MPI/atimport.pxi b/src/mpi4pyve/MPI/atimport.pxi index 089d179..c276c5e 100644 --- a/src/mpi4pyve/MPI/atimport.pxi +++ b/src/mpi4pyve/MPI/atimport.pxi @@ -91,7 +91,7 @@ ctypedef struct Options: cdef Options options options.initialize = 1 options.threads = 1 -options.thread_level = MPI_THREAD_MULTIPLE +options.thread_level = MPI_THREAD_SERIALIZED options.finalize = 1 options.fast_reduce = 1 options.recv_mprobe = 1 @@ -106,7 +106,7 @@ cdef int getOptions(Options* opts) except -1: cdef object rc opts.initialize = 1 opts.threads = 1 - opts.thread_level = MPI_THREAD_MULTIPLE + opts.thread_level = MPI_THREAD_SERIALIZED opts.finalize = 1 opts.fast_reduce = 1 opts.recv_mprobe = 1 @@ -116,7 +116,7 @@ cdef int getOptions(Options* opts) except -1: # cdef object initialize = True cdef object threads = True - cdef object thread_level = 'multiple' + cdef object thread_level = 'serialized' cdef object finalize = None cdef object fast_reduce = True cdef object recv_mprobe = True @@ -273,6 +273,71 @@ def _set_abort_status(object status): except: abort_status = 1 if status else 0 +def print_option(): + print('initialize :', options.initialize) + print('threads :', options.threads) + print('thread_level :', options.thread_level) + print('finalize :', options.finalize) + print('fast_reduce :', options.fast_reduce) + print('recv_mprobe :', options.recv_mprobe) + print('errors :', options.errors) + +# ----------------------------------------------------------------------------- + +# Number of processes assigned to each VH when started with multiple VH. +import os +from libc.stdlib cimport malloc, free +from libc.string cimport strcmp + +cdef int get_mpi_local_size_from_nodeid(int nodeid): + cdef int local_size = 0 + cdef int size + comm = MPI_COMM_WORLD + MPI_Comm_size(comm, &size) + cdef int* nodes_nodeid = malloc(sizeof(int) * size) + MPI_Allgather(&nodeid, 1, MPI_INT, nodes_nodeid, 1, MPI_INT, comm) + for rank in range(0, size): + if nodeid == nodes_nodeid[rank]: + local_size += 1 + free(nodes_nodeid) + return local_size + + +cdef int get_mpi_local_size_from_processname(): + cdef int local_size = 0 + cdef char processor_name[MPI_MAX_PROCESSOR_NAME + 1] + cdef int resultlen + cdef int size + comm = MPI_COMM_WORLD + MPI_Comm_size(comm, &size) + MPI_Get_processor_name(processor_name, &resultlen) + cdef char* nodes_processor_name = malloc(sizeof(processor_name) * size) + MPI_Allgather(processor_name, sizeof(processor_name), MPI_CHAR, + nodes_processor_name, sizeof(processor_name) ,MPI_CHAR, comm) + for rank in range(0, size): + if strcmp( processor_name , &nodes_processor_name[rank * sizeof(processor_name)]) == 0: + local_size += 1 + free(nodes_processor_name) + return local_size + +cdef void set_mpi_local_size(): + if not mpi_active(): return + os.environ["_MPI4PYVE_MPI_INITIALIZED"] = '1' + + cdef int nodeid = -1 + try: + nodeid = int(os.environ['MPINODEID']) + except: + pass + + cdef int local_size = 0 + if nodeid >= 0: + local_size = get_mpi_local_size_from_nodeid(nodeid) + else: + local_size = get_mpi_local_size_from_processname() + os.environ["_MPI4PYVE_MPI_LOCAL_SIZE"] = str(local_size) + + # ----------------------------------------------------------------------------- # Vile hack for raising a exception and not contaminate the traceback diff --git a/src/mpi4pyve/MPI/msgbuffer.pxi b/src/mpi4pyve/MPI/msgbuffer.pxi index 5ffdef2..02f155e 100644 --- a/src/mpi4pyve/MPI/msgbuffer.pxi +++ b/src/mpi4pyve/MPI/msgbuffer.pxi @@ -26,6 +26,9 @@ cdef inline int is_buffer(object ob): else: return PyObject_CheckBuffer(ob) or _Py2_IsBuffer(ob) +cdef inline int is_vai_buffer(object ob): + return Py_CheckVAIBuffer(ob) + cdef inline int is_datatype(object ob): if isinstance(ob, Datatype): return 1 if PY3: @@ -143,6 +146,8 @@ cdef _p_message message_simple(object msg, (o_buf, o_count, o_displ, o_type) = msg else: raise ValueError("message: expecting 2 to 4 items") + elif is_vai_buffer(msg): + o_buf = msg elif PYPY: o_buf = msg else: @@ -255,6 +260,8 @@ cdef _p_message message_vector(object msg, (o_buf, o_counts, o_displs, o_type) = msg else: raise ValueError("message: expecting 2 to 4 items") + elif is_vai_buffer(msg): + o_buf = msg elif PYPY: o_buf = msg else: diff --git a/src/mpi4pyve/__init__.py b/src/mpi4pyve/__init__.py index 9267a20..3b022ce 100644 --- a/src/mpi4pyve/__init__.py +++ b/src/mpi4pyve/__init__.py @@ -77,7 +77,7 @@ bindings. """ -__version__ = '0.1.0b1' +__version__ = '1.0.0' __author__ = 'NEC (dev-nlcpy@sxarr.jp.nec.com)' __credits__ = 'NEC Corporation' @@ -142,7 +142,7 @@ def rc(**kargs): # pylint: disable=invalid-name rc.initialize = True rc.threads = True -rc.thread_level = 'multiple' +rc.thread_level = 'serialized' rc.finalize = None rc.fast_reduce = True rc.recv_mprobe = True diff --git a/src/mpi4pyve/futures/pool.py b/src/mpi4pyve/futures/pool.py index bed9ad5..2e89ec0 100644 --- a/src/mpi4pyve/futures/pool.py +++ b/src/mpi4pyve/futures/pool.py @@ -1,5 +1,58 @@ -# Author: Lisandro Dalcin -# Contact: dalcinl@gmail.com +### mpi4py-ve License ## +# +# Copyright (c) 2022, NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer listed in this license in the +# documentation and/or other materials provided with the distribution. +# +# The copyright holders provide no reassurances that the source code provided does not +# infringe any patent, copyright, or any other intellectual property rights of third +# parties. The copyright holders disclaim any liability to any recipient for claims +# brought against recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTE: This code is derived from mpi4py written by Lisandro Dalcin. +# +### mpi4py License ## +# +# Copyright (c) 2019, Lisandro Dalcin. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + """Implements MPIPoolExecutor.""" import time @@ -13,12 +66,18 @@ from . import _lib +def raise_notimpl_for_necmpi(func): + def _raise_wrapper(*args, **kwargs): + raise NotImplementedError('%s on mpi4py-ve is not implemented yet.' + %func) + return _raise_wrapper class MPIPoolExecutor(Executor): """MPI-based asynchronous executor.""" Future = Future + @raise_notimpl_for_necmpi('MPIPoolExecutor') def __init__(self, max_workers=None, **kwargs): """Initialize a new MPIPoolExecutor instance. @@ -264,6 +323,7 @@ class MPICommExecutor(object): # pylint: disable=too-few-public-methods + @raise_notimpl_for_necmpi('MPICommExecutor') def __init__(self, comm=None, root=0, **kwargs): """Initialize a new MPICommExecutor instance. diff --git a/src/mpi4pyve/util/__init__.py b/src/mpi4pyve/util/__init__.py new file mode 100644 index 0000000..491798d --- /dev/null +++ b/src/mpi4pyve/util/__init__.py @@ -0,0 +1,3 @@ +from mpi4pyve.util.nmpi_aveo_dma import nmpi_aveo_dma +nmpi_aveo_dma = nmpi_aveo_dma() + diff --git a/src/mpi4pyve/util/nmpi_aveo_dma.py b/src/mpi4pyve/util/nmpi_aveo_dma.py new file mode 100644 index 0000000..6a15efe --- /dev/null +++ b/src/mpi4pyve/util/nmpi_aveo_dma.py @@ -0,0 +1,62 @@ +from mpi4pyve import MPI + +class nmpi_aveo_dma(object): + def __init__(self): + self.__count = None + self.__size = None + self.__time = None + + @property + def count(self): + self.__count = MPI._get_dma_count() + return self.__count + + @property + def size(self): + self.__size = MPI._get_dma_size() + return self.__size + + @property + def time(self): + self.__time = MPI._get_dma_time() + return self.__time + + def clear(self): + MPI._nmpi_aveo_dma_clear() + + def show_stats(self): + (count, size, time) = (self.count, self.size, self.time) + + sta = [] + sta.append(['dma_count', str(count[0]), str(count[1]), str(count[2])]) + sta.append(['dma_size', str(size[0]), str(size[1]), str(size[2])]) + sta.append(['dma_time', str(time[0]), str(time[1]), str(time[2])]) + + maxname = 0 + max_ve_ve = 0 + max_ve_vh = 0 + max_vh_ve = 0 + for val in sta: + if maxname < len(val[0]): + maxname = len(val[0]) + if max_ve_ve < len(val[1]): + max_ve_ve = len(val[1]) + if max_ve_vh < len(val[2]): + max_ve_vh = len(val[2]) + if max_vh_ve < len(val[3]): + max_vh_ve = len(val[3]) + + if len(sta) > 0: + sp1 = max(10, maxname) + sp2 = max(10, max_ve_ve) + sp3 = max(10, max_ve_vh) + sp4 = max(10, max_vh_ve) + prval = "Info %s VE->VE %s VE->VH %s VH->VE %s" % (sp1*' ', sp2*' ', sp3*' ', sp4*' ') + print(prval + "\n" + "-"*(sp1+4) + " " + "-"*(sp2+6) + " " + "-"*(sp3+6) + " " + "-"*(sp4+6)) + + for val in sta: + print("%s %s %s %s %s %s %s %s" % (val[0], ' '*(sp1-len(val[0])+4), + val[1], ' '*(sp2-len(val[1])+6), + val[2], ' '*(sp3-len(val[2])+6), + val[3], ' '*(sp4-len(val[3])+6))) + diff --git a/src/mpi4pyve/veo/__init__.py b/src/mpi4pyve/veo/__init__.py new file mode 100644 index 0000000..94ea7fb --- /dev/null +++ b/src/mpi4pyve/veo/__init__.py @@ -0,0 +1,32 @@ +### mpi4py-ve License ## +# +# Copyright (c) 2022, NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer listed in this license in the +# documentation and/or other materials provided with the distribution. +# +# The copyright holders provide no reassurances that the source code provided does not +# infringe any patent, copyright, or any other intellectual property rights of third +# parties. The copyright holders disclaim any liability to any recipient for claims +# brought against recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTE: This code is derived from mpi4py written by Lisandro Dalcin. +# +from mpi4pyve.veo.veo import * # NOQA diff --git a/src/mpi4pyve/veo/veo.py b/src/mpi4pyve/veo/veo.py new file mode 100644 index 0000000..90788e7 --- /dev/null +++ b/src/mpi4pyve/veo/veo.py @@ -0,0 +1,59 @@ +### mpi4py-ve License ## +# +# Copyright (c) 2022, NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer listed in this license in the +# documentation and/or other materials provided with the distribution. +# +# The copyright holders provide no reassurances that the source code provided does not +# infringe any patent, copyright, or any other intellectual property rights of third +# parties. The copyright holders disclaim any liability to any recipient for claims +# brought against recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +from mpi4pyve import MPI + + +def alloc_hmem(proc_handle, size): + """ Allocate a VE memory buffer or a VH memory buffer which users can use them as heterogeneous memory. + + Parameters + ---------- + proc_handle: pointer + VEO process handle + size: int + size in bytes + + Returns + ------- + addr: int + VEMVA address with the identifier + """ + return MPI._alloc_hmem(proc_handle, size) + + +def free_hmem(addr): + """ Free a VE memory buffer. + + Parameters + ---------- + addr: int + VEMVA address + """ + MPI._free_hmem(addr) diff --git a/src/pympivendor.h b/src/pympivendor.h index b14b4db..7bf2dd0 100644 --- a/src/pympivendor.h +++ b/src/pympivendor.h @@ -1,5 +1,59 @@ -/* Author: Lisandro Dalcin */ -/* Contact: dalcinl@gmail.com */ +/* +### mpi4py-ve License ## +# +# Copyright (c) 2022, NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer listed in this license in the +# documentation and/or other materials provided with the distribution. +# +# The copyright holders provide no reassurances that the source code provided does not +# infringe any patent, copyright, or any other intellectual property rights of third +# parties. The copyright holders disclaim any liability to any recipient for claims +# brought against recipient by any third party for infringement of that parties +# intellectual property rights. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANYTHEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTE: This code is derived from mpi4py written by Lisandro Dalcin. +# +### mpi4py License ## +# +# Copyright (c) 2019, Lisandro Dalcin. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +*/ static int PyMPI_Get_vendor(const char **vendor_name, int *version_major, @@ -111,6 +165,10 @@ static int PyMPI_Get_vendor(const char **vendor_name, micro = LAM_RELEASE_VERSION; #endif +#elif defined(MPI4PYVE_NEC_MPI) && (MPI4PYVE_NEC_MPI == 1) + + name = "NEC MPI"; + #endif if (vendor_name) *vendor_name = name; diff --git a/test/nlcpy_ndarray_wrapper.py b/test/nlcpy_ndarray_wrapper.py new file mode 100644 index 0000000..08071cd --- /dev/null +++ b/test/nlcpy_ndarray_wrapper.py @@ -0,0 +1,16 @@ +import nlcpy + +class nlcpy_ndarray_wrapper(nlcpy.ndarray): + def __init__(self, shape, dtype=float, strides=None, order='C'): + super().__init__(shape, dtype, strides, order) + self.read_only_flag = False + + def set_read_only_flag(self, read_only_flag): + self.read_only_flag = read_only_flag + + @property + def __ve_array_interface__(self): + vai = super().__ve_array_interface__ + vai['data'] = (vai['data'][0], self.read_only_flag) + return vai + diff --git a/test/test_cco_buf.py b/test/test_cco_buf.py index f8420a3..468888d 100644 --- a/test/test_cco_buf.py +++ b/test/test_cco_buf.py @@ -110,6 +110,9 @@ def testReduce(self): for root in range(size): for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): sbuf = array(range(size), typecode) + # @unittest.skip('necmpi') + if hasattr(sbuf.as_mpi(), '__ve_array_interface__'): + continue rbuf = array(-1, typecode, size) self.COMM.Reduce(sbuf.as_mpi(), rbuf.as_mpi(), @@ -587,6 +590,9 @@ def testReduceLocal(self): for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): size = 5 sbuf = array(range(1,size+1), typecode) + # @unittest.skip('necmpi') + if hasattr(sbuf.as_mpi(), '__ve_array_interface__'): + continue rbuf = array(range(0,size+0), typecode) try: op.Reduce_local(sbuf.as_mpi(), rbuf.as_mpi()) @@ -605,6 +611,9 @@ def testReduceLocal(self): for array in arrayimpl.ArrayTypes: for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): sbuf = array(range(3), "i") + # @unittest.skip('necmpi') + if hasattr(sbuf.as_mpi(), '__ve_array_interface__'): + continue rbuf = array(range(3), "i") def f(): op.Reduce_local(sbuf.as_mpi_c(2), rbuf.as_mpi_c(3)) diff --git a/test/test_io.py b/test/test_io.py index b255db0..a5c86ae 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -17,7 +17,12 @@ def setUp(self): fname = None if comm.Get_rank() == 0: if not os.path.exists(self.tmpname): - os.mkdir(self.tmpname) + try: + os.mkdir(self.tmpname) + except OSError as e: + if e.errno != 17: # not File exists + raise + pass fd, fname = tempfile.mkstemp(prefix=self.prefix,dir=self.tmpname) os.close(fd) fname = comm.bcast(fname, 0) @@ -112,7 +117,7 @@ def testReadWrite(self): self.assertEqual(value, 42) self.assertEqual(rbuf[-1], -1) comm.Barrier() - + def testIReadIWrite(self): comm = self.COMM size = comm.Get_size() @@ -228,10 +233,10 @@ def testIReadIWriteAtAll(self): for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: etype = arrayimpl.TypeMap[typecode] - fh.Set_size(0) - fh.Set_view(0, etype) count = 13 wbuf = array(42, typecode, count) + fh.Set_size(0) + fh.Set_view(rank*wbuf.itemsize*count, etype) fh.Iwrite_at_all(count*rank, wbuf.as_raw()).Wait() fh.Sync() comm.Barrier() @@ -306,10 +311,10 @@ def testIReadIWriteAll(self): for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: etype = arrayimpl.TypeMap[typecode] - fh.Set_size(0) - fh.Set_view(0, etype) count = 13 wbuf = array(42, typecode, count) + fh.Set_size(0) + fh.Set_view(rank*count*wbuf.itemsize, etype) fh.Seek(count*rank, MPI.SEEK_SET) fh.Iwrite_all(wbuf.as_raw()).Wait() fh.Sync() @@ -352,7 +357,7 @@ def testReadWriteAllBeginEnd(self): self.assertEqual(value, 42) self.assertEqual(rbuf[-1], -1) comm.Barrier() - + def testReadWriteOrdered(self): comm = self.COMM size = comm.Get_size() diff --git a/test/test_p2p_buf.py b/test/test_p2p_buf.py index 75f2ef0..364dd93 100644 --- a/test/test_p2p_buf.py +++ b/test/test_p2p_buf.py @@ -35,7 +35,7 @@ def testSendrecvReplace(self): self.COMM.Sendrecv_replace(buf.as_mpi(), dest, 0, source, 0) for value in buf: self.assertEqual(value, source) - + def testSendRecv(self): size = self.COMM.Get_size() rank = self.COMM.Get_rank() @@ -46,36 +46,38 @@ def testSendRecv(self): sbuf = array( s, typecode, s) rbuf = array(-1, typecode, s) mem = array( 0, typecode, 2*(s+MPI.BSEND_OVERHEAD)).as_raw() - if size == 1: - MPI.Attach_buffer(mem) - rbuf = sbuf - MPI.Detach_buffer() - elif rank == 0: - MPI.Attach_buffer(mem) - self.COMM.Ibsend(sbuf.as_mpi(), 1, 0).Wait() - self.COMM.Bsend(sbuf.as_mpi(), 1, 0) - MPI.Detach_buffer() - self.COMM.Send(sbuf.as_mpi(), 1, 0) - self.COMM.Ssend(sbuf.as_mpi(), 1, 0) - self.COMM.Recv(rbuf.as_mpi(), 1, 0) - self.COMM.Recv(rbuf.as_mpi(), 1, 0) - self.COMM.Recv(rbuf.as_mpi(), 1, 0) - self.COMM.Recv(rbuf.as_mpi(), 1, 0) - elif rank == 1: - self.COMM.Recv(rbuf.as_mpi(), 0, 0) - self.COMM.Recv(rbuf.as_mpi(), 0, 0) - self.COMM.Recv(rbuf.as_mpi(), 0, 0) - self.COMM.Recv(rbuf.as_mpi(), 0, 0) - MPI.Attach_buffer(mem) - self.COMM.Ibsend(sbuf.as_mpi(), 0, 0).Wait() - self.COMM.Bsend(sbuf.as_mpi(), 0, 0) - MPI.Detach_buffer() - self.COMM.Send(sbuf.as_mpi(), 0, 0) - self.COMM.Ssend(sbuf.as_mpi(), 0, 0) - else: - rbuf = sbuf - for value in rbuf: - self.assertEqual(value, s) + # @unittest.skip('necmpi') + if not hasattr(mem, '__ve_array_interface__'): + if size == 1: + MPI.Attach_buffer(mem) + rbuf = sbuf + MPI.Detach_buffer() + elif rank == 0: + MPI.Attach_buffer(mem) + self.COMM.Ibsend(sbuf.as_mpi(), 1, 0).Wait() + self.COMM.Bsend(sbuf.as_mpi(), 1, 0) + MPI.Detach_buffer() + self.COMM.Send(sbuf.as_mpi(), 1, 0) + self.COMM.Ssend(sbuf.as_mpi(), 1, 0) + self.COMM.Recv(rbuf.as_mpi(), 1, 0) + self.COMM.Recv(rbuf.as_mpi(), 1, 0) + self.COMM.Recv(rbuf.as_mpi(), 1, 0) + self.COMM.Recv(rbuf.as_mpi(), 1, 0) + elif rank == 1: + self.COMM.Recv(rbuf.as_mpi(), 0, 0) + self.COMM.Recv(rbuf.as_mpi(), 0, 0) + self.COMM.Recv(rbuf.as_mpi(), 0, 0) + self.COMM.Recv(rbuf.as_mpi(), 0, 0) + MPI.Attach_buffer(mem) + self.COMM.Ibsend(sbuf.as_mpi(), 0, 0).Wait() + self.COMM.Bsend(sbuf.as_mpi(), 0, 0) + MPI.Detach_buffer() + self.COMM.Send(sbuf.as_mpi(), 0, 0) + self.COMM.Ssend(sbuf.as_mpi(), 0, 0) + else: + rbuf = sbuf + for value in rbuf: + self.assertEqual(value, s) # rank = self.COMM.Get_rank() sbuf = array( s, typecode, s) @@ -386,27 +388,29 @@ def testPersistent(self): self.assertEqual(value, -1) # mem = array( 0, typecode, s+MPI.BSEND_OVERHEAD).as_raw() - sbuf = array( s, typecode, s) - rbuf = array(-1, typecode, s+xs) - MPI.Attach_buffer(mem) - sendreq = self.COMM.Bsend_init(sbuf.as_mpi(), dest, 0) - recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0) - sendreq.Start() - recvreq.Start() - sendreq.Wait() - recvreq.Wait() - MPI.Detach_buffer() - self.assertNotEqual(sendreq, MPI.REQUEST_NULL) - self.assertNotEqual(recvreq, MPI.REQUEST_NULL) - sendreq.Free() - recvreq.Free() - self.assertEqual(sendreq, MPI.REQUEST_NULL) - self.assertEqual(recvreq, MPI.REQUEST_NULL) - for value in rbuf[:s]: - self.assertEqual(value, s) - for value in rbuf[s:]: - self.assertEqual(value, -1) - # + # @unittest.skip('necmpi') + if not hasattr(mem, '__ve_array_interface__'): + sbuf = array( s, typecode, s) + rbuf = array(-1, typecode, s+xs) + MPI.Attach_buffer(mem) + sendreq = self.COMM.Bsend_init(sbuf.as_mpi(), dest, 0) + recvreq = self.COMM.Recv_init(rbuf.as_mpi(), source, 0) + sendreq.Start() + recvreq.Start() + sendreq.Wait() + recvreq.Wait() + MPI.Detach_buffer() + self.assertNotEqual(sendreq, MPI.REQUEST_NULL) + self.assertNotEqual(recvreq, MPI.REQUEST_NULL) + sendreq.Free() + recvreq.Free() + self.assertEqual(sendreq, MPI.REQUEST_NULL) + self.assertEqual(recvreq, MPI.REQUEST_NULL) + for value in rbuf[:s]: + self.assertEqual(value, s) + for value in rbuf[s:]: + self.assertEqual(value, -1) + rank = self.COMM.Get_rank() sbuf = array( s, typecode, s) rbuf = array(-1, typecode, s+xs) diff --git a/test/test_p2p_buf_s.py b/test/test_p2p_buf_s.py index d27316e..cc00891 100644 --- a/test/test_p2p_buf_s.py +++ b/test/test_p2p_buf_s.py @@ -31,6 +31,9 @@ def test_send_recv(self): sbuf = array( s, typecode, s) # rbuf = array(-1, typecode, s) mem = array( 0, typecode, 2*(s+MPI.BSEND_OVERHEAD)).as_raw() + # @unittest.skip('necmpi') + if hasattr(mem, '__ve_array_interface__'): + continue if size == 1: MPI.Attach_buffer(mem) rbuf = sbuf diff --git a/test/test_p2p_obj_array.py b/test/test_p2p_obj_array.py index ac709fb..bc52c99 100644 --- a/test/test_p2p_obj_array.py +++ b/test/test_p2p_obj_array.py @@ -375,6 +375,7 @@ def testISSendAndRecv(self): self.assertFalse(req) self.assertTrue(np.array_equal(rmess, smess)) + @unittest.skip('necmpi') def testIRecvAndBSend(self): comm = self.COMM rank = comm.Get_rank() @@ -385,18 +386,19 @@ def testIRecvAndBSend(self): src = dst = rank req1 = comm.irecv(None, src, 1) req2 = comm.irecv(None, src, 2) - req3 = comm.irecv(None, src, 3) - comm.bsend(smess, dst, 3) + #req3 = comm.irecv(None, src, 3) + #comm.bsend(smess, dst, 3) comm.bsend(smess, dst, 2) comm.bsend(smess, dst, 1) - self.assertTrue(np.array_equal(smess, req3.wait())) + #self.assertTrue(np.array_equal(smess, req3.wait())) self.assertTrue(np.array_equal(smess, req2.wait())) self.assertTrue(np.array_equal(smess, req1.wait())) - comm.bsend(smess, MPI.PROC_NULL, 3) + #comm.bsend(smess, MPI.PROC_NULL, 3) finally: MPI.Detach_buffer() MPI.Free_mem(buf) - + + @unittest.skip('necmpi') def testIRecvAndIBSend(self): comm = self.COMM rank = comm.Get_rank() diff --git a/test/test_pack.py b/test/test_pack.py index 64cf259..f9221dc 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -74,6 +74,9 @@ def testPackUnpackExternal(self): iarray1 = array(127, typecode1, items).as_raw() else: iarray1 = array(255, typecode1, items).as_raw() + # @unittest.skip('necmpi') + if hasattr(iarray1, '__ve_array_interface__'): + continue iarray2 = array(range(items), typecode2).as_raw() oarray1 = array(-1, typecode1, items).as_raw() oarray2 = array(-1, typecode2, items).as_raw() diff --git a/test/test_rma.py b/test/test_rma.py index 02204de..b41ec08 100644 --- a/test/test_rma.py +++ b/test/test_rma.py @@ -95,6 +95,7 @@ def testAccumulate(self): group.Free() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue for count in range(10): for rank in range(size): sbuf = array(range(count), typecode) @@ -122,6 +123,7 @@ def testGetAccumulate(self): try: try: self.WIN.Get_accumulate([obuf, 0, MPI.BYTE], [rbuf, 0, MPI.BYTE], rank) + self.WIN.Fence() finally: MPI.Free_mem(obuf) MPI.Free_mem(rbuf) @@ -130,8 +132,10 @@ def testGetAccumulate(self): self.WIN.Fence() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue + if rank != 0: continue for count in range(10): - for rank in range(size): + for lrank in range(size): ones = array([1]*count, typecode) sbuf = array(range(count), typecode) rbuf = array(-1, typecode, count+1) @@ -139,16 +143,16 @@ def testGetAccumulate(self): for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE, MPI.NO_OP): - self.WIN.Lock(rank) - self.WIN.Put(ones.as_mpi(), rank) - self.WIN.Flush(rank) + self.WIN.Lock(lrank) + self.WIN.Put(ones.as_mpi(), lrank) + self.WIN.Flush(lrank) self.WIN.Get_accumulate(sbuf.as_mpi(), rbuf.as_mpi_c(count), - rank, op=op) - self.WIN.Flush(rank) - self.WIN.Get(gbuf.as_mpi_c(count), rank) - self.WIN.Flush(rank) - self.WIN.Unlock(rank) + lrank, op=op) + self.WIN.Flush(lrank) + self.WIN.Get(gbuf.as_mpi_c(count), lrank) + self.WIN.Flush(lrank) + self.WIN.Unlock(lrank) # for i in range(count): self.assertEqual(sbuf[i], i) @@ -158,39 +162,51 @@ def testGetAccumulate(self): self.assertEqual(gbuf[-1], -1) def testFetchAndOp(self): + typemap = MPI._typedict group = self.WIN.Get_group() size = group.Get_size() rank = group.Get_rank() group.Free() self.WIN.Fence() - #obuf = MPI.Alloc_mem(1); memzero(obuf) - #rbuf = MPI.Alloc_mem(1); memzero(rbuf) - #try: - # try: - # self.WIN.Fetch_and_op([obuf, 1, MPI.BYTE], [rbuf, 1, MPI.BYTE], rank) - # finally: - # MPI.Free_mem(obuf) - # MPI.Free_mem(rbuf) - #except NotImplementedError: - # self.skipTest('mpi-win-fetch_and_op') - #self.WIN.Fence() + blen = MPI.INT.Get_size() + obuf = MPI.Alloc_mem(blen); memzero(obuf) + rbuf = MPI.Alloc_mem(blen); memzero(rbuf) + try: + try: + self.WIN.Fetch_and_op( + [obuf, 1, MPI.INT], + [rbuf, 1, MPI.INT], + rank) + self.WIN.Fence() + finally: + MPI.Free_mem(obuf) + MPI.Free_mem(rbuf) + except NotImplementedError: + self.skipTest('mpi-win-fetch_and_op') + self.WIN.Fence() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue + if rank != 0 : continue obuf = array(+1, typecode) rbuf = array(-1, typecode, 2) + datatype = typemap[typecode] for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE, MPI.NO_OP): - for rank in range(size): + for lrank in range(size): for disp in range(3): - self.WIN.Lock(rank) + self.WIN.Lock(lrank) self.WIN.Fetch_and_op(obuf.as_mpi(), rbuf.as_mpi_c(1), - rank, disp, op=op) - self.WIN.Unlock(rank) + lrank, + disp * datatype.size, + op=op) + self.WIN.Unlock(lrank) self.assertEqual(rbuf[1], -1) def testCompareAndSwap(self): + typemap = MPI._typedict group = self.WIN.Get_group() size = group.Get_size() rank = group.Get_rank() @@ -205,6 +221,7 @@ def testCompareAndSwap(self): [cbuf, 1, MPI.BYTE], [rbuf, 1, MPI.BYTE], rank, 0) + self.WIN.Fence() finally: MPI.Free_mem(obuf) MPI.Free_mem(cbuf) @@ -215,17 +232,21 @@ def testCompareAndSwap(self): for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: if typecode in 'fdg': continue + if typecode in 'FDG': continue + if rank !=0 : continue obuf = array(+1, typecode) cbuf = array( 0, typecode) rbuf = array(-1, typecode, 2) - for rank in range(size): + datatype = typemap[typecode] + for lrank in range(size): for disp in range(3): - self.WIN.Lock(rank) + self.WIN.Lock(lrank) self.WIN.Compare_and_swap(obuf.as_mpi(), cbuf.as_mpi(), rbuf.as_mpi_c(1), - rank, disp) - self.WIN.Unlock(rank) + lrank, + disp * datatype.size) + self.WIN.Unlock(lrank) self.assertEqual(rbuf[1], -1) def testPutProcNull(self): @@ -418,7 +439,6 @@ def testFlush(self): class TestRMASelf(BaseTestRMA, unittest.TestCase): COMM = MPI.COMM_SELF -@unittest.skipIf(multihost.IS_MULTI_HOST, 'necmpi-multi-host') class TestRMAWorld(BaseTestRMA, unittest.TestCase): COMM = MPI.COMM_WORLD diff --git a/test/test_rma_nb.py b/test/test_rma_nb.py index 404ffd7..1f5100a 100644 --- a/test/test_rma_nb.py +++ b/test/test_rma_nb.py @@ -45,51 +45,57 @@ def tearDown(self): def testPutGet(self): group = self.WIN.Get_group() size = group.Get_size() + rank = group.Get_rank() group.Free() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue + if rank != 0: continue for count in range(self.COUNT_MIN, 10): - for rank in range(size): - sbuf = array([rank]*count, typecode) + for lrank in range(size): + sbuf = array([lrank]*count, typecode) rbuf = array(-1, typecode, count+1) - self.WIN.Fence() - self.WIN.Lock(rank) - r = self.WIN.Rput(sbuf.as_mpi(), rank) + #self.WIN.Fence() + self.WIN.Lock(lrank) + r = self.WIN.Rput(sbuf.as_mpi(), lrank) r.Wait() - self.WIN.Flush(rank) - r = self.WIN.Rget(rbuf.as_mpi_c(count), rank) + self.WIN.Flush(lrank) + r = self.WIN.Rget(rbuf.as_mpi_c(count), lrank) r.Wait() - self.WIN.Unlock(rank) + self.WIN.Unlock(lrank) for i in range(count): - self.assertEqual(sbuf[i], rank) - self.assertEqual(rbuf[i], rank) - self.assertEqual(rbuf[-1], -1) + self.assertEqual(sbuf[i], lrank) + self.assertEqual(rbuf[i], lrank) + self.assertEqual(int(rbuf[-1]), -1) @unittest.skipMPI('openmpi(>=1.10.0,<1.11.0)') def testAccumulate(self): group = self.WIN.Get_group() size = group.Get_size() + rank = group.Get_rank() group.Free() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue + if rank != 0: continue for count in range(self.COUNT_MIN, 10): - for rank in range(size): + for lrank in range(size): ones = array([1]*count, typecode) sbuf = array(range(count), typecode) rbuf = array(-1, typecode, count+1) for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE): - self.WIN.Lock(rank) - self.WIN.Put(ones.as_mpi(), rank) - self.WIN.Flush(rank) + self.WIN.Lock(lrank) + self.WIN.Put(ones.as_mpi(), lrank) + self.WIN.Flush(lrank) r = self.WIN.Raccumulate(sbuf.as_mpi(), - rank, op=op) + lrank, op=op) r.Wait() - self.WIN.Flush(rank) - r = self.WIN.Rget(rbuf.as_mpi_c(count), rank) + self.WIN.Flush(lrank) + r = self.WIN.Rget(rbuf.as_mpi_c(count), lrank) r.Wait() - self.WIN.Unlock(rank) + self.WIN.Unlock(lrank) # for i in range(count): self.assertEqual(sbuf[i], i) @@ -100,11 +106,14 @@ def testAccumulate(self): def testGetAccumulate(self): group = self.WIN.Get_group() size = group.Get_size() + rank = group.Get_rank() group.Free() for array in arrayimpl.ArrayTypes: for typecode in arrayimpl.TypeMap: + if typecode in 'FDG': continue + if rank != 0: continue for count in range(self.COUNT_MIN, 10): - for rank in range(size): + for lrank in range(size): ones = array([1]*count, typecode) sbuf = array(range(count), typecode) rbuf = array(-1, typecode, count+1) @@ -112,17 +121,17 @@ def testGetAccumulate(self): for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN, MPI.REPLACE, MPI.NO_OP): - self.WIN.Lock(rank) - self.WIN.Put(ones.as_mpi(), rank) - self.WIN.Flush(rank) + self.WIN.Lock(lrank) + self.WIN.Put(ones.as_mpi(), lrank) + self.WIN.Flush(lrank) r = self.WIN.Rget_accumulate(sbuf.as_mpi(), rbuf.as_mpi_c(count), - rank, op=op) + lrank, op=op) r.Wait() - self.WIN.Flush(rank) - r = self.WIN.Rget(gbuf.as_mpi_c(count), rank) + self.WIN.Flush(lrank) + r = self.WIN.Rget(gbuf.as_mpi_c(count), lrank) r.Wait() - self.WIN.Unlock(rank) + self.WIN.Unlock(lrank) # for i in range(count): self.assertEqual(sbuf[i], i) @@ -175,7 +184,6 @@ class TestRMASelf(BaseTestRMA, unittest.TestCase): @unittest.skipMPI('MPI(<3.0)') @unittest.skipMPI('openmpi(<1.8.1)') @unittest.skipMPI('MPICH2(<1.5.0)') -@unittest.skipIf(multihost.IS_MULTI_HOST, 'necmpi-multi-host') class TestRMAWorld(BaseTestRMA, unittest.TestCase): COMM = MPI.COMM_WORLD diff --git a/test/test_rma_nb_nlcpy_notimpl.py b/test/test_rma_nb_nlcpy_notimpl.py deleted file mode 100644 index e83a7c4..0000000 --- a/test/test_rma_nb_nlcpy_notimpl.py +++ /dev/null @@ -1,200 +0,0 @@ -from mpi4pyve import MPI -import mpiunittest as unittest -import nlcpy_only_arrayimpl as arrayimpl -import sys - -pypy_lt_53 = (hasattr(sys, 'pypy_version_info') and - sys.pypy_version_info < (5, 3)) - -def mkzeros(n): - if pypy_lt_53: - return b'\0' * n - return bytearray(n) - -def memzero(m): - try: - m[:] = 0 - except IndexError: # cffi buffer - m[0:len(m)] = b'\0'*len(m) - -class BaseTestRMA(object): - - COMM = MPI.COMM_NULL - INFO = MPI.INFO_NULL - - COUNT_MIN = 0 - - def setUp(self): - nbytes = 100*MPI.DOUBLE.size - try: - self.mpi_memory = MPI.Alloc_mem(nbytes) - self.memory = self.mpi_memory - memzero(self.memory) - except MPI.Exception: - import array - self.mpi_memory = None - self.memory = array.array('B',[0]*nbytes) - self.WIN = MPI.Win.Create(self.memory, 1, self.INFO, self.COMM) - - def tearDown(self): - self.WIN.Free() - if self.mpi_memory: - MPI.Free_mem(self.mpi_memory) - - def testPutGet(self): - group = self.WIN.Get_group() - size = group.Get_size() - group.Free() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(self.COUNT_MIN, 10): - for rank in range(size): - sbuf = array([rank]*count, typecode) - rbuf = array(-1, typecode, count+1) - self.WIN.Fence() - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Rput(sbuf.as_mpi(), rank).Wait() - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Rget(rbuf.as_mpi_c(count), rank).Wait() - ) - self.WIN.Unlock(rank) - #for i in range(count): - # self.assertEqual(sbuf[i], rank) - # self.assertEqual(rbuf[i], rank) - #self.assertEqual(rbuf[-1], -1) - - @unittest.skipMPI('openmpi(>=1.10.0,<1.11.0)') - def testAccumulate(self): - group = self.WIN.Get_group() - size = group.Get_size() - group.Free() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(self.COUNT_MIN, 10): - for rank in range(size): - ones = array([1]*count, typecode) - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - for op in (MPI.SUM, MPI.PROD, - MPI.MAX, MPI.MIN, - MPI.REPLACE): - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(ones.as_mpi(), rank) - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Raccumulate(sbuf.as_mpi(), - rank, op=op).Wait() - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Rget(rbuf.as_mpi_c(count), rank).Wait() - ) - self.WIN.Unlock(rank) - # - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertEqual(rbuf[i], op(1, i)) - #self.assertEqual(rbuf[-1], -1) - - @unittest.skipMPI('openmpi(>=1.10,<1.11)') - def testGetAccumulate(self): - group = self.WIN.Get_group() - size = group.Get_size() - group.Free() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(self.COUNT_MIN, 10): - for rank in range(size): - ones = array([1]*count, typecode) - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - gbuf = array(-1, typecode, count+1) - for op in (MPI.SUM, MPI.PROD, - MPI.MAX, MPI.MIN, - MPI.REPLACE, MPI.NO_OP): - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(ones.as_mpi(), rank) - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Rget_accumulate(sbuf.as_mpi(), - rbuf.as_mpi_c(count), - rank, op=op).Wait() - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Rget(gbuf.as_mpi_c(count), rank).Wait() - ) - self.WIN.Unlock(rank) - # - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertEqual(rbuf[i], 1) - # self.assertEqual(gbuf[i], op(1, i)) - #self.assertEqual(rbuf[-1], -1) - #self.assertEqual(gbuf[-1], -1) - - def testPutProcNull(self): - rank = self.COMM.Get_rank() - self.WIN.Lock(rank) - r = self.WIN.Rput(None, MPI.PROC_NULL, None) - r.Wait() - self.WIN.Unlock(rank) - - def testGetProcNull(self): - rank = self.COMM.Get_rank() - self.WIN.Lock(rank) - r = self.WIN.Rget(None, MPI.PROC_NULL, None) - r.Wait() - self.WIN.Unlock(rank) - - def testAccumulateProcNullReplace(self): - rank = self.COMM.Get_rank() - zeros = mkzeros(8) - self.WIN.Lock(rank) - r = self.WIN.Raccumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.REPLACE) - r.Wait() - r = self.WIN.Raccumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.REPLACE) - r.Wait() - self.WIN.Unlock(rank) - - def testAccumulateProcNullSum(self): - rank = self.COMM.Get_rank() - zeros = mkzeros(8) - self.WIN.Lock(rank) - r = self.WIN.Raccumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.SUM) - r.Wait() - r = self.WIN.Raccumulate([None, MPI.INT], MPI.PROC_NULL, None, MPI.SUM) - r.Wait() - self.WIN.Unlock(rank) - - -@unittest.skipMPI('MPI(<3.0)') -@unittest.skipMPI('openmpi(<1.8.1)') -@unittest.skipMPI('MPICH2(<1.5.0)') -class TestRMASelf(BaseTestRMA, unittest.TestCase): - COMM = MPI.COMM_SELF - -#@unittest.skipMPI('MPI(<3.0)') -#@unittest.skipMPI('openmpi(<1.8.1)') -#@unittest.skipMPI('MPICH2(<1.5.0)') -#class TestRMAWorld(BaseTestRMA, unittest.TestCase): -# COMM = MPI.COMM_WORLD - - -SpectrumMPI = MPI.get_vendor()[0] == 'Spectrum MPI' -try: - if SpectrumMPI: raise NotImplementedError - MPI.Win.Create(None, 1, MPI.INFO_NULL, MPI.COMM_SELF).Free() -except (NotImplementedError, MPI.Exception): - unittest.disable(BaseTestRMA, 'mpi-rma-nb') - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_rma_nlcpy_notimpl.py b/test/test_rma_nlcpy_notimpl.py deleted file mode 100644 index b28d0b7..0000000 --- a/test/test_rma_nlcpy_notimpl.py +++ /dev/null @@ -1,459 +0,0 @@ -from mpi4pyve import MPI -import mpiunittest as unittest -import nlcpy_only_arrayimpl as arrayimpl -import sys - -pypy_lt_53 = (hasattr(sys, 'pypy_version_info') and - sys.pypy_version_info < (5, 3)) - -def mkzeros(n): - if pypy_lt_53: - return b'\0' * n - return bytearray(n) - -def memzero(m): - try: - m[:] = 0 - except IndexError: # cffi buffer - m[0:len(m)] = b'\0'*len(m) - -class BaseTestRMA(object): - - COMM = MPI.COMM_NULL - INFO = MPI.INFO_NULL - - def setUp(self): - nbytes = 100*MPI.DOUBLE.size - try: - self.mpi_memory = MPI.Alloc_mem(nbytes) - self.memory = self.mpi_memory - memzero(self.memory) - except MPI.Exception: - import array - self.mpi_memory = None - self.memory = array.array('B',[0]*nbytes) - self.WIN = MPI.Win.Create(self.memory, 1, self.INFO, self.COMM) - - def tearDown(self): - self.WIN.Free() - if self.mpi_memory: - MPI.Free_mem(self.mpi_memory) - - def testPutGet(self): - typemap = MPI._typedict - group = self.WIN.Get_group() - size = group.Get_size() - group.Free() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(10): - for rank in range(size): - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - # - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(sbuf.as_mpi(), rank) - ) - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get(rbuf.as_mpi_c(count), rank) - ) - self.WIN.Fence() - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertNotEqual(rbuf[i], -1) - #self.assertEqual(rbuf[-1], -1) - # - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - target = sbuf.itemsize - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(sbuf.as_mpi(), rank, target) - ) - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get(rbuf.as_mpi_c(count), rank, target) - ) - self.WIN.Fence() - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertNotEqual(rbuf[i], -1) - #self.assertEqual(rbuf[-1], -1) - # - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - datatype = typemap[typecode] - target = (sbuf.itemsize, count, datatype) - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(sbuf.as_mpi(), rank, target) - ) - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get(rbuf.as_mpi_c(count), rank, target) - ) - self.WIN.Fence() - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertNotEqual(rbuf[i], -1) - #self.assertEqual(rbuf[-1], -1) - - def testAccumulate(self): - group = self.WIN.Get_group() - size = group.Get_size() - group.Free() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(10): - for rank in range(size): - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - for op in (MPI.SUM, MPI.PROD, MPI.MAX, MPI.MIN): - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Accumulate(sbuf.as_mpi(), rank, op=op) - ) - self.WIN.Fence() - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get(rbuf.as_mpi_c(count), rank) - ) - self.WIN.Fence() - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertNotEqual(rbuf[i], -1) - #self.assertEqual(rbuf[-1], -1) - - @unittest.skipMPI('openmpi(>=1.10,<1.11)') - def testGetAccumulate(self): - group = self.WIN.Get_group() - size = group.Get_size() - rank = group.Get_rank() - group.Free() - self.WIN.Fence() - obuf = MPI.Alloc_mem(1); memzero(obuf) - rbuf = MPI.Alloc_mem(1); memzero(rbuf) - try: - try: - self.WIN.Get_accumulate([obuf, 0, MPI.BYTE], [rbuf, 0, MPI.BYTE], rank) - finally: - MPI.Free_mem(obuf) - MPI.Free_mem(rbuf) - except NotImplementedError: - self.skipTest('mpi-win-get_accumulate') - self.WIN.Fence() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - for count in range(10): - for rank in range(size): - ones = array([1]*count, typecode) - sbuf = array(range(count), typecode) - rbuf = array(-1, typecode, count+1) - gbuf = array(-1, typecode, count+1) - for op in (MPI.SUM, MPI.PROD, - MPI.MAX, MPI.MIN, - MPI.REPLACE, MPI.NO_OP): - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Put(ones.as_mpi(), rank) - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get_accumulate(sbuf.as_mpi(), - rbuf.as_mpi_c(count), - rank, op=op) - ) - self.WIN.Flush(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Get(gbuf.as_mpi_c(count), rank) - ) - self.WIN.Flush(rank) - self.WIN.Unlock(rank) - # - #for i in range(count): - # self.assertEqual(sbuf[i], i) - # self.assertEqual(rbuf[i], 1) - # self.assertEqual(gbuf[i], op(1, i)) - #self.assertEqual(rbuf[-1], -1) - #self.assertEqual(gbuf[-1], -1) - - def testFetchAndOp(self): - group = self.WIN.Get_group() - size = group.Get_size() - rank = group.Get_rank() - group.Free() - #self.WIN.Fence() - #obuf = MPI.Alloc_mem(1); memzero(obuf) - #rbuf = MPI.Alloc_mem(1); memzero(rbuf) - #try: - # try: - # self.WIN.Fetch_and_op([obuf, 1, MPI.BYTE], [rbuf, 1, MPI.BYTE], rank) - # finally: - # MPI.Free_mem(obuf) - # MPI.Free_mem(rbuf) - #except NotImplementedError: - # self.skipTest('mpi-win-fetch_and_op') - #self.WIN.Fence() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - obuf = array(+1, typecode) - rbuf = array(-1, typecode, 2) - for op in (MPI.SUM, MPI.PROD, - MPI.MAX, MPI.MIN, - MPI.REPLACE, MPI.NO_OP): - for rank in range(size): - for disp in range(3): - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Fetch_and_op(obuf.as_mpi(), - rbuf.as_mpi_c(1), - rank, disp, op=op) - ) - self.WIN.Unlock(rank) - #self.assertEqual(rbuf[1], -1) - - def testCompareAndSwap(self): - group = self.WIN.Get_group() - size = group.Get_size() - rank = group.Get_rank() - group.Free() - self.WIN.Fence() - obuf = MPI.Alloc_mem(1); memzero(obuf) - cbuf = MPI.Alloc_mem(1); memzero(cbuf) - rbuf = MPI.Alloc_mem(1); memzero(rbuf) - try: - try: - self.WIN.Compare_and_swap([obuf, 1, MPI.BYTE], - [cbuf, 1, MPI.BYTE], - [rbuf, 1, MPI.BYTE], - rank, 0) - finally: - MPI.Free_mem(obuf) - MPI.Free_mem(cbuf) - MPI.Free_mem(rbuf) - except NotImplementedError: - self.skipTest('mpi-win-compare_and_swap') - self.WIN.Fence() - for array in arrayimpl.ArrayTypes: - for typecode in arrayimpl.TypeMap: - if typecode in 'fdg': continue - obuf = array(+1, typecode) - cbuf = array( 0, typecode) - rbuf = array(-1, typecode, 2) - for rank in range(size): - for disp in range(3): - self.WIN.Lock(rank) - self.assertRaises(NotImplementedError, lambda: - self.WIN.Compare_and_swap(obuf.as_mpi(), - cbuf.as_mpi(), - rbuf.as_mpi_c(1), - rank, disp) - ) - self.WIN.Unlock(rank) - #self.assertEqual(rbuf[1], -1) - - def testPutProcNull(self): - self.WIN.Fence() - self.WIN.Put(None, MPI.PROC_NULL, None) - self.WIN.Fence() - - def testGetProcNull(self): - self.WIN.Fence() - self.WIN.Get(None, MPI.PROC_NULL, None) - self.WIN.Fence() - - def testAccumulateProcNullReplace(self): - self.WIN.Fence() - zeros = mkzeros(8) - self.WIN.Fence() - self.WIN.Accumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.REPLACE) - self.WIN.Fence() - self.WIN.Accumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.REPLACE) - self.WIN.Fence() - - def testAccumulateProcNullSum(self): - self.WIN.Fence() - zeros = mkzeros(8) - self.WIN.Fence() - self.WIN.Accumulate([zeros, MPI.INT], MPI.PROC_NULL, None, MPI.SUM) - self.WIN.Fence() - self.WIN.Accumulate([None, MPI.INT], MPI.PROC_NULL, None, MPI.SUM) - self.WIN.Fence() - - def testGetAccumulateProcNull(self): - obuf = [mkzeros(8), 0, MPI.INT] - rbuf = [mkzeros(8), 0, MPI.INT] - self.WIN.Fence() - try: - self.WIN.Get_accumulate(obuf, rbuf, MPI.PROC_NULL) - except NotImplementedError: - self.skipTest('mpi-win-get_accumulate') - self.WIN.Fence() - - ##def testFetchAndOpProcNull(self): - ## obuf = cbuf = rbuf = None - ## self.WIN.Fence() - ## try: - ## self.WIN.Fetch_and_op(obuf, rbuf, MPI.PROC_NULL, 0) - ## except NotImplementedError: - ## self.skipTest('mpi-win-fetch_and_op') - ## self.WIN.Fence() - - ##def testCompareAndSwapProcNull(self): - ## obuf = cbuf = rbuf = None - ## self.WIN.Fence() - ## try: - ## self.WIN.Compare_and_swap(obuf, cbuf, rbuf, MPI.PROC_NULL, 0) - ## except NotImplementedError: - ## self.skipTest('mpi-win-compare_and_swap') - ## self.WIN.Fence() - - def testFence(self): - win = self.WIN - LMODE = [0, MPI.MODE_NOSTORE, MPI.MODE_NOPUT, - MPI.MODE_NOSTORE|MPI.MODE_NOPUT] - GMODE = [0, MPI.MODE_NOPRECEDE, MPI.MODE_NOSUCCEED] - win.Fence() - for lmode in LMODE: - for gmode in GMODE: - assertion = lmode | gmode - win.Fence(assertion) - win.Fence() - - @unittest.skipMPI('openmpi(==1.8.1)') - def testFenceAll(self): - win = self.WIN - assertion = 0 - modes = [0, - MPI.MODE_NOSTORE, - MPI.MODE_NOPUT, - MPI.MODE_NOPRECEDE, - MPI.MODE_NOSUCCEED] - win.Fence() - for mode in modes: - win.Fence(mode) - assertion |= mode - win.Fence(assertion) - win.Fence() - - @unittest.skipMPI('openmpi(==1.8.6)') - def testStartComplete(self): - self.WIN.Start(MPI.GROUP_EMPTY) - self.WIN.Complete() - - @unittest.skipMPI('openmpi(==1.8.6)') - def testPostWait(self): - self.WIN.Post(MPI.GROUP_EMPTY) - self.WIN.Wait() - - @unittest.skipMPI('openmpi(==1.8.7)') - @unittest.skipMPI('openmpi(==1.8.6)') - def testStartCompletePostWait(self): - win = self.WIN - wingroup = win.Get_group() - size = wingroup.Get_size() - rank = wingroup.Get_rank() - if size < 2: return wingroup.Free() - if rank == 0: - group = wingroup.Excl([0]) - win.Start(group) - win.Complete() - win.Post(group) - win.Wait() - group.Free() - else: - group = wingroup.Incl([0]) - win.Post(group) - win.Wait() - win.Start(group) - win.Complete() - group.Free() - wingroup.Free() - - @unittest.skipMPI('openmpi(==1.8.7)') - @unittest.skipMPI('openmpi(==1.8.6)') - def testStartCompletePostTest(self): - comm = self.COMM - win = self.WIN - wingroup = win.Get_group() - size = wingroup.Get_size() - rank = wingroup.Get_rank() - if size < 2: return wingroup.Free() - if rank == 0: - group = wingroup.Excl([0]) - win.Start(group) - comm.Barrier() - win.Complete() - comm.Barrier() - group.Free() - else: - group = wingroup.Incl([0]) - win.Post(group) - flag = win.Test() - self.assertFalse(flag) - comm.Barrier() - comm.Barrier() - flag = win.Test() - self.assertTrue(flag) - group.Free() - wingroup.Free() - - @unittest.skipMPI('MPI(<3.0)') - def testSync(self): - win = self.WIN - comm = self.COMM - rank = comm.Get_rank() - win.Lock(rank) - win.Sync() - win.Unlock(rank) - comm.Barrier() - - @unittest.skipMPI('MPI(<3.0)') - def testFlush(self): - win = self.WIN - comm = self.COMM - size = comm.Get_size() - rank = comm.Get_rank() - # - for i in range(size): - win.Lock(i) - win.Flush(i) - win.Unlock(i) - comm.Barrier() - for i in range(size): - if i == rank: - win.Lock_all() - win.Flush_all() - win.Unlock_all() - comm.Barrier() - # - for i in range(size): - win.Lock(i) - win.Flush_local(i) - win.Unlock(i) - comm.Barrier() - for i in range(size): - if i == rank: - win.Lock_all() - win.Flush_local_all() - win.Unlock_all() - comm.Barrier() - -class TestRMASelf(BaseTestRMA, unittest.TestCase): - COMM = MPI.COMM_SELF - -#class TestRMAWorld(BaseTestRMA, unittest.TestCase): -# COMM = MPI.COMM_WORLD - - -SpectrumMPI = MPI.get_vendor()[0] == 'Spectrum MPI' -try: - if SpectrumMPI: raise NotImplementedError - MPI.Win.Create(None, 1, MPI.INFO_NULL, MPI.COMM_SELF).Free() -except (NotImplementedError, MPI.Exception): - unittest.disable(BaseTestRMA, 'mpi-rma') - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_vai.py b/test/test_vai.py new file mode 100644 index 0000000..63bd918 --- /dev/null +++ b/test/test_vai.py @@ -0,0 +1,103 @@ +from mpi4pyve import MPI +import mpiunittest as unittest +import nlcpy as vp +import numpy as np +import nlcpy_ndarray_wrapper + +class BaseTestVAIBuf(object): + + COMM = MPI.COMM_NULL + + def testSendrecv(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + dest = (rank + 1) % size + source = (rank - 1) % size + if size < 1: return + + if rank == 0: + sbuf = vp.arange(10)[2:] + rbuf = vp.array(vp.zeros(8), dtype='int') + self.COMM.Sendrecv(sbuf, 0, 0, + rbuf, 0, 0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testSendRecv(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = vp.arange(10)[2:] + rbuf = vp.array(vp.zeros(8), dtype='int') + if size < 2: return + if rank == 0: + self.COMM.Send(sbuf, dest=1) + elif rank == 1: + self.COMM.Recv(rbuf, source=0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testPickledSendrecv(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + dest = (rank + 1) % size + source = (rank - 1) % size + if size < 1: return + + if rank == 0: + sbuf = vp.arange(10)[2:] + rbuf = vp.array(vp.zeros(8), dtype='int') + rbuf = self.COMM.sendrecv(sbuf,dest=0,source=0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testPickledSendRecv(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = vp.arange(10)[2:] + rbuf = vp.array(vp.zeros(8), dtype='int') + if size < 2: return + if rank == 0: + self.COMM.send(sbuf, dest=1) + elif rank == 1: + rbuf = self.COMM.recv() + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testVAIReadOnly(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = nlcpy_ndarray_wrapper.nlcpy_ndarray_wrapper(shape=(8)) + sbuf.fill(0) + rbuf = nlcpy_ndarray_wrapper.nlcpy_ndarray_wrapper(shape=(8)) + rbuf.fill(0) + if size < 2: return + if rank == 0: + self.COMM.Send(sbuf, dest=1) + elif rank == 1: + self.COMM.Recv(rbuf, source=0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + sbuf.set_read_only_flag(True) + rbuf.set_read_only_flag(True) + if rank == 0: + #self.COMM.Send(sbuf, dest=1) + pass + elif rank == 1: + with self.assertRaises(BufferError): + self.COMM.Recv(rbuf, source=0) + else : + pass + +class TestVAIBufSelf(BaseTestVAIBuf, unittest.TestCase): + COMM = MPI.COMM_SELF + +class TestVAIBufWorld(BaseTestVAIBuf, unittest.TestCase): + COMM = MPI.COMM_WORLD + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_vai_bool_array.py b/test/test_vai_bool_array.py new file mode 100644 index 0000000..7ca15ee --- /dev/null +++ b/test/test_vai_bool_array.py @@ -0,0 +1,60 @@ +from mpi4pyve import MPI +import mpiunittest as unittest +import nlcpy as vp +import numpy as np +import nlcpy_ndarray_wrapper + +class BaseTestVAIBuf(object): + + COMM = MPI.COMM_NULL + + def testSendRecvbool(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = vp.array([True, True, True], dtype='bool') + rbuf = vp.array([False, False, False], dtype='bool') + if size < 2: return + if rank == 0: + self.COMM.Send(sbuf, dest=1) + elif rank == 1: + self.COMM.Recv(rbuf, source=0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testSendRecvMPIBOOL(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = vp.array([True, True, True], dtype='bool') + rbuf = vp.array([False, False, False], dtype='bool') + if size < 2: return + if rank == 0: + self.COMM.Send([sbuf,MPI.BOOL], dest=1) + elif rank == 1: + self.COMM.Recv([rbuf,MPI.BOOL], source=0) + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + + def testPickledbool(self): + size = self.COMM.Get_size() + rank = self.COMM.Get_rank() + sbuf = vp.array([True, True, True], dtype='bool') + rbuf = vp.array([False, False, False], dtype='bool') + if size < 2: return + if rank == 0: + self.COMM.send(sbuf, dest=1) + elif rank == 1: + rbuf = self.COMM.recv() + self.assertEqual(np.allclose(sbuf, rbuf),True) + else : + pass + +class TestVAIBufSelf(BaseTestVAIBuf, unittest.TestCase): + COMM = MPI.COMM_SELF + +class _TestVAIBufWorld(BaseTestVAIBuf, unittest.TestCase): + COMM = MPI.COMM_WORLD + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_vai_notimpl.py b/test/test_vai_notimpl.py new file mode 100644 index 0000000..5adc67a --- /dev/null +++ b/test/test_vai_notimpl.py @@ -0,0 +1,46 @@ +from mpi4pyve import MPI +import mpiunittest as unittest +import nlcpy +import numpy + +class TestVAI_Notimpl(unittest.TestCase): + + def testRestriction(self): + src = nlcpy.arange(10) + dst = nlcpy.arange(10) + np_src = numpy.arange(10) + np_dst = numpy.arange(10) + + with self.assertRaises(NotImplementedError): + MPI.Attach_buffer(src) + + comm = MPI.COMM_SELF + with self.assertRaises(NotImplementedError): + comm.Bsend(src, 0) + with self.assertRaises(NotImplementedError): + comm.Ibsend(src, 0) + with self.assertRaises(NotImplementedError): + comm.Bsend_init(src, 0) + with self.assertRaises(NotImplementedError): + comm.bsend(src, 0) + with self.assertRaises(NotImplementedError): + comm.ibsend(src, 0) + + op = MPI.SUM + with self.assertRaises(NotImplementedError): + op.Reduce_local(src, np_src) + with self.assertRaises(NotImplementedError): + op.Reduce_local(np_src, src) + + data_type = MPI.INT + with self.assertRaises(NotImplementedError): + data_type.Pack_external('external32',src, np_src, 0) + with self.assertRaises(NotImplementedError): + data_type.Pack_external('external32',np_src, src, 0) + with self.assertRaises(NotImplementedError): + data_type.Unpack_external('external32',src, 0, np_src) + with self.assertRaises(NotImplementedError): + data_type.Unpack_external('external32',np_src, 0, src) + +if __name__ == '__main__': + unittest.main()