Skip to content

Commit

Permalink
Implementation of corrcoef (#2139)
Browse files Browse the repository at this point in the history
* Implementation of corrcoef

* Apply review comments

* Fix docs

---------

Co-authored-by: Anton <[email protected]>
  • Loading branch information
AlexanderKalistratov and antonwolfy authored Nov 12, 2024
1 parent 44c1ed3 commit ba5fb31
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 14 deletions.
122 changes: 122 additions & 0 deletions dpnp/dpnp_iface_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"amin",
"average",
"bincount",
"corrcoef",
"correlate",
"cov",
"max",
Expand Down Expand Up @@ -360,6 +361,127 @@ def bincount(x1, weights=None, minlength=0):
return call_origin(numpy.bincount, x1, weights=weights, minlength=minlength)


def corrcoef(x, y=None, rowvar=True, *, dtype=None):
"""
Return Pearson product-moment correlation coefficients.
For full documentation refer to :obj:`numpy.corrcoef`.
Parameters
----------
x : {dpnp.ndarray, usm_ndarray}
A 1-D or 2-D array containing multiple variables and observations.
Each row of `x` represents a variable, and each column a single
observation of all those variables. Also see `rowvar` below.
y : {None, dpnp.ndarray, usm_ndarray}, optional
An additional set of variables and observations. `y` has the same
shape as `x`.
Default: ``None``.
rowvar : {bool}, optional
If `rowvar` is ``True``, then each row represents a variable,
with observations in the columns. Otherwise, the relationship
is transposed: each column represents a variable, while the rows
contain observations.
Default: ``True``.
dtype : {None, dtype}, optional
Data-type of the result.
Default: ``None``.
Returns
-------
R : {dpnp.ndarray}
The correlation coefficient matrix of the variables.
See Also
--------
:obj:`dpnp.cov` : Covariance matrix.
Examples
--------
In this example we generate two random arrays, ``xarr`` and ``yarr``, and
compute the row-wise and column-wise Pearson correlation coefficients,
``R``. Since `rowvar` is true by default, we first find the row-wise
Pearson correlation coefficients between the variables of ``xarr``.
>>> import dpnp as np
>>> np.random.seed(123)
>>> xarr = np.random.rand(3, 3).astype(np.float32)
>>> xarr
array([[7.2858386e-17, 2.2066992e-02, 3.9520904e-01],
[4.8012391e-01, 5.9377134e-01, 4.5147297e-01],
[9.0728188e-01, 9.9387854e-01, 5.8399546e-01]], dtype=float32)
>>> R1 = np.corrcoef(xarr)
>>> R1
array([[ 0.99999994, -0.6173796 , -0.9685411 ],
[-0.6173796 , 1. , 0.7937219 ],
[-0.9685411 , 0.7937219 , 0.9999999 ]], dtype=float32)
If we add another set of variables and observations ``yarr``, we can
compute the row-wise Pearson correlation coefficients between the
variables in ``xarr`` and ``yarr``.
>>> yarr = np.random.rand(3, 3).astype(np.float32)
>>> yarr
array([[0.17615308, 0.65354985, 0.15716429],
[0.09373496, 0.2123185 , 0.84086883],
[0.9011005 , 0.45206687, 0.00225109]], dtype=float32)
>>> R2 = np.corrcoef(xarr, yarr)
>>> R2
array([[ 0.99999994, -0.6173796 , -0.968541 , -0.48613155, 0.9951523 ,
-0.8900264 ],
[-0.6173796 , 1. , 0.7937219 , 0.9875833 , -0.53702235,
0.19083664],
[-0.968541 , 0.7937219 , 0.9999999 , 0.6883078 , -0.9393724 ,
0.74857277],
[-0.48613152, 0.9875833 , 0.6883078 , 0.9999999 , -0.39783284,
0.0342579 ],
[ 0.9951523 , -0.53702235, -0.9393725 , -0.39783284, 0.99999994,
-0.9305482 ],
[-0.89002645, 0.19083665, 0.7485727 , 0.0342579 , -0.9305482 ,
1. ]], dtype=float32)
Finally if we use the option ``rowvar=False``, the columns are now
being treated as the variables and we will find the column-wise Pearson
correlation coefficients between variables in ``xarr`` and ``yarr``.
>>> R3 = np.corrcoef(xarr, yarr, rowvar=False)
>>> R3
array([[ 1. , 0.9724453 , -0.9909503 , 0.8104691 , -0.46436927,
-0.1643624 ],
[ 0.9724453 , 1. , -0.9949381 , 0.6515728 , -0.6580445 ,
0.07012729],
[-0.99095035, -0.994938 , 1. , -0.72450536, 0.5790461 ,
0.03047091],
[ 0.8104691 , 0.65157276, -0.72450536, 1. , 0.14243561,
-0.71102554],
[-0.4643693 , -0.6580445 , 0.57904613, 0.1424356 , 0.99999994,
-0.79727215],
[-0.1643624 , 0.07012729, 0.03047091, -0.7110255 , -0.7972722 ,
0.99999994]], dtype=float32)
"""

out = dpnp.cov(x, y, rowvar, dtype=dtype)
if out.ndim == 0:
# scalar covariance
# nan if incorrect value (nan, inf, 0), 1 otherwise
return out / out

d = dpnp.diag(out)

stddev = dpnp.sqrt(d.real)
out /= stddev[:, None]
out /= stddev[None, :]

# Clip real and imaginary parts to [-1, 1]. This does not guarantee
# abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without
# excessive work.
dpnp.clip(out.real, -1, 1, out=out.real)
if dpnp.iscomplexobj(out):
dpnp.clip(out.imag, -1, 1, out=out.imag)

return out


def correlate(x1, x2, mode="valid"):
"""
Cross-correlation of two 1-dimensional sequences.
Expand Down
5 changes: 0 additions & 5 deletions tests/skipped_tests.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2

tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
5 changes: 0 additions & 5 deletions tests/skipped_tests_gpu.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,6 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2

tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_y

tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[linear]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[lower]
tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_defaults[higher]
Expand Down
55 changes: 55 additions & 0 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,61 @@ def test_std_error(self):
dpnp.std(ia, ddof="1")


class TestCorrcoef:
@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
)
@pytest.mark.parametrize("dtype", get_all_dtypes())
@pytest.mark.parametrize("rowvar", [True, False])
def test_corrcoef(self, dtype, rowvar):
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
np_array = dpnp.asnumpy(dp_array)

expected = numpy.corrcoef(np_array, rowvar=rowvar)
result = dpnp.corrcoef(dp_array, rowvar=rowvar)

assert_dtype_allclose(result, expected)

@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
"suppress_mean_empty_slice_numpy_warnings",
)
@pytest.mark.parametrize("shape", [(2, 0), (0, 2)])
def test_corrcoef_empty(self, shape):
dp_array = dpnp.empty(shape, dtype=dpnp.int64)
np_array = dpnp.asnumpy(dp_array)

result = dpnp.corrcoef(dp_array)
expected = numpy.corrcoef(np_array)
assert_dtype_allclose(result, expected)

@pytest.mark.usefixtures("suppress_complex_warning")
@pytest.mark.parametrize("dt_in", get_all_dtypes(no_bool=True))
@pytest.mark.parametrize("dt_out", get_float_complex_dtypes())
def test_corrcoef_dtype(self, dt_in, dt_out):
dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dt_in)
np_array = dpnp.asnumpy(dp_array)

expected = numpy.corrcoef(np_array, dtype=dt_out)
result = dpnp.corrcoef(dp_array, dtype=dt_out)
assert expected.dtype == result.dtype
assert_allclose(result, expected, rtol=1e-6)

@pytest.mark.usefixtures(
"suppress_divide_invalid_numpy_warnings",
"suppress_dof_numpy_warnings",
)
def test_corrcoef_scalar(self):
dp_array = dpnp.array(5)
np_array = dpnp.asnumpy(dp_array)

result = dpnp.corrcoef(dp_array)
expected = numpy.corrcoef(np_array)
assert_dtype_allclose(result, expected)


@pytest.mark.usefixtures("allow_fall_back_on_numpy")
class TestBincount:
@pytest.mark.parametrize(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_sycl_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ def test_meshgrid(device):
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
pytest.param("copy", [1.0, 2.0, 3.0]),
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
pytest.param(
"cos", [-dpnp.pi / 2, -dpnp.pi / 4, 0.0, dpnp.pi / 4, dpnp.pi / 2]
),
Expand Down Expand Up @@ -693,6 +694,11 @@ def test_reduce_hypot(device):
pytest.param("append", [1, 2, 3], [4, 5, 6]),
pytest.param("arctan2", [-1, +1, +1, -1], [-1, -1, +1, +1]),
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
pytest.param(
"corrcoef",
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
),
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
pytest.param(
Expand Down
6 changes: 6 additions & 0 deletions tests/test_usm_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ def test_norm(usm_type, ord, axis):
pytest.param("cbrt", [1, 8, 27]),
pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
pytest.param("corrcoef", [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]),
pytest.param(
"cos", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2]
),
Expand Down Expand Up @@ -685,6 +686,11 @@ def test_1in_1out(func, data, usm_type):
pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]),
pytest.param("cross", [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]),
pytest.param("digitize", [0.2, 6.4, 3.0], [0.0, 1.0, 2.5, 4.0]),
pytest.param(
"corrcoef",
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
[[0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
),
# dpnp.dot has 3 different implementations based on input arrays dtype
# checking all of them
pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]),
Expand Down
8 changes: 4 additions & 4 deletions tests/third_party/cupy/statistics_tests/test_correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@

class TestCorrcoef(unittest.TestCase):
@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
return xp.corrcoef(a)

@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_diag_exception(self, xp, dtype):
a = testing.shaped_arange((1, 3), xp, dtype)
return xp.corrcoef(a)

@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_y(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
y = testing.shaped_arange((2, 3), xp, dtype)
return xp.corrcoef(a, y=y)

@testing.for_all_dtypes()
@testing.numpy_cupy_allclose()
@testing.numpy_cupy_allclose(type_check=has_support_aspect64())
def test_corrcoef_rowvar(self, xp, dtype):
a = testing.shaped_arange((2, 3), xp, dtype)
y = testing.shaped_arange((2, 3), xp, dtype)
Expand Down

0 comments on commit ba5fb31

Please sign in to comment.