Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA: enable cuda support v1 - EAGER with GDR COPY #20

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion config/m4/cuda.m4
Original file line number Diff line number Diff line change
@@ -1 +1,84 @@
AM_CONDITIONAL([HAVE_CUDA], [true])
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#

#
# Check for CUDA support
#
cuda_happy="no"

AC_ARG_WITH([cuda],
[AS_HELP_STRING([--with-cuda=(DIR)], [Enable the use of CUDA (default is no).])],
[], [with_cuda=no])

AS_IF([test "x$with_cuda" != "xno"],

[AS_IF([test "x$with_cuda" == "x" || test "x$with_cuda" == "xguess" || test "x$with_cuda" == "xyes"],
[
AC_MSG_NOTICE([CUDA path was not specified. Guessing ...])
with_cuda=/usr/local/cuda
],
[:])
AC_CHECK_HEADERS([$with_cuda/include/cuda.h $with_cuda/include/cuda_runtime.h],
[AC_CHECK_DECLS([cuPointerGetAttribute],
[cuda_happy="yes"],
[AC_MSG_WARN([CUDA runtime not detected. Disable.])
cuda_happy="no"],
[#include <$with_cuda/include/cuda.h>])
AS_IF([test "x$cuda_happy" == "xyes"],
[AC_DEFINE([HAVE_CUDA], 1, [Enable CUDA support])
AC_SUBST(CUDA_CPPFLAGS, "-I$with_cuda/include/ ")
AC_SUBST(CUDA_CFLAGS, "-I$with_cuda/include/ ")
AC_SUBST(CUDA_LDFLAGS, "-lcudart -lcuda -L$with_cuda/lib64")
CFLAGS="$CFLAGS $CUDA_CFLAGS"
CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS"
LDFLAGS="$LDFLAGS $CUDA_LDFLAGS"],
[])],
[AC_MSG_WARN([CUDA not found])
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])])],
[AC_MSG_WARN([CUDA was explicitly disabled])
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])]
)


AM_CONDITIONAL([HAVE_CUDA], [test "x$cuda_happy" != xno])

AC_ARG_WITH([gdrcopy],
[AS_HELP_STRING([--with-gdrcopy=(DIR)], [Enable the use of GDR_COPY (default is no).])],
[], [with_gdrcopy=no])

AS_IF([test "x$with_gdrcopy" != "xno"],

[AS_IF([test "x$with_gdrcopy" == "x" || test "x$with_gdrcopy" == "xguess" || test "x$with_gdrcopy" == "xyes"],
[
AC_MSG_NOTICE([GDR_COPY path was not specified. Guessing ...])
with_gdrcopy=/usr/local/gdrcopy
],
[:])
AC_CHECK_HEADERS([$with_gdrcopy/include/gdrapi.h],
[AC_CHECK_DECLS([gdr_pin_buffer],
[gdrcopy_happy="yes"],
[AC_MSG_WARN([GDR_COPY runtime not detected. Disable.])
gdrcopy_happy="no"],
[#include <$with_gdrcopy/include/gdrapi.h>])
AS_IF([test "x$gdrcopy_happy" == "xyes"],
[AC_DEFINE([HAVE_GDR_COPY], 1, [Enable GDR_COPY support])
AC_SUBST(GDR_COPY_CPPFLAGS, "-I$with_gdrcopy/include/ ")
AC_SUBST(GDR_COPY_CFLAGS, "-I$with_gdrcopy/include/ ")
AC_SUBST(GDR_COPY_LDFLAGS, "-lgdrapi -L$with_gdrcopy/lib64")
CFLAGS="$CFLAGS $GDR_COPY_CFLAGS"
CPPFLAGS="$CPPFLAGS $GDR_COPY_CPPFLAGS"
LDFLAGS="$LDFLAGS $GDR_COPY_LDFLAGS"],
[])],
[AC_MSG_WARN([GDR_COPY not found])
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])])],
[AC_MSG_WARN([GDR_COPY was explicitly disabled])
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])]
)


AM_CONDITIONAL([HAVE_GDR_COPY], [test "x$gdrcopy_happy" != xno])

AC_DEFINE([HAVE_CUDA_GDR], [1], [Eanble GPU Direct RDMA])]
AM_CONDITIONAL([HAVE_CUDA_GDR], [1])
25 changes: 19 additions & 6 deletions src/uct/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,27 @@ endif

if HAVE_CUDA
noinst_HEADERS += \
cuda/cuda_md.h \
cuda/cuda_iface.h \
cuda/cuda_ep.h
cuda/cuda_copy/cuda_copy_md.h \
cuda/cuda_copy/cuda_copy_iface.h \
cuda/cuda_copy/cuda_copy_ep.h

libuct_la_SOURCES += \
cuda/cuda_md.c \
cuda/cuda_iface.c \
cuda/cuda_ep.c
cuda/cuda_copy/cuda_copy_md.c \
cuda/cuda_copy/cuda_copy_iface.c \
cuda/cuda_copy/cuda_copy_ep.c

if HAVE_GDR_COPY
noinst_HEADERS += \
cuda/gdr_copy/gdr_copy_md.h \
cuda/gdr_copy/gdr_copy_iface.h \
cuda/gdr_copy/gdr_copy_ep.h

libuct_la_SOURCES += \
cuda/gdr_copy/gdr_copy_md.c \
cuda/gdr_copy/gdr_copy_iface.c \
cuda/gdr_copy/gdr_copy_ep.c
endif

endif

if HAVE_ROCM
Expand Down
28 changes: 27 additions & 1 deletion src/uct/api/uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,11 +377,24 @@ enum {
UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to
remote memory via a pointer that
is returned by @ref uct_rkey_ptr */
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7) /**< MD support for client-server
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7), /**< MD support for client-server
connection establishment via
sockaddr */
UCT_MD_FLAG_ADDR_DN = UCS_BIT(8) /**< MD supports memory addr domain
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to _MRM_DETECT

detection */
};

/*
* @ingroup UCT_MD
* @brief Memory addr domains
*/
typedef enum {
UCT_MD_ADDR_DOMAIN_CUDA = 0, /**< NVIDIA CUDA domain */
UCT_MD_ADDR_DOMAIN_DEFAULT, /**< Default system domain */
UCT_MD_ADDR_DOMAIN_LAST = UCT_MD_ADDR_DOMAIN_DEFAULT
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check LAST


} uct_addr_domain_t;


/**
* @ingroup UCT_MD
Expand Down Expand Up @@ -630,6 +643,7 @@ struct uct_md_attr {
size_t max_alloc; /**< Maximal allocation size */
size_t max_reg; /**< Maximal registration size */
uint64_t flags; /**< UCT_MD_FLAG_xx */
uct_addr_domain_t addr_dn; /**< Supported addr domain */
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change to uct_memory_domain_t

Copy link
Owner Author

@bureddy bureddy Oct 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or memory_type , memory_kind

} cap;

uct_linear_growth_t reg_cost; /**< Memory registration cost estimation
Expand Down Expand Up @@ -1413,6 +1427,18 @@ ucs_status_t uct_md_mem_reg(uct_md_h md, void *address, size_t length,
ucs_status_t uct_md_mem_dereg(uct_md_h md, uct_mem_h memh);


/**
* @ingroup UCT_MD
* @brief Detect memory on the memory domain.
*
* Detect memory on the memory domain.
* Return UCS_OK if address belongs to MDs address domain
*
* @param [in] md Memory domain to register memory on.
* @param [in] address Memory address to detect.
*/
ucs_status_t uct_md_mem_detect(uct_md_h md, void *addr);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add to ucx_info to show memory detect support


/**
* @ingroup UCT_MD
* @brief Allocate memory for zero-copy communications and remote access.
Expand Down
5 changes: 5 additions & 0 deletions src/uct/base/uct_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,3 +543,8 @@ int uct_md_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr,
{
return md->ops->is_sockaddr_accessible(md, sockaddr, mode);
}

ucs_status_t uct_md_mem_detect(uct_md_h md, void *addr)
{
return md->ops->mem_detect(md, addr);
}
1 change: 1 addition & 0 deletions src/uct/base/uct_md.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ struct uct_md_ops {

int (*is_sockaddr_accessible)(uct_md_h md, const ucs_sock_addr_t *sockaddr,
uct_sockaddr_accessibility_t mode);
ucs_status_t (*mem_detect)(uct_md_h md, void *addr);
};


Expand Down
62 changes: 62 additions & 0 deletions src/uct/cuda/cuda_copy/cuda_copy_ep.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/

#include "cuda_copy_ep.h"
#include "cuda_copy_iface.h"

#include <uct/base/uct_log.h>
#include <ucs/debug/memtrack.h>
#include <ucs/type/class.h>


static UCS_CLASS_INIT_FUNC(uct_cuda_copy_ep_t, uct_iface_t *tl_iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr)
{
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t);
UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super)
return UCS_OK;
}

static UCS_CLASS_CLEANUP_FUNC(uct_cuda_copy_ep_t)
{
}

UCS_CLASS_DEFINE(uct_cuda_copy_ep_t, uct_base_ep_t)
UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, uct_iface_t*,
const uct_device_addr_t *, const uct_iface_addr_t *);
UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t);


ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp)
{
ucs_status_t status;
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t);

uct_cuda_copy_event_desc_t *cuda_event = ucs_mpool_get(&iface->cuda_event_desc);

status = CUDA_FUNC(cudaMemcpyAsync((void *)remote_addr, iov[0].buffer, iov[0].length, cudaMemcpyDeviceToHost, iface->stream_d2h));
if (UCS_OK != status) {
ucs_error("cudaMemcpyAsync Failed ");
return UCS_ERR_IO_ERROR;
}
status = CUDA_FUNC(cudaEventRecord(cuda_event->event, iface->stream_d2h));
if (UCS_OK != status) {
ucs_error("cudaEventRecord Failed ");
return UCS_ERR_IO_ERROR;
}
cuda_event->comp = comp;

ucs_queue_push(&iface->pending_event_q, &cuda_event->queue);

ucs_info("cuda async issued :%p buffer:%p len:%ld", cuda_event, iov[0].buffer, iov[0].length);

return UCS_INPROGRESS;

}

33 changes: 33 additions & 0 deletions src/uct/cuda/cuda_copy/cuda_copy_ep.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/

#ifndef UCT_SYSV_EP_H
#define UCT_SYSV_EP_H

#include <uct/api/uct.h>
#include <uct/base/uct_iface.h>
#include <ucs/type/class.h>


typedef struct uct_cuda_copy_ep_addr {
int ep_id;
} uct_cuda_copy_ep_addr_t;

typedef struct uct_cuda_copy_ep {
uct_base_ep_t super;
struct uct_cuda_copy_ep *next;
} uct_cuda_copy_ep_t;

UCS_CLASS_DECLARE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, uct_iface_t*,
const uct_device_addr_t *, const uct_iface_addr_t *);
UCS_CLASS_DECLARE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t);

ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep,
const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp);

#endif
Loading