-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUDA: enable cuda support v1 - EAGER with GDR COPY #20
base: master
Are you sure you want to change the base?
Changes from 5 commits
a1b6607
87d29ef
ff3bcd0
4ed2a7d
87ecdd7
9cbca08
4c1417b
369d416
2915cdb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,84 @@ | ||
AM_CONDITIONAL([HAVE_CUDA], [true]) | ||
# | ||
# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. | ||
# See file LICENSE for terms. | ||
# | ||
|
||
# | ||
# Check for CUDA support | ||
# | ||
cuda_happy="no" | ||
|
||
AC_ARG_WITH([cuda], | ||
[AS_HELP_STRING([--with-cuda=(DIR)], [Enable the use of CUDA (default is no).])], | ||
[], [with_cuda=no]) | ||
|
||
AS_IF([test "x$with_cuda" != "xno"], | ||
|
||
[AS_IF([test "x$with_cuda" == "x" || test "x$with_cuda" == "xguess" || test "x$with_cuda" == "xyes"], | ||
[ | ||
AC_MSG_NOTICE([CUDA path was not specified. Guessing ...]) | ||
with_cuda=/usr/local/cuda | ||
], | ||
[:]) | ||
AC_CHECK_HEADERS([$with_cuda/include/cuda.h $with_cuda/include/cuda_runtime.h], | ||
[AC_CHECK_DECLS([cuPointerGetAttribute], | ||
[cuda_happy="yes"], | ||
[AC_MSG_WARN([CUDA runtime not detected. Disable.]) | ||
cuda_happy="no"], | ||
[#include <$with_cuda/include/cuda.h>]) | ||
AS_IF([test "x$cuda_happy" == "xyes"], | ||
[AC_DEFINE([HAVE_CUDA], 1, [Enable CUDA support]) | ||
AC_SUBST(CUDA_CPPFLAGS, "-I$with_cuda/include/ ") | ||
AC_SUBST(CUDA_CFLAGS, "-I$with_cuda/include/ ") | ||
AC_SUBST(CUDA_LDFLAGS, "-lcudart -lcuda -L$with_cuda/lib64") | ||
CFLAGS="$CFLAGS $CUDA_CFLAGS" | ||
CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS" | ||
LDFLAGS="$LDFLAGS $CUDA_LDFLAGS"], | ||
[])], | ||
[AC_MSG_WARN([CUDA not found]) | ||
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])])], | ||
[AC_MSG_WARN([CUDA was explicitly disabled]) | ||
AC_DEFINE([HAVE_CUDA], [0], [Disable the use of CUDA])] | ||
) | ||
|
||
|
||
AM_CONDITIONAL([HAVE_CUDA], [test "x$cuda_happy" != xno]) | ||
|
||
AC_ARG_WITH([gdrcopy], | ||
[AS_HELP_STRING([--with-gdrcopy=(DIR)], [Enable the use of GDR_COPY (default is no).])], | ||
[], [with_gdrcopy=no]) | ||
|
||
AS_IF([test "x$with_gdrcopy" != "xno"], | ||
|
||
[AS_IF([test "x$with_gdrcopy" == "x" || test "x$with_gdrcopy" == "xguess" || test "x$with_gdrcopy" == "xyes"], | ||
[ | ||
AC_MSG_NOTICE([GDR_COPY path was not specified. Guessing ...]) | ||
with_gdrcopy=/usr/local/gdrcopy | ||
], | ||
[:]) | ||
AC_CHECK_HEADERS([$with_gdrcopy/include/gdrapi.h], | ||
[AC_CHECK_DECLS([gdr_pin_buffer], | ||
[gdrcopy_happy="yes"], | ||
[AC_MSG_WARN([GDR_COPY runtime not detected. Disable.]) | ||
gdrcopy_happy="no"], | ||
[#include <$with_gdrcopy/include/gdrapi.h>]) | ||
AS_IF([test "x$gdrcopy_happy" == "xyes"], | ||
[AC_DEFINE([HAVE_GDR_COPY], 1, [Enable GDR_COPY support]) | ||
AC_SUBST(GDR_COPY_CPPFLAGS, "-I$with_gdrcopy/include/ ") | ||
AC_SUBST(GDR_COPY_CFLAGS, "-I$with_gdrcopy/include/ ") | ||
AC_SUBST(GDR_COPY_LDFLAGS, "-lgdrapi -L$with_gdrcopy/lib64") | ||
CFLAGS="$CFLAGS $GDR_COPY_CFLAGS" | ||
CPPFLAGS="$CPPFLAGS $GDR_COPY_CPPFLAGS" | ||
LDFLAGS="$LDFLAGS $GDR_COPY_LDFLAGS"], | ||
[])], | ||
[AC_MSG_WARN([GDR_COPY not found]) | ||
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])])], | ||
[AC_MSG_WARN([GDR_COPY was explicitly disabled]) | ||
AC_DEFINE([HAVE_GDR_COPY], [0], [Disable the use of GDR_COPY])] | ||
) | ||
|
||
|
||
AM_CONDITIONAL([HAVE_GDR_COPY], [test "x$gdrcopy_happy" != xno]) | ||
|
||
AC_DEFINE([HAVE_CUDA_GDR], [1], [Eanble GPU Direct RDMA])] | ||
AM_CONDITIONAL([HAVE_CUDA_GDR], [1]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -377,11 +377,24 @@ enum { | |
UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to | ||
remote memory via a pointer that | ||
is returned by @ref uct_rkey_ptr */ | ||
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7) /**< MD support for client-server | ||
UCT_MD_FLAG_SOCKADDR = UCS_BIT(7), /**< MD support for client-server | ||
connection establishment via | ||
sockaddr */ | ||
UCT_MD_FLAG_ADDR_DN = UCS_BIT(8) /**< MD supports memory addr domain | ||
detection */ | ||
}; | ||
|
||
/* | ||
* @ingroup UCT_MD | ||
* @brief Memory addr domains | ||
*/ | ||
typedef enum { | ||
UCT_MD_ADDR_DOMAIN_CUDA = 0, /**< NVIDIA CUDA domain */ | ||
UCT_MD_ADDR_DOMAIN_DEFAULT, /**< Default system domain */ | ||
UCT_MD_ADDR_DOMAIN_LAST = UCT_MD_ADDR_DOMAIN_DEFAULT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check LAST |
||
|
||
} uct_addr_domain_t; | ||
|
||
|
||
/** | ||
* @ingroup UCT_MD | ||
|
@@ -630,6 +643,7 @@ struct uct_md_attr { | |
size_t max_alloc; /**< Maximal allocation size */ | ||
size_t max_reg; /**< Maximal registration size */ | ||
uint64_t flags; /**< UCT_MD_FLAG_xx */ | ||
uct_addr_domain_t addr_dn; /**< Supported addr domain */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. change to uct_memory_domain_t There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or memory_type , memory_kind |
||
} cap; | ||
|
||
uct_linear_growth_t reg_cost; /**< Memory registration cost estimation | ||
|
@@ -1413,6 +1427,18 @@ ucs_status_t uct_md_mem_reg(uct_md_h md, void *address, size_t length, | |
ucs_status_t uct_md_mem_dereg(uct_md_h md, uct_mem_h memh); | ||
|
||
|
||
/** | ||
* @ingroup UCT_MD | ||
* @brief Detect memory on the memory domain. | ||
* | ||
* Detect memory on the memory domain. | ||
* Return UCS_OK if address belongs to MDs address domain | ||
* | ||
* @param [in] md Memory domain to register memory on. | ||
* @param [in] address Memory address to detect. | ||
*/ | ||
ucs_status_t uct_md_mem_detect(uct_md_h md, void *addr); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add to ucx_info to show memory detect support |
||
|
||
/** | ||
* @ingroup UCT_MD | ||
* @brief Allocate memory for zero-copy communications and remote access. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/** | ||
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. | ||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#include "cuda_copy_ep.h" | ||
#include "cuda_copy_iface.h" | ||
|
||
#include <uct/base/uct_log.h> | ||
#include <ucs/debug/memtrack.h> | ||
#include <ucs/type/class.h> | ||
|
||
|
||
static UCS_CLASS_INIT_FUNC(uct_cuda_copy_ep_t, uct_iface_t *tl_iface, | ||
const uct_device_addr_t *dev_addr, | ||
const uct_iface_addr_t *iface_addr) | ||
{ | ||
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); | ||
UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) | ||
return UCS_OK; | ||
} | ||
|
||
static UCS_CLASS_CLEANUP_FUNC(uct_cuda_copy_ep_t) | ||
{ | ||
} | ||
|
||
UCS_CLASS_DEFINE(uct_cuda_copy_ep_t, uct_base_ep_t) | ||
UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, uct_iface_t*, | ||
const uct_device_addr_t *, const uct_iface_addr_t *); | ||
UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t); | ||
|
||
|
||
ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, | ||
uint64_t remote_addr, uct_rkey_t rkey, | ||
uct_completion_t *comp) | ||
{ | ||
ucs_status_t status; | ||
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); | ||
|
||
uct_cuda_copy_event_desc_t *cuda_event = ucs_mpool_get(&iface->cuda_event_desc); | ||
|
||
status = CUDA_FUNC(cudaMemcpyAsync((void *)remote_addr, iov[0].buffer, iov[0].length, cudaMemcpyDeviceToHost, iface->stream_d2h)); | ||
if (UCS_OK != status) { | ||
ucs_error("cudaMemcpyAsync Failed "); | ||
return UCS_ERR_IO_ERROR; | ||
} | ||
status = CUDA_FUNC(cudaEventRecord(cuda_event->event, iface->stream_d2h)); | ||
if (UCS_OK != status) { | ||
ucs_error("cudaEventRecord Failed "); | ||
return UCS_ERR_IO_ERROR; | ||
} | ||
cuda_event->comp = comp; | ||
|
||
ucs_queue_push(&iface->pending_event_q, &cuda_event->queue); | ||
|
||
ucs_info("cuda async issued :%p buffer:%p len:%ld", cuda_event, iov[0].buffer, iov[0].length); | ||
|
||
return UCS_INPROGRESS; | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
/** | ||
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. | ||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifndef UCT_SYSV_EP_H | ||
#define UCT_SYSV_EP_H | ||
|
||
#include <uct/api/uct.h> | ||
#include <uct/base/uct_iface.h> | ||
#include <ucs/type/class.h> | ||
|
||
|
||
typedef struct uct_cuda_copy_ep_addr { | ||
int ep_id; | ||
} uct_cuda_copy_ep_addr_t; | ||
|
||
typedef struct uct_cuda_copy_ep { | ||
uct_base_ep_t super; | ||
struct uct_cuda_copy_ep *next; | ||
} uct_cuda_copy_ep_t; | ||
|
||
UCS_CLASS_DECLARE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, uct_iface_t*, | ||
const uct_device_addr_t *, const uct_iface_addr_t *); | ||
UCS_CLASS_DECLARE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t); | ||
|
||
ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, | ||
const uct_iov_t *iov, size_t iovcnt, | ||
uint64_t remote_addr, uct_rkey_t rkey, | ||
uct_completion_t *comp); | ||
|
||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change to _MRM_DETECT