-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CUDA: enable cuda support v1 - EAGER with GDR COPY #20
base: master
Are you sure you want to change the base?
Conversation
bureddy
commented
Sep 26, 2017
•
edited
Loading
edited
5a4d70e
to
b3bde76
Compare
- for pipeline rndv transfers
b3bde76
to
2915cdb
Compare
@@ -0,0 +1,22 @@ | |||
/** | |||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
copyright year 2017?
ucm_debug("cudamem test: got 0x%x out of 0x%x, total: 0x%x", out_events, events, | ||
installed_events); | ||
|
||
/* Return success iff we caught all wanted events */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
iff->if
#include <pthread.h> | ||
|
||
|
||
#define MAP_FAILED ((void*)-1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it required? MAP_FAILED is defined in <sys/mman.h>, and MAP_FAILED is not used here
iface_attr = &worker->ifaces[rsc_index].attr; | ||
|
||
domain_config->tag.eager.max_short = iface_attr->cap.am.max_short; | ||
//TODO: zcopy thrshold should be based on the ep AM lane capability with domain addr(i.e can UCT do zcopy from domain) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thrshold -> threshold
@@ -106,6 +109,17 @@ typedef struct ucp_ep_rma_config { | |||
} ucp_ep_rma_config_t; | |||
|
|||
|
|||
#define UCP_IS_DEFAULT_ADDR_DOMAIN(_addr_dn_h) (_addr_dn_h == &ucp_addr_dn_dummy_handle) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(_addr_dn_h == &ucp_addr_dn_dummy_handle) -> ((_addr_dn_h) == &ucp_addr_dn_dummy_handle)
return UCS_OK; | ||
} | ||
|
||
while(1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
space after while:
while (1) {
rreq->recv.length, &rreq->recv.state, | ||
data + hdr_len, recv_len, 1); | ||
status = ucp_dt_unpack(rreq, rreq->recv.datatype, rreq->recv.buffer, | ||
rreq->recv.length, &rreq->recv.state, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
allignment
|
||
dn_md_map = addr_dn_h->md_map; | ||
|
||
while(1) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
coding guide line: space after while: while (1) {
uct_cuda_copy_event_desc_t *cuda_event; | ||
cudaError_t result = cudaSuccess; | ||
|
||
ucs_queue_for_each_safe(cuda_event, iter, &iface->pending_event_q, queue) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
coding guideline: ucs_queue_for_each_safe() {
ucs_error("cudaEventCreateWithFlags Failed"); | ||
} | ||
} | ||
void uct_cuda_copy_event_desc_cleanup(ucs_mpool_t *mp, void *obj) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you don't call these functions directly by name from anywhere, may be declare it as 'static'?
@@ -630,6 +643,7 @@ struct uct_md_attr { | |||
size_t max_alloc; /**< Maximal allocation size */ | |||
size_t max_reg; /**< Maximal registration size */ | |||
uint64_t flags; /**< UCT_MD_FLAG_xx */ | |||
uct_addr_domain_t addr_dn; /**< Supported addr domain */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change to uct_memory_domain_t
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or memory_type , memory_kind
typedef enum { | ||
UCT_MD_ADDR_DOMAIN_CUDA = 0, /**< NVIDIA CUDA domain */ | ||
UCT_MD_ADDR_DOMAIN_DEFAULT, /**< Default system domain */ | ||
UCT_MD_ADDR_DOMAIN_LAST = UCT_MD_ADDR_DOMAIN_DEFAULT |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
check LAST
connection establishment via | ||
sockaddr */ | ||
UCT_MD_FLAG_ADDR_DN = UCS_BIT(8) /**< MD supports memory addr domain |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change to _MRM_DETECT
* @param [in] md Memory domain to register memory on. | ||
* @param [in] address Memory address to detect. | ||
*/ | ||
ucs_status_t uct_md_mem_detect(uct_md_h md, void *addr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add to ucx_info to show memory detect support
/*TODO: return if no MDs with address domain detect */ | ||
|
||
for (md_index = 0; md_index < context->num_mds; ++md_index) { | ||
if (context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_ADDR_DN) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove cap flag and use UCT_MD_ADDR_DOMAIN_DEFAULT
@@ -78,6 +78,7 @@ typedef void (*ucp_request_callback_t)(ucp_request_t *req); | |||
struct ucp_request { | |||
ucs_status_t status; /* Operation status */ | |||
uint16_t flags; /* Request flags */ | |||
ucp_addr_dn_h addr_dn_h; /* Memory domain handle */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
embeded fields into req struct
ucp_ep_config(ep)->tag.eager.max_short, | ||
ucp_ep_config(ep)->tag.eager.zcopy_thresh, | ||
ucs_likely(UCP_IS_DEFAULT_ADDR_DOMAIN(addr_dn_h)) ? | ||
ucp_ep_config(ep)->tag.eager.max_short : |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
array of eager threshold per domain
|
||
UCP_THREAD_CS_ENTER_CONDITIONAL(&ep->worker->mt_lock); | ||
|
||
ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", | ||
buffer, count, tag, ucp_ep_peer_name(ep), cb); | ||
|
||
if (ucs_likely(UCP_DT_IS_CONTIG(datatype))) { | ||
ucp_addr_domain_detect_mds(ep->worker->context, (void *)buffer, &addr_dn_h); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
global context value to check if detect is needed or not