Skip to content

Commit

Permalink
Better deal with null codelet and nowhere tasks in trace
Browse files Browse the repository at this point in the history
  • Loading branch information
nfurmento committed Dec 11, 2023
1 parent 1a31468 commit 9cb9ce1
Show file tree
Hide file tree
Showing 11 changed files with 362 additions and 182 deletions.
62 changes: 34 additions & 28 deletions src/common/fxt.h
Original file line number Diff line number Diff line change
Expand Up @@ -824,39 +824,43 @@ do { \
#define _STARPU_TRACE_START_CODELET_BODY(job, nimpl, perf_arch, workerid) \
do { \
if(STARPU_UNLIKELY((_STARPU_FUT_KEYMASK_TASK|_STARPU_FUT_KEYMASK_TASK_VERBOSE|_STARPU_FUT_KEYMASK_DATA|_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA) & fut_active)) { \
FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, starpu_worker_get_memory_node(workerid)); \
int mem_node = workerid == -1 ? -1 : (int)starpu_worker_get_memory_node(workerid); \
int codelet_null = (job)->task->cl == NULL; \
int nowhere = ((job)->task->where == STARPU_NOWHERE) || ((job)->task->cl != NULL && (job)->task->cl->where == STARPU_NOWHERE); \
FUT_FULL_PROBE6(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_START_CODELET_BODY, (job)->job_id, ((job)->task)->sched_ctx, workerid, mem_node, _starpu_gettid(), (codelet_null == 1 || nowhere == 1)); \
if ((job)->task->cl) \
{ \
if ((job)->task->cl) \
const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task); \
char __buf[FXT_MAX_PARAMS*sizeof(long)]; \
int __i; \
for (__i = 0; __i < __nbuffers; __i++) \
{ \
const int __nbuffers = STARPU_TASK_GET_NBUFFERS((job)->task); \
char __buf[FXT_MAX_PARAMS*sizeof(long)]; \
int __i; \
for (__i = 0; __i < __nbuffers; __i++) \
starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i); \
void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i]; \
if (__interface && __handle->ops->describe) \
{ \
starpu_data_handle_t __handle = STARPU_TASK_GET_HANDLE((job)->task, __i); \
void *__interface = _STARPU_TASK_GET_INTERFACES((job)->task)[__i]; \
if (__interface && __handle->ops->describe) \
{ \
__handle->ops->describe(__interface, __buf, sizeof(__buf)); \
_STARPU_FUT_FULL_PROBE1STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, __buf); \
} \
FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i)); \
/* Regarding the memory location:
* - if the data interface doesn't provide to_pointer operation, NULL will be returned
* and the location will be -1, which is fine;
* - we have to check whether the memory is on an actual NUMA node (and not on GPU
* memory, for instance);
* - looking at memory location before executing the task isn't the best choice:
* the page can be not allocated yet. A solution would be to get the memory
* location at the end of the task, but there is no FxT probe where we iterate over
* handles, after task execution.
* */ \
FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, (job)->job_id, (__i), starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)) == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); \
__handle->ops->describe(__interface, __buf, sizeof(__buf)); \
_STARPU_FUT_FULL_PROBE2STR(_STARPU_FUT_KEYMASK_DATA, _STARPU_FUT_CODELET_DATA, workerid, _starpu_gettid(), __buf); \
} \
FUT_FULL_PROBE4(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_CODELET_DATA_HANDLE, (job)->job_id, (__handle), _starpu_data_get_size(__handle), STARPU_TASK_GET_MODE((job)->task, __i)); \
/* Regarding the memory location:
* - if the data interface doesn't provide to_pointer operation, NULL will be returned
* and the location will be -1, which is fine;
* - we have to check whether the memory is on an actual NUMA node (and not on GPU
* memory, for instance);
* - looking at memory location before executing the task isn't the best choice:
* the page can be not allocated yet. A solution would be to get the memory
* location at the end of the task, but there is no FxT probe where we iterate over
* handles, after task execution.
* */ \
FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_TASK_VERBOSE_EXTRA, _STARPU_FUT_CODELET_DATA_HANDLE_NUMA_ACCESS, (job)->job_id, (__i), starpu_worker_get_memory_node_kind(starpu_worker_get_type(workerid)) == STARPU_CPU_RAM && starpu_task_get_current_data_node(__i) >= 0 ? starpu_get_memory_location_bitmap(starpu_data_handle_to_pointer(__handle, (unsigned) starpu_task_get_current_data_node(__i)), starpu_data_get_size(__handle)) : -1); \
} \
} \
if (!(codelet_null == 1 || nowhere == 1)) \
{ \
const size_t __job_size = (perf_arch == NULL) ? 0 : _starpu_job_get_data_size((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \
const uint32_t __job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \
FUT_FULL_PROBE7(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id)); \
FUT_FULL_PROBE8(_STARPU_FUT_KEYMASK_TASK_VERBOSE, _STARPU_FUT_CODELET_DETAILS, ((job)->task)->sched_ctx, __job_size, __job_hash, (job)->task->flops / 1000 / ((job)->task->cl && job->task->cl->type != STARPU_SEQ ? j->task_size : 1), (job)->task->tag_id, workerid, ((job)->job_id), _starpu_gettid()); \
} \
} \
} while(0)
Expand All @@ -868,7 +872,9 @@ do { \
const uint32_t job_hash = (perf_arch == NULL) ? 0 : _starpu_compute_buffers_footprint((job)->task->cl?(job)->task->cl->model:NULL, perf_arch, nimpl, (job)); \
char _archname[32]=""; \
if (perf_arch) starpu_perfmodel_get_arch_name(perf_arch, _archname, 32, 0); \
_STARPU_FUT_FULL_PROBE5STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), _archname); \
int nowhere = ((job)->task->where == STARPU_NOWHERE) || ((job)->task->cl != NULL && (job)->task->cl->where == STARPU_NOWHERE); \
int codelet_null = (job)->task->cl == NULL; \
_STARPU_FUT_FULL_PROBE6STR(_STARPU_FUT_KEYMASK_TASK, _STARPU_FUT_END_CODELET_BODY, (job)->job_id, (job_size), (job_hash), workerid, _starpu_gettid(), (codelet_null == 1 || nowhere == 1), _archname); \
} \
} while(0)

Expand Down Expand Up @@ -1374,7 +1380,7 @@ do { \
FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_MEMORY_FULL,size,_starpu_gettid());

#define _STARPU_TRACE_DATA_LOAD(workerid,size) \
FUT_FULL_PROBE2(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size);
FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_DATA_LOAD, workerid, size, _starpu_gettid());

#define _STARPU_TRACE_START_UNPARTITION(handle, memnode) \
FUT_FULL_PROBE3(_STARPU_FUT_KEYMASK_DSM, _STARPU_FUT_START_UNPARTITION_ON_TID, memnode, _starpu_gettid(), handle);
Expand Down
7 changes: 5 additions & 2 deletions src/core/sched_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,11 @@ int _starpu_repush_task(struct _starpu_job *j)
_starpu_set_current_task(NULL);
}

_STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, 0);
_STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, 0);
{
int worker_id = starpu_worker_get_id();
_STARPU_TRACE_START_CODELET_BODY(j, 0, NULL, worker_id);
_STARPU_TRACE_END_CODELET_BODY(j, 0, NULL, worker_id);
}

if (task->cl && task->cl->specific_nodes)
{
Expand Down
34 changes: 19 additions & 15 deletions src/debug/traces/anim.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* StarPU --- Runtime system for heterogeneous multicore architectures.
*
* Copyright (C) 2015-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
* Copyright (C) 2015-2023 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
* Copyright (C) 2015 Anthony Simonet
*
* StarPU is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -106,20 +106,24 @@ void _starpu_fxt_component_dump(FILE *file)
fxt_component_dump(file, fxt_component_root(), 0);
}

static void fxt_worker_print(FILE *file, struct starpu_fxt_options *options, int workerid, unsigned comp_workerid, unsigned depth)
static void fxt_worker_print(FILE *file, struct starpu_fxt_options *options, long unsigned int tid, int workerid, unsigned comp_workerid, unsigned depth)
{
fprintf(file, "\t\t\t%*s<table><tr><td class='worker_box%s'><center>%s\n", 2*depth, "",
(int) comp_workerid == workerid ? "_sched":"",
options->worker_names[comp_workerid]);
if (_starpu_last_codelet_symbol[comp_workerid][0])
fprintf(file, "\t\t\t%*s<table><tr><td class='run_task'>%s</td></tr></table>\n", 2*(depth+1), "", _starpu_last_codelet_symbol[comp_workerid]);

struct _thread_info *thread_info = NULL;
HASH_FIND(hh, _thread_infos, &tid, sizeof(tid), thread_info);

if (thread_info && thread_info->symbol[0])
fprintf(file, "\t\t\t%*s<table><tr><td class='run_task'>%s</td></tr></table>\n", 2*(depth+1), "", thread_info->symbol);
else
fprintf(file, "\t\t\t%*s<table><tr><td class='fake_task'></td></tr></table>\n", 2*(depth+1), "");
fprintf(file, "\t\t\t%*s</center></td></tr>\n", 2*depth, "");
fprintf(file, "\t\t\t%*s</table>", 2*depth, "");
}

static void fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to, struct component *comp, unsigned depth)
static void fxt_component_print(FILE *file, struct starpu_fxt_options *options, long unsigned int tid, int workerid, struct component *from, struct component *to, struct component *comp, unsigned depth)
{
unsigned i, n;
unsigned ntasks = comp->ntasks + comp->npriotasks;
Expand Down Expand Up @@ -184,7 +188,7 @@ static void fxt_component_print(FILE *file, struct starpu_fxt_options *options,
if (comp->children[i]->parent == comp)
{
fprintf(file, "\t\t\t%*s<td>\n", 2*depth, "");
fxt_component_print(file, options, workerid, from, to, comp->children[i], depth+1);
fxt_component_print(file, options, tid, workerid, from, to, comp->children[i], depth+1);
fprintf(file, "\t\t\t%*s</td>\n", 2*depth, "");
}
fprintf(file, "\t\t\t%*s</tr>\n", 2*depth, "");
Expand All @@ -194,18 +198,18 @@ static void fxt_component_print(FILE *file, struct starpu_fxt_options *options,
{
fprintf(file, "\t\t\t%*s<tr>\n", 2*depth, "");
fprintf(file, "\t\t\t%*s<td>\n", 2*depth, "");
fxt_worker_print(file, options, workerid, comp->workerid, depth+1);
fxt_worker_print(file, options, tid, workerid, comp->workerid, depth+1);
fprintf(file, "\t\t\t%*s</td>\n", 2*depth, "");
fprintf(file, "\t\t\t%*s</tr>\n", 2*depth, "");
}

fprintf(file, "\t\t\t%*s</table>", 2*depth, "");
}

void _starpu_fxt_component_print(FILE *file, struct starpu_fxt_options *options, int workerid, struct component *from, struct component *to)
void _starpu_fxt_component_print(FILE *file, struct starpu_fxt_options *options, long unsigned int tid, int workerid, struct component *from, struct component *to)
{
fprintf(file, "<center>\n");
fxt_component_print(file, options, workerid, from, to, fxt_component_root(), 0);
fxt_component_print(file, options, tid, workerid, from, to, fxt_component_root(), 0);
fprintf(file, "</center>\n");
}

Expand Down Expand Up @@ -357,15 +361,15 @@ void _starpu_fxt_component_print_header(FILE *file)
fprintf(file, "\t<body>\n");
}

static void fxt_component_print_step(FILE *file, struct starpu_fxt_options *options, double timestamp, int workerid, unsigned push, struct component *from, struct component *to)
static void fxt_component_print_step(FILE *file, struct starpu_fxt_options *options, double timestamp, long unsigned int tid, int workerid, unsigned push, struct component *from, struct component *to)
{
fprintf(file, "\t\t<div id='et%u' style='display:%s;'><center><!-- Étape %u -->\n",
global_state, global_state > 1 ? "none":"block", global_state);
fprintf(file, "\t\t<p>Time %f, %u submitted %u ready, %s</p>\n", timestamp, nsubmitted, curq_size-nflowing, push?"push":"pull");
//fprintf(file, "\t\t\t<tt><pre>\n");
//_starpu_fxt_component_dump(file);
//fprintf(file, "\t\t\t</pre></tt>\n");
_starpu_fxt_component_print(file, options, workerid, from, to);
_starpu_fxt_component_print(file, options, tid, workerid, from, to);
fprintf(file,"\t\t</center></div>");

global_state++;
Expand Down Expand Up @@ -394,7 +398,7 @@ void _starpu_fxt_component_update_ntasks(unsigned _nsubmitted, unsigned _curq_si
curq_size = _curq_size;
}

void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio)
void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options, double timestamp, long unsigned int tid, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio)
{
struct component *from_p = NULL, *to_p = NULL;

Expand Down Expand Up @@ -429,10 +433,10 @@ void _starpu_fxt_component_push(FILE *output, struct starpu_fxt_options *options
}

// fprintf(stderr,"push from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none");
fxt_component_print_step(output, options, timestamp, workerid, 1, from_p, to_p);
fxt_component_print_step(output, options, timestamp, tid, workerid, 1, from_p, to_p);
}

void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio)
void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options, double timestamp, long unsigned int tid, int workerid, uint64_t from, uint64_t to, uint64_t task STARPU_ATTRIBUTE_UNUSED, unsigned prio)
{
struct component *from_p = NULL, *to_p = NULL;

Expand Down Expand Up @@ -467,7 +471,7 @@ void _starpu_fxt_component_pull(FILE *output, struct starpu_fxt_options *options
nflowing--;

// fprintf(stderr,"pull from %s to %s\n", from_p?from_p->name:"none", to_p?to_p->name:"none");
fxt_component_print_step(output, options, timestamp, workerid, 0, from_p, to_p);
fxt_component_print_step(output, options, timestamp, tid, workerid, 0, from_p, to_p);
}

void _starpu_fxt_component_finish(FILE *file)
Expand Down
Loading

0 comments on commit 9cb9ce1

Please sign in to comment.