diff --git a/README.md b/README.md index d0e8290e..9cb73cf0 100644 --- a/README.md +++ b/README.md @@ -71,38 +71,51 @@ seq [out]: Order of element `i` in processor `part[i]` after partition ```C typedef struct { // General options - int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0) - int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1) - int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 1) - int two_level; // Enable two level partitioning (Default: 0) + int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0) + int tagged; // Tagged partitioning: 0 - No, 1 - Yes (Default: 0) + int levels; // Number of levels: 1, or 2 (Default: 2) + int find_disconnected_comps; // Find number of components: 0 - No, 1 - Yes + // (Default: 1) int repair; // Repair disconnected components: 0 - No, 1 - Yes (Default: 0) - // RSB common (Lanczos + MG) options + int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1) + int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 0) + // RSB common (Lanczos and MG) options int rsb_algo; // RSB algo: 0 - Lanczos, 1 - MG (Default: 0) int rsb_pre; // RSB pre-partition : 0 - None, 1 - RCB , 2 - RIB (Default: 1) int rsb_max_iter; // Max iterations in Lanczos / MG (Default: 50) int rsb_max_passes; // Max Lanczos restarts / Inverse iterations (Default: 50) double rsb_tol; // Tolerance for Lanczos or RQI (Default: 1e-5) + int rsb_dump_stats; // Dump partition statistics to a text file. // RSB MG specific options int rsb_mg_grammian; // MG Grammian: 0 or 1 (Default: 0) int rsb_mg_factor; // MG Coarsening factor (Default: 2, should be > 1) - int rsb_mg_sagg; // MG smooth aggregation: 0 or 1 (Default: 0) } parrsb_options; ``` -You can use `parrsb_default_options` struct instance to pass default options -to `parrsb_part_mesh` routine. All of these options can be controlled at runtime -by setting up the relevant environment variable (named as `PARRSB_`) -to the corresponding value as well. Enviornment variable values will override -what is passed to `parrsb_part_mesh` routine. +User can use `parrsb_default_options` struct instance to pass default options +to `parrsb_part_mesh` routine. -Below is a list of some of environment variables: +All of these options can be controlled at runtime by setting the relevant +environment variable (named `PARRSB_`) as well. Enviornment variable +values will override what is passed to `parrsb_part_mesh` routine. + +Below is a full list of some of environment variables: ``` PARRSB_PARTITIONER +PARRSB_TAGGED +PARRSB_LEVELS +PARRSB_FIND_DISCONNECTED_COMPONENTS +PARRSB_REPAIR PARRSB_VERBOSE_LEVEL PARRSB_PROFILE_LEVEL -PARRSB_TWO_LEVEL -PARRSB_REPAIR + PARRSB_RSB_ALGO PARRSB_RSB_PRE +PARRSB_RSB_MAX_ITER +PARRSB_RSB_MAX_PASSES +PARRSB_RSB_TOL + +PARRSB_RSB_MG_GRAMMIAN +PARRSB_RSB_MG_FACTOR ``` diff --git a/src/parRSB.h b/src/parRSB.h index 7a71ca6f..2ca8977e 100644 --- a/src/parRSB.h +++ b/src/parRSB.h @@ -22,7 +22,9 @@ typedef struct { // General options int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0) int tagged; // Tagged partitioning: 0 - No, 1 - Yes (Default: 0) - int levels; // Number of levels (levels: 1, 2) + int levels; // Number of levels: 1, or 2 (Default: 2) + int find_disconnected_comps; // Find number of components: 0 - No, 1 - Yes + // (Default: 1) int repair; // Repair disconnected components: 0 - No, 1 - Yes (Default: 0) int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1) int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 0) diff --git a/src/parrsb-impl.h b/src/parrsb-impl.h index f792dbb1..fbd638f4 100644 --- a/src/parrsb-impl.h +++ b/src/parrsb-impl.h @@ -47,7 +47,7 @@ struct rsb_element { }; void rsb(struct array *elements, int nv, const parrsb_options *const options, - const struct comm comms[3], buffer *bfr); + const struct comm *comms, buffer *bfr); //------------------------------------------------------------------------------ // Find number of components. diff --git a/src/parrsb.c b/src/parrsb.c index 2d7ac084..7fc68780 100644 --- a/src/parrsb.c +++ b/src/parrsb.c @@ -28,6 +28,7 @@ parrsb_options parrsb_default_options = { .partitioner = 0, .tagged = 0, .levels = 2, + .find_disconnected_comps = 1, .repair = 0, .verbose_level = 1, .profile_level = 0, @@ -59,6 +60,8 @@ static void update_options(parrsb_options *const options) { UPDATE_OPTION(partitioner, "PARRSB_PARTITIONER", 1); UPDATE_OPTION(tagged, "PARRSB_TAGGED", 1); UPDATE_OPTION(levels, "PARRSB_LEVELS", 1); + UPDATE_OPTION(find_disconnected_comps, "PARRSB_FIND_DISCONNECTED_COMPONENTS", + 1); UPDATE_OPTION(repair, "PARRSB_REPAIR", 1); UPDATE_OPTION(verbose_level, "PARRSB_VERBOSE_LEVEL", 1); UPDATE_OPTION(profile_level, "PARRSB_PROFILE_LEVEL", 1); @@ -67,7 +70,6 @@ static void update_options(parrsb_options *const options) { UPDATE_OPTION(rsb_max_iter, "PARRSB_RSB_MAX_ITER", 1); UPDATE_OPTION(rsb_max_passes, "PARRSB_RSB_MAX_PASSES", 1); UPDATE_OPTION(rsb_tol, "PARRSB_RSB_TOL", 0); - UPDATE_OPTION(rsb_dump_stats, "PARRSB_DUMP_STATS", 1); UPDATE_OPTION(rsb_mg_grammian, "PARRSB_RSB_MG_GRAMMIAN", 1); UPDATE_OPTION(rsb_mg_factor, "PARRSB_RSB_MG_FACTOR", 1); @@ -82,6 +84,8 @@ static void print_options(const struct comm *c, PRINT_OPTION(partitioner, "PARRSB_PARTITIONER", "%d"); PRINT_OPTION(tagged, "PARRSB_TAGGED", "%d"); PRINT_OPTION(levels, "PARRSB_LEVELS", "%d"); + PRINT_OPTION(find_disconnected_comps, "PARRSB_FIND_DISCONNECTED_COMPONENTS", + "%d"); PRINT_OPTION(repair, "PARRSB_REPAIR", "%d"); PRINT_OPTION(verbose_level, "PARRSB_VERBOSE_LEVEL", "%d"); PRINT_OPTION(profile_level, "PARRSB_PROFILE_LEVEL", "%d"); @@ -90,7 +94,6 @@ static void print_options(const struct comm *c, PRINT_OPTION(rsb_max_iter, "PARRSB_RSB_MAX_ITER", "%d"); PRINT_OPTION(rsb_max_passes, "PARRSB_RSB_MAX_PASSES", "%d"); PRINT_OPTION(rsb_tol, "PARRSB_RSB_TOL", "%lf"); - PRINT_OPTION(rsb_dump_stats, "PARRSB_DUMP_STATS", "%d"); PRINT_OPTION(rsb_mg_grammian, "PARRSB_RSB_MG_GRAMMIAN", "%d"); PRINT_OPTION(rsb_mg_factor, "PARRSB_RSB_MG_FACTOR", "%d"); @@ -104,8 +107,7 @@ static size_t load_balance(struct array *elist, uint nel, int nv, slong out[2][1], wrk[2][1], in = nel; comm_scan(out, c, gs_long, gs_add, &in, 1, wrk); slong start = out[0][0], nelg = out[1][0]; - parrsb_print(c, verbose, "load_balance: start = %lld nelg = %lld", start, - nelg); + parrsb_print(c, verbose, "load_balance: nelg = %lld", nelg); uint nstar = nelg / c->np, nrem = nelg - nstar * c->np; slong lower = (nstar + 1) * nrem; @@ -224,10 +226,7 @@ static void initialize_levels(struct comm *const comms, int *const levels_in, // Hardcode the maximum number of levels to two for now. sint levels = 2; - uint sizes[2] = {num_nodes, 1}; - *levels_in = levels = MIN(levels, *levels_in); - if (levels > 1) comm_dup(&comms[levels - 1], &nc); comm_free(&nc); @@ -264,9 +263,10 @@ static void parrsb_part_mesh_v0(int *part, const long long *const vtx, const uint levels = options->levels; assert(levels <= sizeof(comms) / sizeof(comms[0])); initialize_levels(comms, &options->levels, &ca, verbose); - parrsb_print(c, verbose, - "parrsb_part_mesh_v0: Levels: requested = %d, enabled = %d", - levels, options->levels); + parrsb_print( + c, verbose, + "parrsb_part_mesh_v0: levels requested = %d, levels enabled = %d", + levels, options->levels); } parrsb_print(c, verbose, "parrsb_part_mesh_v0: running partitioner ..."); diff --git a/src/rsb.c b/src/rsb.c index e148e80d..5edacfb7 100644 --- a/src/rsb.c +++ b/src/rsb.c @@ -43,6 +43,21 @@ static void test_component_versions(struct array *elements, struct comm *lc, crystal_free(&cr); } +static void check_disconnected_components(const int i, const struct comm *gc, + void *bfr) { + sint minc = (sint)metric_get_value(i, RSB_COMPONENTS_NCOMP), maxc = minc; + comm_allreduce(gc, gs_int, gs_min, &minc, 1, (void *)bfr); + comm_allreduce(gc, gs_int, gs_max, &maxc, 1, (void *)bfr); + + if (maxc > 1 && gc->id == 0) { + fprintf(stderr, + "Warning: Partition created %d/%d (min/max) disconnected " + "components in Level=%d!\n", + minc, maxc, i); + fflush(stderr); + } +} + static void check_rsb_partition(const struct comm *gc, const parrsb_options *const opts) { int max_levels = log2ll(gc->np); @@ -72,33 +87,26 @@ static void check_rsb_partition(const struct comm *gc, double final = metric_get_value(i, TOL_FNL); comm_allreduce(&c, gs_double, gs_min, &final, 1, (void *)bfr); if (c.id == 0) { - printf("Warning: Lanczos reached a residual of %lf (target: %lf) " - "after %d x %d iterations in Level=%d!\n", - final, target, mpass, miter, i); - fflush(stdout); + fprintf(stderr, + "Warning: Lanczos reached a residual of %lf (target: %lf) " + "after %d x %d iterations in Level=%d!\n", + final, target, mpass, miter, i); + fflush(stderr); } } else if (opts->rsb_algo == 1) { if (c.id == 0) { - printf("Warning: Inverse iteration didn't converge after %d " - "iterations in Level = %d\n", - mpass, i); - fflush(stdout); + fprintf(stderr, + "Warning: Inverse iteration didn't converge after %d " + "iterations in Level = %d\n", + mpass, i); + fflush(stderr); } } } comm_free(&c); - sint minc, maxc; - minc = maxc = (sint)metric_get_value(i, RSB_COMPONENTS_NCOMP); - comm_allreduce(gc, gs_int, gs_min, &minc, 1, (void *)bfr); - comm_allreduce(gc, gs_int, gs_max, &maxc, 1, (void *)bfr); - - if (maxc > 1 && gc->id == 0) { - printf("Warning: Partition created %d/%d (min/max) disconnected " - "components in Level=%d!\n", - minc, maxc, i); - fflush(stdout); - } + if (opts->find_disconnected_comps == 1) + check_disconnected_components(i, gc, (void *)bfr); } } @@ -221,7 +229,7 @@ static int balance_partitions(struct array *elements, unsigned nv, } static sint get_bin(const struct comm *const lc, const uint level, - const uint levels, const struct comm comms[3]) { + const uint levels, const struct comm *comms) { sint psize = lc->np, pid = lc->id; if (level < levels - 1) { sint out[2][1], wrk[2][1], in = (comms[level + 1].id == 0); @@ -234,7 +242,7 @@ static sint get_bin(const struct comm *const lc, const uint level, } static uint get_level_cuts(const uint level, const uint levels, - const struct comm comms[3]) { + const struct comm *comms) { uint n = comms[level].np; if (level < levels - 1) { sint size = (comms[level + 1].id == 0), wrk; @@ -252,7 +260,7 @@ static uint get_level_cuts(const uint level, const uint levels, } void rsb(struct array *elements, int nv, const parrsb_options *const options, - const struct comm comms[3], buffer *bfr) { + const struct comm *comms, buffer *bfr) { const unsigned levels = options->levels; const sint verbose = options->verbose_level; const uint ndim = (nv == 8) ? 3 : 2; @@ -309,15 +317,16 @@ void rsb(struct array *elements, int nv, const parrsb_options *const options, comm_split(&lc, bin, lc.id, &tc); // Find the number of disconnected components. + if (options->find_disconnected_comps == 0) goto bisect_and_balance; parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Components ...", level + 1, cut + 1); metric_tic(&lc, RSB_COMPONENTS); - const uint ncomp = - get_components_v2(NULL, elements, nv, &tc, bfr, verbose - 2); + uint ncomp = get_components_v2(NULL, elements, nv, &tc, bfr, verbose - 2); metric_acc(RSB_COMPONENTS_NCOMP, ncomp); metric_toc(&lc, RSB_COMPONENTS); + bisect_and_balance: // Bisect and balance. parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Balance ...", level + 1, cut + 1);