Skip to content

Commit

Permalink
Add PARRSB_FIND_DISCONNECTED_COMPONENTS to turn-off disconnected comp…
Browse files Browse the repository at this point in the history
…onents calculation (#74)
  • Loading branch information
thilinarmtb authored Apr 29, 2024
1 parent b8b6f44 commit fdd4d07
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 50 deletions.
41 changes: 27 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,38 +71,51 @@ seq [out]: Order of element `i` in processor `part[i]` after partition
```C
typedef struct {
// General options
int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0)
int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1)
int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 1)
int two_level; // Enable two level partitioning (Default: 0)
int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0)
int tagged; // Tagged partitioning: 0 - No, 1 - Yes (Default: 0)
int levels; // Number of levels: 1, or 2 (Default: 2)
int find_disconnected_comps; // Find number of components: 0 - No, 1 - Yes
// (Default: 1)
int repair; // Repair disconnected components: 0 - No, 1 - Yes (Default: 0)
// RSB common (Lanczos + MG) options
int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1)
int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 0)
// RSB common (Lanczos and MG) options
int rsb_algo; // RSB algo: 0 - Lanczos, 1 - MG (Default: 0)
int rsb_pre; // RSB pre-partition : 0 - None, 1 - RCB , 2 - RIB (Default: 1)
int rsb_max_iter; // Max iterations in Lanczos / MG (Default: 50)
int rsb_max_passes; // Max Lanczos restarts / Inverse iterations (Default: 50)
double rsb_tol; // Tolerance for Lanczos or RQI (Default: 1e-5)
int rsb_dump_stats; // Dump partition statistics to a text file.
// RSB MG specific options
int rsb_mg_grammian; // MG Grammian: 0 or 1 (Default: 0)
int rsb_mg_factor; // MG Coarsening factor (Default: 2, should be > 1)
int rsb_mg_sagg; // MG smooth aggregation: 0 or 1 (Default: 0)
} parrsb_options;
```

You can use `parrsb_default_options` struct instance to pass default options
to `parrsb_part_mesh` routine. All of these options can be controlled at runtime
by setting up the relevant environment variable (named as `PARRSB_<OPT_NAME>`)
to the corresponding value as well. Enviornment variable values will override
what is passed to `parrsb_part_mesh` routine.
User can use `parrsb_default_options` struct instance to pass default options
to `parrsb_part_mesh` routine.

Below is a list of some of environment variables:
All of these options can be controlled at runtime by setting the relevant
environment variable (named `PARRSB_<OPT_NAME>`) as well. Enviornment variable
values will override what is passed to `parrsb_part_mesh` routine.

Below is a full list of some of environment variables:

```
PARRSB_PARTITIONER
PARRSB_TAGGED
PARRSB_LEVELS
PARRSB_FIND_DISCONNECTED_COMPONENTS
PARRSB_REPAIR
PARRSB_VERBOSE_LEVEL
PARRSB_PROFILE_LEVEL
PARRSB_TWO_LEVEL
PARRSB_REPAIR
PARRSB_RSB_ALGO
PARRSB_RSB_PRE
PARRSB_RSB_MAX_ITER
PARRSB_RSB_MAX_PASSES
PARRSB_RSB_TOL
PARRSB_RSB_MG_GRAMMIAN
PARRSB_RSB_MG_FACTOR
```
4 changes: 3 additions & 1 deletion src/parRSB.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ typedef struct {
// General options
int partitioner; // Partition algo: 0 - RSB, 1 - RCB, 2 - RIB (Default: 0)
int tagged; // Tagged partitioning: 0 - No, 1 - Yes (Default: 0)
int levels; // Number of levels (levels: 1, 2)
int levels; // Number of levels: 1, or 2 (Default: 2)
int find_disconnected_comps; // Find number of components: 0 - No, 1 - Yes
// (Default: 1)
int repair; // Repair disconnected components: 0 - No, 1 - Yes (Default: 0)
int verbose_level; // Verbose level: 0, 1, 2, .. etc (Default: 1)
int profile_level; // Profile level: 0, 1, 2, .. etc (Default: 0)
Expand Down
2 changes: 1 addition & 1 deletion src/parrsb-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct rsb_element {
};

void rsb(struct array *elements, int nv, const parrsb_options *const options,
const struct comm comms[3], buffer *bfr);
const struct comm *comms, buffer *bfr);

//------------------------------------------------------------------------------
// Find number of components.
Expand Down
20 changes: 10 additions & 10 deletions src/parrsb.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ parrsb_options parrsb_default_options = {
.partitioner = 0,
.tagged = 0,
.levels = 2,
.find_disconnected_comps = 1,
.repair = 0,
.verbose_level = 1,
.profile_level = 0,
Expand Down Expand Up @@ -59,6 +60,8 @@ static void update_options(parrsb_options *const options) {
UPDATE_OPTION(partitioner, "PARRSB_PARTITIONER", 1);
UPDATE_OPTION(tagged, "PARRSB_TAGGED", 1);
UPDATE_OPTION(levels, "PARRSB_LEVELS", 1);
UPDATE_OPTION(find_disconnected_comps, "PARRSB_FIND_DISCONNECTED_COMPONENTS",
1);
UPDATE_OPTION(repair, "PARRSB_REPAIR", 1);
UPDATE_OPTION(verbose_level, "PARRSB_VERBOSE_LEVEL", 1);
UPDATE_OPTION(profile_level, "PARRSB_PROFILE_LEVEL", 1);
Expand All @@ -67,7 +70,6 @@ static void update_options(parrsb_options *const options) {
UPDATE_OPTION(rsb_max_iter, "PARRSB_RSB_MAX_ITER", 1);
UPDATE_OPTION(rsb_max_passes, "PARRSB_RSB_MAX_PASSES", 1);
UPDATE_OPTION(rsb_tol, "PARRSB_RSB_TOL", 0);
UPDATE_OPTION(rsb_dump_stats, "PARRSB_DUMP_STATS", 1);
UPDATE_OPTION(rsb_mg_grammian, "PARRSB_RSB_MG_GRAMMIAN", 1);
UPDATE_OPTION(rsb_mg_factor, "PARRSB_RSB_MG_FACTOR", 1);

Expand All @@ -82,6 +84,8 @@ static void print_options(const struct comm *c,
PRINT_OPTION(partitioner, "PARRSB_PARTITIONER", "%d");
PRINT_OPTION(tagged, "PARRSB_TAGGED", "%d");
PRINT_OPTION(levels, "PARRSB_LEVELS", "%d");
PRINT_OPTION(find_disconnected_comps, "PARRSB_FIND_DISCONNECTED_COMPONENTS",
"%d");
PRINT_OPTION(repair, "PARRSB_REPAIR", "%d");
PRINT_OPTION(verbose_level, "PARRSB_VERBOSE_LEVEL", "%d");
PRINT_OPTION(profile_level, "PARRSB_PROFILE_LEVEL", "%d");
Expand All @@ -90,7 +94,6 @@ static void print_options(const struct comm *c,
PRINT_OPTION(rsb_max_iter, "PARRSB_RSB_MAX_ITER", "%d");
PRINT_OPTION(rsb_max_passes, "PARRSB_RSB_MAX_PASSES", "%d");
PRINT_OPTION(rsb_tol, "PARRSB_RSB_TOL", "%lf");
PRINT_OPTION(rsb_dump_stats, "PARRSB_DUMP_STATS", "%d");
PRINT_OPTION(rsb_mg_grammian, "PARRSB_RSB_MG_GRAMMIAN", "%d");
PRINT_OPTION(rsb_mg_factor, "PARRSB_RSB_MG_FACTOR", "%d");

Expand All @@ -104,8 +107,7 @@ static size_t load_balance(struct array *elist, uint nel, int nv,
slong out[2][1], wrk[2][1], in = nel;
comm_scan(out, c, gs_long, gs_add, &in, 1, wrk);
slong start = out[0][0], nelg = out[1][0];
parrsb_print(c, verbose, "load_balance: start = %lld nelg = %lld", start,
nelg);
parrsb_print(c, verbose, "load_balance: nelg = %lld", nelg);

uint nstar = nelg / c->np, nrem = nelg - nstar * c->np;
slong lower = (nstar + 1) * nrem;
Expand Down Expand Up @@ -224,10 +226,7 @@ static void initialize_levels(struct comm *const comms, int *const levels_in,

// Hardcode the maximum number of levels to two for now.
sint levels = 2;
uint sizes[2] = {num_nodes, 1};

*levels_in = levels = MIN(levels, *levels_in);

if (levels > 1) comm_dup(&comms[levels - 1], &nc);
comm_free(&nc);

Expand Down Expand Up @@ -264,9 +263,10 @@ static void parrsb_part_mesh_v0(int *part, const long long *const vtx,
const uint levels = options->levels;
assert(levels <= sizeof(comms) / sizeof(comms[0]));
initialize_levels(comms, &options->levels, &ca, verbose);
parrsb_print(c, verbose,
"parrsb_part_mesh_v0: Levels: requested = %d, enabled = %d",
levels, options->levels);
parrsb_print(
c, verbose,
"parrsb_part_mesh_v0: levels requested = %d, levels enabled = %d",
levels, options->levels);
}

parrsb_print(c, verbose, "parrsb_part_mesh_v0: running partitioner ...");
Expand Down
57 changes: 33 additions & 24 deletions src/rsb.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,21 @@ static void test_component_versions(struct array *elements, struct comm *lc,
crystal_free(&cr);
}

static void check_disconnected_components(const int i, const struct comm *gc,
void *bfr) {
sint minc = (sint)metric_get_value(i, RSB_COMPONENTS_NCOMP), maxc = minc;
comm_allreduce(gc, gs_int, gs_min, &minc, 1, (void *)bfr);
comm_allreduce(gc, gs_int, gs_max, &maxc, 1, (void *)bfr);

if (maxc > 1 && gc->id == 0) {
fprintf(stderr,
"Warning: Partition created %d/%d (min/max) disconnected "
"components in Level=%d!\n",
minc, maxc, i);
fflush(stderr);
}
}

static void check_rsb_partition(const struct comm *gc,
const parrsb_options *const opts) {
int max_levels = log2ll(gc->np);
Expand Down Expand Up @@ -72,33 +87,26 @@ static void check_rsb_partition(const struct comm *gc,
double final = metric_get_value(i, TOL_FNL);
comm_allreduce(&c, gs_double, gs_min, &final, 1, (void *)bfr);
if (c.id == 0) {
printf("Warning: Lanczos reached a residual of %lf (target: %lf) "
"after %d x %d iterations in Level=%d!\n",
final, target, mpass, miter, i);
fflush(stdout);
fprintf(stderr,
"Warning: Lanczos reached a residual of %lf (target: %lf) "
"after %d x %d iterations in Level=%d!\n",
final, target, mpass, miter, i);
fflush(stderr);
}
} else if (opts->rsb_algo == 1) {
if (c.id == 0) {
printf("Warning: Inverse iteration didn't converge after %d "
"iterations in Level = %d\n",
mpass, i);
fflush(stdout);
fprintf(stderr,
"Warning: Inverse iteration didn't converge after %d "
"iterations in Level = %d\n",
mpass, i);
fflush(stderr);
}
}
}
comm_free(&c);

sint minc, maxc;
minc = maxc = (sint)metric_get_value(i, RSB_COMPONENTS_NCOMP);
comm_allreduce(gc, gs_int, gs_min, &minc, 1, (void *)bfr);
comm_allreduce(gc, gs_int, gs_max, &maxc, 1, (void *)bfr);

if (maxc > 1 && gc->id == 0) {
printf("Warning: Partition created %d/%d (min/max) disconnected "
"components in Level=%d!\n",
minc, maxc, i);
fflush(stdout);
}
if (opts->find_disconnected_comps == 1)
check_disconnected_components(i, gc, (void *)bfr);
}
}

Expand Down Expand Up @@ -221,7 +229,7 @@ static int balance_partitions(struct array *elements, unsigned nv,
}

static sint get_bin(const struct comm *const lc, const uint level,
const uint levels, const struct comm comms[3]) {
const uint levels, const struct comm *comms) {
sint psize = lc->np, pid = lc->id;
if (level < levels - 1) {
sint out[2][1], wrk[2][1], in = (comms[level + 1].id == 0);
Expand All @@ -234,7 +242,7 @@ static sint get_bin(const struct comm *const lc, const uint level,
}

static uint get_level_cuts(const uint level, const uint levels,
const struct comm comms[3]) {
const struct comm *comms) {
uint n = comms[level].np;
if (level < levels - 1) {
sint size = (comms[level + 1].id == 0), wrk;
Expand All @@ -252,7 +260,7 @@ static uint get_level_cuts(const uint level, const uint levels,
}

void rsb(struct array *elements, int nv, const parrsb_options *const options,
const struct comm comms[3], buffer *bfr) {
const struct comm *comms, buffer *bfr) {
const unsigned levels = options->levels;
const sint verbose = options->verbose_level;
const uint ndim = (nv == 8) ? 3 : 2;
Expand Down Expand Up @@ -309,15 +317,16 @@ void rsb(struct array *elements, int nv, const parrsb_options *const options,
comm_split(&lc, bin, lc.id, &tc);

// Find the number of disconnected components.
if (options->find_disconnected_comps == 0) goto bisect_and_balance;
parrsb_print(gc, verbose - 1,
"\trsb: level = %d, cut = %d, Components ...", level + 1,
cut + 1);
metric_tic(&lc, RSB_COMPONENTS);
const uint ncomp =
get_components_v2(NULL, elements, nv, &tc, bfr, verbose - 2);
uint ncomp = get_components_v2(NULL, elements, nv, &tc, bfr, verbose - 2);
metric_acc(RSB_COMPONENTS_NCOMP, ncomp);
metric_toc(&lc, RSB_COMPONENTS);

bisect_and_balance:
// Bisect and balance.
parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Balance ...",
level + 1, cut + 1);
Expand Down

0 comments on commit fdd4d07

Please sign in to comment.