Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparse GPU OPFLOW Model: Assemble Jacobian and Hessian on device #90

Draft
wants to merge 35 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d3e0ac0
OPFLOW: initial implementation of RAJA/HiOp sparse GPU-based solver
May 8, 2021
80e8599
Spit out sparse Jacobian indexes for debugging
wperkins Sep 18, 2023
8670724
Count equality Jacobian nonzeros and assign bus/gen/load locations
wperkins Sep 19, 2023
0287bd7
Equality Jacobian nonzero count matches
wperkins Sep 19, 2023
02d6619
Inequality Jacobian nonzero count matches
wperkins Sep 19, 2023
33902a5
Log event begin/end unbalanced in equality Jacobian for some reason
wperkins Oct 17, 2023
cd0bd76
Checkpoint - Equality Jacobian assembly runs
wperkins Oct 24, 2023
8acb1d9
Checkpoint: equality Jacobian indexes correct (w/o lines)
wperkins Oct 26, 2023
3bf98af
Add line to/from locations for sparse equality Jacobian
wperkins Oct 26, 2023
335ddd1
GPU computed sparse equality Jacobian indexes correct
wperkins Oct 26, 2023
94ddd33
Add some additional line information for sparse Jacobian assembly
wperkins Oct 27, 2023
46efbde
Sparse GPU equality Jacobian assembly is coded and running (results u…
wperkins Oct 27, 2023
84a45be
Streamline debug printing of equality Jacobian
wperkins Oct 27, 2023
fae9cd9
Equality Jacobian is correct, but apparently needs to be reordered
wperkins Oct 30, 2023
6890b32
Inequality Jacobian indexes are correct (for lines)
wperkins Oct 31, 2023
d60a5ff
Inequality Jacobian values for line limits added
wperkins Oct 31, 2023
523932d
Minor changes to inequality Jacobian line limit values
wperkins Oct 31, 2023
fd65166
Sort Jacobian indexes, but not values.
wperkins Nov 8, 2023
067469f
Correctly reorder Jacobian values, but using the wrong method
wperkins Nov 8, 2023
8fb2e2c
Actually use GPU-computed Jacobian; reduce steps to reorder Jacobian …
wperkins Nov 8, 2023
fe0b6ea
Print (CPU) Hessian non-zero count, indexes, and values
wperkins Nov 9, 2023
fcf9074
Add Hessian indexes to line params; make sure Hessian index is allocated
wperkins Nov 9, 2023
8a2c4d3
Attempt to count Hessian non-zeros (wrong, of course)
wperkins Nov 9, 2023
eaa1cf0
Temporarily capture and report Hessian row/col indexes used
wperkins Nov 14, 2023
d241781
GPU Hessian indexes match model but not resulting matrix?
wperkins Nov 14, 2023
d15445b
First shot at GPU computed Hessian values (untested)
wperkins Nov 14, 2023
ca85561
Sort Hessian indexes and permute values; clean up allocations
wperkins Nov 14, 2023
8e11b60
Add some minor debug messages about Hessian size
wperkins Nov 17, 2023
86f787d
Allocate Hessian differently to avoid unnecessary structure
wperkins Nov 20, 2023
e3905c7
Add an entry for generator reactive power even if it's unused
wperkins Nov 20, 2023
1e78787
GPU assembled Hessian non-zero count and indexes are correct
wperkins Nov 20, 2023
86ff70d
Mark Hessian contributions with a code
wperkins Nov 28, 2023
718359f
Checkpoint: GPU Hessian values incorrect -- new approach maybe?
wperkins Nov 28, 2023
baa1c9d
Fix Hessian indexing problems in line limits
wperkins Nov 28, 2023
f05b8d7
Apply pre-commmit fixes
wperkins Nov 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/opflow/interface/opflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,19 @@ PetscErrorCode OPFLOWSetUp(OPFLOW opflow) {
}

/* Create Hessian */
ierr = PSCreateMatrix(opflow->ps, &opflow->Hes);
// ierr = PSCreateMatrix(opflow->ps, &opflow->Hes);
// CHKERRQ(ierr);

ierr = MatCreate(opflow->comm->type, &opflow->Hes);
CHKERRQ(ierr);
ierr =
MatSetSizes(opflow->Hes, opflow->nx, opflow->nx, opflow->Nx, opflow->Nx);
CHKERRQ(ierr);
ierr = MatSetFromOptions(opflow->Hes);
CHKERRQ(ierr);
ierr = MatSeqAIJSetPreallocation(opflow->Hes, 6, NULL);
CHKERRQ(ierr);
ierr = MatSetOption(opflow->Hes, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
CHKERRQ(ierr);

/* Create natural to sparse dense ordering mapping (needed for some models)
Expand Down
73 changes: 63 additions & 10 deletions src/opflow/model/power_bal_hiop/paramsrajahiop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) {
h_allocator_.deallocate(bl);
h_allocator_.deallocate(xidx);
h_allocator_.deallocate(gidx);
h_allocator_.deallocate(jacsp_idx);
h_allocator_.deallocate(jacsq_idx);
h_allocator_.deallocate(hesssp_idx);
if (opflow->include_powerimbalance_variables) {
h_allocator_.deallocate(xidxpimb);
h_allocator_.deallocate(powerimbalance_penalty);
h_allocator_.deallocate(jacsp_idx);
h_allocator_.deallocate(jacsq_idx);
}

#ifdef EXAGO_ENABLE_GPU
Expand All @@ -40,11 +41,12 @@ int BUSParamsRajaHiop::destroy(OPFLOW opflow) {
d_allocator_.deallocate(bl_dev_);
d_allocator_.deallocate(xidx_dev_);
d_allocator_.deallocate(gidx_dev_);
d_allocator_.deallocate(jacsp_idx_dev_);
d_allocator_.deallocate(jacsq_idx_dev_);
d_allocator_.deallocate(hesssp_idx_dev_);
if (opflow->include_powerimbalance_variables) {
d_allocator_.deallocate(xidxpimb_dev_);
d_allocator_.deallocate(powerimbalance_penalty_dev_);
d_allocator_.deallocate(jacsp_idx_dev_);
d_allocator_.deallocate(jacsq_idx_dev_);
}
#endif

Expand Down Expand Up @@ -74,10 +76,11 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) {

resmgr.copy(xidx_dev_, xidx);
resmgr.copy(gidx_dev_, gidx);
resmgr.copy(jacsp_idx_dev_, jacsp_idx);
resmgr.copy(jacsq_idx_dev_, jacsq_idx);
resmgr.copy(hesssp_idx_dev_, hesssp_idx);
if (opflow->include_powerimbalance_variables) {
resmgr.copy(xidxpimb_dev_, xidxpimb);
resmgr.copy(jacsp_idx_dev_, jacsp_idx);
resmgr.copy(jacsq_idx_dev_, jacsq_idx);
resmgr.copy(powerimbalance_penalty_dev_, powerimbalance_penalty);
}
#else
Expand All @@ -95,6 +98,7 @@ int BUSParamsRajaHiop::copy(OPFLOW opflow) {
gidx_dev_ = gidx;
jacsp_idx_dev_ = jacsp_idx;
jacsq_idx_dev_ = jacsq_idx;
hesssp_idx_dev_ = hesssp_idx;
powerimbalance_penalty_dev_ = powerimbalance_penalty;
#endif
return 0;
Expand Down Expand Up @@ -126,11 +130,12 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) {
xidx = paramAlloc<int>(h_allocator_, nbus);
gidx = paramAlloc<int>(h_allocator_, nbus);

jacsp_idx = paramAlloc<int>(h_allocator_, nbus);
jacsq_idx = paramAlloc<int>(h_allocator_, nbus);
hesssp_idx = paramAlloc<int>(h_allocator_, nbus);
if (opflow->include_powerimbalance_variables) {
xidxpimb = paramAlloc<int>(h_allocator_, nbus);
powerimbalance_penalty = paramAlloc<double>(h_allocator_, nbus);
jacsp_idx = paramAlloc<int>(h_allocator_, nbus);
jacsq_idx = paramAlloc<int>(h_allocator_, nbus);
}

/* Memzero arrays */
Expand Down Expand Up @@ -194,11 +199,12 @@ int BUSParamsRajaHiop::allocate(OPFLOW opflow) {
xidx_dev_ = paramAlloc<int>(d_allocator_, nbus);
gidx_dev_ = paramAlloc<int>(d_allocator_, nbus);

jacsp_idx_dev_ = paramAlloc<int>(d_allocator_, nbus);
jacsq_idx_dev_ = paramAlloc<int>(d_allocator_, nbus);
hesssp_idx_dev_ = paramAlloc<int>(d_allocator_, nbus);
if (opflow->include_powerimbalance_variables) {
xidxpimb_dev_ = paramAlloc<int>(d_allocator_, nbus);
powerimbalance_penalty_dev_ = paramAlloc<double>(d_allocator_, nbus);
jacsp_idx_dev_ = paramAlloc<int>(d_allocator_, nbus);
jacsq_idx_dev_ = paramAlloc<int>(d_allocator_, nbus);
}
#endif
return 0;
Expand Down Expand Up @@ -226,11 +232,17 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) {

resmgr.copy(geqidxf_dev_, geqidxf);
resmgr.copy(geqidxt_dev_, geqidxt);
resmgr.copy(busf_idx_dev_, busf_idx);
resmgr.copy(bust_idx_dev_, bust_idx);
resmgr.copy(jacf_idx_dev_, jacf_idx);
resmgr.copy(jact_idx_dev_, jact_idx);
resmgr.copy(hesssp_idx_dev_, hesssp_idx);

if (opflow->nlinesmon) {
resmgr.copy(gineqidx_dev_, gineqidx);
resmgr.copy(gbineqidx_dev_, gbineqidx);
resmgr.copy(linelimidx_dev_, linelimidx);
resmgr.copy(jac_ieq_idx_dev_, jac_ieq_idx);
}
#else
Gff_dev_ = Gff;
Expand All @@ -246,10 +258,16 @@ int LINEParamsRajaHiop::copy(OPFLOW opflow) {
xidxt_dev_ = xidxt;
geqidxf_dev_ = geqidxf;
geqidxt_dev_ = geqidxt;
busf_idx_dev_ = busf_idx;
bust_idx_dev_ = bust_idx;
jacf_idx_dev_ = jacf_idx;
jact_idx_dev_ = jact_idx;
hesssp_idx_dev_ = hesssp_idx;
if (opflow->nlinesmon) {
gineqidx_dev_ = gineqidx;
gbineqidx_dev_ = gbineqidx;
linelimidx_dev_ = linelimidx;
jac_ieq_idx_dev_ = jac_ieq_idx;
}
#endif
return 0;
Expand All @@ -272,11 +290,17 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) {

h_allocator_.deallocate(geqidxf);
h_allocator_.deallocate(geqidxt);
h_allocator_.deallocate(busf_idx);
h_allocator_.deallocate(bust_idx);
h_allocator_.deallocate(jacf_idx);
h_allocator_.deallocate(jact_idx);
h_allocator_.deallocate(hesssp_idx);

if (opflow->nlinesmon) {
h_allocator_.deallocate(gineqidx);
h_allocator_.deallocate(gbineqidx);
h_allocator_.deallocate(linelimidx);
h_allocator_.deallocate(jac_ieq_idx);
}

#ifdef EXAGO_ENABLE_GPU
Expand All @@ -296,11 +320,17 @@ int LINEParamsRajaHiop::destroy(OPFLOW opflow) {

d_allocator_.deallocate(geqidxf_dev_);
d_allocator_.deallocate(geqidxt_dev_);
d_allocator_.deallocate(busf_idx_dev_);
d_allocator_.deallocate(bust_idx_dev_);
d_allocator_.deallocate(jacf_idx_dev_);
d_allocator_.deallocate(jact_idx_dev_);
d_allocator_.deallocate(hesssp_idx_dev_);

if (opflow->nlinesmon) {
d_allocator_.deallocate(gineqidx_dev_);
d_allocator_.deallocate(gbineqidx_dev_);
d_allocator_.deallocate(linelimidx_dev_);
d_allocator_.deallocate(jac_ieq_idx_dev_);
}
#endif

Expand Down Expand Up @@ -344,10 +374,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
geqidxf = paramAlloc<int>(h_allocator_, nlineON);
geqidxt = paramAlloc<int>(h_allocator_, nlineON);

busf_idx = paramAlloc<int>(h_allocator_, nlineON);
bust_idx = paramAlloc<int>(h_allocator_, nlineON);
jacf_idx = paramAlloc<int>(h_allocator_, nlineON);
jact_idx = paramAlloc<int>(h_allocator_, nlineON);
hesssp_idx = paramAlloc<int>(h_allocator_, nlineON);

if (opflow->nlinesmon) {
linelimidx = paramAlloc<int>(h_allocator_, nlinelim);
gineqidx = paramAlloc<int>(h_allocator_, nlinelim);
gbineqidx = paramAlloc<int>(h_allocator_, nlinelim);
jac_ieq_idx = paramAlloc<int>(h_allocator_, nlinelim);
}

PetscInt j = 0;
Expand Down Expand Up @@ -386,11 +423,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
*/
geqidxf[linei] = busf->starteqloc;
geqidxt[linei] = bust->starteqloc;
busf_idx[linei] = ps->busext2intmap[line->fbus];
bust_idx[linei] = ps->busext2intmap[line->tbus];
jacf_idx[linei] = 0;
jact_idx[linei] = 0;
hesssp_idx[linei] = 0;

if (j < opflow->nlinesmon && opflow->linesmon[j] == i) {
gbineqidx[j] = opflow->nconeq + line->startineqloc;
gineqidx[j] = line->startineqloc;
linelimidx[j] = linei;
jac_ieq_idx[j] = 0;
j++;
}

Expand All @@ -416,10 +459,17 @@ int LINEParamsRajaHiop::allocate(OPFLOW opflow) {
geqidxf_dev_ = paramAlloc<int>(d_allocator_, nlineON);
geqidxt_dev_ = paramAlloc<int>(d_allocator_, nlineON);

busf_idx_dev_ = paramAlloc<int>(d_allocator_, nlineON);
bust_idx_dev_ = paramAlloc<int>(d_allocator_, nlineON);
jacf_idx_dev_ = paramAlloc<int>(d_allocator_, nlineON);
jact_idx_dev_ = paramAlloc<int>(d_allocator_, nlineON);
hesssp_idx_dev_ = paramAlloc<int>(d_allocator_, nlineON);

if (opflow->nconineq) {
gineqidx_dev_ = paramAlloc<int>(d_allocator_, nlinelim);
gbineqidx_dev_ = paramAlloc<int>(d_allocator_, nlinelim);
linelimidx_dev_ = paramAlloc<int>(d_allocator_, nlinelim);
jac_ieq_idx_dev_ = paramAlloc<int>(d_allocator_, nlinelim);
}
#endif
return 0;
Expand Down Expand Up @@ -781,10 +831,12 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) {

auto &resmgr = umpire::ResourceManager::getInstance();
umpire::Allocator h_allocator_ = resmgr.getAllocator("HOST");
umpire::Allocator d_allocator_ = resmgr.getAllocator("DEVICE");

h_allocator_.deallocate(i_jaceq);
h_allocator_.deallocate(j_jaceq);
h_allocator_.deallocate(val_jaceq);
d_allocator_.deallocate(idx_jaceq_dev_);

h_allocator_.deallocate(i_hess);
h_allocator_.deallocate(j_hess);
Expand All @@ -794,6 +846,7 @@ void PbpolModelRajaHiop::destroy(OPFLOW opflow) {
h_allocator_.deallocate(i_jacineq);
h_allocator_.deallocate(j_jacineq);
h_allocator_.deallocate(val_jacineq);
d_allocator_.deallocate(idx_jacineq_dev_);
}
}
#endif
Expand Down
24 changes: 22 additions & 2 deletions src/opflow/model/power_bal_hiop/paramsrajahiop.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ struct LINEParamsRajaHiop {
constraint bound */
int *linelimidx; /* Indices for subset of lines that have finite limits */

int *busf_idx; /* From bus index */
int *bust_idx; /* To bus index */
int *jacf_idx; /* Location number in the sparse Jacobian (from) */
int *jact_idx; /* Location number in the sparse Jacobian (to) */
int *jac_ieq_idx; /* Location number in sparse inequality Jacobian */
int *hesssp_idx; /* Location number in sparse Hessian */

// Device data
double *Gff_dev_; /* From side self conductance */
double *Bff_dev_; /* From side self susceptance */
Expand All @@ -219,6 +226,13 @@ struct LINEParamsRajaHiop {
int *
linelimidx_dev_; /* Indices for subset of lines that have finite limits */

int *busf_idx_dev_; /* From bus index */
int *bust_idx_dev_; /* To bus index */
int *jacf_idx_dev_; /* Location number in the sparse Jacobian (from) */
int *jact_idx_dev_; /* Location number in the sparse Jacobian (to) */
int *jac_ieq_idx_dev_; /* Location number in sparse inequality Jacobian */
int *hesssp_idx_dev_; /* Location number in sparse Hessian */

int allocate(OPFLOW);
int destroy(OPFLOW);
int copy(OPFLOW);
Expand All @@ -237,6 +251,7 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP {
i_jaceq = j_jaceq = i_jacineq = j_jacineq = NULL;
i_hess = j_hess = NULL;
val_jaceq = val_jacineq = val_hess = NULL;
idx_jaceq_dev_ = idx_jacineq_dev_ = idx_hess_dev_ = NULL;
}

void destroy(OPFLOW opflow);
Expand All @@ -252,9 +267,14 @@ struct PbpolModelRajaHiop : public _p_FormPBPOLRAJAHIOP {
// GPU sparse model)
int *i_jaceq,
*j_jaceq; // Row and column indices for equality constrained Jacobian
int *idx_jaceq_dev_; // Permuted triplet indexes for equality constrained
// Jacobian (on-device)
int *i_jacineq,
*j_jacineq; // Row and column indices for inequality constrained Jacobain
int *i_hess, *j_hess; // Row and column indices for hessian
int *idx_jacineq_dev_; // Permuted triplet indexes for inequality constrained
// Jacobian (on-device)
int *i_hess, *j_hess; // Row and column indices for hessian
double *val_jaceq, *val_jacineq,
*val_hess; // values for equality, inequality jacobians and hessian
*val_hess; // values for equality, inequality jacobians and hessian
int *idx_hess_dev_; // Permuted triplet indexes for Hessian (on-device)
};
Loading
Loading