Skip to content

Commit

Permalink
...EJB
Browse files Browse the repository at this point in the history
  • Loading branch information
ebylaska committed Jul 16, 2024
1 parent c995097 commit f53df0a
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 56 deletions.
4 changes: 3 additions & 1 deletion Nwpw/band/lib/cpsp/CPseudopotential.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2232,6 +2232,7 @@ double CPseudopotential::e_nonlocal(double *psi)

for (auto nbq=0; nbq<(mypneb->nbrillq); ++ nbq)
{
double weight = mypneb->pbrill_weight(nbq);
int nbq1 = nbq + 1;

// Copy psi to device
Expand Down Expand Up @@ -2302,9 +2303,10 @@ double CPseudopotential::e_nonlocal(double *psi)

std::complex<double> ztmp = ZDOTC_PWDFT(ntmp, zsw1, one, zsw2, one);

esum += ztmp.real();
esum += ztmp.real()*weight;
mypneb->c3db::mygdevice.T_free();
}

}

esum = parall->SumAll(2,esum);
Expand Down
113 changes: 83 additions & 30 deletions Nwpw/nwpwlib/C3dB/CGrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,32 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
Gmin = sqrt(ggmin);

// for printing grid values
nidb_print = new (std::nothrow) int[nbrillouin]();
nwave_all_print = new (std::nothrow) int[nbrillouin]();
//nidb_print = new (std::nothrow) int[nbrillouin]();
//nwave_all_print = new (std::nothrow) int[nbrillouin]();
//nidb_print.resize(nbrillouin,0);
//nwave_all_print.resize(nbrillouin,0);

// aligned Memory
nidb = new (std::nothrow) int[nbrillq+1]();
nidb2 = new (std::nothrow) int[nbrillq+1]();
nwave = new (std::nothrow) int[nbrillq+1]();
nwave_all = new (std::nothrow) int[nbrillq+1]();
nwave_entire = new (std::nothrow) int[nbrillq+1]();
//nidb = new (std::nothrow) int[nbrillq+1]();
//nidb2 = new (std::nothrow) int[nbrillq+1]();
//nwave = new (std::nothrow) int[nbrillq+1]();
//nwave_all = new (std::nothrow) int[nbrillq+1]();
//nwave_entire = new (std::nothrow) int[nbrillq+1]();


// aligned Memory
//nidb = new (std::nothrow) int[nbrillq+1]();
//nidb2 = new (std::nothrow) int[nbrillq+1]();
//nwave = new (std::nothrow) int[nbrillq+1]();
//nwave_all = new (std::nothrow) int[nbrillq+1]();
//nwave_entire = new (std::nothrow) int[nbrillq+1]();

nidb.resize(nbrillq + 1);
nidb2.resize(nbrillq + 1);
nwave.resize(nbrillq + 1);
nwave_all.resize(nbrillq + 1);
nwave_entire.resize(nbrillq + 1);
Gpack.resize(nbrillq + 1);

p_weight = new (std::nothrow) double[nbrillq];
p_kvector = new (std::nothrow) double[3*nbrillq];
Expand All @@ -117,12 +134,16 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,

//std::size_t aligned_size = (nfft3d * sizeof(int) + Alignment - 1) & ~(Alignment - 1);
std::size_t aligned_size = (nfft3d);
masker = new (std::nothrow) int*[nbrillq+1]();
packarray = new (std::nothrow) int*[nbrillq+1]();
//masker = new (std::nothrow) int*[nbrillq+1]();
//packarray = new (std::nothrow) int*[nbrillq+1]();
masker.resize(nbrillq+1);
packarray.resize(nbrillq+1);
for (auto nbq=0; nbq<=nbrillq; ++nbq)
{
masker[nbq] = new (std::nothrow) int[aligned_size]();
packarray[nbq] = new (std::nothrow) int[aligned_size]();
//masker[nbq] = new (std::nothrow) int[aligned_size]();
//packarray[nbq] = new (std::nothrow) int[aligned_size]();
masker[nbq].resize(aligned_size, 0.0);
packarray[nbq].resize(aligned_size, 0.0);
for (auto k=0; k<(nfft3d); ++k)
{
masker[nbq][k] = 1;
Expand Down Expand Up @@ -226,7 +247,7 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
nidb[nbq] = nidb2[nbq] + (nwave_out[nbq] - nwave_in[nbq]);
nwave_all[nbq] = nidb2[nbq];
}
c3db::parall->Vector_ISumAll(1, nbrillq+1, nwave_all);
c3db::parall->Vector_ISumAll(1, nbrillq+1, nwave_all.data());

nidb1_max = 0;
for (auto nbq=0; nbq<nbrillq; ++nbq)
Expand All @@ -236,7 +257,9 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
zero_row3 = new (std::nothrow) bool*[nbrillq+1];
zero_row2 = new (std::nothrow) bool*[nbrillq+1];
zero_slab23 = new (std::nothrow) bool*[nbrillq+1];
Gpack = new (std::nothrow) double*[nbrillq+1];
//zero_row3.resize(nbrillq+1);
//zero_row2.resize(nbrillq+1);
//zero_slab23.resize(nbrillq+1);

if (maptype == 1)
{
Expand All @@ -248,6 +271,29 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
zero_row3[nbq] = new (std::nothrow) bool[(nx * nq)];
zero_row2[nbq] = new (std::nothrow) bool[(nx * nq)];
zero_slab23[nbq] = new (std::nothrow) bool[nx];
//zero_row3[nbq].resize(nx*nq,false);
//zero_row2[nbq].resize(nx*nq,false);
//zero_slab23[nbq].resize(nx,false);
}

//zero_arow3 = new bool[(nx*ny + Alignment - 1) & ~(Alignment - 1)];
zero_arow3 = new bool[(nx*ny)];
for (auto nb=0; nb<=nbrillq; ++nb)
{
if (nb == 0)
{
ggcut = lattice->eggcut();
kv[0] = 0.0;
kv[1] = 0.0;
kv[2] = 0.0;
}
else
{
ggcut = lattice->wggcut();
kv[0] = p_kvector[3*(nb-1)];
kv[1] = p_kvector[3*(nb-1)+1];
kv[2] = p_kvector[3*(nb-1)+2];
}
}

//zero_arow3 = new bool[(nx*ny + Alignment - 1) & ~(Alignment - 1)];
Expand Down Expand Up @@ -357,6 +403,9 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
zero_row3[nbq] = new (std::nothrow) bool[(nq3)]();
zero_row2[nbq] = new (std::nothrow) bool[(nq2)]();
zero_slab23[nbq] = new (std::nothrow) bool[nx]();
//zero_row3[nbq].resize(nq3,false);
//zero_row2[nbq].resize(nq2,false);
//zero_slab23[nbq].resize(nx,false);
}

//zero_arow2 = new (std::nothrow) bool[(nx*nz + Alignment - 1) & ~(Alignment - 1)]();
Expand Down Expand Up @@ -461,9 +510,11 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
}

//Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]) + Alignment - 1) & ~(Alignment - 1)]();
//Gpack[nbq] = (std::nothrow) double[(3*(nidb[nbq]))]();
//Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]))]();
for (auto nbq=0; nbq<=nbrillq; ++nbq)
Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]))]();
Gpack[nbq].resize(3*nidb[nbq], 0.0);

double *Gtmp = new (std::nothrow) double[nfft3d]();

int one = 1;
Expand All @@ -474,7 +525,7 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
std::memcpy(Gtmp,Garray+i*nfft3d,nfft3d*sizeof(double));

this->r_pack(nb,Gtmp);
this->rr_pack_copy(nb,Gtmp,Gpack[nb]+i*(nidb[nb]));
this->rr_pack_copy(nb,Gtmp,Gpack[nb].data()+i*(nidb[nb]));
}
}

Expand Down Expand Up @@ -515,16 +566,18 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0,
}

/* initialize print_grid */
std::memset(nidb_print,0,nbrillouin*sizeof(int));
std::memset(nwave_all_print,0,nbrillouin*sizeof(int));
//std::memset(nidb_print.data(),0,nbrillouin*sizeof(int));
//std::memset(nwave_all_print.data(),0,nbrillouin*sizeof(int));
nidb_print.resize(nbrillouin,0);
nwave_all_print.resize(nbrillouin,0);
for (auto nbq=0; nbq<nbrillq; ++nbq)
{
auto nb = k1db::ktoptok(nbq);
nidb_print[nb] = nidb[1+nbq];
nwave_all_print[nb] = nwave_all[1+nbq];
}
c3db::parall->Vector_ISumAll(3,nbrillouin,nidb_print);
c3db::parall->Vector_ISumAll(3,nbrillouin,nwave_all_print);
c3db::parall->Vector_ISumAll(3,nbrillouin,nidb_print.data());
c3db::parall->Vector_ISumAll(3,nbrillouin,nwave_all_print.data());


/* initialize pfft3 queues */
Expand Down Expand Up @@ -590,7 +643,7 @@ void CGrid::c_unpack(const int nb, double *a)

std::memcpy(c3db::c3db_tmp1,a,nn*sizeof(double));
std::memset(a, 0, 2*nfft3d*sizeof(double));
c_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
c_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);

}

Expand All @@ -605,7 +658,7 @@ void CGrid::c_pack(const int nb, double *a)
std::memcpy(c3db::c3db_tmp1,a,2*nfft3d*sizeof(double));
std::memset(a, 0,2*nfft3d*sizeof(double));

c_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
c_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);

if (balanced)
mybalance->c_balance(nb, a);
Expand Down Expand Up @@ -773,7 +826,7 @@ void CGrid::tt_pack_copy(const int nb, const double *a, double *b)
********************************/
void CGrid::tt_pack_copy(const int nb, const double *a, double *b) {
// Ensure nb is within bounds
if (nb < 0 || nb >= 4) {
if (nb < 0 ) {
std::cerr << "Error: 'nb' index out of bounds in tt_pack_copy" << std::endl;
return;
}
Expand Down Expand Up @@ -897,7 +950,7 @@ void CGrid::r_unpack(const int nb, double *a)
std::memcpy(c3db::c3db_tmp1, a, nn * sizeof(double));
std::memset(a, 0, nfft3d*sizeof(double));

t_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
t_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);
}

/********************************
Expand All @@ -914,7 +967,7 @@ void CGrid::t_unpack(const int nb, double *a)
std::memcpy(c3db::c3db_tmp1, a, nn * sizeof(double));
std::memset(a, 0, nfft3d*sizeof(double));

t_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
t_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);
}


Expand All @@ -929,7 +982,7 @@ void CGrid::r_pack(const int nb, double *a)
std::memcpy(c3db::c3db_tmp1,a,nfft3d*sizeof(double));
std::memset(a,0,nfft3d*sizeof(double));

t_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
t_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);

if (balanced)
mybalance->r_balance(nb, a);
Expand All @@ -945,7 +998,7 @@ void CGrid::t_pack(const int nb, double *a)
std::memcpy(c3db::c3db_tmp1,a,nfft3d*sizeof(double));
std::memset(a,0,nfft3d*sizeof(double));

t_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a);
t_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a);

if (balanced)
mybalance->t_balance(nb, a);
Expand Down Expand Up @@ -1001,7 +1054,7 @@ void CGrid::i_pack(const int nb, int *a)
for (auto i=0; i<nfft3d; ++i) tmp[i] = a[i];
for (auto i=0; i<nfft3d; ++i) a[i] = 0;

i_aindexcopy(nidb2[nb], packarray[nb], tmp, a);
i_aindexcopy(nidb2[nb], packarray[nb].data(), tmp, a);

if (balanced)
mybalance->i_balance(nb, a);
Expand Down Expand Up @@ -1440,7 +1493,7 @@ void CGrid::c_unpack_mid(const int nffts, const int nb, double *tmp1, double *tm
std::memset(tmp1, 0, nffts*2*nfft3d*sizeof(double));

for (auto s=0; s<nffts; ++s)
c_bindexcopy((nidb2[nb]), packarray[nb], tmp2+s*2*nfft3d, tmp1+s*2*nfft3d);
c_bindexcopy((nidb2[nb]), packarray[nb].data(), tmp2+s*2*nfft3d, tmp1+s*2*nfft3d);
}

/********************************
Expand Down Expand Up @@ -3208,7 +3261,7 @@ void CGrid::c_pack_start(const int nffts, const int nb, double *a, double *tmp1,
std::memset(a, 0, nffts*2*nfft3d * sizeof(double));

for (auto s=0; s<nffts; ++s)
c_aindexcopy(nidb2[nb], packarray[nb], tmp1 + s*n2ft3d, a + s*n2ft3d);
c_aindexcopy(nidb2[nb], packarray[nb].data(), tmp1 + s*n2ft3d, a + s*n2ft3d);

if (balanced)
mybalance->c_balance_start(nffts, nb, a, request_indx, msgtype);
Expand Down
45 changes: 27 additions & 18 deletions Nwpw/nwpwlib/C3dB/CGrid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "k1db.hpp"
#include <cmath>
#include <complex>
#include <vector>

namespace pwdft {

Expand All @@ -28,16 +29,25 @@ class CGrid : public k1db, public c3db {


/* G grid data */
double *Garray, **Gpack;

//double *Garray, **Gpack;
double *Garray;
std::vector<std::vector<double>> Gpack;
double Gmax, Gmin;
int **masker, **packarray;
int *nwave, *nwave_entire, *nwave_all, *nidb, *nidb2, nidb1_max;
int *nwave_all_print, *nidb_print;

//int **masker, **packarray;
//int *nwave, *nwave_entire, *nwave_all, *nidb, *nidb2, nidb1_max;
//int *nwave_all_print, *nidb_print;
std::vector<std::vector<int>> masker,packarray;
std::vector<int> nwave_all_print,nidb_print;
std::vector<int> nwave,nwave_entire,nwave_all,nidb,nidb2;
int nidb1_max;

double *p_kvector, *p_weight;

/* pfft data */
bool **zero_row2, **zero_row3, **zero_slab23;
//std::vector<std::vector<bool>> zero_row2,zero_row3,zero_slab23;

/* pfft_queue data */
int nffts_max = 1;
Expand Down Expand Up @@ -74,27 +84,26 @@ class CGrid : public k1db, public c3db {
delete [] p_weight;
delete [] p_kvector;
delete [] Garray;
delete [] nwave;
delete [] nwave_entire;
delete [] nwave_all;
delete [] nidb;
delete [] nidb2;
delete [] nwave_all_print;
delete [] nidb_print;
//delete [] nwave;
//delete [] nwave_entire;
//delete [] nwave_all;
//delete [] nidb;
//delete [] nidb2;
//delete [] nwave_all_print;
//delete [] nidb_print;

for (auto nb=0; nb<=nbrillq; ++nb)
{
delete [] Gpack[nb];
delete [] masker[nb];
delete [] packarray[nb];
//delete [] Gpack[nb];
//delete [] masker[nb];
//delete [] packarray[nb];
delete [] zero_row3[nb];
delete [] zero_row2[nb];
delete [] zero_slab23[nb];
}

delete [] Gpack;
delete [] masker;
delete [] packarray;
//delete [] masker;
//delete [] packarray;
delete [] zero_row3;
delete [] zero_row2;
delete [] zero_slab23;
Expand All @@ -121,7 +130,7 @@ class CGrid : public k1db, public c3db {
}

double *Gxyz(const int i) { return Garray + i*nfft3d; }
double *Gpackxyz(const int nb, const int i) { return Gpack[nb] + i*(nidb[nb]); }
double *Gpackxyz(const int nb, const int i) { return Gpack[nb].data() + i*(nidb[nb]); }
double Gmax_ray() { return Gmax; }
double Gmin_ray() { return Gmin; }
double dGmin_ray() { return 0.01 * Gmin; }
Expand Down
2 changes: 1 addition & 1 deletion Nwpw/nwpwlib/C3dB/Cneb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2627,7 +2627,7 @@ void Cneb::ggw_lambda(double dte, double *psi1, double *psi2, double *lmbda)
} // for loop - ms

/* correction due to contraint */
fwf_Multiply(-1, psi1+nbq*shift2, lmbda + nbq*2*(ne[0]*ne[0]+ne[1]*ne[1]), rdte, psi2+nbq*shift2, rone);
Cneb::fwf_Multiply(-1, psi1+nbq*shift2, lmbda + nbq*2*(ne[0]*ne[0]+ne[1]*ne[1]), rdte, psi2+nbq*shift2, rone);
}

}
Expand Down
3 changes: 0 additions & 3 deletions Nwpw/nwpwlib/C3dB/Mapping3c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,6 @@ Mapping3c::Mapping3c(const int mapin, const int npin, const int taskidin,
/* makes expand and contract routines trivial parallel */
}

std::cout << "nx=" << nx << " ny=" << ny << " nz=" << nz << std::endl;
std::cout << "nq1=" << nq1 << " nq2=" << nq2 << " nq3=" << nq3 << std::endl;

nfft3d = nx * nq1;
if ((ny * nq2) > nfft3d)
Expand All @@ -249,7 +247,6 @@ Mapping3c::Mapping3c(const int mapin, const int npin, const int taskidin,
n2ft3d = 2*nfft3d;
nfft3d_map = nz * nq3;
n2ft3d_map = 2*nx * nq1;
std::cout << "nfft3d=" << nfft3d << " nfft3d_map=" << nfft3d_map << std::endl;

nrft3d = nx * nqr1;
if ((ny * nqr2) > nfft3d)
Expand Down
Loading

0 comments on commit f53df0a

Please sign in to comment.