From f53df0a4565e33aa2a6be7c038442c72a8f340ae Mon Sep 17 00:00:00 2001 From: eric bylaska Date: Tue, 16 Jul 2024 13:46:12 -0700 Subject: [PATCH] ...EJB --- Nwpw/band/lib/cpsp/CPseudopotential.cpp | 4 +- Nwpw/nwpwlib/C3dB/CGrid.cpp | 113 ++++++++++++++++------ Nwpw/nwpwlib/C3dB/CGrid.hpp | 45 +++++---- Nwpw/nwpwlib/C3dB/Cneb.cpp | 2 +- Nwpw/nwpwlib/C3dB/Mapping3c.cpp | 3 - Nwpw/nwpwlib/psp_library/Psp1d_Hamann.cpp | 6 +- 6 files changed, 117 insertions(+), 56 deletions(-) diff --git a/Nwpw/band/lib/cpsp/CPseudopotential.cpp b/Nwpw/band/lib/cpsp/CPseudopotential.cpp index a6fadfc0..1ae6a4bf 100644 --- a/Nwpw/band/lib/cpsp/CPseudopotential.cpp +++ b/Nwpw/band/lib/cpsp/CPseudopotential.cpp @@ -2232,6 +2232,7 @@ double CPseudopotential::e_nonlocal(double *psi) for (auto nbq=0; nbq<(mypneb->nbrillq); ++ nbq) { + double weight = mypneb->pbrill_weight(nbq); int nbq1 = nbq + 1; // Copy psi to device @@ -2302,9 +2303,10 @@ double CPseudopotential::e_nonlocal(double *psi) std::complex ztmp = ZDOTC_PWDFT(ntmp, zsw1, one, zsw2, one); - esum += ztmp.real(); + esum += ztmp.real()*weight; mypneb->c3db::mygdevice.T_free(); } + } esum = parall->SumAll(2,esum); diff --git a/Nwpw/nwpwlib/C3dB/CGrid.cpp b/Nwpw/nwpwlib/C3dB/CGrid.cpp index 50cb0a7b..7d22c5a7 100644 --- a/Nwpw/nwpwlib/C3dB/CGrid.cpp +++ b/Nwpw/nwpwlib/C3dB/CGrid.cpp @@ -94,15 +94,32 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, Gmin = sqrt(ggmin); // for printing grid values - nidb_print = new (std::nothrow) int[nbrillouin](); - nwave_all_print = new (std::nothrow) int[nbrillouin](); + //nidb_print = new (std::nothrow) int[nbrillouin](); + //nwave_all_print = new (std::nothrow) int[nbrillouin](); + //nidb_print.resize(nbrillouin,0); + //nwave_all_print.resize(nbrillouin,0); // aligned Memory - nidb = new (std::nothrow) int[nbrillq+1](); - nidb2 = new (std::nothrow) int[nbrillq+1](); - nwave = new (std::nothrow) int[nbrillq+1](); - nwave_all = new (std::nothrow) int[nbrillq+1](); - nwave_entire = new (std::nothrow) int[nbrillq+1](); + //nidb = new (std::nothrow) int[nbrillq+1](); + //nidb2 = new (std::nothrow) int[nbrillq+1](); + //nwave = new (std::nothrow) int[nbrillq+1](); + //nwave_all = new (std::nothrow) int[nbrillq+1](); + //nwave_entire = new (std::nothrow) int[nbrillq+1](); + + + // aligned Memory + //nidb = new (std::nothrow) int[nbrillq+1](); + //nidb2 = new (std::nothrow) int[nbrillq+1](); + //nwave = new (std::nothrow) int[nbrillq+1](); + //nwave_all = new (std::nothrow) int[nbrillq+1](); + //nwave_entire = new (std::nothrow) int[nbrillq+1](); + + nidb.resize(nbrillq + 1); + nidb2.resize(nbrillq + 1); + nwave.resize(nbrillq + 1); + nwave_all.resize(nbrillq + 1); + nwave_entire.resize(nbrillq + 1); + Gpack.resize(nbrillq + 1); p_weight = new (std::nothrow) double[nbrillq]; p_kvector = new (std::nothrow) double[3*nbrillq]; @@ -117,12 +134,16 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, //std::size_t aligned_size = (nfft3d * sizeof(int) + Alignment - 1) & ~(Alignment - 1); std::size_t aligned_size = (nfft3d); - masker = new (std::nothrow) int*[nbrillq+1](); - packarray = new (std::nothrow) int*[nbrillq+1](); + //masker = new (std::nothrow) int*[nbrillq+1](); + //packarray = new (std::nothrow) int*[nbrillq+1](); + masker.resize(nbrillq+1); + packarray.resize(nbrillq+1); for (auto nbq=0; nbq<=nbrillq; ++nbq) { - masker[nbq] = new (std::nothrow) int[aligned_size](); - packarray[nbq] = new (std::nothrow) int[aligned_size](); + //masker[nbq] = new (std::nothrow) int[aligned_size](); + //packarray[nbq] = new (std::nothrow) int[aligned_size](); + masker[nbq].resize(aligned_size, 0.0); + packarray[nbq].resize(aligned_size, 0.0); for (auto k=0; k<(nfft3d); ++k) { masker[nbq][k] = 1; @@ -226,7 +247,7 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, nidb[nbq] = nidb2[nbq] + (nwave_out[nbq] - nwave_in[nbq]); nwave_all[nbq] = nidb2[nbq]; } - c3db::parall->Vector_ISumAll(1, nbrillq+1, nwave_all); + c3db::parall->Vector_ISumAll(1, nbrillq+1, nwave_all.data()); nidb1_max = 0; for (auto nbq=0; nbqeggcut(); + kv[0] = 0.0; + kv[1] = 0.0; + kv[2] = 0.0; + } + else + { + ggcut = lattice->wggcut(); + kv[0] = p_kvector[3*(nb-1)]; + kv[1] = p_kvector[3*(nb-1)+1]; + kv[2] = p_kvector[3*(nb-1)+2]; + } } //zero_arow3 = new bool[(nx*ny + Alignment - 1) & ~(Alignment - 1)]; @@ -357,6 +403,9 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, zero_row3[nbq] = new (std::nothrow) bool[(nq3)](); zero_row2[nbq] = new (std::nothrow) bool[(nq2)](); zero_slab23[nbq] = new (std::nothrow) bool[nx](); + //zero_row3[nbq].resize(nq3,false); + //zero_row2[nbq].resize(nq2,false); + //zero_slab23[nbq].resize(nx,false); } //zero_arow2 = new (std::nothrow) bool[(nx*nz + Alignment - 1) & ~(Alignment - 1)](); @@ -461,9 +510,11 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, } //Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]) + Alignment - 1) & ~(Alignment - 1)](); + //Gpack[nbq] = (std::nothrow) double[(3*(nidb[nbq]))](); + //Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]))](); for (auto nbq=0; nbq<=nbrillq; ++nbq) - Gpack[nbq] = new (std::nothrow) double[(3*(nidb[nbq]))](); - + Gpack[nbq].resize(3*nidb[nbq], 0.0); + double *Gtmp = new (std::nothrow) double[nfft3d](); int one = 1; @@ -474,7 +525,7 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, std::memcpy(Gtmp,Garray+i*nfft3d,nfft3d*sizeof(double)); this->r_pack(nb,Gtmp); - this->rr_pack_copy(nb,Gtmp,Gpack[nb]+i*(nidb[nb])); + this->rr_pack_copy(nb,Gtmp,Gpack[nb].data()+i*(nidb[nb])); } } @@ -515,16 +566,18 @@ CGrid::CGrid(Parallel *inparall, Lattice *inlattice, int mapping0, int balance0, } /* initialize print_grid */ - std::memset(nidb_print,0,nbrillouin*sizeof(int)); - std::memset(nwave_all_print,0,nbrillouin*sizeof(int)); + //std::memset(nidb_print.data(),0,nbrillouin*sizeof(int)); + //std::memset(nwave_all_print.data(),0,nbrillouin*sizeof(int)); + nidb_print.resize(nbrillouin,0); + nwave_all_print.resize(nbrillouin,0); for (auto nbq=0; nbqVector_ISumAll(3,nbrillouin,nidb_print); - c3db::parall->Vector_ISumAll(3,nbrillouin,nwave_all_print); + c3db::parall->Vector_ISumAll(3,nbrillouin,nidb_print.data()); + c3db::parall->Vector_ISumAll(3,nbrillouin,nwave_all_print.data()); /* initialize pfft3 queues */ @@ -590,7 +643,7 @@ void CGrid::c_unpack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1,a,nn*sizeof(double)); std::memset(a, 0, 2*nfft3d*sizeof(double)); - c_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + c_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); } @@ -605,7 +658,7 @@ void CGrid::c_pack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1,a,2*nfft3d*sizeof(double)); std::memset(a, 0,2*nfft3d*sizeof(double)); - c_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + c_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); if (balanced) mybalance->c_balance(nb, a); @@ -773,7 +826,7 @@ void CGrid::tt_pack_copy(const int nb, const double *a, double *b) ********************************/ void CGrid::tt_pack_copy(const int nb, const double *a, double *b) { // Ensure nb is within bounds - if (nb < 0 || nb >= 4) { + if (nb < 0 ) { std::cerr << "Error: 'nb' index out of bounds in tt_pack_copy" << std::endl; return; } @@ -897,7 +950,7 @@ void CGrid::r_unpack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1, a, nn * sizeof(double)); std::memset(a, 0, nfft3d*sizeof(double)); - t_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + t_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); } /******************************** @@ -914,7 +967,7 @@ void CGrid::t_unpack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1, a, nn * sizeof(double)); std::memset(a, 0, nfft3d*sizeof(double)); - t_bindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + t_bindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); } @@ -929,7 +982,7 @@ void CGrid::r_pack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1,a,nfft3d*sizeof(double)); std::memset(a,0,nfft3d*sizeof(double)); - t_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + t_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); if (balanced) mybalance->r_balance(nb, a); @@ -945,7 +998,7 @@ void CGrid::t_pack(const int nb, double *a) std::memcpy(c3db::c3db_tmp1,a,nfft3d*sizeof(double)); std::memset(a,0,nfft3d*sizeof(double)); - t_aindexcopy(nidb2[nb],packarray[nb],c3db::c3db_tmp1,a); + t_aindexcopy(nidb2[nb],packarray[nb].data(),c3db::c3db_tmp1,a); if (balanced) mybalance->t_balance(nb, a); @@ -1001,7 +1054,7 @@ void CGrid::i_pack(const int nb, int *a) for (auto i=0; ii_balance(nb, a); @@ -1440,7 +1493,7 @@ void CGrid::c_unpack_mid(const int nffts, const int nb, double *tmp1, double *tm std::memset(tmp1, 0, nffts*2*nfft3d*sizeof(double)); for (auto s=0; sc_balance_start(nffts, nb, a, request_indx, msgtype); diff --git a/Nwpw/nwpwlib/C3dB/CGrid.hpp b/Nwpw/nwpwlib/C3dB/CGrid.hpp index c0de6532..0a218ce3 100644 --- a/Nwpw/nwpwlib/C3dB/CGrid.hpp +++ b/Nwpw/nwpwlib/C3dB/CGrid.hpp @@ -18,6 +18,7 @@ #include "k1db.hpp" #include #include +#include namespace pwdft { @@ -28,16 +29,25 @@ class CGrid : public k1db, public c3db { /* G grid data */ - double *Garray, **Gpack; + + //double *Garray, **Gpack; + double *Garray; + std::vector> Gpack; double Gmax, Gmin; - int **masker, **packarray; - int *nwave, *nwave_entire, *nwave_all, *nidb, *nidb2, nidb1_max; - int *nwave_all_print, *nidb_print; + + //int **masker, **packarray; + //int *nwave, *nwave_entire, *nwave_all, *nidb, *nidb2, nidb1_max; + //int *nwave_all_print, *nidb_print; + std::vector> masker,packarray; + std::vector nwave_all_print,nidb_print; + std::vector nwave,nwave_entire,nwave_all,nidb,nidb2; + int nidb1_max; double *p_kvector, *p_weight; /* pfft data */ bool **zero_row2, **zero_row3, **zero_slab23; + //std::vector> zero_row2,zero_row3,zero_slab23; /* pfft_queue data */ int nffts_max = 1; @@ -74,27 +84,26 @@ class CGrid : public k1db, public c3db { delete [] p_weight; delete [] p_kvector; delete [] Garray; - delete [] nwave; - delete [] nwave_entire; - delete [] nwave_all; - delete [] nidb; - delete [] nidb2; - delete [] nwave_all_print; - delete [] nidb_print; + //delete [] nwave; + //delete [] nwave_entire; + //delete [] nwave_all; + //delete [] nidb; + //delete [] nidb2; + //delete [] nwave_all_print; + //delete [] nidb_print; for (auto nb=0; nb<=nbrillq; ++nb) { - delete [] Gpack[nb]; - delete [] masker[nb]; - delete [] packarray[nb]; + //delete [] Gpack[nb]; + //delete [] masker[nb]; + //delete [] packarray[nb]; delete [] zero_row3[nb]; delete [] zero_row2[nb]; delete [] zero_slab23[nb]; } - delete [] Gpack; - delete [] masker; - delete [] packarray; + //delete [] masker; + //delete [] packarray; delete [] zero_row3; delete [] zero_row2; delete [] zero_slab23; @@ -121,7 +130,7 @@ class CGrid : public k1db, public c3db { } double *Gxyz(const int i) { return Garray + i*nfft3d; } - double *Gpackxyz(const int nb, const int i) { return Gpack[nb] + i*(nidb[nb]); } + double *Gpackxyz(const int nb, const int i) { return Gpack[nb].data() + i*(nidb[nb]); } double Gmax_ray() { return Gmax; } double Gmin_ray() { return Gmin; } double dGmin_ray() { return 0.01 * Gmin; } diff --git a/Nwpw/nwpwlib/C3dB/Cneb.cpp b/Nwpw/nwpwlib/C3dB/Cneb.cpp index a54415eb..f158ba01 100644 --- a/Nwpw/nwpwlib/C3dB/Cneb.cpp +++ b/Nwpw/nwpwlib/C3dB/Cneb.cpp @@ -2627,7 +2627,7 @@ void Cneb::ggw_lambda(double dte, double *psi1, double *psi2, double *lmbda) } // for loop - ms /* correction due to contraint */ - fwf_Multiply(-1, psi1+nbq*shift2, lmbda + nbq*2*(ne[0]*ne[0]+ne[1]*ne[1]), rdte, psi2+nbq*shift2, rone); + Cneb::fwf_Multiply(-1, psi1+nbq*shift2, lmbda + nbq*2*(ne[0]*ne[0]+ne[1]*ne[1]), rdte, psi2+nbq*shift2, rone); } } diff --git a/Nwpw/nwpwlib/C3dB/Mapping3c.cpp b/Nwpw/nwpwlib/C3dB/Mapping3c.cpp index 2b12269f..0e57325e 100644 --- a/Nwpw/nwpwlib/C3dB/Mapping3c.cpp +++ b/Nwpw/nwpwlib/C3dB/Mapping3c.cpp @@ -238,8 +238,6 @@ Mapping3c::Mapping3c(const int mapin, const int npin, const int taskidin, /* makes expand and contract routines trivial parallel */ } - std::cout << "nx=" << nx << " ny=" << ny << " nz=" << nz << std::endl; - std::cout << "nq1=" << nq1 << " nq2=" << nq2 << " nq3=" << nq3 << std::endl; nfft3d = nx * nq1; if ((ny * nq2) > nfft3d) @@ -249,7 +247,6 @@ Mapping3c::Mapping3c(const int mapin, const int npin, const int taskidin, n2ft3d = 2*nfft3d; nfft3d_map = nz * nq3; n2ft3d_map = 2*nx * nq1; - std::cout << "nfft3d=" << nfft3d << " nfft3d_map=" << nfft3d_map << std::endl; nrft3d = nx * nqr1; if ((ny * nqr2) > nfft3d) diff --git a/Nwpw/nwpwlib/psp_library/Psp1d_Hamann.cpp b/Nwpw/nwpwlib/psp_library/Psp1d_Hamann.cpp index 33403b1f..2f63a7fe 100644 --- a/Nwpw/nwpwlib/psp_library/Psp1d_Hamann.cpp +++ b/Nwpw/nwpwlib/psp_library/Psp1d_Hamann.cpp @@ -1257,9 +1257,9 @@ void Psp1d_Hamann::cpp_generate_nonlocal_spline(CGrid *mygrid, int nbq, double * /* generate vnl */ //mygrid->t_pack_nzero(nbq,nprj,vnl); mygrid->t_pack_max_nzero(nprj,vnl); - gx = mygrid->Gpackxyz(nbq,0); - gy = mygrid->Gpackxyz(nbq,1); - gz = mygrid->Gpackxyz(nbq,2); + gx = mygrid->Gpackxyz(1+nbq,0); + gy = mygrid->Gpackxyz(1+nbq,1); + gz = mygrid->Gpackxyz(1+nbq,2); for (auto k=0; k