diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h index d3c88d5783..8aeea9792f 100644 --- a/source/module_basis/module_ao/parallel_2d.h +++ b/source/module_basis/module_ao/parallel_2d.h @@ -15,6 +15,7 @@ class Parallel_2D ~Parallel_2D() = default; Parallel_2D& operator=(Parallel_2D&& rhs) = default; + Parallel_2D(Parallel_2D&& rhs) = default; /// number of local rows int get_row_size() const diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index 4c87eadb40..fb9ad4670e 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -67,6 +67,21 @@ inline void redirect_log(const bool& out_alllog) } } +inline int cal_nupdown_form_occ(const ModuleBase::matrix& wg) +{ // only for nspin=2 + const int& nk = wg.nr / 2; + auto occ_sum_k = [&](const int& is, const int& ib)->double { double o = 0.0; for (int ik = 0;ik < nk;++ik) { o += wg(is * nk + ik, ib); } return o;}; + int nupdown = 0; + for (int ib = 0;ib < wg.nc;++ib) + { + const int nu = static_cast(std::lround(occ_sum_k(0, ib))); + const int nd = static_cast(std::lround(occ_sum_k(1, ib))); + if (!(nu + nd)) { break; } + nupdown += nu - nd; + } + return nupdown; +} + template void LR::ESolver_LR::parameter_check()const { @@ -87,32 +102,25 @@ template void LR::ESolver_LR::set_dimension() { this->nspin = PARAM.inp.nspin; - if (nspin == 2) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl; -} this->nstates = input.lr_nstates; this->nbasis = PARAM.globalv.nlocal; // calculate the number of occupied and unoccupied states // which determines the basis size of the excited states this->nocc_max = LR_Util::cal_nocc(LR_Util::cal_nelec(ucell)); - this->nocc = std::max(1, std::min(input.nocc, this->nocc_max)); - this->nvirt = PARAM.inp.nbands - this->nocc_max; //nbands-nocc - if (input.nvirt > this->nvirt) { - GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt << std::endl; - } else if (input.nvirt > 0) { this->nvirt = input.nvirt; -} - this->nbands = this->nocc + this->nvirt; - this->npairs = this->nocc * this->nvirt; + this->nocc_in = std::max(1, std::min(input.nocc, this->nocc_max)); + this->nvirt_in = PARAM.inp.nbands - this->nocc_max; //nbands-nocc + if (input.nvirt > this->nvirt_in) { GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt_in << std::endl; } + else if (input.nvirt > 0) { this->nvirt_in = input.nvirt; } + this->nbands = this->nocc_in + this->nvirt_in; this->nk = this->kv.get_nks() / this->nspin; - if (this->nstates > this->nocc * this->nvirt * this->nk) { - throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk"); -} - + this->nocc = { nocc_in, nocc_in }; + this->nvirt = { nvirt_in, nvirt_in }; + this->npairs = { nocc[0] * nvirt[0], nocc[1] * nvirt[1] }; GlobalV::ofs_running << "Setting LR-TDDFT parameters: " << std::endl; - GlobalV::ofs_running << "number of occupied bands: " << this->nocc << std::endl; - GlobalV::ofs_running << "number of virtual bands: " << this->nvirt << std::endl; + GlobalV::ofs_running << "number of occupied bands: " << nocc_in << std::endl; + GlobalV::ofs_running << "number of virtual bands: " << nvirt_in << std::endl; GlobalV::ofs_running << "number of Atom orbitals (LCAO-basis size): " << this->nbasis << std::endl; GlobalV::ofs_running << "number of KS bands: " << this->eig_ks.nc << std::endl; - GlobalV::ofs_running << "number of electron-hole pairs (2-particle basis size): " << this->npairs << std::endl; GlobalV::ofs_running << "number of excited states to be solved: " << this->nstates << std::endl; if (input.ri_hartree_benchmark == "aims" && !input.aims_nbasis.empty()) { @@ -121,6 +129,20 @@ void LR::ESolver_LR::set_dimension() } } +template +void LR::ESolver_LR::reset_dim_spin2() +{ + if (nspin != 2) { return; } + if (nupdown == 0) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl; } + else + { + nupdown > 0 ? ((nocc[1] -= nupdown) && (nvirt[1] += nupdown)) : ((nocc[0] += nupdown) && (nvirt[0] -= nupdown)); + // npairs = { nocc[0] * nvirt[0], nocc[1] * nvirt[1] }; + std::cout << "** Solve the spin-up and spin-down states separately for open-shell system. **" << std::endl; + } + if (nstates > std::min(npairs[0], npairs[1]) * nk) { throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk"); } +} + template LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol, const Input_para& inp, UnitCell& ucell) @@ -171,7 +193,7 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol { this->psi_ks = new psi::Psi(this->kv.get_nks(), this->paraC_.get_col_size(), this->paraC_.get_row_size()); this->eig_ks.create(this->kv.get_nks(), this->nbands); - const int start_band = this->nocc_max - this->nocc; + const int start_band = this->nocc_max - std::max(nocc[0], nocc[1]); for (int ik = 0;ik < this->kv.get_nks();++ik) { Cpxgemr2d(this->nbasis, this->nbands, &(*ks_sol.psi)(ik, 0, 0), 1, start_band + 1, ks_sol.pv.desc_wfc, @@ -182,6 +204,11 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol #else move_gs(); #endif + if (nspin == 2) + { + this->nupdown = cal_nupdown_form_occ(ks_sol.pelec->wg); + reset_dim_spin2(); + } //grid integration this->gt_ = std::move(ks_sol.GridT); @@ -275,6 +302,11 @@ LR::ESolver_LR::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu this->paraMat_.ncol_bands, this->paraMat_.get_row_size()); this->read_ks_wfc(); + if (nspin == 2) + { + this->nupdown = cal_nupdown_form_occ(this->pelec->wg); + reset_dim_spin2(); + } LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nbands #ifdef __MPI @@ -409,10 +441,10 @@ void LR::ESolver_LR::runner(int istep, UnitCell& cell) #ifdef __EXX this->exx_lri, this->exx_info.info_global.hybrid_alpha, #endif - this->gint_, this->pot[is], this->kv, & this->paraX_, & this->paraC_, & this->paraMat_, + this->gint_, this->pot[is], this->kv, this->paraX_, & this->paraC_, & this->paraMat_, spin_type[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector({}))); // solve the Casida equation - HSolverLR hsol(nk, this->npairs, is, this->input.out_wfc_lr); + HSolverLR hsol(nk, this->npairs[is], is, this->input.out_wfc_lr); hsol.set_diagethr(hsol.diag_ethr, 0, 0, std::max(1e-13, this->input.lr_thr)); hsol.solve(phamilt, *this->X[is], this->pelec, this->input.lr_solver/*, !std::set({ "hf", "hse" }).count(this->xc_kernel)*/); //whether the kernel is Hermitian @@ -450,8 +482,8 @@ void LR::ESolver_LR::after_all_runners() for (int is = 0;is < this->nspin;++is) { LR_Spectrum spectrum(&this->pelec->ekb.c[is * this->nstates], *this->X[is], - this->nspin, this->nbasis, this->nocc, this->nvirt, this->gint_, *this->pw_rho, *this->psi_ks, - this->ucell, this->kv, this->paraX_, this->paraC_, this->paraMat_); + this->nspin, this->nbasis, this->nocc[is], this->nvirt[is], this->gint_, *this->pw_rho, *this->psi_ks, + this->ucell, this->kv, this->paraX_[is], this->paraC_, this->paraMat_); spectrum.oscillator_strength(); spectrum.transition_analysis(is); spectrum.optical_absorption(freq, input.abs_broadening, is); @@ -463,11 +495,16 @@ template void LR::ESolver_LR::setup_eigenvectors_X() { ModuleBase::TITLE("ESolver_LR", "setup_eigenvectors_X"); - LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc + for (int is = 0;is < nspin;++is) + { + Parallel_2D px; + LR_Util::setup_2d_division(px, /*nb2d=*/1, this->nvirt[is], this->nocc[is] #ifdef __MPI - , this->paraC_.blacs_ctxt + , this->paraC_.blacs_ctxt #endif - );//nvirt - row, nocc - col + );//nvirt - row, nocc - col + this->paraX_.emplace_back(std::move(px)); + } this->X.resize(this->nspin); const std::vector spin_types = { "Spin Singlet", "Spin Triplet" }; // if spectrum-only, read the LR-eigenstates from file and return @@ -486,7 +523,7 @@ void LR::ESolver_LR::setup_eigenvectors_X() { this->X[is] = std::make_shared>(this->nk, this->nstates, - this->paraX_.get_local_size(), + this->paraX_[is].get_local_size(), nullptr, false); // band(state)-first this->X[is]->zero_out(); @@ -499,37 +536,40 @@ template void LR::ESolver_LR::set_X_initial_guess() { // set the initial guess of X - // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first) - bool ix_mode = false; //default - if (this->eig_ks.nc > nocc + 1 && nocc >= 2 && eig_ks(0, nocc) - eig_ks(0, nocc - 2) - 1e-5 > eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1)) { ix_mode = true; } - GlobalV::ofs_running << "setting the initial guess of X: " << std::endl; - if (nocc >= 2 && eig_ks.nc > nocc) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(0, nocc) - eig_ks(0, nocc - 2) << std::endl; } - if (nocc >= 1 && eig_ks.nc > nocc + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1) << std::endl; } - GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl; - - /// global index map between (i,c) and ix - ModuleBase::matrix ioiv2ix; - std::vector> ix2ioiv; - std::pair>> indexmap = - LR_Util::set_ix_map_diagonal(ix_mode, nocc, nvirt); - - ioiv2ix = std::move(std::get<0>(indexmap)); - ix2ioiv = std::move(std::get<1>(indexmap)); - - // use unit vectors as the initial guess - // for (int i = 0; i < std::min(this->nstates * PARAM.inp.pw_diag_ndim, nocc * nvirt); i++) for (int is = 0;is < this->nspin;++is) { + const int& no = this->nocc[is]; + const int& nv = this->nvirt[is]; + const int& np = this->npairs[is]; + const Parallel_2D& px = this->paraX_[is]; + + // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first) + bool ix_mode = false; //default + if (this->eig_ks.nc > no + 1 && no >= 2 && eig_ks(is, no) - eig_ks(is, no - 2) - 1e-5 > eig_ks(is, no + 1) - eig_ks(is, no - 1)) { ix_mode = true; } + GlobalV::ofs_running << "setting the initial guess of X: " << std::endl; + if (no >= 2 && eig_ks.nc > no) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(is, no) - eig_ks(is, no - 2) << std::endl; } + if (no >= 1 && eig_ks.nc > no + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(is, no + 1) - eig_ks(is, no - 1) << std::endl; } + GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl; + + /// global index map between (i,c) and ix + ModuleBase::matrix ioiv2ix; + std::vector> ix2ioiv; + std::pair>> indexmap = + LR_Util::set_ix_map_diagonal(ix_mode, no, nv); + + ioiv2ix = std::move(std::get<0>(indexmap)); + ix2ioiv = std::move(std::get<1>(indexmap)); + for (int s = 0; s < nstates; ++s) { this->X[is]->fix_b(s); - int ipair = s % (npairs); + int ipair = s % np; int occ_global = std::get<0>(ix2ioiv[ipair]); // occ int virt_global = std::get<1>(ix2ioiv[ipair]); // virt - int ik = s / (npairs); - if (this->paraX_.in_this_processor(virt_global, occ_global)) - (*X[is])(ik, this->paraX_.global2local_col(occ_global) * this->paraX_.get_row_size() - + this->paraX_.global2local_row(virt_global)) + int ik = s / np; + if (px.in_this_processor(virt_global, occ_global)) + (*X[is])(ik, px.global2local_col(occ_global) * px.get_row_size() + + px.global2local_row(virt_global)) = (static_cast(1.0) / static_cast(nk)); } this->X[is]->fix_b(0); //recover the pointer @@ -566,8 +606,8 @@ void LR::ESolver_LR::read_ks_wfc() { #ifdef __EXX int ncore = 0; - std::vector eig_ks_vec = RI_Benchmark::read_aims_ebands(PARAM.globalv.global_readin_dir + "band_out", nocc, nvirt, ncore); - std::cout << "ncore=" << ncore << ", nocc=" << nocc << ", nvirt=" << nvirt << ", nbands=" << this->nbands << std::endl; + std::vector eig_ks_vec = RI_Benchmark::read_aims_ebands(PARAM.globalv.global_readin_dir + "band_out", nocc_in, nvirt_in, ncore); + std::cout << "ncore=" << ncore << ", nocc=" << nocc_in << ", nvirt=" << nvirt_in << ", nbands=" << this->nbands << std::endl; std::cout << "eig_ks_vec.size()=" << eig_ks_vec.size() << std::endl; if(eig_ks_vec.size() != this->nbands) {ModuleBase::WARNING_QUIT("ESolver_LR", "read_aims_ebands failed.");}; for (int i = 0;i < nbands;++i) { this->pelec->ekb(0, i) = eig_ks_vec[i]; } @@ -577,7 +617,7 @@ void LR::ESolver_LR::read_ks_wfc() #endif } else if (!ModuleIO::read_wfc_nao(PARAM.globalv.global_readin_dir, this->paraMat_, *this->psi_ks, this->pelec, - /*skip_bands=*/this->nocc_max - this->nocc)) { + /*skip_bands=*/this->nocc_max - this->nocc_in)) { ModuleBase::WARNING_QUIT("ESolver_LR", "read ground-state wavefunction failed."); } this->eig_ks = std::move(this->pelec->ekb); diff --git a/source/module_lr/esolver_lrtd_lcao.h b/source/module_lr/esolver_lrtd_lcao.h index f9db98d765..61dfe6fe9f 100644 --- a/source/module_lr/esolver_lrtd_lcao.h +++ b/source/module_lr/esolver_lrtd_lcao.h @@ -68,17 +68,20 @@ namespace LR /// @brief Excited state info. size: nstates * nks * (nocc(local) * nvirt (local)) std::vector>> X; - int nocc = 1; + std::vector nocc = { 1, 1 }; ///< number of occupied orbitals for each spin used in the calculation + int nocc_in = 1; ///< nocc read from input (adjusted by nelec): max(spin-up, spindown) int nocc_max = 1; ///< nelec/2 - int nvirt = 1; + std::vector nvirt = { 1, 1 }; ///< number of virtual orbitals for each spin used in the calculation + int nvirt_in = 1; ///< nvirt read from input (adjusted by nelec): min(spin-up, spindown) int nbands = 2; int nbasis = 2; /// n_occ*nvirt, the basis size of electron-hole pair representation - int npairs = 1; + std::vector npairs = { 1, 1 }; /// how many 2-particle states to be solved int nstates = 1; int nspin = 1; int nk = 1; + int nupdown = 0; std::string xc_kernel; Grid_Technique gt_; @@ -90,7 +93,7 @@ namespace LR /// @brief variables for parallel distribution of KS orbitals Parallel_2D paraC_; /// @brief variables for parallel distribution of excited states - Parallel_2D paraX_; + std::vector paraX_; /// @brief variables for parallel distribution of matrix in AO representation Parallel_Orbitals paraMat_; @@ -111,6 +114,8 @@ namespace LR void parameter_check() const; /// @brief set nocc, nvirt, nbasis, npairs and nstates void set_dimension(); + /// reset nocc, nvirt, npairs after read ground-state wavefunction when nspin=2 + void reset_dim_spin2(); #ifdef __EXX /// Tdata of Exx_LRI is same as T, for the reason, see operator_lr_exx.h diff --git a/source/module_lr/hamilt_casida.cpp b/source/module_lr/hamilt_casida.cpp index 08a00bb09e..c6c509d1b7 100644 --- a/source/module_lr/hamilt_casida.cpp +++ b/source/module_lr/hamilt_casida.cpp @@ -5,21 +5,24 @@ namespace LR std::vector HamiltCasidaLR::matrix() { ModuleBase::TITLE("HamiltCasidaLR", "matrix"); - int npairs = this->nocc * this->nvirt; + const int no = this->nocc[0]; + const int nv = this->nvirt[0]; + const auto& px = this->pX[0]; + int npairs = no * nv; std::vector Amat_full(this->nk * npairs * this->nk * npairs, 0.0); for (int ik = 0;ik < this->nk;++ik) - for (int j = 0;j < nocc;++j) - for (int b = 0;b < nvirt;++b) + for (int j = 0;j < no;++j) + for (int b = 0;b < nv;++b) {//calculate A^{ai} for each bj - int bj = j * nvirt + b; + int bj = j * nv + b; int kbj = ik * npairs + bj; - psi::Psi X_bj(1, 1, this->nk * this->pX->get_local_size()); // k1-first, like in iterative solver + psi::Psi X_bj(1, 1, this->nk * px.get_local_size()); // k1-first, like in iterative solver X_bj.zero_out(); - // X_bj(0, 0, lj * this->pX->get_row_size() + lb) = this->one(); - int lj = this->pX->global2local_col(j); - int lb = this->pX->global2local_row(b); - if (this->pX->in_this_processor(b, j)) X_bj(0, 0, ik * this->pX->get_local_size() + lj * this->pX->get_row_size() + lb) = this->one(); - psi::Psi A_aibj(1, 1, this->nk * this->pX->get_local_size()); // k1-first + // X_bj(0, 0, lj * px.get_row_size() + lb) = this->one(); + int lj = px.global2local_col(j); + int lb = px.global2local_row(b); + if (px.in_this_processor(b, j)) X_bj(0, 0, ik * px.get_local_size() + lj * px.get_row_size() + lb) = this->one(); + psi::Psi A_aibj(1, 1, this->nk * px.get_local_size()); // k1-first A_aibj.zero_out(); hamilt::Operator* node(this->ops); @@ -32,9 +35,9 @@ namespace LR A_aibj.fix_kb(0, 0); #ifdef __MPI for (int ik_ai = 0;ik_ai < this->nk;++ik_ai) - LR_Util::gather_2d_to_full(*this->pX, &A_aibj.get_pointer()[ik_ai * this->pX->get_local_size()], + LR_Util::gather_2d_to_full(px, &A_aibj.get_pointer()[ik_ai * px.get_local_size()], Amat_full.data() + kbj * this->nk * npairs /*col, bj*/ + ik_ai * npairs/*row, ai*/, - false, this->nvirt, this->nocc); + false, nv, no); #endif } // output Amat diff --git a/source/module_lr/hamilt_casida.h b/source/module_lr/hamilt_casida.h index 8330716377..63ee792b65 100644 --- a/source/module_lr/hamilt_casida.h +++ b/source/module_lr/hamilt_casida.h @@ -19,8 +19,8 @@ namespace LR HamiltCasidaLR(std::string& xc_kernel, const int& nspin, const int& naos, - const int& nocc, - const int& nvirt, + const std::vector& nocc, + const std::vector& nvirt, const UnitCell& ucell_in, const std::vector& orb_cutoff, Grid_Driver& gd_in, @@ -33,7 +33,7 @@ namespace LR TGint* gint_in, std::weak_ptr pot_in, const K_Vectors& kv_in, - Parallel_2D* pX_in, + std::vector& pX_in, Parallel_2D* pc_in, Parallel_Orbitals* pmat_in, const std::string& spin_type, @@ -46,7 +46,7 @@ namespace LR this->DM_trans.resize(1); this->DM_trans[0] = LR_Util::make_unique>(&kv_in, pmat_in, nspin); // add the diag operator (the first one) - this->ops = new OperatorLRDiag(eig_ks.c, pX_in, nk, nocc, nvirt); + this->ops = new OperatorLRDiag(eig_ks.c, &pX[0], nk, nocc[0], nvirt[0]); //add Hxc operator #ifdef __EXX using TAC = std::pair>; @@ -77,7 +77,7 @@ namespace LR } if (!std::set({ "rpa", "hf" }).count(xc_kernel)) { throw std::runtime_error("ri_hartree_benchmark is only supported for xc_kernel rpa and hf"); } RI_Benchmark::OperatorRIHartree* ri_hartree_op - = new RI_Benchmark::OperatorRIHartree(ucell_in, naos, nocc, nvirt, *psi_ks_in, + = new RI_Benchmark::OperatorRIHartree(ucell_in, naos, nocc[0], nvirt[0], *psi_ks_in, Cs_read, Vs_read, ri_hartree_benchmark == "aims", aims_nbasis); this->ops->add(ri_hartree_op); } @@ -102,8 +102,8 @@ namespace LR exx_lri_in.lock()->reset_Vs(Vs_read); } // std::cout << "exx_alpha=" << exx_alpha << std::endl; // the default value of exx_alpha is 0.25 when dft_functional is pbe or hse - hamilt::Operator* lr_exx = new OperatorLREXX(nspin, naos, nocc, nvirt, ucell_in, psi_ks_in, - this->DM_trans, exx_lri_in, kv_in, pX_in, pc_in, pmat_in, + hamilt::Operator* lr_exx = new OperatorLREXX(nspin, naos, nocc[0], nvirt[0], ucell_in, psi_ks_in, + this->DM_trans, exx_lri_in, kv_in, &pX_in[0], pc_in, pmat_in, xc_kernel == "hf" ? 1.0 : exx_alpha, //alpha ri_hartree_benchmark != "none"/*whether to cal_dm_trans first here*/, aims_nbasis); @@ -122,10 +122,10 @@ namespace LR virtual std::vector matrix() override; private: - int nocc; - int nvirt; + const std::vector& nocc; + const std::vector& nvirt; int nk; - Parallel_2D* pX = nullptr; + std::vector& pX; T one(); /// transition density matrix in AO representation /// Hxc only: size=1, calculate on the same address for each bands diff --git a/source/module_lr/operator_casida/operator_lr_hxc.cpp b/source/module_lr/operator_casida/operator_lr_hxc.cpp index f9bc15f2f1..79b782daca 100644 --- a/source/module_lr/operator_casida/operator_lr_hxc.cpp +++ b/source/module_lr/operator_casida/operator_lr_hxc.cpp @@ -14,6 +14,11 @@ inline double conj(double a) { return a; } inline std::complex conj(std::complex a) { return std::conj(a); } +template +inline psi::Psi get_psi_spin(const psi::Psi& psi_in, const int& is, const int& nk) +{ + return psi::Psi(&psi_in(is * nk, 0, 0), psi_in, nk, psi_in.get_nbands()); +} namespace LR { @@ -23,14 +28,16 @@ namespace LR ModuleBase::TITLE("OperatorLRHxc", "act"); assert(nbands <= psi_in.get_nbands()); const int& nk = this->kv.get_nks() / this->nspin; + const int& sl = ispin_ks[0]; + const int& sr = ispin_ks[1]; //print // if (this->first_print) LR_Util::print_psi_kfirst(*psi_ks, "psi_ks"); this->init_DM_trans(nbands, this->DM_trans); // initialize transion density matrix - psi::Psi psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, this->pX->get_local_size()); - psi::Psi psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, this->pX->get_local_size()); + psi::Psi psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, pX[sr].get_local_size()); + psi::Psi psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, pX[sr].get_local_size()); const int& lgd = gint->gridt->lgd; for (int ib = 0;ib < nbands;++ib) @@ -45,10 +52,10 @@ namespace LR // 1. transition density matrix #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, *pX, *psi_ks, *pc, naos, nocc, nvirt, *pmat); + std::vector dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, pX[sr], get_psi_spin(*psi_ks, sr, nk), *pc, naos, nocc[sr], nvirt[sr], *pmat); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, *pmat); #else - std::vector dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, *psi_ks, nocc, nvirt); + std::vector dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, get_psi_spin(*psi_ks, sr, nk), nocc[sr], nvirt[sr]); if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif // tensor to vector, then set DMK @@ -80,9 +87,9 @@ namespace LR // 5. [AX]^{Hxc}_{ai}=\sum_{\mu,\nu}c^*_{a,\mu,}V^{Hxc}_{\mu,\nu}c_{\nu,i} #ifdef __MPI - cal_AX_pblas(v_hxc_2d, *this->pmat, *this->psi_ks, *this->pc, naos, nocc, nvirt, *this->pX, psi_out_bfirst); + cal_AX_pblas(v_hxc_2d, *this->pmat, get_psi_spin(*psi_ks, sl, nk), *this->pc, naos, nocc[sl], nvirt[sl], this->pX[sl], psi_out_bfirst); #else - cal_AX_blas(v_hxc_2d, *this->psi_ks, nocc, nvirt, psi_out_bfirst); + cal_AX_blas(v_hxc_2d, get_psi_spin(*psi_ks, sl, nk), nocc[sl], nvirt[sl], psi_out_bfirst); #endif // if (this->first_print) LR_Util::print_psi_bandfirst(psi_out_bfirst, "5.AX", ib); } diff --git a/source/module_lr/operator_casida/operator_lr_hxc.h b/source/module_lr/operator_casida/operator_lr_hxc.h index 1c5ac17591..5bc2313f98 100644 --- a/source/module_lr/operator_casida/operator_lr_hxc.h +++ b/source/module_lr/operator_casida/operator_lr_hxc.h @@ -16,8 +16,8 @@ namespace LR //when nspin=2, nks is 2 times of real number of k-points. else (nspin=1 or 4), nks is the real number of k-points OperatorLRHxc(const int& nspin, const int& naos, - const int& nocc, - const int& nvirt, + const std::vector& nocc, + const std::vector& nvirt, const psi::Psi* psi_ks_in, std::vector>>& DM_trans_in, typename TGint::type* gint_in, @@ -26,13 +26,14 @@ namespace LR const std::vector& orb_cutoff, Grid_Driver& gd_in, const K_Vectors& kv_in, - Parallel_2D* pX_in, + std::vector& pX_in, Parallel_2D* pc_in, - Parallel_Orbitals* pmat_in) + Parallel_Orbitals* pmat_in, + const std::vector& ispin_ks = { 0, 0 }) : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), psi_ks(psi_ks_in), DM_trans(DM_trans_in), gint(gint_in), pot(pot_in), ucell(ucell_in), orb_cutoff_(orb_cutoff), gd(gd_in), kv(kv_in), - pX(pX_in), pc(pc_in), pmat(pmat_in) + pX(pX_in), pc(pc_in), pmat(pmat_in), ispin_ks(ispin_ks) { ModuleBase::TITLE("OperatorLRHxc", "OperatorLRHxc"); this->cal_type = hamilt::calculation_type::lcao_gint; @@ -98,8 +99,9 @@ namespace LR const int& nspin; const int nspin_solve = 1; ///< in singlet-triplet calculation, the Casida equation is solved respectively so nspin_solve in a single problem is 1 const int& naos; - const int& nocc; - const int& nvirt; + const std::vector& nocc; + const std::vector& nvirt; + const std::vector ispin_ks = { 0,0 }; ///< the index of spin of psi_ks used in {AX, DM_trans} const K_Vectors& kv; /// ground state wavefunction const psi::Psi* psi_ks = nullptr; @@ -112,7 +114,7 @@ namespace LR /// parallel info Parallel_2D* pc = nullptr; - Parallel_2D* pX = nullptr; + std::vector& pX; Parallel_Orbitals* pmat = nullptr; std::weak_ptr pot;