diff --git a/source/module_lr/utils/ri_benchmark.cpp b/source/module_lr/utils/ri_benchmark.cpp new file mode 100644 index 0000000000..722629d08e --- /dev/null +++ b/source/module_lr/utils/ri_benchmark.cpp @@ -0,0 +1,239 @@ +#include "ri_benchmark.h" +#include "module_base/module_container/base/third_party/blas.h" +#include "module_base/blas_connector.h" +namespace RI_Benchmark +{ + // std::cout << "the size of Cs:" << std::endl; + // for (auto& it1: Cs) + // { + // for (auto& it2: it1.second) + // { + // std::cout << "iat1=" << it1.first << ", iat2= " << it2.first.first << std::endl; + // auto& ts = it2.second.shape; + // std::cout << "Tensor shape:" << ts[0] << " " << ts[1] << " " << ts[2] << std::endl; // abf, nw1, nw2 + // } + // } + + template + inline int count_nao_from_Cs(const TLRI& Cs_ao) + { + int naos = 0; + for (const auto& it2 : Cs_ao.at(0)) + { + assert(it2.second.shape.size() == 3); + naos += it2.second.shape[2]; + } + return naos + } + + /// @brief slice the psi from wfc_ks of all the k-points, some bands, some basis functions + /// @return + template + inline std::vector slice_psi(const psi::Psi& wfc_ks, + const bool& ibstart, + const int& nb, + const int& iwstart, + const int& nw) + { + std::vector psi_slice(wfc_ks.get_nk() * nb * nw); + int i = 0; + for (int ik = 0; ik < wfc_ks.get_nk(); ++ik) + for (int ib = ibstart; ib < nb; ++ib) + for (int iw = iwstart; iw < nw; ++iw) + psi_slice[i++] = wfc_ks(ik, ib, iw); + return psi_slice; + } + + template + TLRI cal_Cs_mo(const UnitCell& ucell, + const TLRI& Cs_ao, + const psi::Psi& wfc_ks, + const int& nocc, + const int& nvirt, + const int& occ_first) + { + assert(wfc_ks.get_nk() == 1); // currently only gamma-only is supported + assert(nocc + nvirt <= wfc_ks.get_nbands()); + const int& naos = cs.get_nbasis(); + assert(count_nao_from_Cs(Cs_ao) == naos); // wfc_ks must be global + + TLRI Cs_mo; + int iw1 = 0, iw2 = 0; + for (auto& it1 : Cs_ao[i]) + { + const int& iat1 = it1.first; + const int& it1 = ucell.iat2it[iat1]; + const int& nw1 = ucell.atoms[it1].nw; + for (auto& it2 : it1.second) + { + const int& iat2 = it2.first.first; + const int& it2 = ucell.iat2it[iat2]; + const int& nw2 = ucell.atoms[it2].nw; + + const auto& tensor_ao = it2.second; + const int& nabf = tensor_ao.shape[0]; + assert(tensor_ao.shape.size() == 3); // abf, nw1, nw2 + + std:: : vector psi_a1 = occ_first ? slice_psi(wfc_ks, 0, nocc, iw1, nw1) + : slice_psi(wfc_ks, nocc, nvirt, iw1, nw1); // virt + std:: : vector psi_a2 = occ_first ? slice_psi(wfc_ks, nocc, nvirt, iw2, nw2) + : slice_psi(wfc_ks, 0, nocc, iw2, nw2); // occ + + Cs_mo[i][it.first][it2.first] = RI::Tensor({ nabf,nocc,nvirt }); + for (int iabf = 0; iabf < nabf; ++iabf) + { + const auto ptr = &tensor_ao(iabf, 0, 0); + std::vector tmp(nw1 * (occ_first ? nvirt : nocc)); + // caution: Cs are row-major (ia2 contiguous) + if (occ_first) + { + gemm('T', 'N', nvirt, nw1, nw2, 1.0, psi_a2.data(), nw2, ptr, nw2, 0.0, tmp.data(), nvirt); + gemm('N', 'N', nvirt, nocc, nw1, 1.0, tmp.data(), nvirt, psi_a1.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt); + } + else + { + gemm('T', 'N', nw1, nocc, nw2, 1.0, ptr, nw2, psi_a2.data(), nw2, 0.0, tmp.data(), nw1); + gemm('T', 'N', nvirt, nocc, nw1, 1.0, psi_a1.data(), nw1, tmp.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt); + } + } + iw1 += nw1; + iw2 += nw2; + } + } + return Cs_mo; + } + + template + std::vector cal_Amat_full(const TLRI& Cs_a, + const TLRI& Cs_b, + const TLRI& Vs) + { + assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt + assert(Cs_b.second.second.shape.size() == 3); // abf, nocc, nvirt + assert(Vs.second.second.shape.size() == 2); // abf, abf + + const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2]; + std::vector Amat_full(npairs * npairs, 0.0); + + for (auto& itv1 : Vs) + { + const int& iat1 = itv1.first; + for (auto& itv2 : itv1.second) + { + const int& iat2 = itv2.first.first; + const auto& tensor_v = itv2.second; // (nabf2, nabf1), T + for (auto& itca2 : Cs_a.at(iat1)) + { + const int& iat3 = itca2.first.first; + const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N + for (auto& itcb2 : Cs_b.at(iat2)) + { + const int& iat4 = itcb2.first.first; + const auto& tensor_cb = itcb2.second; // (nvirt*nocc, nabf2), T + const int& nabf1 = tensor_v.shape[0]; + const int& nabf2 = tensor_v.shape[1]; + assert(tensor_ca.shape[0] == nabf1); //abf1 + assert(tensor_cb.shape[0] == nabf2); //abf2 + std::vector tmp(npairs * nabf1); + gemm('T', 'T', nabf1, npairs, nabf2, 1.0, tensor_v.ptr(), nabf2, tensor_cb.ptr(), npairs, 0.0, tmp.data(), nabf1); + gemm('N', 'N', npairs, npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), nabf1, 1.0, Amat_full.data(), npairs); + } + } + } + } + return Amat_full; + } + + template + TLRIX cal_CsX(const UnitCell& ucell, + const TLRI& Cs_ao, + const psi::Psi& wfc_ks, + const psi::Psi& X_istate, + const int& nocc) + { + assert(X_istate.get_nk() == 1); // currently only gamma-only is supported + const int& npairs = X_istate.get_nbasis(); + assert(npairs % nocc == 0); + const int& nvirt = npairs / nocc; + const TLRI& Cs_mo = cal_Cs_mo(ucell, Cs_ao, wfc_ks, nocc, nvirt,/*jb: occ_first=*/true); + + TLRIX CsX; + for (auto& it1 : Cs_mo) + { + const int& iat1 = it1.first; + for (auto& it2 : it1.second) + { + const int& iat2 = it2.first.first; + auto& tensor_c = it2.second; + const int& nabf = tensor_c.shape[0]; + + std::vector CX(nabf); + for (int iabf = 0;iabf < nabf;++iabf) + CX[iabf] = dot(X.get_nbasis(), &tensor_c(iabf, 0, 0), 1, X_istate.get_pointer(), 1); + CsX[iat1][it2.first] = CX; + } + } + return CsX; + } + + template + std::vector cal_AX(const TLRI& Cs_a, + const TLRIX& Cs_bX, + const TLRI& Vs) + { + assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt + assert(Vs.second.second.shape.size() == 2); // abf, abf + + const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2]; + std::vector AX(npairs, 0.0); + + for (auto& itv1 : Vs) + { + const int& iat1 = itv1.first; + for (auto& itv2 : itv1.second) + { + const int& iat2 = itv2.first.first; + const auto& tensor_v = itv2.second; // (nabf2, nabf1), T + for (auto& itca2 : Cs_a.at(iat1)) + { + const int& iat3 = itca2.first.first; + const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N + for (auto& itcb2 : Cs_bX.at(iat2)) + { + const int& iat4 = itcb2.first.first; + const auto& vector_cb = itcb2.second; // (nvirt*nocc, nabf2), T + const int& nabf1 = tensor_v.shape[0]; + const int& nabf2 = tensor_v.shape[1]; + assert(tensor_ca.shape[0] == nabf1); //abf1 + assert(vector_cb.size() == nabf2); //abf2 + std::vector tmp(nabf1); + gemv('T', nabf1, nabf2, 1.0, tensor_v.ptr(), nabf2, vector_cb.data(), 1, 0.0, tmp.data(), 1); + gemv('N', npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), 1, 1.0, AX.data(), 1); + } + } + } + } + return Amat_full; + } + + template + void benchmark_driver_A(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, + const int& nocc, const int& nvirt) + { + const int nks = 1; + const int& npairs = nocc * nvirt; + const TLRI& Cs_ao = read_Cs_ao(file_Cs); + const TLRI& Vs = read_Vs_abf(file_Vs); + const int nbands = LR_Util:: + const psi::Psi&wfc_ks = read_wfc_ks(file_kswfc); + + const TLRI& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false); + const TLRI& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true); + const std::vector& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs); + std::vector eigenvalue(npairs); + LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data()); + std::cout << "eigenvalues:" << std::endl; + for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << " " << std::endl; } + + } +} diff --git a/source/module_lr/utils/ri_benchmark.h b/source/module_lr/utils/ri_benchmark.h new file mode 100644 index 0000000000..c34ba83d18 --- /dev/null +++ b/source/module_lr/utils/ri_benchmark.h @@ -0,0 +1,63 @@ +#pragma once +#include "module_cell/unitcell.h" +#include "module_psi/psi.h" + +#include +namespace RI_Benchmark +{ + using TC = std::array; + using TAC = std::pair; + + template + using TLRI = std::map>>; + template + using TLRIX = std::map>>; + + template + void benchmark_driver_A(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt); + template + void benchmark_driver_AX(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt); + + // 1. full-matrix version + + /// calculate $Cs_{Ua\alpha,Vi}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu a} c_{\nu i}$ + /// if occ_first, calculate $Cs_{Ui\alpha,Va}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu i} c_{\nu a}$ + template + TLRI cal_Cs_mo(const UnitCell& ucell, + const TLRI& Cs_ao, + const psi::Psi& wfc_ks, + const int& nocc, + const int& nvirt, + const int& occ_first = false); + + /// A=CVC, sum over atom quads + template + std::vector cal_Amat_full(const TLRI& Cs_a, + const TLRI& Cs_b, + const TLRI& Vs); + + // 2. AX version + template + TLRIX cal_CsX(const UnitCell& ucell, + const TLRI& Cs_ao, + const psi::Psi& wfc_ks, + const psi::Psi& X, + const int& nocc); + + /// AX=CVCX, sum over atom quads + template + std::vector cal_AX(const TLRI& Cs_a, + const TLRIX& Cs_bX, + const TLRI& Vs); + + // read/write + template + TLRI read_Cs_ao(const std::string& file_path); + template + TLRI read_Vs_abf(const std::string& file_path); + template + void write_Vs_abf(const TLRI& Vs, const std::string& file_path); + // template + // psi::Psi read_wfc_ks(const std::string& file_path); + +} // namespace LR_Util \ No newline at end of file diff --git a/source/module_lr/utils/ri_benchmark_driver.cpp b/source/module_lr/utils/ri_benchmark_driver.cpp new file mode 100644 index 0000000000..2da4216fe5 --- /dev/null +++ b/source/module_lr/utils/ri_benchmark_driver.cpp @@ -0,0 +1,27 @@ +#include "ri_benchmark.h" +#include "lr_util.h" +namespace RI_Benchmark +{ + template + void benchmark_driver_A(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, + const int homo, const int nocc, const int nvirt) + { + assert(nocc > 0 && nocc < homo); + const int nks = 1; + const int& npairs = nocc * nvirt; + const TLRI& Cs_ao = read_Cs_ao(file_Cs); + const TLRI& Vs = read_Vs_abf(file_Vs); + + // read range: [homo-nocc, homo+nvirt] + const psi::Psi& wfc_ks = read_wfc_ks(file_kswfc); + + const TLRI& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false); + const TLRI& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true); + const std::vector& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs); + std::vector eigenvalue(npairs); + LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data()); + std::cout << "eigenvalues:" << std::endl; + for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << " " << std::endl; } + + } +} \ No newline at end of file diff --git a/source/module_lr/utils/ri_benchmark_io.cpp b/source/module_lr/utils/ri_benchmark_io.cpp new file mode 100644 index 0000000000..caeb2673bb --- /dev/null +++ b/source/module_lr/utils/ri_benchmark_io.cpp @@ -0,0 +1,82 @@ +#include "ri_benchmark.h" +#include "module_base/abfs-vector3_order.h" + +namespace RI_Benchmark +{ + template + TLRI read_Cs_ao(const std::string& file_path) + { + const int natom, ncell, ia1, ia2, ic_1, ic_2, ic_3, nw1, nw2, nabf; + std::ifstream infile; + infile.open(file_path); + infile >> natom >> ncell; // no use of ncell + + TLRI Cs; + while (infile.peek() != EOF) + { + infile >> ia1 >> ia2 >> ic_1 >> ic_2 >> ic_3 >> nw1 >> nw2 >> nabf; + ModuleBase::Vector3_Order box(ic_1, ic_2, ic_3); + RI::Tensor tensor_cs({ nabf, na1, na2 }); + for (int i = 0; i != nw1; i++) + for (int j = 0; j != nw2; j++) + for (int mu = 0; mu != nabf; mu++) + infile >> tensor_cs(mu, i, j); + // no screening for data-structure consistency + // if (loc_atp_index.count(ia1) && (*cs_ptr).absmax() >= threshold) + Cs[ia1][{ia2, box}] = tensor_cs; + // else ++cs_discard; + } + return Cs; + } + + template + TLRI read_Vs_abf(const std::string& file_path) + { + const int natom, ncell, ia1, ia2, ic_1, ic_2, ic_3, nabf1, nabf2; + std::ifstream infile; + infile.open(file_path); + infile >> natom >> ncell; // no use of ncell + + TLRI Vs; + while (infile.peek() != EOF) + { + infile >> ia1 >> ia2 >> ic_1 >> ic_2 >> ic_3 >> nabf1 >> nabf2; + ModuleBase::Vector3_Order box(ic_1, ic_2, ic_3); + RI::Tensor tensor_cs({ n_mu,n_nu }); + for (int i = 0; i != nabf1; i++) + for (int j = 0; j != nabf2; j++) + infile >> tensor_cs(i, j); + // no screening for data-structure consistency + // if (loc_atp_index.count(ia1) && (*cs_ptr).absmax() >= threshold) + Vs[ia1][{ia2, box}] = tensor_cs; + // else ++cs_discard; + } + return Vs; + } + + template + void write_Vs_abf(const TLRI& Vs, const std::string& file_path) + { + std::ofstream outfile; + outfile.open(file_path); + outfile << Vs.size() << " " << Vs.at(0).size() / Vs.size() << std::endl; //natom, ncell + for (const auto& it1 : Vs) + { + const int& ia1 = it1.first; + for (const auto& it2 : it1.second) + { + const int& ia2 = it2.first.first; + const auto& box = it2.first.second; + const auto& tensor_v = it2.second; + outfile << ia1 << " " << ia2 << " " << box[0] << " " << box[1] << " " << box[2] << std::endl; + outfile << << tensor_v.shape[0] << " " << tensor_v.shape[1] << std::endl; + for (int i = 0; i != tensor_v.shape[0]; i++) + { + for (int j = 0; j != tensor_v.shape[1]; j++) + outfile << tensor_v(i, j) << " "; + outfile << std::endl; + } + } + } + } +} \ No newline at end of file