Skip to content

Commit

Permalink
ri benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
maki49 committed Jul 15, 2024
1 parent 98f0682 commit 21d409a
Show file tree
Hide file tree
Showing 4 changed files with 411 additions and 0 deletions.
239 changes: 239 additions & 0 deletions source/module_lr/utils/ri_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#include "ri_benchmark.h"
#include "module_base/module_container/base/third_party/blas.h"
#include "module_base/blas_connector.h"
namespace RI_Benchmark
{
// std::cout << "the size of Cs:" << std::endl;
// for (auto& it1: Cs)
// {
// for (auto& it2: it1.second)
// {
// std::cout << "iat1=" << it1.first << ", iat2= " << it2.first.first << std::endl;
// auto& ts = it2.second.shape;
// std::cout << "Tensor shape:" << ts[0] << " " << ts[1] << " " << ts[2] << std::endl; // abf, nw1, nw2
// }
// }

template <typename TR>
inline int count_nao_from_Cs(const TLRI<TR>& Cs_ao)
{
int naos = 0;
for (const auto& it2 : Cs_ao.at(0))
{
assert(it2.second.shape.size() == 3);
naos += it2.second.shape[2];
}
return naos
}

/// @brief slice the psi from wfc_ks of all the k-points, some bands, some basis functions
/// @return
template <typename TK>
inline std::vector<TK> slice_psi(const psi::Psi<TK>& wfc_ks,
const bool& ibstart,
const int& nb,
const int& iwstart,
const int& nw)
{
std::vector<TK> psi_slice(wfc_ks.get_nk() * nb * nw);
int i = 0;
for (int ik = 0; ik < wfc_ks.get_nk(); ++ik)
for (int ib = ibstart; ib < nb; ++ib)
for (int iw = iwstart; iw < nw; ++iw)
psi_slice[i++] = wfc_ks(ik, ib, iw);
return psi_slice;
}

template <typename TK, typename TR>
TLRI<TK> cal_Cs_mo(const UnitCell& ucell,
const TLRI<TR>& Cs_ao,
const psi::Psi<TK>& wfc_ks,
const int& nocc,
const int& nvirt,
const int& occ_first)
{
assert(wfc_ks.get_nk() == 1); // currently only gamma-only is supported
assert(nocc + nvirt <= wfc_ks.get_nbands());
const int& naos = cs.get_nbasis();
assert(count_nao_from_Cs(Cs_ao) == naos); // wfc_ks must be global

TLRI<TK> Cs_mo;
int iw1 = 0, iw2 = 0;
for (auto& it1 : Cs_ao[i])
{
const int& iat1 = it1.first;
const int& it1 = ucell.iat2it[iat1];
const int& nw1 = ucell.atoms[it1].nw;
for (auto& it2 : it1.second)
{
const int& iat2 = it2.first.first;
const int& it2 = ucell.iat2it[iat2];
const int& nw2 = ucell.atoms[it2].nw;

const auto& tensor_ao = it2.second;
const int& nabf = tensor_ao.shape[0];
assert(tensor_ao.shape.size() == 3); // abf, nw1, nw2

std:: : vector<TK> psi_a1 = occ_first ? slice_psi(wfc_ks, 0, nocc, iw1, nw1)
: slice_psi(wfc_ks, nocc, nvirt, iw1, nw1); // virt
std:: : vector<TK> psi_a2 = occ_first ? slice_psi(wfc_ks, nocc, nvirt, iw2, nw2)
: slice_psi(wfc_ks, 0, nocc, iw2, nw2); // occ

Cs_mo[i][it.first][it2.first] = RI::Tensor<TK>({ nabf,nocc,nvirt });
for (int iabf = 0; iabf < nabf; ++iabf)
{
const auto ptr = &tensor_ao(iabf, 0, 0);
std::vector<TK> tmp(nw1 * (occ_first ? nvirt : nocc));
// caution: Cs are row-major (ia2 contiguous)
if (occ_first)
{
gemm('T', 'N', nvirt, nw1, nw2, 1.0, psi_a2.data(), nw2, ptr, nw2, 0.0, tmp.data(), nvirt);
gemm('N', 'N', nvirt, nocc, nw1, 1.0, tmp.data(), nvirt, psi_a1.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt);
}
else
{
gemm('T', 'N', nw1, nocc, nw2, 1.0, ptr, nw2, psi_a2.data(), nw2, 0.0, tmp.data(), nw1);
gemm('T', 'N', nvirt, nocc, nw1, 1.0, psi_a1.data(), nw1, tmp.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt);
}
}
iw1 += nw1;
iw2 += nw2;
}
}
return Cs_mo;
}

template <typename TK, typename TR>
std::vector<TK> cal_Amat_full(const TLRI<TK>& Cs_a,
const TLRI<TK>& Cs_b,
const TLRI<TR>& Vs)
{
assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt
assert(Cs_b.second.second.shape.size() == 3); // abf, nocc, nvirt
assert(Vs.second.second.shape.size() == 2); // abf, abf

const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2];
std::vector<TK> Amat_full(npairs * npairs, 0.0);

for (auto& itv1 : Vs)
{
const int& iat1 = itv1.first;
for (auto& itv2 : itv1.second)
{
const int& iat2 = itv2.first.first;
const auto& tensor_v = itv2.second; // (nabf2, nabf1), T
for (auto& itca2 : Cs_a.at(iat1))
{
const int& iat3 = itca2.first.first;
const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N
for (auto& itcb2 : Cs_b.at(iat2))
{
const int& iat4 = itcb2.first.first;
const auto& tensor_cb = itcb2.second; // (nvirt*nocc, nabf2), T
const int& nabf1 = tensor_v.shape[0];
const int& nabf2 = tensor_v.shape[1];
assert(tensor_ca.shape[0] == nabf1); //abf1
assert(tensor_cb.shape[0] == nabf2); //abf2
std::vector<TK> tmp(npairs * nabf1);
gemm('T', 'T', nabf1, npairs, nabf2, 1.0, tensor_v.ptr(), nabf2, tensor_cb.ptr(), npairs, 0.0, tmp.data(), nabf1);
gemm('N', 'N', npairs, npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), nabf1, 1.0, Amat_full.data(), npairs);
}
}
}
}
return Amat_full;
}

template <typename TK, typename TR>
TLRIX<TK> cal_CsX(const UnitCell& ucell,
const TLRI<TR>& Cs_ao,
const psi::Psi<TK>& wfc_ks,
const psi::Psi<TK>& X_istate,
const int& nocc)
{
assert(X_istate.get_nk() == 1); // currently only gamma-only is supported
const int& npairs = X_istate.get_nbasis();
assert(npairs % nocc == 0);
const int& nvirt = npairs / nocc;
const TLRI<TK>& Cs_mo = cal_Cs_mo(ucell, Cs_ao, wfc_ks, nocc, nvirt,/*jb: occ_first=*/true);

TLRIX<TK> CsX;
for (auto& it1 : Cs_mo)
{
const int& iat1 = it1.first;
for (auto& it2 : it1.second)
{
const int& iat2 = it2.first.first;
auto& tensor_c = it2.second;
const int& nabf = tensor_c.shape[0];

std::vector<TK> CX(nabf);
for (int iabf = 0;iabf < nabf;++iabf)
CX[iabf] = dot(X.get_nbasis(), &tensor_c(iabf, 0, 0), 1, X_istate.get_pointer(), 1);
CsX[iat1][it2.first] = CX;
}
}
return CsX;
}

template <typename TK, typename TR>
std::vector<TK> cal_AX(const TLRI<TK>& Cs_a,
const TLRIX<TK>& Cs_bX,
const TLRI<TR>& Vs)
{
assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt
assert(Vs.second.second.shape.size() == 2); // abf, abf

const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2];
std::vector<TK> AX(npairs, 0.0);

for (auto& itv1 : Vs)
{
const int& iat1 = itv1.first;
for (auto& itv2 : itv1.second)
{
const int& iat2 = itv2.first.first;
const auto& tensor_v = itv2.second; // (nabf2, nabf1), T
for (auto& itca2 : Cs_a.at(iat1))
{
const int& iat3 = itca2.first.first;
const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N
for (auto& itcb2 : Cs_bX.at(iat2))
{
const int& iat4 = itcb2.first.first;
const auto& vector_cb = itcb2.second; // (nvirt*nocc, nabf2), T
const int& nabf1 = tensor_v.shape[0];
const int& nabf2 = tensor_v.shape[1];
assert(tensor_ca.shape[0] == nabf1); //abf1
assert(vector_cb.size() == nabf2); //abf2
std::vector<TK> tmp(nabf1);
gemv('T', nabf1, nabf2, 1.0, tensor_v.ptr(), nabf2, vector_cb.data(), 1, 0.0, tmp.data(), 1);
gemv('N', npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), 1, 1.0, AX.data(), 1);
}
}
}
}
return Amat_full;
}

template <typename TK, typename TR>
void benchmark_driver_A<TK, TR>(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc,
const int& nocc, const int& nvirt)
{
const int nks = 1;
const int& npairs = nocc * nvirt;
const TLRI<TR>& Cs_ao = read_Cs_ao<TR>(file_Cs);
const TLRI<TR>& Vs = read_Vs_abf<TR>(file_Vs);
const int nbands = LR_Util::
const psi::Psi<TK>&wfc_ks = read_wfc_ks<TK>(file_kswfc);

const TLRI<TK>& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false);
const TLRI<TK>& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true);
const std::vector<TK>& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs);
std::vector<Real> eigenvalue(npairs);
LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data());
std::cout << "eigenvalues:" << std::endl;
for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << " " << std::endl; }

}
}
63 changes: 63 additions & 0 deletions source/module_lr/utils/ri_benchmark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#pragma once
#include "module_cell/unitcell.h"
#include "module_psi/psi.h"

#include <RI/global/Tensor.h>
namespace RI_Benchmark
{
using TC = std::array<int, 3>;
using TAC = std::pair<int, TC>;

template <typename T>
using TLRI = std::map<int, std::map<TAC, RI::Tensor<T>>>;
template<typename T>
using TLRIX = std::map<int, std::map<TAC, std::vector<T>>>;

template <typename TK, typename TR>
void benchmark_driver_A(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt);
template <typename TK, typename TR>
void benchmark_driver_AX(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt);

// 1. full-matrix version

/// calculate $Cs_{Ua\alpha,Vi}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu a} c_{\nu i}$
/// if occ_first, calculate $Cs_{Ui\alpha,Va}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu i} c_{\nu a}$
template <typename TK, typename TR>
TLRI<TK> cal_Cs_mo(const UnitCell& ucell,
const TLRI<TR>& Cs_ao,
const psi::Psi<TK>& wfc_ks,
const int& nocc,
const int& nvirt,
const int& occ_first = false);

/// A=CVC, sum over atom quads
template <typename TK, typename TR>
std::vector<TK> cal_Amat_full(const TLRI<TK>& Cs_a,
const TLRI<TK>& Cs_b,
const TLRI<TR>& Vs);

// 2. AX version
template <typename TK, typename TR>
TLRIX<TK> cal_CsX(const UnitCell& ucell,
const TLRI<TR>& Cs_ao,
const psi::Psi<TK>& wfc_ks,
const psi::Psi<TK>& X,
const int& nocc);

/// AX=CVCX, sum over atom quads
template <typename TK, typename TR>
std::vector<TK> cal_AX(const TLRI<TK>& Cs_a,
const TLRIX<TK>& Cs_bX,
const TLRI<TR>& Vs);

// read/write
template <typename TR>
TLRI<TR> read_Cs_ao(const std::string& file_path);
template <typename TR>
TLRI<TR> read_Vs_abf(const std::string& file_path);
template <typename TR>
void write_Vs_abf(const TLRI<TR>& Vs, const std::string& file_path);
// template <typename TK>
// psi::Psi<TK> read_wfc_ks(const std::string& file_path);

} // namespace LR_Util
27 changes: 27 additions & 0 deletions source/module_lr/utils/ri_benchmark_driver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "ri_benchmark.h"
#include "lr_util.h"
namespace RI_Benchmark
{
template <typename TK, typename TR>
void benchmark_driver_A<TK, TR>(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc,
const int homo, const int nocc, const int nvirt)
{
assert(nocc > 0 && nocc < homo);
const int nks = 1;
const int& npairs = nocc * nvirt;
const TLRI<TR>& Cs_ao = read_Cs_ao<TR>(file_Cs);
const TLRI<TR>& Vs = read_Vs_abf<TR>(file_Vs);

// read range: [homo-nocc, homo+nvirt]
const psi::Psi<TK>& wfc_ks = read_wfc_ks<TK>(file_kswfc);

const TLRI<TK>& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false);
const TLRI<TK>& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true);
const std::vector<TK>& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs);
std::vector<Real> eigenvalue(npairs);
LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data());
std::cout << "eigenvalues:" << std::endl;
for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << " " << std::endl; }

}
}
Loading

0 comments on commit 21d409a

Please sign in to comment.