ri benchmark

maki49 · Jul 15, 2024 · 21d409a · 21d409a
1 parent 98f0682
commit 21d409a
Show file tree

Hide file tree

Showing 4 changed files with 411 additions and 0 deletions.
diff --git a/source/module_lr/utils/ri_benchmark.cpp b/source/module_lr/utils/ri_benchmark.cpp
@@ -0,0 +1,239 @@
+#include "ri_benchmark.h"
+#include "module_base/module_container/base/third_party/blas.h"
+#include "module_base/blas_connector.h"
+namespace RI_Benchmark
+{
+    // std::cout << "the size of Cs:" << std::endl;
+    // for (auto& it1: Cs)
+    // {
+    //     for (auto& it2: it1.second)
+    //     {
+    //         std::cout << "iat1=" << it1.first << ", iat2= " << it2.first.first << std::endl;
+    //         auto& ts = it2.second.shape;
+    //         std::cout << "Tensor shape:" << ts[0] << " " << ts[1] << " " << ts[2] << std::endl; // abf, nw1, nw2
+    //     }
+    // }
+
+    template <typename TR>
+    inline int count_nao_from_Cs(const TLRI<TR>& Cs_ao)
+    {
+        int naos = 0;
+        for (const auto& it2 : Cs_ao.at(0))
+        {
+            assert(it2.second.shape.size() == 3);
+            naos += it2.second.shape[2];
+        }
+        return naos
+    }
+
+    /// @brief slice the psi from wfc_ks of all the k-points, some bands, some basis functions
+    /// @return
+    template <typename TK>
+    inline std::vector<TK> slice_psi(const psi::Psi<TK>& wfc_ks,
+        const bool& ibstart,
+        const int& nb,
+        const int& iwstart,
+        const int& nw)
+    {
+        std::vector<TK> psi_slice(wfc_ks.get_nk() * nb * nw);
+        int i = 0;
+        for (int ik = 0; ik < wfc_ks.get_nk(); ++ik)
+            for (int ib = ibstart; ib < nb; ++ib)
+                for (int iw = iwstart; iw < nw; ++iw)
+                    psi_slice[i++] = wfc_ks(ik, ib, iw);
+        return psi_slice;
+    }
+
+    template <typename TK, typename TR>
+    TLRI<TK> cal_Cs_mo(const UnitCell& ucell,
+        const TLRI<TR>& Cs_ao,
+        const psi::Psi<TK>& wfc_ks,
+        const int& nocc,
+        const int& nvirt,
+        const int& occ_first)
+    {
+        assert(wfc_ks.get_nk() == 1);   // currently only gamma-only is supported
+        assert(nocc + nvirt <= wfc_ks.get_nbands());
+        const int& naos = cs.get_nbasis();
+        assert(count_nao_from_Cs(Cs_ao) == naos); // wfc_ks must be global
+
+        TLRI<TK> Cs_mo;
+        int iw1 = 0, iw2 = 0;
+        for (auto& it1 : Cs_ao[i])
+        {
+            const int& iat1 = it1.first;
+            const int& it1 = ucell.iat2it[iat1];
+            const int& nw1 = ucell.atoms[it1].nw;
+            for (auto& it2 : it1.second)
+            {
+                const int& iat2 = it2.first.first;
+                const int& it2 = ucell.iat2it[iat2];
+                const int& nw2 = ucell.atoms[it2].nw;
+
+                const auto& tensor_ao = it2.second;
+                const int& nabf = tensor_ao.shape[0];
+                assert(tensor_ao.shape.size() == 3); // abf, nw1, nw2
+
+                std:: : vector<TK> psi_a1 = occ_first ? slice_psi(wfc_ks, 0, nocc, iw1, nw1)
+                    : slice_psi(wfc_ks, nocc, nvirt, iw1, nw1); // virt
+                std:: : vector<TK> psi_a2 = occ_first ? slice_psi(wfc_ks, nocc, nvirt, iw2, nw2)
+                    : slice_psi(wfc_ks, 0, nocc, iw2, nw2); // occ
+
+                Cs_mo[i][it.first][it2.first] = RI::Tensor<TK>({ nabf,nocc,nvirt });
+                for (int iabf = 0; iabf < nabf; ++iabf)
+                {
+                    const auto ptr = &tensor_ao(iabf, 0, 0);
+                    std::vector<TK> tmp(nw1 * (occ_first ? nvirt : nocc));
+                    // caution: Cs are row-major  (ia2 contiguous)
+                    if (occ_first)
+                    {
+                        gemm('T', 'N', nvirt, nw1, nw2, 1.0, psi_a2.data(), nw2, ptr, nw2, 0.0, tmp.data(), nvirt);
+                        gemm('N', 'N', nvirt, nocc, nw1, 1.0, tmp.data(), nvirt, psi_a1.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt);
+                    }
+                    else
+                    {
+                        gemm('T', 'N', nw1, nocc, nw2, 1.0, ptr, nw2, psi_a2.data(), nw2, 0.0, tmp.data(), nw1);
+                        gemm('T', 'N', nvirt, nocc, nw1, 1.0, psi_a1.data(), nw1, tmp.data(), nw1, 0.0, Cs_mo[i][it.first][it2.first](iabf, 0, 0), nvirt);
+                    }
+                }
+                iw1 += nw1;
+                iw2 += nw2;
+            }
+        }
+        return Cs_mo;
+    }
+
+    template <typename TK, typename TR>
+    std::vector<TK> cal_Amat_full(const TLRI<TK>& Cs_a,
+        const TLRI<TK>& Cs_b,
+        const TLRI<TR>& Vs)
+    {
+        assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt
+        assert(Cs_b.second.second.shape.size() == 3); // abf, nocc, nvirt
+        assert(Vs.second.second.shape.size() == 2); // abf, abf
+
+        const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2];
+        std::vector<TK> Amat_full(npairs * npairs, 0.0);
+
+        for (auto& itv1 : Vs)
+        {
+            const int& iat1 = itv1.first;
+            for (auto& itv2 : itv1.second)
+            {
+                const int& iat2 = itv2.first.first;
+                const auto& tensor_v = itv2.second; // (nabf2, nabf1), T
+                for (auto& itca2 : Cs_a.at(iat1))
+                {
+                    const int& iat3 = itca2.first.first;
+                    const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N
+                    for (auto& itcb2 : Cs_b.at(iat2))
+                    {
+                        const int& iat4 = itcb2.first.first;
+                        const auto& tensor_cb = itcb2.second; // (nvirt*nocc, nabf2), T
+                        const int& nabf1 = tensor_v.shape[0];
+                        const int& nabf2 = tensor_v.shape[1];
+                        assert(tensor_ca.shape[0] == nabf1);   //abf1
+                        assert(tensor_cb.shape[0] == nabf2); //abf2
+                        std::vector<TK> tmp(npairs * nabf1);
+                        gemm('T', 'T', nabf1, npairs, nabf2, 1.0, tensor_v.ptr(), nabf2, tensor_cb.ptr(), npairs, 0.0, tmp.data(), nabf1);
+                        gemm('N', 'N', npairs, npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), nabf1, 1.0, Amat_full.data(), npairs);
+                    }
+                }
+            }
+        }
+        return Amat_full;
+    }
+
+    template <typename TK, typename TR>
+    TLRIX<TK> cal_CsX(const UnitCell& ucell,
+        const TLRI<TR>& Cs_ao,
+        const psi::Psi<TK>& wfc_ks,
+        const psi::Psi<TK>& X_istate,
+        const int& nocc)
+    {
+        assert(X_istate.get_nk() == 1);   // currently only gamma-only is supported
+        const int& npairs = X_istate.get_nbasis();
+        assert(npairs % nocc == 0);
+        const int& nvirt = npairs / nocc;
+        const TLRI<TK>& Cs_mo = cal_Cs_mo(ucell, Cs_ao, wfc_ks, nocc, nvirt,/*jb: occ_first=*/true);
+
+        TLRIX<TK> CsX;
+        for (auto& it1 : Cs_mo)
+        {
+            const int& iat1 = it1.first;
+            for (auto& it2 : it1.second)
+            {
+                const int& iat2 = it2.first.first;
+                auto& tensor_c = it2.second;
+                const int& nabf = tensor_c.shape[0];
+
+                std::vector<TK> CX(nabf);
+                for (int iabf = 0;iabf < nabf;++iabf)
+                    CX[iabf] = dot(X.get_nbasis(), &tensor_c(iabf, 0, 0), 1, X_istate.get_pointer(), 1);
+                CsX[iat1][it2.first] = CX;
+            }
+        }
+        return CsX;
+    }
+
+    template <typename TK, typename TR>
+    std::vector<TK> cal_AX(const TLRI<TK>& Cs_a,
+        const TLRIX<TK>& Cs_bX,
+        const TLRI<TR>& Vs)
+    {
+        assert(Cs_a.second.second.shape.size() == 3); // abf, nocc, nvirt
+        assert(Vs.second.second.shape.size() == 2); // abf, abf
+
+        const int& npairs = Cs_a.second.second.shape[1] * Cs_a.second.second.shape[2];
+        std::vector<TK> AX(npairs, 0.0);
+
+        for (auto& itv1 : Vs)
+        {
+            const int& iat1 = itv1.first;
+            for (auto& itv2 : itv1.second)
+            {
+                const int& iat2 = itv2.first.first;
+                const auto& tensor_v = itv2.second; // (nabf2, nabf1), T
+                for (auto& itca2 : Cs_a.at(iat1))
+                {
+                    const int& iat3 = itca2.first.first;
+                    const auto& tensor_ca = itca2.second; // (nvirt*nocc, nabf1), N
+                    for (auto& itcb2 : Cs_bX.at(iat2))
+                    {
+                        const int& iat4 = itcb2.first.first;
+                        const auto& vector_cb = itcb2.second; // (nvirt*nocc, nabf2), T
+                        const int& nabf1 = tensor_v.shape[0];
+                        const int& nabf2 = tensor_v.shape[1];
+                        assert(tensor_ca.shape[0] == nabf1);   //abf1
+                        assert(vector_cb.size() == nabf2); //abf2
+                        std::vector<TK> tmp(nabf1);
+                        gemv('T', nabf1, nabf2, 1.0, tensor_v.ptr(), nabf2, vector_cb.data(), 1, 0.0, tmp.data(), 1);
+                        gemv('N', npairs, nabf1, 1.0, tensor_ca.ptr(), npairs, tmp.data(), 1, 1.0, AX.data(), 1);
+                    }
+                }
+            }
+        }
+        return Amat_full;
+    }
+
+    template <typename TK, typename TR>
+    void benchmark_driver_A<TK, TR>(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc,
+        const int& nocc, const int& nvirt)
+    {
+        const int nks = 1;
+        const int& npairs = nocc * nvirt;
+        const TLRI<TR>& Cs_ao = read_Cs_ao<TR>(file_Cs);
+        const TLRI<TR>& Vs = read_Vs_abf<TR>(file_Vs);
+        const int nbands = LR_Util::
+            const psi::Psi<TK>&wfc_ks = read_wfc_ks<TK>(file_kswfc);
+
+        const TLRI<TK>& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false);
+        const TLRI<TK>& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true);
+        const std::vector<TK>& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs);
+        std::vector<Real> eigenvalue(npairs);
+        LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data());
+        std::cout << "eigenvalues:" << std::endl;
+        for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << "  " << std::endl; }
+
+    }
+}
diff --git a/source/module_lr/utils/ri_benchmark.h b/source/module_lr/utils/ri_benchmark.h
@@ -0,0 +1,63 @@
+#pragma once
+#include "module_cell/unitcell.h"
+#include "module_psi/psi.h"
+
+#include <RI/global/Tensor.h>
+namespace RI_Benchmark
+{
+    using TC = std::array<int, 3>;
+    using TAC = std::pair<int, TC>;
+
+    template <typename T>
+    using TLRI = std::map<int, std::map<TAC, RI::Tensor<T>>>;
+    template<typename T>
+    using TLRIX = std::map<int, std::map<TAC, std::vector<T>>>;
+
+    template <typename TK, typename TR>
+    void benchmark_driver_A(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt);
+    template <typename TK, typename TR>
+    void benchmark_driver_AX(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc, const int nocc, const int nvirt);
+
+    // 1. full-matrix version
+
+    /// calculate $Cs_{Ua\alpha,Vi}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu a} c_{\nu i}$
+    /// if occ_first, calculate $Cs_{Ui\alpha,Va}^{mo} = \sum_{\mu\nu} C_{U\mu\alpha,V\nu}^{ao} c^*_{\mu i} c_{\nu a}$
+    template <typename TK, typename TR>
+    TLRI<TK> cal_Cs_mo(const UnitCell& ucell,
+        const TLRI<TR>& Cs_ao,
+        const psi::Psi<TK>& wfc_ks,
+        const int& nocc,
+        const int& nvirt,
+        const int& occ_first = false);
+
+    /// A=CVC, sum over atom quads
+    template <typename TK, typename TR>
+    std::vector<TK> cal_Amat_full(const TLRI<TK>& Cs_a,
+        const TLRI<TK>& Cs_b,
+        const TLRI<TR>& Vs);
+
+    // 2. AX version
+    template <typename TK, typename TR>
+    TLRIX<TK> cal_CsX(const UnitCell& ucell,
+        const TLRI<TR>& Cs_ao,
+        const psi::Psi<TK>& wfc_ks,
+        const psi::Psi<TK>& X,
+        const int& nocc);
+
+    /// AX=CVCX, sum over atom quads
+    template <typename TK, typename TR>
+    std::vector<TK> cal_AX(const TLRI<TK>& Cs_a,
+        const TLRIX<TK>& Cs_bX,
+        const TLRI<TR>& Vs);
+
+    // read/write
+    template <typename TR>
+    TLRI<TR> read_Cs_ao(const std::string& file_path);
+    template <typename TR>
+    TLRI<TR> read_Vs_abf(const std::string& file_path);
+    template <typename TR>
+    void write_Vs_abf(const TLRI<TR>& Vs, const std::string& file_path);
+    // template <typename TK>
+    // psi::Psi<TK> read_wfc_ks(const std::string& file_path);
+
+} // namespace LR_Util
diff --git a/source/module_lr/utils/ri_benchmark_driver.cpp b/source/module_lr/utils/ri_benchmark_driver.cpp
@@ -0,0 +1,27 @@
+#include "ri_benchmark.h"
+#include "lr_util.h"
+namespace RI_Benchmark
+{
+    template <typename TK, typename TR>
+    void benchmark_driver_A<TK, TR>(std::string& file_Cs, std::string& file_Vs, std::string& file_kswfc,
+        const int homo, const int nocc, const int nvirt)
+    {
+        assert(nocc > 0 && nocc < homo);
+        const int nks = 1;
+        const int& npairs = nocc * nvirt;
+        const TLRI<TR>& Cs_ao = read_Cs_ao<TR>(file_Cs);
+        const TLRI<TR>& Vs = read_Vs_abf<TR>(file_Vs);
+
+        // read range: [homo-nocc, homo+nvirt]
+        const psi::Psi<TK>& wfc_ks = read_wfc_ks<TK>(file_kswfc);
+
+        const TLRI<TK>& Cs_mo_ai = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/false);
+        const TLRI<TK>& Cs_mo_jb = cal_Cs_mo(Cs_ao, wfc_ks, nocc, nvirt, /*occ_first=*/true);
+        const std::vector<TK>& Amat_full = cal_Amat_full(Cs_mo_ai, Cs_mo_jb, Vs);
+        std::vector<Real> eigenvalue(npairs);
+        LR_Util::diag_lapack(nks * npairs, Amat_full.data(), eigenvalue.data());
+        std::cout << "eigenvalues:" << std::endl;
+        for (int i = 0; i < npairs; ++i) { std::cout << eigenvalue[i] << "  " << std::endl; }
+
+    }
+}