diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index cc7679e68f..ec0973f055 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -409,7 +409,7 @@
     - [lr\_solverl](#lr_solver)
     - [lr\_thr](#lr_thr)
     - [nvirt](#nvirt)
-    - [nstates](#nstates)
+    - [lr_nstates](#lr_nstates)
     - [abs\_wavelen\_range](#abs_wavelen_range)
     - [out\_wfc\_lr](#out_wfc_lr)
 [back to top](#full-list-of-input-keywords)
@@ -3767,10 +3767,10 @@ Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`.
 ### lr_solver
 
 - **Type**: String
-- **Description**: The method to solve the Casida equation in LR-TDDFT.
+- **Description**: The method to solve the Casida equation $AX=\Omega X$ in LR-TDDFT under Tamm-Dancoff approximation (TDA), where $A_{ai,bj}=(\epsilon_a-\epsilon_i)\delta_{ij}\delta_{ab}+(ai|f_{Hxc}|bj)+\alpha_{EX}(ab|ij)$ is the particle-hole excitation matrix and $X$ is the transition amplitude.
   - `dav`: Construct $AX$ and diagonalize the Hamiltonian matrix iteratively with Davidson algorithm.
   - `lapack`: Construct the full $A$ matrix and directly diagonalize with LAPACK.
-  - `spectrum`: Caluclate absorption spectrum only without solving Casida equation. The `OUT.${suffix}/` directory should contain the
+  - `spectrum`: Calculate absorption spectrum only without solving Casida equation. The `OUT.${suffix}/` directory should contain the
   files for LR-TDDFT eigenstates and eigenvalues, i.e. `Excitation_Energy.dat` and `Excitation_Amplitude_${processor_rank}.dat`
    output by setting `out_wfc_lr` to true.
 - **Default**: dav
@@ -3778,8 +3778,8 @@ Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`.
 ### lr_thr
 
 - **Type**: Real
-- **Description**: The convergence threshold of iterative diagonalization fo LR-TDDFT.
-- **Default**: 1e-6
+- **Description**: The convergence threshold of iterative diagonalization solver fo LR-TDDFT. It is a pure-math number with the same as [pw_diag_thr](#pw_diag_thr), but since the Casida equation is a one-shot eigenvalue problem, it is also the convergence threshold of LR-TDDFT.
+- **Default**: 1e-2
 
 ### nvirt
 
@@ -3787,7 +3787,7 @@ Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`.
 - **Description**: The number of virtual orbitals used in the LR-TDDFT calculation.
 - **Default**: 1
 
-### nstates
+### lr_nstates
 
 - **Type**: Integer
 - **Description**:  The number of 2-particle states to be solved
diff --git a/examples/lr-tddft/lcao_H2O/INPUT b/examples/lr-tddft/lcao_H2O/INPUT
index ba485b4088..06d7347b74 100644
--- a/examples/lr-tddft/lcao_H2O/INPUT
+++ b/examples/lr-tddft/lcao_H2O/INPUT
@@ -25,7 +25,7 @@ mixing_type             pulay
 mixing_beta             0.4
 mixing_gg0 0
 
-nstates 2
+lr_nstates 2
 xc_kernel lda
 lr_solver dav
 lr_thr 1e-6
diff --git a/examples/lr-tddft/lcao_Si2/INPUT b/examples/lr-tddft/lcao_Si2/INPUT
index fd7f27185f..cac5e5cca3 100644
--- a/examples/lr-tddft/lcao_Si2/INPUT
+++ b/examples/lr-tddft/lcao_Si2/INPUT
@@ -24,7 +24,7 @@ smearing_sigma                   0.02
 mixing_type             pulay
 mixing_beta             0.4
 
-nstates 10 # for test/debug, you can try a smaller one like 2
+lr_nstates 10 # for test/debug, you can try a smaller one like 2
 xc_kernel lda
 lr_solver dav
 lr_thr 1e-2
diff --git a/source/driver_run.cpp b/source/driver_run.cpp
index 97de81e840..73edede478 100644
--- a/source/driver_run.cpp
+++ b/source/driver_run.cpp
@@ -7,13 +7,6 @@
 #include "module_md/run_md.h"
 #include "module_io/para_json.h"
 
-#ifdef __LCAO
-#include "module_beyonddft/esolver_lrtd_lcao.h"
-#endif
-extern "C"
-{
-#include "module_base/blacs_connector.h"
-}
 /**
  * @brief This is the driver function which defines the workflow of ABACUS calculations.
  * It relies on the class Esolver, which is a class that organizes workflows of single point calculations.
@@ -31,9 +24,8 @@ void Driver::driver_run(void)
     ModuleBase::TITLE("Driver", "driver_line");
     ModuleBase::timer::tick("Driver", "driver_line");
 
-    //! 1: initialize the ESolver 
-    ModuleESolver::ESolver *p_esolver = nullptr;
-    ModuleESolver::init_esolver(p_esolver);
+    //! 1: initialize the ESolver
+    ModuleESolver::ESolver* p_esolver = nullptr;
 
     //! 2: setup cell and atom information
 
@@ -49,18 +41,9 @@ void Driver::driver_run(void)
     // delete ucell as a GlobalC in near future
     GlobalC::ucell.setup_cell(GlobalV::stru_file, GlobalV::ofs_running);
 
-    //! 3: initialize Esolver and fill json-structure 
-#ifdef __LCAO
-    if (GlobalV::ESOLVER_TYPE == "lr")
-        // use constructor rather than Init function to initialize reference (instead of pointers) to ucell
-        if (GlobalV::GAMMA_ONLY_LOCAL)
-            p_esolver = new ModuleESolver::ESolver_LRTD<double, double>(INPUT, GlobalC::ucell);
-        else if (GlobalV::NSPIN < 4)
-            p_esolver = new ModuleESolver::ESolver_LRTD<std::complex<double>, double>(INPUT, GlobalC::ucell);
-        else
-            throw std::runtime_error("LR-TDDFT is not implemented for spin polarized case");
-    else
-#endif
+    ModuleESolver::init_esolver(p_esolver, INPUT, GlobalC::ucell);
+
+    //! 3: initialize Esolver and fill json-structure
     p_esolver->before_all_runners(INPUT, GlobalC::ucell);
 
     // this Json part should be moved to before_all_runners, mohan 2024-05-12
@@ -99,33 +82,8 @@ void Driver::driver_run(void)
     //! 5: clean up esolver
     p_esolver->after_all_runners();
 
-#ifdef __LCAO
-    //---------beyond DFT: set up the next ESolver---------
-    if (INPUT.esolver_type == "ks-lr")
-    {
-        std::cout << "setting up the esolver for excited state" << std::endl;
-        // ModuleESolver::ESolver_KS_LCAO* p_esolver_lcao_tmp = dynamic_cast<ModuleESolver::ESolver_KS_LCAO<double, double>*>(p_esolver);
-        ModuleESolver::ESolver* p_esolver_lr = nullptr;
-        if (INPUT.gamma_only)
-            p_esolver_lr = new ModuleESolver::ESolver_LRTD<double, double>(std::move(*dynamic_cast<ModuleESolver::ESolver_KS_LCAO<double, double>*>(p_esolver)), INPUT, GlobalC::ucell);
-        else
-            p_esolver_lr = new ModuleESolver::ESolver_LRTD<std::complex<double>, double>(std::move(*dynamic_cast<ModuleESolver::ESolver_KS_LCAO<std::complex<double>, double>*>(p_esolver)), INPUT, GlobalC::ucell);
-        ModuleESolver::clean_esolver(p_esolver);
-        p_esolver_lr->runner(0, GlobalC::ucell);
-        p_esolver_lr->after_all_runners();
-
-        std::cout << "before clean lr" << std::endl;
-        ModuleESolver::clean_esolver(p_esolver_lr);
-        std::cout << "after clean lr" << std::endl;
-    } //----------------------beyond DFT------------------------
-    else
-        ModuleESolver::clean_esolver(p_esolver);
-
-    if (INPUT.basis_type == "lcao")
-        Cblacs_exit(1); // clean up blacs after all the esolvers are cleaned up without closing MPI
-#else
     ModuleESolver::clean_esolver(p_esolver);
-#endif
+
     ModuleBase::timer::tick("Driver", "driver_line");
     return;
 }
diff --git a/source/module_beyonddft/dm_trans/dm_trans_parallel.cpp b/source/module_beyonddft/dm_trans/dm_trans_parallel.cpp
index 5709f79a2e..cc3a88f26e 100644
--- a/source/module_beyonddft/dm_trans/dm_trans_parallel.cpp
+++ b/source/module_beyonddft/dm_trans/dm_trans_parallel.cpp
@@ -9,137 +9,205 @@ namespace hamilt
     //output: col first, consistent with blas
     // c: nao*nbands in para2d, nbands*nao in psi  (row-para and constructed: nao)
     // X: nvirt*nocc in para2d, nocc*nvirt in psi (row-para and constructed: nvirt)
-    template<>std::vector<container::Tensor> cal_dm_trans_pblas(
-        const psi::Psi<double>& X_istate,
-        const Parallel_2D& px,
-        const psi::Psi<double>& c,
-        const Parallel_2D& pc,
-        int naos,
-        int nocc,
-        int nvirt,
-        Parallel_2D& pmat,
-        const bool renorm_k,
-        const int nspin)
-    {
-        ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
-        assert(px.comm_2D == pc.comm_2D);
-        assert(px.blacs_ctxt == pc.blacs_ctxt);
+template <>
+std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<double>& X_istate,
+                                                  const Parallel_2D& px,
+                                                  const psi::Psi<double>& c,
+                                                  const Parallel_2D& pc,
+                                                  const int naos,
+                                                  const int nocc,
+                                                  const int nvirt,
+                                                  Parallel_2D& pmat,
+                                                  const bool renorm_k,
+                                                  const int nspin)
+{
+    ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
+    assert(px.comm_2D == pc.comm_2D);
+    assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-        if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
-            LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
-        else assert(pmat.get_local_size() > 0);
+    if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
+        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
+    else
+        assert(pmat.get_local_size() > 0);
 
-        int nks = c.get_nk();
-        assert(nks == X_istate.get_nk());
+    int nks = c.get_nk();
+    assert(nks == X_istate.get_nk());
 
-        std::vector<container::Tensor> dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { pmat.get_col_size(), pmat.get_row_size() }));
-        for (int isk = 0;isk < nks;++isk)
-        {
-            c.fix_k(isk);
-            X_istate.fix_k(isk);
-            int i1 = 1;
-            int ivirt = nocc + 1;
-            char transa = 'N';
-            char transb = 'T';
-            const double alpha = 1.0;
-            const double beta = 0;
+    std::vector<container::Tensor> dm_trans(
+        nks,
+        container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, {pmat.get_col_size(), pmat.get_row_size()}));
+    for (int isk = 0; isk < nks; ++isk)
+    {
+        c.fix_k(isk);
+        X_istate.fix_k(isk);
+        int i1 = 1;
+        int ivirt = nocc + 1;
+        char transa = 'N';
+        char transb = 'T';
+        const double alpha = 1.0;
+        const double beta = 0;
 
-            // 1. [X*C_occ^T]^T=C_occ*X^T
-            Parallel_2D pXc;    //nvirt*naos
-            LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt);
-            container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, { pXc.get_col_size(), pXc.get_row_size() });//row is "inside"(memory contiguity) for pblas
-            Xc.zero();
-            pdgemm_(&transa, &transb, &naos, &nvirt, &nocc,
-                &alpha, c.get_pointer(), &i1, &i1, pc.desc,
-                X_istate.get_pointer(), &i1, &i1, px.desc,
-                &beta, Xc.data<double>(), &i1, &i1, pXc.desc);
+        // 1. [X*C_occ^T]^T=C_occ*X^T
+        Parallel_2D pXc; // nvirt*naos
+        LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt);
+        container::Tensor Xc(DAT::DT_DOUBLE,
+                             DEV::CpuDevice,
+                             {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas
+        Xc.zero();
+        pdgemm_(&transa,
+                &transb,
+                &naos,
+                &nvirt,
+                &nocc,
+                &alpha,
+                c.get_pointer(),
+                &i1,
+                &i1,
+                pc.desc,
+                X_istate.get_pointer(),
+                &i1,
+                &i1,
+                px.desc,
+                &beta,
+                Xc.data<double>(),
+                &i1,
+                &i1,
+                pXc.desc);
 
-            // 2. C_virt*[X*C_occ^T]
-            pdgemm_(&transa, &transb, &naos, &naos, &nvirt,
-                &alpha, c.get_pointer(), &i1, &ivirt, pc.desc,
-                Xc.data<double>(), &i1, &i1, pXc.desc,
-                &beta, dm_trans[isk].data<double>(), &i1, &i1, pmat.desc);
-        }
-        return dm_trans;
+        // 2. C_virt*[X*C_occ^T]
+        pdgemm_(&transa,
+                &transb,
+                &naos,
+                &naos,
+                &nvirt,
+                &alpha,
+                c.get_pointer(),
+                &i1,
+                &ivirt,
+                pc.desc,
+                Xc.data<double>(),
+                &i1,
+                &i1,
+                pXc.desc,
+                &beta,
+                dm_trans[isk].data<double>(),
+                &i1,
+                &i1,
+                pmat.desc);
     }
-    template<> std::vector<container::Tensor> cal_dm_trans_pblas(
-        const psi::Psi<std::complex<double>>& X_istate,
-        const Parallel_2D& px,
-        const psi::Psi<std::complex<double>>& c,
-        const Parallel_2D& pc,
-        int naos,
-        int nocc,
-        int nvirt,
-        Parallel_2D& pmat,
-        const bool renorm_k,
-        const int nspin)
-    {
-        ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
-        assert(px.comm_2D == pc.comm_2D);
-        assert(px.blacs_ctxt == pc.blacs_ctxt);
+    return dm_trans;
+}
+template <>
+std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<std::complex<double>>& X_istate,
+                                                  const Parallel_2D& px,
+                                                  const psi::Psi<std::complex<double>>& c,
+                                                  const Parallel_2D& pc,
+                                                  const int naos,
+                                                  const int nocc,
+                                                  const int nvirt,
+                                                  Parallel_2D& pmat,
+                                                  const bool renorm_k,
+                                                  const int nspin)
+{
+    ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
+    assert(px.comm_2D == pc.comm_2D);
+    assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-        if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
-            LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
-        else assert(pmat.get_local_size() > 0);
+    if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
+        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
+    else
+        assert(pmat.get_local_size() > 0);
 
-        int nks = c.get_nk();
-        assert(nks == X_istate.get_nk());
+    int nks = c.get_nk();
+    assert(nks == X_istate.get_nk());
 
-        std::vector<container::Tensor> dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pmat.get_col_size(), pmat.get_row_size() }));
-        for (int isk = 0;isk < nks;++isk)
-        {
-            c.fix_k(isk);
-            X_istate.fix_k(isk);
-            int i1 = 1;
-            int ivirt = nocc + 1;
+    std::vector<container::Tensor> dm_trans(
+        nks,
+        container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, {pmat.get_col_size(), pmat.get_row_size()}));
+    for (int isk = 0; isk < nks; ++isk)
+    {
+        c.fix_k(isk);
+        X_istate.fix_k(isk);
+        int i1 = 1;
+        int ivirt = nocc + 1;
 
-            // ============== C_virt * X * C_occ^\dagger=============
-            // char transa = 'N';
-            // char transb = 'C';
-            // // 1. [X*C_occ^\dagger]^\dagger=C_occ*X^\dagger
-            // Parallel_2D pXc;
-            // LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt);
-            // container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pXc.get_col_size(), pXc.get_row_size() });//row is "inside"(memory contiguity) for pblas
-            // Xc.zero();
-            // const std::complex<double> alpha(1.0, 0.0);
-            // const std::complex<double> beta(0.0, 0.0);
-            // pzgemm_(&transa, &transb, &naos, &nvirt, &nocc,
-            //     &alpha, c.get_pointer(), &i1, &i1, pc.desc,
-            //     X_istate.get_pointer(), &i1, &i1, px.desc,
-            //     &beta, Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc);
+        // ============== C_virt * X * C_occ^\dagger=============
+        // char transa = 'N';
+        // char transb = 'C';
+        // // 1. [X*C_occ^\dagger]^\dagger=C_occ*X^\dagger
+        // Parallel_2D pXc;
+        // LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt);
+        // container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pXc.get_col_size(), pXc.get_row_size()
+        // });//row is "inside"(memory contiguity) for pblas Xc.zero(); const std::complex<double> alpha(1.0, 0.0);
+        // const std::complex<double> beta(0.0, 0.0);
+        // pzgemm_(&transa, &transb, &naos, &nvirt, &nocc,
+        //     &alpha, c.get_pointer(), &i1, &i1, pc.desc,
+        //     X_istate.get_pointer(), &i1, &i1, px.desc,
+        //     &beta, Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc);
 
-            // // 2. C_virt*[X*C_occ^\dagger]
-            // pzgemm_(&transa, &transb, &naos, &naos, &nvirt,
-            //     &alpha, c.get_pointer(), &i1, &ivirt, pc.desc,
-            //     Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc,
-            //     &beta, dm_trans[isk].data<std::complex<double>>(), &i1, &i1, pmat.desc);
+        // // 2. C_virt*[X*C_occ^\dagger]
+        // pzgemm_(&transa, &transb, &naos, &naos, &nvirt,
+        //     &alpha, c.get_pointer(), &i1, &ivirt, pc.desc,
+        //     Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc,
+        //     &beta, dm_trans[isk].data<std::complex<double>>(), &i1, &i1, pmat.desc);
 
-            // ============== [C_virt * X * C_occ^\dagger]^T=============
-            // ============== = [C_occ^* * X^T * C_virt^T]^T=============
-            // 1. X*C_occ^\dagger
-            char transa = 'N';
-            char transb = 'C';
-            Parallel_2D pXc;
-            LR_Util::setup_2d_division(pXc, px.get_block_size(), nvirt, naos, px.comm_2D, px.blacs_ctxt);
-            container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pXc.get_col_size(), pXc.get_row_size() });//row is "inside"(memory contiguity) for pblas
-            Xc.zero();
-            std::complex<double> alpha(1.0, 0.0);
-            const std::complex<double> beta(0.0, 0.0);
-            pzgemm_(&transa, &transb, &nvirt, &naos, &nocc,
-                &alpha, X_istate.get_pointer(), &i1, &i1, px.desc,
-                c.get_pointer(), &i1, &i1, pc.desc,
-                &beta, Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc);
+        // ============== [C_virt * X * C_occ^\dagger]^T=============
+        // ============== = [C_occ^* * X^T * C_virt^T]^T=============
+        // 1. X*C_occ^\dagger
+        char transa = 'N';
+        char transb = 'C';
+        Parallel_2D pXc;
+        LR_Util::setup_2d_division(pXc, px.get_block_size(), nvirt, naos, px.comm_2D, px.blacs_ctxt);
+        container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE,
+                             DEV::CpuDevice,
+                             {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas
+        Xc.zero();
+        std::complex<double> alpha(1.0, 0.0);
+        const std::complex<double> beta(0.0, 0.0);
+        pzgemm_(&transa,
+                &transb,
+                &nvirt,
+                &naos,
+                &nocc,
+                &alpha,
+                X_istate.get_pointer(),
+                &i1,
+                &i1,
+                px.desc,
+                c.get_pointer(),
+                &i1,
+                &i1,
+                pc.desc,
+                &beta,
+                Xc.data<std::complex<double>>(),
+                &i1,
+                &i1,
+                pXc.desc);
 
-            // 2. [X*C_occ^\dagger]^TC_virt^T
-            alpha.real(renorm_k ? 1.0 / static_cast<double>(nks) : 1.0);
-            transa = transb = 'T';
-            pzgemm_(&transa, &transb, &naos, &naos, &nvirt,
-                &alpha, Xc.data<std::complex<double>>(), &i1, &i1, pXc.desc,
-                c.get_pointer(), &i1, &ivirt, pc.desc,
-                &beta, dm_trans[isk].data<std::complex<double>>(), &i1, &i1, pmat.desc);
-        }
-        return dm_trans;
+        // 2. [X*C_occ^\dagger]^TC_virt^T
+        alpha.real(renorm_k ? 1.0 / static_cast<double>(nks) : 1.0);
+        transa = transb = 'T';
+        pzgemm_(&transa,
+                &transb,
+                &naos,
+                &naos,
+                &nvirt,
+                &alpha,
+                Xc.data<std::complex<double>>(),
+                &i1,
+                &i1,
+                pXc.desc,
+                c.get_pointer(),
+                &i1,
+                &ivirt,
+                pc.desc,
+                &beta,
+                dm_trans[isk].data<std::complex<double>>(),
+                &i1,
+                &i1,
+                pmat.desc);
     }
-
+    return dm_trans;
+}
 }
 #endif
diff --git a/source/module_beyonddft/esolver_lrtd_lcao.cpp b/source/module_beyonddft/esolver_lrtd_lcao.cpp
index 54c90ff15a..619eab6e3d 100644
--- a/source/module_beyonddft/esolver_lrtd_lcao.cpp
+++ b/source/module_beyonddft/esolver_lrtd_lcao.cpp
@@ -74,9 +74,11 @@ void ModuleESolver::ESolver_LRTD<T, TR>::parameter_check()
     if (this->nspin != 1 && this->nspin != 2)
         throw std::invalid_argument("LR-TDDFT only supports nspin = 1 or 2 now");
 }
-template<typename T, typename TR>
+template <typename T, typename TR>
 ModuleESolver::ESolver_LRTD<T, TR>::ESolver_LRTD(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol,
-    Input& inp, UnitCell& ucell) : input(inp), ucell(ucell)
+                                                 Input& inp,
+                                                 UnitCell& ucell)
+    : input(inp), ucell(ucell)
 {
     redirect_log(inp.out_alllog);
     ModuleBase::TITLE("ESolver_LRTD", "ESolver_LRTD");
@@ -103,7 +105,7 @@ ModuleESolver::ESolver_LRTD<T, TR>::ESolver_LRTD(ModuleESolver::ESolver_KS_LCAO<
 
     //allocate 2-particle state and setup 2d division
     this->nbasis = GlobalV::NLOCAL;
-    this->nstates = inp.nstates;
+    this->nstates = inp.lr_nstates;
     this->init_X(inp.nvirt);
 
     //grid integration
@@ -147,8 +149,7 @@ ModuleESolver::ESolver_LRTD<T, TR>::ESolver_LRTD(ModuleESolver::ESolver_KS_LCAO<
     this->pelec = new elecstate::ElecStateLCAO<T>();
 }
 
-
-template<typename T, typename TR>
+template <typename T, typename TR>
 ModuleESolver::ESolver_LRTD<T, TR>::ESolver_LRTD(Input& inp, UnitCell& ucell) : input(inp), ucell(ucell)
 {
     redirect_log(inp.out_alllog);
@@ -205,7 +206,7 @@ ModuleESolver::ESolver_LRTD<T, TR>::ESolver_LRTD(Input& inp, UnitCell& ucell) :
     this->read_ks_wfc();
 
     //allocate 2-particle state and setup 2d division
-    this->nstates = inp.nstates;
+    this->nstates = inp.lr_nstates;
     this->init_X(inp.nvirt);
     this->pelec = new elecstate::ElecState();
 
diff --git a/source/module_beyonddft/esolver_lrtd_lcao.h b/source/module_beyonddft/esolver_lrtd_lcao.h
index d623607aad..7908e561d3 100644
--- a/source/module_beyonddft/esolver_lrtd_lcao.h
+++ b/source/module_beyonddft/esolver_lrtd_lcao.h
@@ -79,11 +79,11 @@ namespace ModuleESolver
         /// @brief Excited state info. size: nstates * nks * (nocc(local) * nvirt (local))
         psi::Psi<T>* X;
 
-        int nocc;
-        int nvirt;
-        int nbasis;
+        int nocc = 1;
+        int nvirt = 1;
+        int nbasis = 2;
         /// n_occ*n_unocc, the basis size of electron-hole pair representation
-        int npairs;
+        int npairs = 1;
         /// how many 2-particle states to be solved
         int nstates = 1;
         int nspin = 1;
diff --git a/source/module_beyonddft/move_gint.hpp b/source/module_beyonddft/move_gint.hpp
index 0cd1c4d89c..cf37175c2b 100644
--- a/source/module_beyonddft/move_gint.hpp
+++ b/source/module_beyonddft/move_gint.hpp
@@ -79,193 +79,4 @@ Gint_k& Gint_k::operator=(Gint_k&& rhs)
     if (this == &rhs)return *this;
     this->Gint::operator=(std::move(rhs));
     return *this;
-}
-
-Grid_MeshK& Grid_MeshK::operator=(Grid_MeshK&& rhs)
-{
-    if (this == &rhs)return *this;
-    
-    this->maxu1 = rhs.maxu1;
-    this->maxu2 = rhs.maxu2;
-    this->maxu3 = rhs.maxu3;
-    this->minu1 = rhs.minu1;
-    this->minu2 = rhs.minu2;
-    this->minu3 = rhs.minu3;
-    this->nu1 = rhs.nu1;
-    this->nu2 = rhs.nu2;
-    this->nu3 = rhs.nu3;
-    this->nutot = rhs.nutot;
-
-    this->ucell_index2x = std::move(rhs.ucell_index2x);
-    this->ucell_index2y = std::move(rhs.ucell_index2y);
-    this->ucell_index2z = std::move(rhs.ucell_index2z);
-
-    this->max_ucell_para = std::move(rhs.max_ucell_para);
-    this->min_ucell_para = std::move(rhs.min_ucell_para);
-    this->num_ucell_para = std::move(rhs.num_ucell_para);
-    return *this;
-}
-
-Grid_MeshCell& Grid_MeshCell::operator=(Grid_MeshCell&& rhs)
-{
-    if (this == &rhs)return *this;
-    this->Grid_MeshK::operator=(std::move(rhs));
-    
-    this->ncx = rhs.ncx;
-    this->ncy = rhs.ncy;
-    this->ncz = rhs.ncz;
-    this->ncxyz = rhs.ncxyz;
-    this->bx = rhs.bx;
-    this->by = rhs.by;
-    this->bz = rhs.bz;
-    this->bxyz = rhs.bxyz;
-    this->nbx = rhs.nbx;
-    this->nby = rhs.nby;
-    this->nbz = rhs.nbz;
-    this->nbxyz = rhs.nbxyz;
-    this->nbxx = rhs.nbxx;
-    this->nbzp_start = rhs.nbzp_start;
-    this->nbzp = rhs.nbzp;
-
-    this->meshcell_vec1 = std::move(rhs.meshcell_vec1);
-    this->meshcell_vec2 = std::move(rhs.meshcell_vec2);
-    this->meshcell_vec3 = std::move(rhs.meshcell_vec3);
-
-    this->meshcell_latvec0 = std::move(rhs.meshcell_latvec0);
-    this->meshcell_GT = std::move(rhs.meshcell_GT);
-    this->meshcell_pos = std::move(rhs.meshcell_pos);
-    return *this;
-}
-
-Grid_BigCell& Grid_BigCell::operator=(Grid_BigCell&& rhs)
-{
-    if (this == &rhs)return *this;
-    this->Grid_MeshCell::operator=(std::move(rhs));
-    this->orbital_rmax = rhs.orbital_rmax;
-    this->nat = rhs.nat;
-    this->dxe = rhs.dxe;
-    this->dye = rhs.dye;
-    this->dze = rhs.dze;
-    this->nxe = rhs.nxe;
-    this->nye = rhs.nye;
-    this->nze = rhs.nze;
-    this->nxyze = rhs.nxyze;
-
-    this->bigcell_vec1 = std::move(rhs.bigcell_vec1);
-    this->bigcell_vec2 = std::move(rhs.bigcell_vec2);
-    this->bigcell_vec3 = std::move(rhs.bigcell_vec3);
-    this->bigcell_latvec0 = std::move(rhs.bigcell_latvec0);
-    this->bigcell_GT = std::move(rhs.bigcell_GT);
-
-    this->tau_in_bigcell = std::move(rhs.tau_in_bigcell);
-    this->index_atom = std::move(rhs.index_atom);
-    return *this;
-}
-
-Grid_MeshBall& Grid_MeshBall::operator=(Grid_MeshBall&& rhs)
-{
-    if (this == &rhs)return *this;
-    this->Grid_BigCell::operator=(std::move(rhs));
-
-    this->meshball_radius = rhs.meshball_radius;
-    this->meshball_ncells = rhs.meshball_ncells;
-
-    this->index_ball = std::move(rhs.index_ball);
-    this->meshball_positions = std::move(rhs.meshball_positions);
-    return *this;
-}
-
-Grid_Technique& Grid_Technique::operator=(Grid_Technique&& rhs)
-{
-    if (this == &rhs)return *this;
-    this->Grid_MeshBall::operator=(std::move(rhs));
-    // so many members to move
-    // following the order rather than by type
-    this->how_many_atoms = std::move(rhs.how_many_atoms);
-
-    this->max_atom = rhs.max_atom;
-    this->total_atoms_on_grid = rhs.total_atoms_on_grid;
-
-    this->start_ind = std::move(rhs.start_ind);
-    this->bcell_start = std::move(rhs.bcell_start);
-    this->which_atom = std::move(rhs.which_atom);
-    this->which_bigcell = std::move(rhs.which_bigcell);
-    this->which_unitcell = std::move(rhs.which_unitcell);
-
-    this->lnat = rhs.lnat;
-    this->lgd = rhs.lgd;
-
-    this->in_this_processor = std::move(rhs.in_this_processor);
-    this->trace_iat = std::move(rhs.trace_iat);
-    this->trace_lo = std::move(rhs.trace_lo);
-
-    this->nnrg = rhs.nnrg;
-    this->allocate_find_R2 = rhs.allocate_find_R2;
-
-    this->nlocdimg = std::move(rhs.nlocdimg);
-    this->nlocstartg = std::move(rhs.nlocstartg);
-    this->nad = std::move(rhs.nad);
-
-    this->find_R2 = std::move(rhs.find_R2);
-    this->find_R2_sorted_index = std::move(rhs.find_R2_sorted_index);
-    this->find_R2st = std::move(rhs.find_R2st);
-
-    this->ucell = rhs.ucell;
-    this->orb = rhs.orb;
-
-    this->nwmax = rhs.nwmax;
-    this->nr_max = rhs.nr_max;
-    this->ntype = rhs.ntype;
-    this->dr_uniform = rhs.dr_uniform;
-
-    this->rcuts = std::move(rhs.rcuts);
-    this->psi_u = std::move(rhs.psi_u);
-    this->dpsi_u = std::move(rhs.dpsi_u);
-    this->d2psi_u = std::move(rhs.d2psi_u);
-
-    this->nnrg_index = std::move(rhs.nnrg_index);
-
-    this->maxB1 = rhs.maxB1;
-    this->maxB2 = rhs.maxB2;
-    this->maxB3 = rhs.maxB3;
-    this->minB1 = rhs.minB1;
-    this->minB2 = rhs.minB2;
-    this->minB3 = rhs.minB3;
-    this->nB1 = rhs.nB1;
-    this->nB2 = rhs.nB2;
-    this->nB3 = rhs.nB3;
-    this->nbox = rhs.nbox;
-    return *this;
-}
-
-#include "module_cell/klist.h"
-K_Vectors& K_Vectors::operator=(K_Vectors&& rhs)
-{
-    if (this == &rhs)return *this;
-    this->kvec_c = std::move(rhs.kvec_c);
-    this->kvec_d = std::move(rhs.kvec_d);
-    this->kvec_d_ibz = std::move(rhs.kvec_d_ibz);
-    this->wk = std::move(rhs.wk);
-    this->wk_ibz = std::move(rhs.wk_ibz);
-    this->ngk = std::move(rhs.ngk);
-    this->isk = std::move(rhs.isk);
-    this->ibz2bz = std::move(rhs.ibz2bz);
-    this->nks = rhs.nks;
-    this->nkstot = rhs.nkstot;
-    this->nkstot_ibz = rhs.nkstot_ibz;
-    this->nkstot_full = rhs.nkstot_full;
-
-    this->nspin = rhs.nspin;
-    this->kc_done = rhs.kc_done;
-    this->kd_done = rhs.kd_done;
-    this->k_kword = rhs.k_kword;
-    this->k_nkstot = rhs.k_nkstot;
-    this->is_mp = rhs.is_mp;
-
-    for (int i = 0;i < 3;++i)
-    {
-        this->nmp[i] = rhs.nmp[i];
-        this->koffset[i] = rhs.koffset[i];
-    }
-    return *this;
 }
\ No newline at end of file
diff --git a/source/module_beyonddft/operator_casida/operator_lr_hxc.h b/source/module_beyonddft/operator_casida/operator_lr_hxc.h
index 93e1ae9604..d513ad609d 100644
--- a/source/module_beyonddft/operator_casida/operator_lr_hxc.h
+++ b/source/module_beyonddft/operator_casida/operator_lr_hxc.h
@@ -90,7 +90,6 @@ namespace hamilt
         template<typename TR>
         void init_DM_trans(const int& nbands, std::vector<elecstate::DensityMatrix<T, TR>*>& DM_trans)const
         {
-            if (DM_trans[0] == nullptr) DM_trans[0]->init_DMR(*this->hR);
             // LR_Util::print_DMR(*this->DM_trans[0], ucell.nat, "DMR[ib=" + std::to_string(0) + "]");
             if (this->next_op != nullptr)
             {
diff --git a/source/module_beyonddft/utils/lr_util.hpp b/source/module_beyonddft/utils/lr_util.hpp
index 1c9e54fc22..a6a1295c9b 100644
--- a/source/module_beyonddft/utils/lr_util.hpp
+++ b/source/module_beyonddft/utils/lr_util.hpp
@@ -86,20 +86,37 @@ namespace LR_Util
         }
     };
 
+    inline double get_conj(const double& x)
+    {
+        return x;
+    }
+    inline std::complex<double> get_conj(const std::complex<double>& x)
+    {
+        return std::conj(x);
+    }
     template <typename T>
     void matsym(const T* in, const int n, T* out)
     {
-        for (int i = 0;i < n;++i)  out[i * n + i] = in[i * n + i];
+        for (int i = 0; i < n; ++i)
+            out[i * n + i] = 0.5 * in[i * n + i] + 0.5 * get_conj(in[i * n + i]);
         for (int i = 0;i < n;++i)
             for (int j = i + 1;j < n;++j)
-                out[i * n + j] = out[j * n + i] = 0.5 * (in[i * n + j] + in[j * n + i]);
+            {
+                out[i * n + j] = 0.5 * (in[i * n + j] + get_conj(in[j * n + i]));
+                out[j * n + i] = get_conj(out[i * n + j]);
+            }
     }
     template <typename T>
     void matsym(T* inout, const int n)
     {
+        for (int i = 0; i < n; ++i)
+            inout[i * n + i] = 0.5 * (inout[i * n + i] + get_conj(inout[i * n + i]));
         for (int i = 0;i < n;++i)
             for (int j = i + 1;j < n;++j)
-                inout[i * n + j] = inout[j * n + i] = 0.5 * (inout[i * n + j] + inout[j * n + i]);
+            {
+                inout[i * n + j] = 0.5 * (inout[i * n + j] + get_conj(inout[j * n + i]));
+                inout[j * n + i] = get_conj(inout[i * n + j]);
+            }
     }
 
     /// psi(nk=1, nbands=nb, nk * nbasis) -> psi(nb, nk, nbasis) without memory copy
diff --git a/source/module_cell/klist.h b/source/module_cell/klist.h
index d8e46f97a0..7381cd5a05 100644
--- a/source/module_cell/klist.h
+++ b/source/module_cell/klist.h
@@ -27,7 +27,7 @@ class K_Vectors
     K_Vectors();
     ~K_Vectors();
     K_Vectors& operator=(const K_Vectors&) = default;
-    K_Vectors& operator=(K_Vectors&& rhs);
+    K_Vectors& operator=(K_Vectors&& rhs) = default;
 
     /**
      * @brief Set up the k-points for the system.
diff --git a/source/module_esolver/esolver.cpp b/source/module_esolver/esolver.cpp
index b2c07707d3..51ff321c9b 100644
--- a/source/module_esolver/esolver.cpp
+++ b/source/module_esolver/esolver.cpp
@@ -5,6 +5,11 @@
 #ifdef __LCAO
 #include "esolver_ks_lcao.h"
 #include "esolver_ks_lcao_tddft.h"
+#include "module_beyonddft/esolver_lrtd_lcao.h"
+extern "C"
+{
+#include "module_base/blacs_connector.h"
+}
 #endif
 #include "esolver_dp.h"
 #include "esolver_lj.h"
@@ -114,7 +119,7 @@ std::string determine_type(void)
 
 
 //Some API to operate E_Solver
-void init_esolver(ESolver*& p_esolver)
+void init_esolver(ESolver*& p_esolver, Input& input, UnitCell& ucell)
 {
 	//determine type of esolver based on INPUT information
 	std::string esolver_type = determine_type();
@@ -146,25 +151,75 @@ void init_esolver(ESolver*& p_esolver)
         }
     }
 #ifdef __LCAO
-    else if (esolver_type == "ksdft_lcao" || "ksdft_lr_lcao")
-	{
-		if (GlobalV::GAMMA_ONLY_LOCAL)
-		{
-			p_esolver = new ESolver_KS_LCAO<double, double>();
-		}
-		else if (GlobalV::NSPIN < 4)
-		{
-			p_esolver = new ESolver_KS_LCAO<std::complex<double>, double>();
-		}
-		else
-		{
-			p_esolver = new ESolver_KS_LCAO<std::complex<double>, std::complex<double>>();
-		}
-	}
-	else if (esolver_type == "ksdft_lcao_tddft")
-	{
-		p_esolver = new ESolver_KS_LCAO_TDDFT();
-	}
+    else if (esolver_type == "ksdft_lcao")
+    {
+        if (GlobalV::GAMMA_ONLY_LOCAL)
+        {
+            p_esolver = new ESolver_KS_LCAO<double, double>();
+        }
+        else if (GlobalV::NSPIN < 4)
+        {
+            p_esolver = new ESolver_KS_LCAO<std::complex<double>, double>();
+        }
+        else
+        {
+            p_esolver = new ESolver_KS_LCAO<std::complex<double>, std::complex<double>>();
+        }
+    }
+    else if (esolver_type == "ksdft_lcao_tddft")
+    {
+        p_esolver = new ESolver_KS_LCAO_TDDFT();
+    }
+    else if (esolver_type == "lr_lcao")
+    {
+        // use constructor rather than Init function to initialize reference (instead of pointers) to ucell
+        if (GlobalV::GAMMA_ONLY_LOCAL)
+            p_esolver = new ModuleESolver::ESolver_LRTD<double, double>(input, ucell);
+        else if (GlobalV::NSPIN < 4)
+            p_esolver = new ModuleESolver::ESolver_LRTD<std::complex<double>, double>(input, ucell);
+        else
+            throw std::runtime_error("LR-TDDFT is not implemented for spin polarized case");
+    }
+    else if (esolver_type == "ksdft_lr_lcao")
+    {
+        // initialize the 1st ESolver_KS
+        if (GlobalV::GAMMA_ONLY_LOCAL)
+        {
+            p_esolver = new ESolver_KS_LCAO<double, double>();
+        }
+        else if (GlobalV::NSPIN < 4)
+        {
+            p_esolver = new ESolver_KS_LCAO<std::complex<double>, double>();
+        }
+        else
+        {
+            p_esolver = new ESolver_KS_LCAO<std::complex<double>, std::complex<double>>();
+        }
+        p_esolver->before_all_runners(input, ucell);
+        p_esolver->runner(0, ucell); // scf-only
+        // force and stress is not needed currently,
+        // they will be supported after the analytical gradient
+        // of LR-TDDFT is implemented.
+
+        p_esolver->after_all_runners();
+        std::cout << "setting up the esolver for excited state" << std::endl;
+        // initialize the 2nd ESolver_LR at the temporary pointer
+        ModuleESolver::ESolver* p_esolver_lr = nullptr;
+        if (INPUT.gamma_only)
+            p_esolver_lr = new ModuleESolver::ESolver_LRTD<double, double>(
+                std::move(*dynamic_cast<ModuleESolver::ESolver_KS_LCAO<double, double>*>(p_esolver)),
+                INPUT,
+                ucell);
+        else
+            p_esolver_lr = new ModuleESolver::ESolver_LRTD<std::complex<double>, double>(
+                std::move(*dynamic_cast<ModuleESolver::ESolver_KS_LCAO<std::complex<double>, double>*>(p_esolver)),
+                INPUT,
+                ucell);
+        // clean the 1st ESolver_KS and swap the pointer
+        ModuleESolver::clean_esolver(p_esolver, false); // do not call Cblacs_exit, remain it for the 2nd ESolver
+        p_esolver = p_esolver_lr;
+        p_esolver_lr = nullptr;
+    }
 #endif
 	else if (esolver_type == "sdft_pw")
 	{
@@ -184,10 +239,12 @@ void init_esolver(ESolver*& p_esolver)
 	}
 }
 
-
-void clean_esolver(ESolver*& pesolver)
+void clean_esolver(ESolver*& pesolver, const bool lcao_cblacs_exit)
 {
 	delete pesolver;
+#ifdef __LCAO
+    if (lcao_cblacs_exit)
+        Cblacs_exit(1);
+#endif
 }
-
 }
diff --git a/source/module_esolver/esolver.h b/source/module_esolver/esolver.h
index af7d870b7e..0548d2ac00 100644
--- a/source/module_esolver/esolver.h
+++ b/source/module_esolver/esolver.h
@@ -84,9 +84,9 @@ std::string determine_type(void);
  *
  * @param [in, out] p_esolver A pointer to an ESolver object that will be initialized.
  */
-void init_esolver(ESolver*& p_esolver);
+void init_esolver(ESolver*& p_esolver, Input& input, UnitCell& ucell);
 
-void clean_esolver(ESolver*& pesolver);
+void clean_esolver(ESolver*& pesolver, const bool lcao_cblacs_exit = false);
 
 } // namespace ModuleESolver
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
index 9698a79598..87960e7e18 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
@@ -18,10 +18,10 @@ class Grid_BigCell: public Grid_MeshCell
         std::vector<std::vector<double>> tau_in_bigcell;
 
         /// move operator for the next ESolver to directly use its infomation
-        Grid_BigCell& operator=(Grid_BigCell&& rhs);
+        Grid_BigCell& operator=(Grid_BigCell&& rhs) = default;
 
-	protected:
-		// get the max radius of all orbitals
+      protected:
+        // get the max radius of all orbitals
 		// which will use to generate grid expansion,
 		// and the meshball.
 		double orbital_rmax;
diff --git a/source/module_hamilt_lcao/module_gint/grid_meshball.h b/source/module_hamilt_lcao/module_gint/grid_meshball.h
index 43c4c768e9..571d59126e 100644
--- a/source/module_hamilt_lcao/module_gint/grid_meshball.h
+++ b/source/module_hamilt_lcao/module_gint/grid_meshball.h
@@ -12,9 +12,9 @@ class Grid_MeshBall : public Grid_BigCell
         std::vector<std::vector<double>> meshball_positions;
 
         /// move operator for the next ESolver to directly use its infomation
-        Grid_MeshBall& operator=(Grid_MeshBall&& rhs);
+        Grid_MeshBall& operator=(Grid_MeshBall&& rhs) = default;
 
-	protected:
+      protected:
 		// number of meshcells in meshball.
 		int meshball_ncells=0;
 		// used in index2normal
diff --git a/source/module_hamilt_lcao/module_gint/grid_meshcell.h b/source/module_hamilt_lcao/module_gint/grid_meshcell.h
index c4a2d2d436..71d57b5e88 100644
--- a/source/module_hamilt_lcao/module_gint/grid_meshcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_meshcell.h
@@ -27,12 +27,10 @@ class Grid_MeshCell: public Grid_MeshK
 	std::vector<double> meshcell_vec2;
 	std::vector<double> meshcell_vec3;
 
-    /// @brief move operator for the next ESolver to directly use its infomation
-    /// @param rhs
-    /// @return *this
-    Grid_MeshCell& operator=(Grid_MeshCell&& rhs);
+    /// move operator for the next ESolver to directly use its infomation
+    Grid_MeshCell& operator=(Grid_MeshCell&& rhs) = default;
 
-	void set_grid_dim(
+    void set_grid_dim(
 			const int &ncx_in,
 			const int &ncy_in,
 			const int &ncz_in,
diff --git a/source/module_hamilt_lcao/module_gint/grid_meshk.h b/source/module_hamilt_lcao/module_gint/grid_meshk.h
index e13e7d8364..c541a0b20f 100644
--- a/source/module_hamilt_lcao/module_gint/grid_meshk.h
+++ b/source/module_hamilt_lcao/module_gint/grid_meshk.h
@@ -22,9 +22,9 @@ class Grid_MeshK
         int cal_Rindex(const int& u1, const int& u2, const int& u3)const;
 
         /// move operator for the next ESolver to directly use its infomation
-        Grid_MeshK& operator=(Grid_MeshK&& rhs);
+        Grid_MeshK& operator=(Grid_MeshK&& rhs) = default;
 
-	protected:
+      protected:
 		// the max and the min unitcell.
 		int maxu1;
 		int maxu2;
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index f490e6a154..47e5253c0c 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -24,7 +24,7 @@ class Grid_Technique : public Grid_MeshBall
     ~Grid_Technique();
 
     /// move operator for the next ESolver to directly use its infomation
-    Grid_Technique& operator=(Grid_Technique&& rhs);
+    Grid_Technique& operator=(Grid_Technique&& rhs) = default;
     //------------------------------------
     // 1: Info about atom number on grid.
     //------------------------------------
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 13e9d9fb08..29f33b28b3 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -697,7 +697,7 @@ void Input::Default(void)
     //==========================================================
     //    beyond-dft   
     //==========================================================
-    nstates = 0;
+    lr_nstates = 0;
     nvirt = 0;
     xc_kernel = "LDA";
     lr_solver = "dav";
@@ -2565,9 +2565,9 @@ bool Input::Read(const std::string& fn)
         //----------------------------------------------------------------------------------
         // beyond dft
         //----------------------------------------------------------------------------------
-        else if (strcmp("nstates", word) == 0)
+        else if (strcmp("lr_nstates", word) == 0)
         {
-            read_value(ifs, nstates);
+            read_value(ifs, lr_nstates);
         }
         else if (strcmp("nvirt", word) == 0)
         {
@@ -4039,13 +4039,16 @@ void Input::Bcast()
     //----------------------------------------------------------------------------------
     //    beyond dft
     //----------------------------------------------------------------------------------
-    Parallel_Common::bcast_int(nstates);
+    Parallel_Common::bcast_int(lr_nstates);
     Parallel_Common::bcast_int(nvirt);
     Parallel_Common::bcast_string(xc_kernel);
     Parallel_Common::bcast_string(lr_solver);
     Parallel_Common::bcast_double(lr_thr);
     Parallel_Common::bcast_double(abs_broadening);
     if (abs_wavelen_range.size())Parallel_Common::bcast_double(abs_wavelen_range.data(), abs_wavelen_range.size());
+    Parallel_Common::bcast_double(abs_broadening);
+    Parallel_Common::bcast_bool(out_wfc_lr);
+
     return;
 }
 #endif
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 04c2513b32..1c95cb35fc 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -658,7 +658,7 @@ class Input
     //==========================================================
     //Beyond DFT
     //==========================================================
-    int nstates;      // the number of 2-particle states to be solved
+    int lr_nstates;   // the number of 2-particle states to be solved
     int nvirt;        // the number of virtual orbitals to form the 2-particle basis (nocc + nvirt <= nbands)
     std::string xc_kernel; // xc kernel for LR-TDDFT
     std::string lr_solver; // the solver for LR-TDDFT
diff --git a/source/module_io/test/input_test.cpp b/source/module_io/test/input_test.cpp
index 2912cdc42a..508bad5834 100644
--- a/source/module_io/test/input_test.cpp
+++ b/source/module_io/test/input_test.cpp
@@ -387,7 +387,7 @@ TEST_F(InputTest, Default)
     EXPECT_DOUBLE_EQ(INPUT.alpha_trial, 0.01);
     EXPECT_DOUBLE_EQ(INPUT.sccut, 3.0);
     EXPECT_EQ(INPUT.sc_file, "none");
-    EXPECT_EQ(INPUT.nstates, 0);
+    EXPECT_EQ(INPUT.lr_nstates, 0);
     EXPECT_EQ(INPUT.nvirt, 0);
     EXPECT_EQ(INPUT.xc_kernel, "LDA");
     EXPECT_EQ(INPUT.lr_solver, "dav");