diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h
index d3c88d5783..8aeea9792f 100644
--- a/source/module_basis/module_ao/parallel_2d.h
+++ b/source/module_basis/module_ao/parallel_2d.h
@@ -15,6 +15,7 @@ class Parallel_2D
     ~Parallel_2D() = default;
 
     Parallel_2D& operator=(Parallel_2D&& rhs) = default;
+    Parallel_2D(Parallel_2D&& rhs) = default;
 
     /// number of local rows
     int get_row_size() const
diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp
index 4c87eadb40..fb9ad4670e 100644
--- a/source/module_lr/esolver_lrtd_lcao.cpp
+++ b/source/module_lr/esolver_lrtd_lcao.cpp
@@ -67,6 +67,21 @@ inline void redirect_log(const bool& out_alllog)
     }
 }
 
+inline int cal_nupdown_form_occ(const ModuleBase::matrix& wg)
+{   // only for nspin=2
+    const int& nk = wg.nr / 2;
+    auto occ_sum_k = [&](const int& is, const int& ib)->double { double o = 0.0; for (int ik = 0;ik < nk;++ik) { o += wg(is * nk + ik, ib); } return o;};
+    int nupdown = 0;
+    for (int ib = 0;ib < wg.nc;++ib)
+    {
+        const int nu = static_cast<int>(std::lround(occ_sum_k(0, ib)));
+        const int nd = static_cast<int>(std::lround(occ_sum_k(1, ib)));
+        if (!(nu + nd)) { break; }
+        nupdown += nu - nd;
+    }
+    return nupdown;
+}
+
 template<typename T, typename TR>
 void LR::ESolver_LR<T, TR>::parameter_check()const
 {
@@ -87,32 +102,25 @@ template<typename T, typename TR>
 void LR::ESolver_LR<T, TR>::set_dimension()
 {
     this->nspin = PARAM.inp.nspin;
-    if (nspin == 2) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl;
-}
     this->nstates = input.lr_nstates;
     this->nbasis = PARAM.globalv.nlocal;
     // calculate the number of occupied and unoccupied states
     // which determines the basis size of the excited states
     this->nocc_max = LR_Util::cal_nocc(LR_Util::cal_nelec(ucell));
-    this->nocc = std::max(1, std::min(input.nocc, this->nocc_max));
-    this->nvirt = PARAM.inp.nbands - this->nocc_max;   //nbands-nocc
-    if (input.nvirt > this->nvirt) {
-        GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt << std::endl;
-    } else if (input.nvirt > 0) { this->nvirt = input.nvirt;
-}
-    this->nbands = this->nocc + this->nvirt;
-    this->npairs = this->nocc * this->nvirt;
+    this->nocc_in = std::max(1, std::min(input.nocc, this->nocc_max));
+    this->nvirt_in = PARAM.inp.nbands - this->nocc_max;   //nbands-nocc
+    if (input.nvirt > this->nvirt_in) { GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt_in << std::endl; }
+    else if (input.nvirt > 0) { this->nvirt_in = input.nvirt; }
+    this->nbands = this->nocc_in + this->nvirt_in;
     this->nk = this->kv.get_nks() / this->nspin;
-    if (this->nstates > this->nocc * this->nvirt * this->nk) {
-        throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk");
-}
-
+    this->nocc = { nocc_in, nocc_in };
+    this->nvirt = { nvirt_in, nvirt_in };
+    this->npairs = { nocc[0] * nvirt[0], nocc[1] * nvirt[1] };
     GlobalV::ofs_running << "Setting LR-TDDFT parameters: " << std::endl;
-    GlobalV::ofs_running << "number of occupied bands: " << this->nocc << std::endl;
-    GlobalV::ofs_running << "number of virtual bands: " << this->nvirt << std::endl;
+    GlobalV::ofs_running << "number of occupied bands: " << nocc_in << std::endl;
+    GlobalV::ofs_running << "number of virtual bands: " << nvirt_in << std::endl;
     GlobalV::ofs_running << "number of Atom orbitals (LCAO-basis size): " << this->nbasis << std::endl;
     GlobalV::ofs_running << "number of KS bands: " << this->eig_ks.nc << std::endl;
-    GlobalV::ofs_running << "number of electron-hole pairs (2-particle basis size): " << this->npairs << std::endl;
     GlobalV::ofs_running << "number of excited states to be solved: " << this->nstates << std::endl;
     if (input.ri_hartree_benchmark == "aims" && !input.aims_nbasis.empty())
     {
@@ -121,6 +129,20 @@ void LR::ESolver_LR<T, TR>::set_dimension()
     }
 }
 
+template<typename T, typename TR>
+void LR::ESolver_LR<T, TR>::reset_dim_spin2()
+{
+    if (nspin != 2) { return; }
+    if (nupdown == 0) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl; }
+    else
+    {
+        nupdown > 0 ? ((nocc[1] -= nupdown) && (nvirt[1] += nupdown)) : ((nocc[0] += nupdown) && (nvirt[0] -= nupdown));
+        // npairs = { nocc[0] * nvirt[0], nocc[1] * nvirt[1] };
+        std::cout << "** Solve the spin-up and spin-down states separately for open-shell system. **" << std::endl;
+    }
+    if (nstates > std::min(npairs[0], npairs[1]) * nk) { throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk"); }
+}
+
 template <typename T, typename TR>
 LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol,
     const Input_para& inp, UnitCell& ucell)
@@ -171,7 +193,7 @@ LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol
     {
         this->psi_ks = new psi::Psi<T>(this->kv.get_nks(), this->paraC_.get_col_size(), this->paraC_.get_row_size());
         this->eig_ks.create(this->kv.get_nks(), this->nbands);
-        const int start_band = this->nocc_max - this->nocc;
+        const int start_band = this->nocc_max - std::max(nocc[0], nocc[1]);
         for (int ik = 0;ik < this->kv.get_nks();++ik)
         {
             Cpxgemr2d(this->nbasis, this->nbands, &(*ks_sol.psi)(ik, 0, 0), 1, start_band + 1, ks_sol.pv.desc_wfc,
@@ -182,6 +204,11 @@ LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol
 #else
     move_gs();
 #endif
+    if (nspin == 2)
+    {
+        this->nupdown = cal_nupdown_form_occ(ks_sol.pelec->wg);
+        reset_dim_spin2();
+    }
 
     //grid integration
     this->gt_ = std::move(ks_sol.GridT);
@@ -275,6 +302,11 @@ LR::ESolver_LR<T, TR>::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu
         this->paraMat_.ncol_bands,
         this->paraMat_.get_row_size());
     this->read_ks_wfc();
+    if (nspin == 2)
+    {
+        this->nupdown = cal_nupdown_form_occ(this->pelec->wg);
+        reset_dim_spin2();
+    }
 
     LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nbands
 #ifdef __MPI
@@ -409,10 +441,10 @@ void LR::ESolver_LR<T, TR>::runner(int istep, UnitCell& cell)
 #ifdef __EXX
                 this->exx_lri, this->exx_info.info_global.hybrid_alpha,
 #endif
-                this->gint_, this->pot[is], this->kv, & this->paraX_, & this->paraC_, & this->paraMat_, 
+                this->gint_, this->pot[is], this->kv, this->paraX_, & this->paraC_, & this->paraMat_,
                 spin_type[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector<int>({})));
             // solve the Casida equation
-            HSolverLR<T> hsol(nk, this->npairs, is, this->input.out_wfc_lr);
+            HSolverLR<T> hsol(nk, this->npairs[is], is, this->input.out_wfc_lr);
             hsol.set_diagethr(hsol.diag_ethr, 0, 0, std::max(1e-13, this->input.lr_thr));
             hsol.solve(phamilt, *this->X[is], this->pelec, this->input.lr_solver/*,
                 !std::set<std::string>({ "hf", "hse" }).count(this->xc_kernel)*/);  //whether the kernel is Hermitian
@@ -450,8 +482,8 @@ void LR::ESolver_LR<T, TR>::after_all_runners()
     for (int is = 0;is < this->nspin;++is)
     {
         LR_Spectrum<T> spectrum(&this->pelec->ekb.c[is * this->nstates], *this->X[is],
-            this->nspin, this->nbasis, this->nocc, this->nvirt, this->gint_, *this->pw_rho, *this->psi_ks,
-            this->ucell, this->kv, this->paraX_, this->paraC_, this->paraMat_);
+            this->nspin, this->nbasis, this->nocc[is], this->nvirt[is], this->gint_, *this->pw_rho, *this->psi_ks,
+            this->ucell, this->kv, this->paraX_[is], this->paraC_, this->paraMat_);
         spectrum.oscillator_strength();
         spectrum.transition_analysis(is);
         spectrum.optical_absorption(freq, input.abs_broadening, is);
@@ -463,11 +495,16 @@ template<typename T, typename TR>
 void LR::ESolver_LR<T, TR>::setup_eigenvectors_X()
 {
     ModuleBase::TITLE("ESolver_LR", "setup_eigenvectors_X");
-    LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc
+    for (int is = 0;is < nspin;++is)
+    {
+        Parallel_2D px;
+        LR_Util::setup_2d_division(px, /*nb2d=*/1, this->nvirt[is], this->nocc[is]
 #ifdef __MPI
-        , this->paraC_.blacs_ctxt
+            , this->paraC_.blacs_ctxt
 #endif
-    );//nvirt - row, nocc - col 
+        );//nvirt - row, nocc - col 
+        this->paraX_.emplace_back(std::move(px));
+    }
     this->X.resize(this->nspin);
     const std::vector<std::string> spin_types = { "Spin Singlet", "Spin Triplet" };
     // if spectrum-only, read the LR-eigenstates from file and return
@@ -486,7 +523,7 @@ void LR::ESolver_LR<T, TR>::setup_eigenvectors_X()
         {
             this->X[is] = std::make_shared<psi::Psi<T>>(this->nk,
                 this->nstates,
-                this->paraX_.get_local_size(),
+                this->paraX_[is].get_local_size(),
                 nullptr,
                 false); // band(state)-first
             this->X[is]->zero_out();
@@ -499,37 +536,40 @@ template<typename T, typename TR>
 void LR::ESolver_LR<T, TR>::set_X_initial_guess()
 {
     // set the initial guess of X
-  // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first)
-    bool ix_mode = false;   //default
-    if (this->eig_ks.nc > nocc + 1 && nocc >= 2 && eig_ks(0, nocc) - eig_ks(0, nocc - 2) - 1e-5 > eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1)) { ix_mode = true; }
-    GlobalV::ofs_running << "setting the initial guess of X: " << std::endl;
-    if (nocc >= 2 && eig_ks.nc > nocc) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(0, nocc) - eig_ks(0, nocc - 2) << std::endl; }
-    if (nocc >= 1 && eig_ks.nc > nocc + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1) << std::endl; }
-    GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl;
-
-    /// global index map between (i,c) and ix
-    ModuleBase::matrix ioiv2ix;
-    std::vector<std::pair<int, int>> ix2ioiv;
-    std::pair<ModuleBase::matrix, std::vector<std::pair<int, int>>> indexmap =
-        LR_Util::set_ix_map_diagonal(ix_mode, nocc, nvirt);
-
-    ioiv2ix = std::move(std::get<0>(indexmap));
-    ix2ioiv = std::move(std::get<1>(indexmap));
-
-    // use unit vectors as the initial guess
-    // for (int i = 0; i < std::min(this->nstates * PARAM.inp.pw_diag_ndim, nocc * nvirt); i++)
     for (int is = 0;is < this->nspin;++is)
     {
+        const int& no = this->nocc[is];
+        const int& nv = this->nvirt[is];
+        const int& np = this->npairs[is];
+        const Parallel_2D& px = this->paraX_[is];
+
+        // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first)
+        bool ix_mode = false;   //default
+        if (this->eig_ks.nc > no + 1 && no >= 2 && eig_ks(is, no) - eig_ks(is, no - 2) - 1e-5 > eig_ks(is, no + 1) - eig_ks(is, no - 1)) { ix_mode = true; }
+        GlobalV::ofs_running << "setting the initial guess of X: " << std::endl;
+        if (no >= 2 && eig_ks.nc > no) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(is, no) - eig_ks(is, no - 2) << std::endl; }
+        if (no >= 1 && eig_ks.nc > no + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(is, no + 1) - eig_ks(is, no - 1) << std::endl; }
+        GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl;
+
+        /// global index map between (i,c) and ix
+        ModuleBase::matrix ioiv2ix;
+        std::vector<std::pair<int, int>> ix2ioiv;
+        std::pair<ModuleBase::matrix, std::vector<std::pair<int, int>>> indexmap =
+            LR_Util::set_ix_map_diagonal(ix_mode, no, nv);
+
+        ioiv2ix = std::move(std::get<0>(indexmap));
+        ix2ioiv = std::move(std::get<1>(indexmap));
+
         for (int s = 0; s < nstates; ++s)
         {
             this->X[is]->fix_b(s);
-            int ipair = s % (npairs);
+            int ipair = s % np;
             int occ_global = std::get<0>(ix2ioiv[ipair]);   // occ
             int virt_global = std::get<1>(ix2ioiv[ipair]);   // virt
-            int ik = s / (npairs);
-            if (this->paraX_.in_this_processor(virt_global, occ_global))
-                (*X[is])(ik, this->paraX_.global2local_col(occ_global) * this->paraX_.get_row_size()
-                    + this->paraX_.global2local_row(virt_global))
+            int ik = s / np;
+            if (px.in_this_processor(virt_global, occ_global))
+                (*X[is])(ik, px.global2local_col(occ_global) * px.get_row_size()
+                    + px.global2local_row(virt_global))
                 = (static_cast<T>(1.0) / static_cast<T>(nk));
         }
         this->X[is]->fix_b(0);  //recover the pointer
@@ -566,8 +606,8 @@ void LR::ESolver_LR<T, TR>::read_ks_wfc()
     {
 #ifdef __EXX
         int ncore = 0;
-        std::vector<double> eig_ks_vec = RI_Benchmark::read_aims_ebands<double>(PARAM.globalv.global_readin_dir + "band_out", nocc, nvirt, ncore);
-        std::cout << "ncore=" << ncore << ", nocc=" << nocc << ", nvirt=" << nvirt << ", nbands=" << this->nbands << std::endl;
+        std::vector<double> eig_ks_vec = RI_Benchmark::read_aims_ebands<double>(PARAM.globalv.global_readin_dir + "band_out", nocc_in, nvirt_in, ncore);
+        std::cout << "ncore=" << ncore << ", nocc=" << nocc_in << ", nvirt=" << nvirt_in << ", nbands=" << this->nbands << std::endl;
         std::cout << "eig_ks_vec.size()=" << eig_ks_vec.size() << std::endl;
         if(eig_ks_vec.size() != this->nbands) {ModuleBase::WARNING_QUIT("ESolver_LR", "read_aims_ebands failed.");};
         for (int i = 0;i < nbands;++i) { this->pelec->ekb(0, i) = eig_ks_vec[i]; }
@@ -577,7 +617,7 @@ void LR::ESolver_LR<T, TR>::read_ks_wfc()
 #endif
     }
     else if (!ModuleIO::read_wfc_nao(PARAM.globalv.global_readin_dir, this->paraMat_, *this->psi_ks, this->pelec,
-        /*skip_bands=*/this->nocc_max - this->nocc)) {
+        /*skip_bands=*/this->nocc_max - this->nocc_in)) {
         ModuleBase::WARNING_QUIT("ESolver_LR", "read ground-state wavefunction failed.");
     }
     this->eig_ks = std::move(this->pelec->ekb);
diff --git a/source/module_lr/esolver_lrtd_lcao.h b/source/module_lr/esolver_lrtd_lcao.h
index f9db98d765..61dfe6fe9f 100644
--- a/source/module_lr/esolver_lrtd_lcao.h
+++ b/source/module_lr/esolver_lrtd_lcao.h
@@ -68,17 +68,20 @@ namespace LR
         /// @brief Excited state info. size: nstates * nks * (nocc(local) * nvirt (local))
         std::vector<std::shared_ptr<psi::Psi<T>>> X;
 
-        int nocc = 1;
+        std::vector<int> nocc = { 1, 1 };   ///< number of occupied orbitals for each spin used in the calculation
+        int nocc_in = 1;    ///< nocc read from input (adjusted by nelec): max(spin-up, spindown)
         int nocc_max = 1;   ///< nelec/2
-        int nvirt = 1;
+        std::vector<int> nvirt = { 1, 1 };   ///< number of virtual orbitals for each spin used in the calculation
+        int nvirt_in = 1;   ///< nvirt read from input (adjusted by nelec): min(spin-up, spindown)
         int nbands = 2;
         int nbasis = 2;
         /// n_occ*nvirt, the basis size of electron-hole pair representation
-        int npairs = 1;
+        std::vector<int> npairs = { 1, 1 };
         /// how many 2-particle states to be solved
         int nstates = 1;
         int nspin = 1;
         int nk = 1;
+        int nupdown = 0;
         std::string xc_kernel;
 
         Grid_Technique gt_;
@@ -90,7 +93,7 @@ namespace LR
         /// @brief variables for parallel distribution of KS orbitals
         Parallel_2D paraC_;
         /// @brief variables for parallel distribution of excited states
-        Parallel_2D paraX_;
+        std::vector<Parallel_2D> paraX_;
         /// @brief variables for parallel distribution of matrix in AO representation
         Parallel_Orbitals paraMat_;
 
@@ -111,6 +114,8 @@ namespace LR
         void parameter_check() const;
         /// @brief set nocc, nvirt, nbasis, npairs and nstates
         void set_dimension();
+        /// reset nocc, nvirt, npairs after read ground-state wavefunction when nspin=2
+        void reset_dim_spin2();
 
 #ifdef __EXX
         /// Tdata of Exx_LRI is same as T, for the reason, see operator_lr_exx.h
diff --git a/source/module_lr/hamilt_casida.cpp b/source/module_lr/hamilt_casida.cpp
index 08a00bb09e..c6c509d1b7 100644
--- a/source/module_lr/hamilt_casida.cpp
+++ b/source/module_lr/hamilt_casida.cpp
@@ -5,21 +5,24 @@ namespace LR
     std::vector<T> HamiltCasidaLR<T>::matrix()
     {
         ModuleBase::TITLE("HamiltCasidaLR", "matrix");
-        int npairs = this->nocc * this->nvirt;
+        const int no = this->nocc[0];
+        const int nv = this->nvirt[0];
+        const auto& px = this->pX[0];
+        int npairs = no * nv;
         std::vector<T> Amat_full(this->nk * npairs * this->nk * npairs, 0.0);
         for (int ik = 0;ik < this->nk;++ik)
-            for (int j = 0;j < nocc;++j)
-                for (int b = 0;b < nvirt;++b)
+            for (int j = 0;j < no;++j)
+                for (int b = 0;b < nv;++b)
                 {//calculate A^{ai} for each bj
-                    int bj = j * nvirt + b;
+                    int bj = j * nv + b;
                     int kbj = ik * npairs + bj;
-                    psi::Psi<T> X_bj(1, 1, this->nk * this->pX->get_local_size()); // k1-first, like in iterative solver
+                    psi::Psi<T> X_bj(1, 1, this->nk * px.get_local_size()); // k1-first, like in iterative solver
                     X_bj.zero_out();
-                    // X_bj(0, 0, lj * this->pX->get_row_size() + lb) = this->one();
-                    int lj = this->pX->global2local_col(j);
-                    int lb = this->pX->global2local_row(b);
-                    if (this->pX->in_this_processor(b, j)) X_bj(0, 0, ik * this->pX->get_local_size() + lj * this->pX->get_row_size() + lb) = this->one();
-                    psi::Psi<T> A_aibj(1, 1, this->nk * this->pX->get_local_size()); // k1-first
+                    // X_bj(0, 0, lj * px.get_row_size() + lb) = this->one();
+                    int lj = px.global2local_col(j);
+                    int lb = px.global2local_row(b);
+                    if (px.in_this_processor(b, j)) X_bj(0, 0, ik * px.get_local_size() + lj * px.get_row_size() + lb) = this->one();
+                    psi::Psi<T> A_aibj(1, 1, this->nk * px.get_local_size()); // k1-first
                     A_aibj.zero_out();
 
                     hamilt::Operator<T>* node(this->ops);
@@ -32,9 +35,9 @@ namespace LR
                     A_aibj.fix_kb(0, 0);
 #ifdef __MPI
                     for (int ik_ai = 0;ik_ai < this->nk;++ik_ai)
-                        LR_Util::gather_2d_to_full(*this->pX, &A_aibj.get_pointer()[ik_ai * this->pX->get_local_size()],
+                        LR_Util::gather_2d_to_full(px, &A_aibj.get_pointer()[ik_ai * px.get_local_size()],
                             Amat_full.data() + kbj * this->nk * npairs /*col, bj*/ + ik_ai * npairs/*row, ai*/,
-                            false, this->nvirt, this->nocc);
+                            false, nv, no);
 #endif
                 }
         // output Amat
diff --git a/source/module_lr/hamilt_casida.h b/source/module_lr/hamilt_casida.h
index 8330716377..63ee792b65 100644
--- a/source/module_lr/hamilt_casida.h
+++ b/source/module_lr/hamilt_casida.h
@@ -19,8 +19,8 @@ namespace LR
         HamiltCasidaLR(std::string& xc_kernel,
             const int& nspin,
             const int& naos,
-            const int& nocc,
-            const int& nvirt,
+            const std::vector<int>& nocc,
+            const std::vector<int>& nvirt,
             const UnitCell& ucell_in,
             const std::vector<double>& orb_cutoff,
             Grid_Driver& gd_in,
@@ -33,7 +33,7 @@ namespace LR
             TGint* gint_in,
             std::weak_ptr<PotHxcLR> pot_in,
             const K_Vectors& kv_in,
-            Parallel_2D* pX_in,
+            std::vector<Parallel_2D>& pX_in,
             Parallel_2D* pc_in,
             Parallel_Orbitals* pmat_in,
             const std::string& spin_type,
@@ -46,7 +46,7 @@ namespace LR
             this->DM_trans.resize(1);
             this->DM_trans[0] = LR_Util::make_unique<elecstate::DensityMatrix<T, T>>(&kv_in, pmat_in, nspin);
             // add the diag operator  (the first one)
-            this->ops = new OperatorLRDiag<T>(eig_ks.c, pX_in, nk, nocc, nvirt);
+            this->ops = new OperatorLRDiag<T>(eig_ks.c, &pX[0], nk, nocc[0], nvirt[0]);
             //add Hxc operator
 #ifdef __EXX
             using TAC = std::pair<int, std::array<int, 3>>;
@@ -77,7 +77,7 @@ namespace LR
                     }
                     if (!std::set<std::string>({ "rpa", "hf" }).count(xc_kernel)) { throw std::runtime_error("ri_hartree_benchmark is only supported for xc_kernel rpa and hf"); }
                     RI_Benchmark::OperatorRIHartree<T>* ri_hartree_op
-                        = new RI_Benchmark::OperatorRIHartree<T>(ucell_in, naos, nocc, nvirt, *psi_ks_in,
+                        = new RI_Benchmark::OperatorRIHartree<T>(ucell_in, naos, nocc[0], nvirt[0], *psi_ks_in,
                             Cs_read, Vs_read, ri_hartree_benchmark == "aims", aims_nbasis);
                     this->ops->add(ri_hartree_op);
                 }
@@ -102,8 +102,8 @@ namespace LR
                     exx_lri_in.lock()->reset_Vs(Vs_read);
                 }
                 // std::cout << "exx_alpha=" << exx_alpha << std::endl; // the default value of exx_alpha is 0.25 when dft_functional is pbe or hse
-                hamilt::Operator<T>* lr_exx = new OperatorLREXX<T>(nspin, naos, nocc, nvirt, ucell_in, psi_ks_in,
-                    this->DM_trans, exx_lri_in, kv_in, pX_in, pc_in, pmat_in,
+                hamilt::Operator<T>* lr_exx = new OperatorLREXX<T>(nspin, naos, nocc[0], nvirt[0], ucell_in, psi_ks_in,
+                    this->DM_trans, exx_lri_in, kv_in, &pX_in[0], pc_in, pmat_in,
                     xc_kernel == "hf" ? 1.0 : exx_alpha, //alpha
                     ri_hartree_benchmark != "none"/*whether to cal_dm_trans first here*/,
                     aims_nbasis);
@@ -122,10 +122,10 @@ namespace LR
         virtual std::vector<T> matrix() override;
 
     private:
-        int nocc;
-        int nvirt;
+        const std::vector<int>& nocc;
+        const std::vector<int>& nvirt;
         int nk;
-        Parallel_2D* pX = nullptr;
+        std::vector<Parallel_2D>& pX;
         T one();
         /// transition density matrix in AO representation
         /// Hxc only: size=1, calculate on the same address for each bands
diff --git a/source/module_lr/operator_casida/operator_lr_hxc.cpp b/source/module_lr/operator_casida/operator_lr_hxc.cpp
index f9bc15f2f1..79b782daca 100644
--- a/source/module_lr/operator_casida/operator_lr_hxc.cpp
+++ b/source/module_lr/operator_casida/operator_lr_hxc.cpp
@@ -14,6 +14,11 @@
 
 inline double conj(double a) { return a; }
 inline std::complex<double> conj(std::complex<double> a) { return std::conj(a); }
+template<typename T>
+inline psi::Psi<T> get_psi_spin(const psi::Psi<T>& psi_in, const int& is, const int& nk)
+{
+    return psi::Psi<T>(&psi_in(is * nk, 0, 0), psi_in, nk, psi_in.get_nbands());
+}
 
 namespace LR
 {
@@ -23,14 +28,16 @@ namespace LR
         ModuleBase::TITLE("OperatorLRHxc", "act");
         assert(nbands <= psi_in.get_nbands());
         const int& nk = this->kv.get_nks() / this->nspin;
+        const int& sl = ispin_ks[0];
+        const int& sr = ispin_ks[1];
 
         //print 
         // if (this->first_print) LR_Util::print_psi_kfirst(*psi_ks, "psi_ks");
 
         this->init_DM_trans(nbands, this->DM_trans);    // initialize transion density matrix
 
-        psi::Psi<T> psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, this->pX->get_local_size());
-        psi::Psi<T> psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, this->pX->get_local_size());
+        psi::Psi<T> psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, pX[sr].get_local_size());
+        psi::Psi<T> psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, pX[sr].get_local_size());
 
         const int& lgd = gint->gridt->lgd;
         for (int ib = 0;ib < nbands;++ib)
@@ -45,10 +52,10 @@ namespace LR
 
             // 1. transition density matrix
 #ifdef __MPI
-            std::vector<container::Tensor>  dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, *pX, *psi_ks, *pc, naos, nocc, nvirt, *pmat);
+            std::vector<container::Tensor>  dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, pX[sr], get_psi_spin(*psi_ks, sr, nk), *pc, naos, nocc[sr], nvirt[sr], *pmat);
             if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data<T>(), naos, *pmat);
 #else
-            std::vector<container::Tensor>  dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, *psi_ks, nocc, nvirt);
+            std::vector<container::Tensor>  dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, get_psi_spin(*psi_ks, sr, nk), nocc[sr], nvirt[sr]);
             if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data<T>(), naos);
 #endif
             // tensor to vector, then set DMK
@@ -80,9 +87,9 @@ namespace LR
 
             // 5. [AX]^{Hxc}_{ai}=\sum_{\mu,\nu}c^*_{a,\mu,}V^{Hxc}_{\mu,\nu}c_{\nu,i}
 #ifdef __MPI
-            cal_AX_pblas(v_hxc_2d, *this->pmat, *this->psi_ks, *this->pc, naos, nocc, nvirt, *this->pX, psi_out_bfirst);
+            cal_AX_pblas(v_hxc_2d, *this->pmat, get_psi_spin(*psi_ks, sl, nk), *this->pc, naos, nocc[sl], nvirt[sl], this->pX[sl], psi_out_bfirst);
 #else
-            cal_AX_blas(v_hxc_2d, *this->psi_ks, nocc, nvirt, psi_out_bfirst);
+            cal_AX_blas(v_hxc_2d, get_psi_spin(*psi_ks, sl, nk), nocc[sl], nvirt[sl], psi_out_bfirst);
 #endif
             // if (this->first_print) LR_Util::print_psi_bandfirst(psi_out_bfirst, "5.AX", ib);
         }
diff --git a/source/module_lr/operator_casida/operator_lr_hxc.h b/source/module_lr/operator_casida/operator_lr_hxc.h
index 1c5ac17591..5bc2313f98 100644
--- a/source/module_lr/operator_casida/operator_lr_hxc.h
+++ b/source/module_lr/operator_casida/operator_lr_hxc.h
@@ -16,8 +16,8 @@ namespace LR
         //when nspin=2, nks is 2 times of real number of k-points. else (nspin=1 or 4), nks is the real number of k-points
         OperatorLRHxc(const int& nspin,
             const int& naos,
-            const int& nocc,
-            const int& nvirt,
+            const std::vector<int>& nocc,
+            const std::vector<int>& nvirt,
             const psi::Psi<T, Device>* psi_ks_in,
             std::vector<std::unique_ptr<elecstate::DensityMatrix<T, T>>>& DM_trans_in,
             typename TGint<T>::type* gint_in,
@@ -26,13 +26,14 @@ namespace LR
             const std::vector<double>& orb_cutoff,
             Grid_Driver& gd_in,
             const K_Vectors& kv_in,
-            Parallel_2D* pX_in,
+            std::vector<Parallel_2D>& pX_in,
             Parallel_2D* pc_in,
-            Parallel_Orbitals* pmat_in)
+            Parallel_Orbitals* pmat_in,
+            const std::vector<int>& ispin_ks = { 0, 0 })
             : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt),
             psi_ks(psi_ks_in), DM_trans(DM_trans_in), gint(gint_in), pot(pot_in),
             ucell(ucell_in), orb_cutoff_(orb_cutoff), gd(gd_in), kv(kv_in),
-            pX(pX_in), pc(pc_in), pmat(pmat_in)
+            pX(pX_in), pc(pc_in), pmat(pmat_in), ispin_ks(ispin_ks)
         {
             ModuleBase::TITLE("OperatorLRHxc", "OperatorLRHxc");
             this->cal_type = hamilt::calculation_type::lcao_gint;
@@ -98,8 +99,9 @@ namespace LR
         const int& nspin;
         const int nspin_solve = 1;    ///< in singlet-triplet calculation, the Casida equation is solved respectively so nspin_solve in a single problem is 1
         const int& naos;
-        const int& nocc;
-        const int& nvirt;
+        const std::vector<int>& nocc;
+        const std::vector<int>& nvirt;
+        const std::vector<int> ispin_ks = { 0,0 };  ///< the index of spin of psi_ks used in {AX, DM_trans}
         const K_Vectors& kv;
         /// ground state wavefunction
         const psi::Psi<T, Device>* psi_ks = nullptr;
@@ -112,7 +114,7 @@ namespace LR
 
         /// parallel info
         Parallel_2D* pc = nullptr;
-        Parallel_2D* pX = nullptr;
+        std::vector<Parallel_2D>& pX;
         Parallel_Orbitals* pmat = nullptr;
 
         std::weak_ptr<PotHxcLR> pot;