diff --git a/CMakeLists.txt b/CMakeLists.txt
index daa591f..9271509 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,6 +20,7 @@ option(AutoSetCXX11 "Auto set c++11 flags" ON)
 if (AutoSetCXX11)
     set_cxx11()
 endif (AutoSetCXX11)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_MATH_DISABLE_FLOAT128")
 # Print build type
 message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 
@@ -28,9 +29,7 @@ find_package(ALPSCore REQUIRED COMPONENTS hdf5 accumulators mc params HINTS ${AL
 
 # Dependencies
 add_alpscore()
-if (NOT ALPSCore_HAS_EIGEN_VERSION)
-    add_eigen3()
-endif (NOT ALPSCore_HAS_EIGEN_VERSION)
+add_eigen3()
 add_gftools()
 option(BuildPython "Build python modules" OFF)
 if (BuildPython)
@@ -46,9 +45,6 @@ endif()
 # fftw3
 add_fftw3()
 
-# https://stackoverflow.com/questions/25365160/boostmultiprecisionfloat128-and-c11
-add_definitions("-fext-numeric-literals")
-
 ### add source/binary root of the project to included path
 include_directories(
     ${CMAKE_SOURCE_DIR}
diff --git a/README.md b/README.md
index 60e3a2f..86ff362 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ hub_df_cubicNd --help
 ```
 
 ### Dependencies:
-- c++11 - compatible c++ compiler
+- c++11 - compatible c++ compiler; To link to the current ALPSCore, set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_MATH_DISABLE_FLOAT128")
 - *cmake*
 - *eigen3* (header-only) : http://eigen.tuxfamily.org/. CMake option: `-DEIGEN3_INCLUDE_DIR=path_to_eigen3_headers`.
 - *fftw*. Cmake option : `-DFFTW_ROOT=path_to_fftw_prefix` (that is before include or lib).
diff --git a/cmake/ExtGFTools.cmake b/cmake/ExtGFTools.cmake
index 232f9c0..b1ff838 100644
--- a/cmake/ExtGFTools.cmake
+++ b/cmake/ExtGFTools.cmake
@@ -4,6 +4,7 @@ find_package(Git)
 
 ExternalProject_Add(gftools
     GIT_REPOSITORY https://github.com/aeantipov/gftools.git
+    GIT_TAG main
     TIMEOUT 10
     INSTALL_COMMAND ""
     CONFIGURE_COMMAND ""
diff --git a/include/opendf/df_hubbard.hpp b/include/opendf/df_hubbard.hpp
index dcbcd33..9e02f0d 100644
--- a/include/opendf/df_hubbard.hpp
+++ b/include/opendf/df_hubbard.hpp
@@ -1,6 +1,7 @@
 #pragma once 
 #include "opendf/df_base.hpp"
 
+
 namespace open_df { 
 
 /// Dual fermion evaluation of a spin-symmetric Hubbard model without spatial symmetry breaking
diff --git a/include/opendf/lattice_traits.hpp b/include/opendf/lattice_traits.hpp
index 6debd87..e239b31 100644
--- a/include/opendf/lattice_traits.hpp
+++ b/include/opendf/lattice_traits.hpp
@@ -86,7 +86,7 @@ struct triangular_traits : lattice_traits_base<2,triangular_traits> {
     real_type _tp = 1.0;
     triangular_traits(real_type t, real_type tp):_t(t),_tp(tp){};
 
-    real_type dispersion(real_type kx,real_type ky){return -2.*_t*(cos(kx)+cos(ky)) - 2.*_tp*cos(kx-ky);};
+    real_type dispersion(real_type kx,real_type ky){return -2.*_t*cos(kx) - 2.0*_t*cos(ky) - 2.*_tp*cos(kx-ky);};
     real_type dispersion(typename base::arg_tuple x) { return base::dispersion(x); }
     real_type dispersion(typename base::bz_point x) { return base::dispersion(x); }
     real_type disp_square_sum(){return 4.*_t*_t + 2.*_tp*_tp;}; 
diff --git a/prog/data_save.hpp b/prog/data_save.hpp
index fa78c84..26c803d 100644
--- a/prog/data_save.hpp
+++ b/prog/data_save.hpp
@@ -133,7 +133,7 @@ void save_data(SCType const& sc, typename SCType::gw_type new_delta, alps::param
 
         // add here points for fluctuation diagnostics
         fluct_pts.reserve(4);
-        // add pi/2 pi/2
+        // add pi/2 pi/2Raman demo session
         bz_point p1 = gftools::tuple_tools::repeater<kmesh::point, D>::get_array(k_pi_half);
         fluct_pts.push_back(p1); 
         // add pi, 0
diff --git a/prog/hub_df.cpp b/prog/hub_df.cpp
index d72c2ce..cb98ca1 100644
--- a/prog/hub_df.cpp
+++ b/prog/hub_df.cpp
@@ -9,6 +9,8 @@
 
 #include <chrono>
 #include <alps/params.hpp>
+//#include "alps/hdf5.hpp"
+
 
 #include <opendf/lattice_traits.hpp>
 #include <opendf/df_hubbard.hpp>
@@ -53,19 +55,23 @@ typedef df_type::gw_type gw_type;
 typedef df_type::gk_type gk_type;
 typedef df_type::disp_type disp_type;
 
+#ifdef OPENDF_ENABLE_MPI
+  alps::mpi::communicator comm;
+#endif 
 
 inline void print_section (const std::string& str)
 {
-  std::cout << std::string(str.size(),'=') << std::endl;
-  std::cout << str << std::endl;
-  std::cout << std::string(str.size(),'=') << std::endl;
+  mpi_cout << std::string(str.size(),'=') << std::endl;
+  mpi_cout << str << std::endl;
+  mpi_cout << std::string(str.size(),'=') << std::endl;
 }
 
 void run(alps::params p)
 {
-    #ifdef OPENDF_ENABLE_MPI
-    alps::mpi::communicator comm;
-    #endif 
+    #ifdef OPENDF_ENABLE_MPI     
+      mpi_cout<<"number of process "<<comm.size()<<std::endl;
+      mpi_cout<<"rank id "<<comm.rank()<<std::endl;
+    #endif
 
     print_section("DF ladder in " + std::to_string(D) + " dimensions.");
     // read input data
@@ -156,10 +162,11 @@ void run(alps::params p)
     end = steady_clock::now();
 
     #ifdef OPENDF_ENABLE_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
     if (!comm.rank()) 
     #endif
         { 
-
+        //std::cout<<"comm.rank "<<comm.rank()<<std::endl;
         mpi_cout << "Calculation lasted : " 
             << duration_cast<hours>(end-start).count() << "h " 
             << duration_cast<minutes>(end-start).count()%60 << "m " 
@@ -189,6 +196,10 @@ void run(alps::params p)
             d0.savetxt("density_vertex_input_W0.dat");
             }
         }
+
+    #ifdef OPENDF_ENABLE_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+    #endif
 }
 
 
@@ -197,29 +208,34 @@ void run(alps::params p)
 // 
 
 #ifndef BUILD_PYTHON_MODULE
-alps::params cmdline_params(int argc, char* argv[]);
+alps::params cmdline_params(int argc, char** argv);
 
 int main(int argc, char *argv[])
 {
     #ifdef OPENDF_ENABLE_MPI
     MPI_Init(&argc, &argv);
     #endif
-
     try { 
         alps::params p = cmdline_params(argc, argv); 
         run(p); 
         }
     catch (std::exception &e) { std::cerr << e.what() << std::endl; exit(1); };
-    
+
+    #ifdef OPENDF_ENABLE_MPI
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Finalize();
+    #endif    
 }
 
 
-alps::params cmdline_params(int argc, char* argv[])
+alps::params cmdline_params(int argc, char** argv)
 {
-    alps::params p(argc, (const char**)argv);
+    alps::params p(argc, argv);
     p.description("Dual fermions for the Hubbard model in " + std::to_string(D) + " dimensions.");
-
     df_type::define_parameters(p);
+
+
+
     save_define_parameters(p);
 
     p
@@ -234,7 +250,7 @@ alps::params cmdline_params(int argc, char* argv[])
     p.define<double>("t",  1.0,   "hopping on a lattice");
     #elif LATTICE_PARAMS == 2
     p.define<double>("t",  1.0,   "nearest neighbor hopping on a lattice");
-    p.define<double>("tp", 0.0,   "next-nearest neighbor hopping on a lattice");
+    p.define<double>("tp", 0.0,   "next-nearest neighbor hopping on a square lattice, anisotropic hopping along one direction on a triangular lattice (set 1.0 if you want an isotropic triangular lattice)");
     #else
     #error Undefined lattice
     #endif
diff --git a/src/df_hubbard.cpp b/src/df_hubbard.cpp
index 857da92..6c55b93 100644
--- a/src/df_hubbard.cpp
+++ b/src/df_hubbard.cpp
@@ -1,7 +1,15 @@
+#ifdef OPENDF_ENABLE_MPI
+#include <alps/utilities/boost_mpi.hpp>
+#define mpi_df_cout if(mpi_rank==0) std::cout
+#else
+#define mpi_df_cout std::cout
+#endif
+
 #include "opendf/df_hubbard.hpp"
 #include "opendf/diagrams.hpp"
 #include "opendf/lattice_traits.hpp"
 
+
 namespace open_df { 
 
 template <typename LatticeT>
@@ -30,7 +38,16 @@ alps::params& df_hubbard<LatticeT>::define_parameters(alps::params& p)
 template <typename LatticeT>
 typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::params p)
 {
-    std::cout << "Starting ladder dual fermion calculations" << std::endl;
+    int mpi_rank, mpi_size;
+    mpi_rank=0; mpi_size=1;
+    #ifdef OPENDF_ENABLE_MPI
+     alps::mpi::communicator comm_df;
+     mpi_rank=comm_df.rank();
+     mpi_size=comm_df.size();     
+     if(mpi_rank==0) std::cout<<"myrank = "<< mpi_rank<<std::endl;
+    #endif 
+ 
+    if(mpi_rank==0) std::cout << "Starting ladder dual fermion calculations" << std::endl;
 
     int n_bs_iter = p["n_bs_iter"];   // number of Bethe Salpeter iterations, if iterative evaluation is requested (also used, when the ladder can't be converged)
     double bs_mix = p["bs_mix"];      // mixing between subsequent Bethe-Salpeter iterations 
@@ -53,14 +70,15 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
     double beta = fgrid_.beta();
     double T = 1.0/beta;
     bmatsubara_grid const& bgrid = magnetic_vertex_.grid();
-    std::cout << "Vertices are defined on bosonic grid : " << bgrid << std::endl; 
+    if(mpi_rank==0) std::cout << "Vertices are defined on bosonic grid : " << bgrid << std::endl; 
     const auto unique_kpts = lattice_t::getUniqueBZPoints(kgrid_);
+    const auto all_kpts = lattice_t::getAllBZPoints(kgrid_);
 
     gk_type gd_initial(gd_);
 
     gw_type gd_sum(fgrid_);
     for (auto iw : fgrid_.points()) { gd_sum[iw] = std::abs(gd_[iw].sum())/totalkpts; }; 
-    std::cout << "Beginning with GD sum = " << std::abs(gd_sum.sum())/double(fgrid_.size()) << std::endl;
+    if(mpi_rank==0) std::cout << "Beginning with GD sum = " << std::abs(gd_sum.sum())/double(fgrid_.size()) << std::endl;
     
     // prepare caches        
     fvertex_type m_v(fgrid_, fgrid_), d_v(m_v);
@@ -83,17 +101,28 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
     double diff_gd = 1.0, diff_gd_min = diff_gd;
     int diff_gd_min_count = 0;
 
+    int av_Windex= (2*nbosonic_-1)/mpi_size+1;
+    int start_Windex = mpi_rank*av_Windex - nbosonic_ +1;
+    int end_Windex = (mpi_rank+1)*av_Windex - nbosonic_;
+    if(end_Windex>=nbosonic_) end_Windex=nbosonic_-1;
+    //std::cout<<nbosonic_<<"  "<<mpi_size<<"   "<<av_Windex<<std::endl;
+   
+    std::vector<std::complex<double>> reduced_sigma_d(sigma_d_.size());
+    std::vector<std::complex<double>> vector_sigma_d(sigma_d_.size());
+    
+
     for (size_t nd_iter=0; nd_iter<df_sc_iter && diff_gd > df_sc_cutoff; ++nd_iter) { 
         sigma_d_ = 0.0;
-        std::cout << std::endl << "DF iteration " << nd_iter << std::endl;
-
+        if(mpi_rank==0) std::cout << std::endl << "DF iteration " << nd_iter << std::endl;
+       
         // loop through conserved bosonic frequencies
-        for (int Windex = -nbosonic_ + 1; Windex < nbosonic_; Windex++) { 
+        
+        for (int Windex = start_Windex; Windex <= end_Windex; Windex++) { 
             std::complex<double> W_val = BMatsubara(Windex, beta);
             typename bmatsubara_grid::point W = bgrid.find_nearest(W_val); 
             assert(is_float_equal(W.value(), W_val, 1e-4));
-            std::cout << "W (bosonic) = " << W << std::endl;
-            std::cout << "Calculating bubbles" << std::endl;
+            if(mpi_rank==0) std::cout << "W (bosonic) = " << W << std::endl;
+            if(mpi_rank==0) std::cout << "Calculating bubbles" << std::endl;
             gk_type dual_bubbles = diagram_traits::calc_bubbles(gd_, W); 
 
                 
@@ -115,12 +144,12 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
                 std::array<kmesh::point, NDim> q = pts_it->first; // point
                 real_type q_weight = real_type(pts_it->second.size()); // it's weight
                 auto other_qpts = pts_it -> second; // other points, equivalent by symmetry
-                std::cout << nq++ << "/" << unique_kpts.size() << ": [" << std::flush;
+                if(mpi_rank==0) std::cout << nq++ << "/" << unique_kpts.size() << ": [" << std::flush;
                 //for (size_t i=0; i<D; ++i) std::cout << real_type(q[i]) << " " << std::flush; std::cout << "]. Weight : " << q_weight << ". " << std::endl;
 
                 // define arguments conserved on the ladder
                 typename gk_type::arg_tuple Wq_args = std::tuple_cat(std::make_tuple(W),q);
-                std::cout << tuple_tools::print_tuple(Wq_args) << "]. Weight : " << q_weight << ". " << std::flush;
+                if(mpi_rank==0) std::cout << tuple_tools::print_tuple(Wq_args) << "]. Weight : " << q_weight << ". " << std::flush;
                 // get dual bubble
                 dual_bubble.fill([&](typename fmatsubara_grid::point w){return dual_bubbles(std::tuple_cat(std::make_tuple(w), q)); });
 
@@ -129,18 +158,18 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
                 // Calculate ladders in different channels and get a determinant of 1 - vertex * bubble. 
                 // If it's negative - one eigenvalue is negative, i.e. the ladder can't be evaluated.
                 // magnetic channel
-                std::cout << "\tMagnetic " << std::flush;
+                if(mpi_rank==0) std::cout << "\tMagnetic " << std::flush;
                 forward_bs magnetic_bs(dual_bubble_matrix, magnetic_v_matrix, 0);
                 full_m = magnetic_bs.solve(eval_bs_sc, n_bs_iter, bs_mix);
                 double m_det = magnetic_bs.determinant().real();
-                std::cout << "det = " << m_det;
+                if(mpi_rank==0) std::cout << "det = " << m_det;
                 min_det = std::min(min_det, m_det); 
                 // density channel
-                std::cout << "\tDensity " << std::flush;
+                if(mpi_rank==0) std::cout << "\tDensity " << std::flush;
                 forward_bs density_bs(dual_bubble_matrix, density_v_matrix, 0);
                 full_d = density_bs.solve(eval_bs_sc, n_bs_iter, bs_mix);
                 double d_det = density_bs.determinant().real();
-                std::cout << "det = " << d_det;
+                if(mpi_rank==0) std::cout<< "det = " << d_det;
                 min_det = std::min(min_det, d_det); 
 
                 // second order correction
@@ -157,13 +186,13 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
                         full_vertex.get(std::tuple_cat(std::make_tuple(iw1),q_pt)) = 0.5*(3.0*(magnetic_val)+density_val);
                         };
                     };
-                std::cout << std::endl;
+                if(mpi_rank==0) std::cout << std::endl;
                 } // end bz loop
 
             if (store_full_diag_vertex) { 
                 (*full_diag_vertex_ptr_)[W] = full_vertex.data();
                 }
-            std::cout << "Updating sigma" << std::endl;
+            if(mpi_rank==0) std::cout << "Updating sigma" << std::endl;
             for (auto iw1 : fgrid_.points()) {
                 v4r = run_fft(full_vertex[iw1], FFTW_FORWARD)/knorm;
                 gdr = run_fft(gd_shift[iw1], FFTW_BACKWARD);
@@ -171,20 +200,45 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
                 sigma_d_[iw1]+= (1.0*T)*run_fft(v4r*gdr, FFTW_FORWARD); 
                 };
             //std::cout << "After W = " << W << " sigma diff = " << sigma_d_.diff(sigma_d_ * 0) << std::endl;
-
             } // end bgrid loop
 
-        std::cout << "Total sigma diff = " << sigma_d_.diff(sigma_d_*0) << std::endl;
+            #ifdef OPENDF_ENABLE_MPI
+            int jj=0;
+            for(auto iw1 : fgrid_.points()){
+              for (auto pts_it = all_kpts.begin(); pts_it != all_kpts.end(); pts_it++) { 
+                std::array<kmesh::point, NDim> q = *pts_it;    
+                auto wk = std::tuple_cat(std::make_tuple(iw1),q); 
+                vector_sigma_d[jj]=sigma_d_(wk);
+                reduced_sigma_d[jj]=0.0;
+                jj++;
+             }
+            }
+
+            int size_sigma=sigma_d_.size();                     
+            MPI_Allreduce(vector_sigma_d.data(),reduced_sigma_d.data(),size_sigma,MPI_DOUBLE_COMPLEX,MPI_SUM,MPI_COMM_WORLD); 
+
+            jj=0;
+            for(auto iw1 : fgrid_.points()){
+              for (auto pts_it = all_kpts.begin(); pts_it != all_kpts.end(); pts_it++) { 
+                std::array<kmesh::point, NDim> q = *pts_it;    
+                auto wk = std::tuple_cat(std::make_tuple(iw1),q); 
+                sigma_d_(wk)=reduced_sigma_d[jj];
+                jj++;
+             }
+            }
+           #endif
+ 
+        if(mpi_rank==0) std::cout << "Total sigma diff = " << sigma_d_.diff(sigma_d_*0) << std::endl;
 
         // check convergence
         auto gd_new = df_sc_mix/(1.0/gd0_ - sigma_d_) + gd_*(1.0-df_sc_mix); // Dyson eq;
         diff_gd = gd_new.diff(gd_);
         if (diff_gd<diff_gd_min-df_sc_cutoff/10.) { diff_gd_min = diff_gd; diff_gd_min_count = 0; }
         else diff_gd_min_count++;
-        std::cout << "Dual gd diff = " << diff_gd << std::endl;
+        if(mpi_rank==0) std::cout << "Dual gd diff = " << diff_gd << std::endl;
 
         if (diff_gd_min_count > 12 && std::abs(df_sc_mix-0.05)>1e-3 && update_df_sc_mixing) {
-            std::cerr << "\n\tCaught loop cycle. Reducing DF mixing to " << df_sc_mix/2 << " .\n" << std::endl;
+            if(mpi_rank==0) std::cerr << "\n\tCaught loop cycle. Reducing DF mixing to " << df_sc_mix/2 << " .\n" << std::endl;
             df_sc_mix=std::max(df_sc_mix/1.5, 0.05);
             gd_new = gd_initial;
             sigma_d_ = 0.0;
@@ -192,18 +246,21 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
             diff_gd_min_count = 0;
             }
 
-        diffDF_stream.open(p["diff_stream"].as<std::string>(),std::ios::app);
-        diffDF_stream << diff_gd << "  " << df_sc_mix << " " << min_det << std::endl;
-        diffDF_stream.close();
+        if(mpi_rank==0) diffDF_stream.open(p["diff_stream"].as<std::string>(),std::ios::app);
+        if(mpi_rank==0) diffDF_stream << diff_gd << "  " << df_sc_mix << " " << min_det << std::endl;
+        if(mpi_rank==0) diffDF_stream.close();
 
         gd_=gd_new;
         gd_.set_tail(gd0_.tail()); // assume DMFT asymptotics are good 
         sigma_d_ = 0.0;
 
         for (auto iw : fgrid_.points()) { gd_sum[iw] = std::abs(gd_[iw].sum())/knorm; }; 
-        std::cout << "GD sum = " << std::abs(gd_sum.sum())/double(fgrid_.size()) << std::endl; 
+        if(mpi_rank==0) std::cout << "GD sum = " << std::abs(gd_sum.sum())/double(fgrid_.size()) << std::endl; 
+        #ifdef OPENDF_ENABLE_MPI
+        MPI_Barrier(MPI_COMM_WORLD);
+        #endif
         }
-    std::cout << "Finished DF iterations" << std::endl;
+    if(mpi_rank==0) std::cout << "Finished DF iterations" << std::endl;
         
     sigma_d_ = 1.0/gd0_ - 1.0/gd_; 
 
@@ -225,6 +282,49 @@ typename df_hubbard<LatticeT>::gw_type df_hubbard<LatticeT>::operator()(alps::pa
     //DEBUG("GD0 = " << GD0);
     //DEBUG("GD  = " << GD);
     //DEBUG("SigmaD = " << SigmaD);
+    if (store_full_diag_vertex) { 
+      #ifdef OPENDF_ENABLE_MPI
+       int size_full_vertex=bgrid.size()*sigma_d_.size();
+       //std::array<std::complex<double>,size_full_vertex> vector_full_vertex;
+       //std::array<std::complex<double>,size_full_vertex> reduced_full_vertex;
+       std::vector<std::complex<double>> vector_full_vertex(sigma_d_.size());
+       std::vector<std::complex<double>> reduced_full_vertex(sigma_d_.size());
+       //std::complex<double> vector_full_vertex[size_full_vertex];
+       //std::complex<double> reduced_full_vertex[size_full_vertex];
+       std::complex<double> czero(0.0,0.0);
+       std::fill(reduced_full_vertex.data(),&reduced_full_vertex.back(),czero);
+       int jj=0;
+       for(auto iw1 : bgrid.points()){
+        for(auto iw2 : fgrid_.points()){
+         for (auto pts_it = all_kpts.begin(); pts_it != all_kpts.end(); pts_it++) { 
+            std::array<kmesh::point, NDim> q = *pts_it;    
+            auto wk = std::tuple_cat(std::make_tuple(iw1),std::make_tuple(iw2),q);
+            vector_full_vertex[jj]=(*full_diag_vertex_ptr_)(wk);
+            jj++;
+         }
+        }
+       }
+
+       MPI_Allreduce(vector_full_vertex.data(),reduced_full_vertex.data(),size_full_vertex,MPI_DOUBLE_COMPLEX,MPI_SUM,MPI_COMM_WORLD);
+
+       jj=0;
+       for(auto iw1 : bgrid.points()){
+        for(auto iw2 : fgrid_.points()){
+         for (auto pts_it = all_kpts.begin(); pts_it != all_kpts.end(); pts_it++) { 
+            std::array<kmesh::point, NDim> q = *pts_it;    
+            auto wk = std::tuple_cat(std::make_tuple(iw1),std::make_tuple(iw2),q);
+            (*full_diag_vertex_ptr_)(wk)=reduced_full_vertex[jj];
+            jj++;
+         }
+        }
+       }        
+
+     MPI_Barrier(MPI_COMM_WORLD);
+    #endif
+    }
+
+
+
     return delta_out;
 
 }