diff --git a/include/quda.h b/include/quda.h index e059442a5d..fcfd942df7 100644 --- a/include/quda.h +++ b/include/quda.h @@ -869,6 +869,8 @@ extern "C" { double alpha3; /**< The coefficient used in HYP smearing step 1*/ unsigned int meas_interval; /**< Perform the requested measurements on the gauge field at this interval */ QudaGaugeSmearType smear_type; /**< The smearing type to perform */ + unsigned int adj_n_save; /**< How many intermediate gauge fields to save at each large nblock to perform adj flow*/ + unsigned int hier_threshold; /**< Minimum *hierarchical* threshold for adj gradient flow*/ QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */ double t0; /**< Starting flow time for Wilson flow */ int dir_ignore; /**< The direction to be ignored by the smearing algorithm @@ -1701,6 +1703,26 @@ extern "C" { */ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); + + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the "safe" way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/check_params.h b/lib/check_params.h index 02ce0e4621..18c296ce64 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1159,6 +1159,18 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) #if defined CHECK_PARAM if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) errorQuda("Unexpected QudaGaugeSmearParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); + + if (param->n_steps <= param->adj_n_save ) { + + logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) >= n_steps (%d); adj_n_save manually altered: \n",param->n_steps,param->adj_n_save); + if (param->n_steps == 1) + param->adj_n_save = param->n_steps; + else + param->adj_n_save = param->n_steps - 1; + logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",param->n_steps,param->adj_n_save); + + } + #else P(struct_size, (size_t)INVALID_INT); #endif @@ -1172,6 +1184,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(rho, 0.0); P(epsilon, 0.0); P(restart, QUDA_BOOLEAN_FALSE); + P(adj_n_save,5); + P(hier_threshold,6); P(t0, 0.0); P(alpha1, 0.0); P(alpha2, 0.0); @@ -1184,6 +1198,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(rho, INVALID_DOUBLE); P(epsilon, INVALID_DOUBLE); P(restart, QUDA_BOOLEAN_INVALID); + P(adj_n_save,(unsigned int)INVALID_INT); + P(hier_threshold,(unsigned int)INVALID_INT); P(t0, INVALID_DOUBLE); P(alpha1, INVALID_DOUBLE); P(alpha2, INVALID_DOUBLE); diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 139ffcfc60..9f26d0a65d 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -196,6 +197,11 @@ static TimeProfile profileWFlow("wFlowQuda"); //!< Profiler for gFlowQuda static TimeProfile profileGFlow("gFlowQuda"); +//!< Profiler for gFlowQuda +static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); + +static TimeProfile profileAdjGFlowHier("AdjgFlowHierQuda"); + //!< Profiler for projectSU3Quda static TimeProfile profileProject("projectSU3Quda"); @@ -5389,6 +5395,458 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG } /* end of performGFlowQuda */ + +// perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix E) +// the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) +{ + + auto profile = pushProfile(profileAdjGFlowSafe); + pushOutputPrefix("performAdjGFlowQudaSafe: "); + checkGaugeSmearParam(smear_param); + + pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); + + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowSafe); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + + const GaugeField gin = *gaugeSmeared; + GaugeField &g_W0 = *gaugeSmeared; + GaugeField &g_W1 = gaugeW1; + GaugeField &g_W2 = gaugeW2; + GaugeField &g_VT = gaugeVT; + + //necessary? + if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + int parity = 0; + + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + // Will add fermion measruement utilities later + // int measurement_n = 0; // The nth measurement to take + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + // auxilliary fermion fields [0], [1], [2] and [3] + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + // set [3] = input spinor + f_temp3 = fin; + + + for (unsigned int j = 0; j < smear_param->n_steps ; j++) + { + for (unsigned int i = 0; i < smear_param->n_steps - j; i++) { + + if (i == 0) g_W0 = gin; + else std::swap(g_W0,g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); + + } + + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profileAdjGFlowSafe); + + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + + + f_temp2 = f_temp4; + + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profileAdjGFlowSafe); + + + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); + + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + blas::axpy(-8. / 9.,f_temp2, f_temp4); + + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profileAdjGFlowSafe); + + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); + blas::axpy(1.,f_temp2, f_temp0); + blas::axpy(1.,f_temp1, f_temp0); + + fout = f_temp0; + //redefining f_temp0 to restart loop + f_temp3 = f_temp0; + + } + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = fout; + + popOutputPrefix(); +} + +void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile, std::vector> meas_cinf) +{ + const GaugeField gin = gf_list[0].get(); + GaugeField &g_W0 = gf_list[0].get(); + GaugeField &g_W1 = gf_list[1].get(); + GaugeField &g_W2 = gf_list[2].get(); + GaugeField &g_VT = gf_list[3].get(); + GaugeField &gaugeTemp = gf_list[4].get(); + GaugeField &precise = gf_list[5].get(); + + ColorSpinorField &f_temp0 = sf_list[0].get(); + ColorSpinorField &f_temp1 = sf_list[1].get(); + ColorSpinorField &f_temp2 = sf_list[2].get(); + ColorSpinorField &f_temp3 = sf_list[3].get(); + ColorSpinorField &f_temp4 = sf_list[4].get(); + + int &i_glob = meas_cinf[0].get(); + int &measurement_n = meas_cinf[1].get(); + measurement_n = 0; + + int parity = 0; + + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + // f_temp3 = fin; + + for (unsigned int j = 0; j < ns_safe ; j++) + { + for (unsigned int i = 0; i < ns_safe - j; i++) { + + if (i == 0) g_W0 = gin; + else std::swap(g_W0,g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); + + } + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + // [4] = Lap2 [0] + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profile); + + // [4] -> 3/4 eps [4] + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + + // [2] = [4] + f_temp2 = f_temp4; + + // [4] = Lap1 [2] + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profile); + + // [3] -> [3] + 8/9 eps [4] + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); + + // [1], [4] <- [3] + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + // [4] <- [4] - 8/9 [2] + blas::axpy(-8. / 9.,f_temp2, f_temp4); + + // [0] <- Lap0 [4] + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profile); + + // [0] <- 1/4 eps [0]; [0] <- [2] + [0]; [0] <- [1] + [0] + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); + blas::axpy(1.,f_temp2, f_temp0); + blas::axpy(1.,f_temp1, f_temp0); + + // fout = f_temp0; + //redefining f_temp0 to restart loop + f_temp3 = f_temp0; + + i_glob++; + } + +} + +/* total_dist == n_steps, n_b is dividing factor of each block, n_Save is the size of the list, "front" denotes whether split hierarchy goes to existing or new subhierarchy */ +std::vector get_hier_list(int total_dist, int n_b, int n_save, bool front = true){ + + std::vector hier_list; + int counter = 0; + + int val = total_dist; + for (int i_s = 0; i_s < n_save; i_s++) { + val = (val <= 1) ? 1 : val / n_b; + hier_list.push_back(val); + counter += val; + } + + if (front) hier_list.at(0) += total_dist - counter; + else hier_list.back() += total_dist - counter; + + return hier_list; + +} + +int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int threshold) { + + int result = -1; + int current_size = hier_list.size(); + std::vector temp_list; + if (current_size > n_save) errorQuda("something isnt right\n"); + + int diff = n_save - current_size; + + + for (int i=current_size - 1; i >= 0; --i){ + + if (hier_list[i] > threshold){ + + temp_list = get_hier_list(hier_list[i], n_b, diff+1,false); + // for (int ii = 0; ii< temp_list.size();ii++) printf("tempf %d \n",temp_list[ii]); + hier_list.erase(hier_list.begin()+i); + hier_list.insert(hier_list.begin()+i, temp_list.begin(),temp_list.end()); + result = i; + break; + } + } + + return result; + +} + +void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ + + auto profile = pushProfile(profileAdjGFlowHier); + pushOutputPrefix("performAdjGFlowQudaHier: "); + checkGaugeSmearParam(smear_param); + + // pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); + + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowHier); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + GaugeField gauge_out(gParamDummy); + + + + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + + auto n = smear_param->adj_n_save; + + std::vector gauge_stages(n,gParamDummy); + gauge_stages[0] = *gaugeSmeared; + //Can also do below + //creates copies std::vector gauge_stages(n,*gaugeSmeared); + + GaugeField &gin = *gaugeSmeared; + GaugeField &gout = gauge_out; + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + //IMPORTANT initializing step: set [3] = input spinor + f_temp3 = fin; + + int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); + logQuda(QUDA_SUMMARIZE,"Hierarchical block n_b: %d\n\n",n_b); + int ret_idx = 0; + int threshold = smear_param->hier_threshold; + std::vector hier_list; + //The first stage is saved at the very beginning, so its presence is implicit + hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); + logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %d\n",(int) hier_list.size()); + if (threshold < hier_list.back()) {threshold = hier_list.back(); logQuda(QUDA_SUMMARIZE, "threshold changed to %d",threshold);} + else logQuda(QUDA_SUMMARIZE, "threshold is %d\n",threshold); + + if (hier_list.empty()) errorQuda("hier_list is not populated\n"); + if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages\n"); + + for (unsigned int i = 0; i < hier_list.size() - 1; i++) { + + + if (i == 0){ + logQuda(QUDA_VERBOSE,"we first set gin to the first index of the gauge_steps vector\n"); + gauge_stages[0] = gin; + } + if (i > 0) std::swap(gout,gin); + + for (unsigned int j = 0; j < (unsigned int) hier_list[i]; j++){ + if (j > 0) std::swap(gout,gin); + + WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); + + } + gauge_stages[i + 1] = gout; + } + + std::vector> sf_list; + sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + std::vector> gf_list; + gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + + //first one is global counter, second is meas counter + int i_glob = 0, measurement_n = 0 ; + std::vector> meas_cinf{i_glob, measurement_n}; + + int hier_loop_counter = 0; + while (ret_idx != -1){ + logQuda(QUDA_DEBUG_VERBOSE,"Hier loop count %d has begun \n",hier_loop_counter); + logQuda(QUDA_DEBUG_VERBOSE,"Starting a hierarchical loop log: \n"); + + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier,meas_cinf); + + logQuda(QUDA_DEBUG_VERBOSE,"Previous hier list elements: \n"); + for (int j = 0; j < (int) hier_list.size(); j++ ){ + logQuda(QUDA_DEBUG_VERBOSE,"%d \n", (int) hier_list[j]); + } + logQuda(QUDA_DEBUG_VERBOSE,"\n"); + + hier_list.pop_back(); + gauge_stages.pop_back(); + ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); + if (ret_idx == -1) { + logQuda(QUDA_VERBOSE," now in final serial stage of hierarchial evolution \n"); + for (int i = gauge_stages.size() - 1; i >= 0; --i) { + //first load correct gauge field (for beginning of the loop, it is the final gauge list element) + + gf_list.at(0) = std::ref(gauge_stages[i]); + + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier,meas_cinf); + + logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); + } + logQuda(QUDA_VERBOSE,"Hierarchial evolution completed \n"); + break; + } + + GaugeField g_2(gParamDummy); + GaugeField g_1 = gauge_stages[ret_idx]; + + logQuda(QUDA_DEBUG_VERBOSE,"Modified hier list elements: \n"); + for (int j = 0; j < (int) hier_list.size(); j++ ){ + logQuda(QUDA_DEBUG_VERBOSE,"%d \n",(int) hier_list[j]); + } + logQuda(QUDA_DEBUG_VERBOSE,"\n"); + + for (unsigned int j = 0; j < (unsigned int) hier_list[ret_idx]; j++){ + if (j > 0) std::swap(g_2,g_1); + WFlowStep(g_2, gaugeTemp, g_1, smear_param->epsilon, smear_param->smear_type); + } + // break; + gauge_stages.insert(gauge_stages.begin() + ret_idx + 1, g_2); + logQuda(QUDA_DEBUG_VERBOSE,"recycled gauge field placed *before* index %d\n\n",ret_idx + 1); + gf_list.at(0) = std::ref(gauge_stages.back()); + hier_loop_counter += 1; + + } + + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = sf_list[0].get(); + logQuda(QUDA_DEBUG_VERBOSE,"Spinor written to cpu \n"); + popOutputPrefix(); + +} + + +/* save list of gauge vectors */ + int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index da3e7a97bf..f2a0f8e1c5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -182,6 +182,11 @@ if(QUDA_QIO) target_link_libraries(io_test ${TEST_LIBS}) quda_checkbuildtest(io_test QUDA_BUILD_ALL_TESTS) install(TARGETS io_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + + add_executable(vanilla_io vanilla_io.cpp) + target_link_libraries(vanilla_io ${TEST_LIBS}) + quda_checkbuildtest(vanilla_io QUDA_BUILD_ALL_TESTS) + install(TARGETS vanilla_io ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() add_executable(tune_test tune_test.cpp) @@ -199,6 +204,11 @@ target_link_libraries(su3_test ${TEST_LIBS}) quda_checkbuildtest(su3_test QUDA_BUILD_ALL_TESTS) install(TARGETS su3_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_executable(su3_fermion_test su3_fermion_test.cpp) +target_link_libraries(su3_fermion_test ${TEST_LIBS}) +quda_checkbuildtest(su3_fermion_test QUDA_BUILD_ALL_TESTS) +install(TARGETS su3_fermion_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(pack_test pack_test.cpp) target_link_libraries(pack_test ${TEST_LIBS}) quda_checkbuildtest(pack_test QUDA_BUILD_ALL_TESTS) diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp new file mode 100644 index 0000000000..3de53e4fc7 --- /dev/null +++ b/tests/su3_fermion_test.cpp @@ -0,0 +1,338 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// In a typical application, quda.h is the only QUDA header required. +#include + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// Smearing variables +double gauge_smear_rho = 0.1; +double gauge_smear_epsilon = 0.1; +double gauge_smear_alpha = 0.6; +double gauge_smear_alpha1 = 0.75; +double gauge_smear_alpha2 = 0.6; +double gauge_smear_alpha3 = 0.3; +int gauge_smear_steps = 50; +int gauge_n_save = 3; +int hier_threshold = 6; +QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; +int gauge_smear_dir_ignore = -1; +int measurement_interval = 5; +bool su_project = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim); + + // Specific test + printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); + switch (gauge_smear_type) { + case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; + case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + printfQuda(" - rho %f\n", gauge_smear_rho); + printfQuda(" - epsilon %f\n", gauge_smear_epsilon); + break; + case QUDA_GAUGE_SMEAR_HYP: + printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); + printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); + printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); + break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; + default: errorQuda("Undefined test type %d given", test_type); + } + printfQuda(" - smearing steps %d\n", gauge_smear_steps); + printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); + printfQuda(" - Measurement interval %d\n", measurement_interval); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); + return; +} + +void add_su3_option_group(std::shared_ptr quda_app) +{ + CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, + {"stout", QUDA_GAUGE_SMEAR_STOUT}, + {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, + {"hyp", QUDA_GAUGE_SMEAR_HYP}, + {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, + {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; + + // Option group for SU(3) related options + auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); + + opgroup + ->add_option( + "--su3-smear-type", + gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") + ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); + ; + opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); + + opgroup->add_option("--su3-smear-rho", gauge_smear_rho, + "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); + + opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, + "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); + + opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); + opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); + opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); + + opgroup->add_option( + "--su3-smear-dir-ignore", gauge_smear_dir_ignore, + "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); + + opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); + + opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, "The number of gauge steps to save for hierarchical adj grad flow"); + + opgroup->add_option("--su3-hier-threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); + + opgroup->add_option("--su3-measurement-interval", measurement_interval, + "Measure the field energy and/or topological charge every Nth step (default 5) "); + + opgroup->add_option("--su3-project", su_project, + "Project smeared gauge onto su3 manifold at measurement interval (default true)"); +} + +int main(int argc, char **argv) +{ + + auto app = make_app(); + add_su3_option_group(app); + + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + initComms(argc, argv, gridsize_from_cmdline); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + setDims(gauge_param.X); + + // All user inputs are now defined + display_test_info(); + + void *gauge[4], *new_gauge[4]; + + for (int dir = 0; dir < 4; dir++) { + gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + + initQuda(device_ordinal); + + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + + constructHostGaugeField(gauge, gauge_param, argc, argv); + // Load the gauge field to the device + loadGaugeQuda((void *)gauge, &gauge_param); + saveGaugeQuda(new_gauge, &gauge_param); + // start the timer + quda::host_timer_t host_timer, host_safe_timer, host_hier_timer, host_fwd_timer; + + // The commented out section is all geared towards gauge observables, so unlikely to be needed for now + // // Prepare various perf info + // long long flops_plaquette = 6ll * 597 * V; + // long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // // Prepare a gauge observable struct + // QudaGaugeObservableParam param = newQudaGaugeObservableParam(); + + // The user may specify which measurements they wish to perform/omit + // using the QudaGaugeObservableParam struct, and whether or not to + // perform suN projection at each measurement step. We recommend that + // users perform suN projection. + // A unique observable param struct is constructed for each measurement. + + // Gauge Smearing Routines + //--------------------------------------------------------------------------- + // Stout smearing should be equivalent to APE smearing + // on D dimensional lattices for rho = alpha/2*(D-1). + // Typical values for + // APE: alpha=0.6 + // Stout: rho=0.1 + // Over Improved Stout: rho=0.08, epsilon=-0.25 + // + // Typically, the user will use smearing for Q charge data only, so + // we hardcode to compute Q only and not the plaquette. Users may + // of course set these as they wish. SU(N) projection su_project=true is recommended. + QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i] = newQudaGaugeObservableParam(); + obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; + obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; + obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + } + + // We here set all the problem parameters for all possible smearing types. + QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); + smear_param.smear_type = gauge_smear_type; + smear_param.n_steps = gauge_smear_steps; + smear_param.adj_n_save = gauge_n_save; + smear_param.hier_threshold = hier_threshold; + smear_param.meas_interval = measurement_interval; + smear_param.alpha = gauge_smear_alpha; + smear_param.rho = gauge_smear_rho; + smear_param.epsilon = gauge_smear_epsilon; + smear_param.alpha1 = gauge_smear_alpha1; + smear_param.alpha2 = gauge_smear_alpha2; + smear_param.alpha3 = gauge_smear_alpha3; + smear_param.dir_ignore = gauge_smear_dir_ignore; + + + quda::ColorSpinorField check,check_safe,check_hier,check_fwd; + QudaInvertParam invParam = newQudaInvertParam(); + invParam.cpu_prec = QUDA_DOUBLE_PRECISION; + invParam.cuda_prec = QUDA_DOUBLE_PRECISION; + invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + invParam.dirac_order = QUDA_DIRAC_ORDER; + + quda::ColorSpinorParam cs_param; + + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); + check = quda::ColorSpinorField(cs_param); + //Add noise to spinor + quda::RNG rng(check, 1234); + spinorNoise(check, rng, QUDA_NOISE_GAUSS); + +// Example of how to construct a spinor that is the complex conjugate of check. +// quda::ColorSpinorField check_norm(cs_param); +// #pragma omp parallel for +// for (int i = 0; i < V * 24; i++) { + +// if (i % 2 == 0) +// check_norm.data()[i] = check.data()[i]; +// else +// check_norm.data()[i] = -1.*check.data()[i]; +// } + + check_safe = quda::ColorSpinorField(cs_param); + check_hier = quda::ColorSpinorField(cs_param); + check_fwd = quda::ColorSpinorField(cs_param); + + printf("Inspecting the very first element of the random fermion we will use:\n"); + check.PrintVector(0,0,0); + printf("Inspecting the very first element of the 3 un-evolved fermions (should be zero):\n"); + printf("Hierarchical method:\n"); + check_hier.PrintVector(0,0,0); + printf("Safe method:\n"); + check_safe.PrintVector(0,0,0); + printf("Forward method:\n"); + check_fwd.PrintVector(0,0,0); + + host_timer.start(); // start the timer + switch (smear_param.smear_type) { + case QUDA_GAUGE_SMEAR_APE: + case QUDA_GAUGE_SMEAR_STOUT: + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + case QUDA_GAUGE_SMEAR_HYP: { + performGaugeSmearQuda(&smear_param, obs_param); + break; + } + + // Here we use a typical use case which is different from simple smearing in that + // the user will want to compute the plaquette values to compute the gauge energy. + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; + } + + // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first + host_hier_timer.start(); + performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param); + host_hier_timer.stop(); + host_safe_timer.start(); + performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param); + host_safe_timer.stop(); + // Perform forward flow algorithm + host_fwd_timer.start(); + performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); + host_fwd_timer.stop(); + + printfQuda("Time elapsed for adjoint hierarchical fermion/gauge smearing = %g secs\n", host_hier_timer.last()); + printfQuda("Time elapsed for adjoint safe fermion/gauge smearing = %g secs\n", host_safe_timer.last()); + printfQuda("Time elapsed for forward fermion/gauge smearing = %g secs\n", host_fwd_timer.last()); + + break; + } + default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); + } + + host_timer.stop(); // stop the timer + + printfQuda("Total time for collective fermion/gauge smearing = %g secs\n", host_timer.last()); + printf("Now, inspecting the very first element of the 3 evolved fermions:\n"); + printf("Hierarchical method:\n"); + check_hier.PrintVector(0,0,0); + printf("Safe method:\n"); + check_safe.PrintVector(0,0,0); + printf("Forward method:\n"); + check_fwd.PrintVector(0,0,0); + + double method_adj_diff = 0.; + /* To access the ith complex entry in a raw vector, do, for example: check.data*>()[i]*/ + for (int i = 0; i < V * 24; i++) { + method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); + } + double method_adj_check = sqrt(method_adj_diff)/(V*24.); + printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); + + std::complextrace_fwd,trace_adj; + trace_fwd = twoColorSpinorContract(check.data*>(), check_fwd.data*>()); + trace_adj = twoColorSpinorContract(check.data*>(), check_safe.data*>()); + + auto trace_diff_err = 2.*std::fabs(trace_fwd - std::conj(trace_adj))/std::fabs(trace_fwd + std::conj(trace_adj)); + + printf("The two numbers below should be complex conjugates of one another\n"); + printf(" is %1.5e, %1.5e \n",trace_adj.real(), trace_adj.imag()); + printf(" is %1.5e, %1.5e \n",trace_fwd.real(), trace_fwd.imag()); + printf("Fractional error of ( - .conj()) = %1.5e \n", trace_diff_err); + + if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); + + for (int dir = 0; dir < 4; dir++) { + host_free(gauge[dir]); + host_free(new_gauge[dir]); + } + + freeGaugeQuda(); + endQuda(); + + finalizeComms(); + return 0; +} diff --git a/tests/utils/host_blas.cpp b/tests/utils/host_blas.cpp index e918f77589..5a63209c94 100644 --- a/tests/utils/host_blas.cpp +++ b/tests/utils/host_blas.cpp @@ -128,3 +128,4 @@ void cpu_xpy(QudaPrecision prec, const void *x, void *y, int size) for (int i = 0; i < size; i++) { dst[i] += src[i]; } } } + diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index 207d072d44..bc5ca05682 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -1240,6 +1240,47 @@ void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precisi checkGauge((float **)oldG, (float **)newG, epsilon); } +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2) +{ + int col_inc = 3; + + std::vector col_st{0, 1, 2}; + std::vector row_st{0, 3, 6}; + + std::vector> test_contract(9 * V); + complex trace = {0. , 0.}; + double trace_re,trace_im; + for (int i = 0; i < V; i++) { + + for (int ii = 0; ii < 9; ii++){ + int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3))/ 3; + + std::complex dot = {0.,0.}; + + for(int i_s = 0; i_s < 4; i_s++){ + + int s_row_idx = i * 12 + col_st[which_row_idx]+ i_s*col_inc; + int s_col_idx = i * 12 + col_st[which_col_idx]+ i_s*col_inc; + + auto m1 = std::conj(spinor1[s_row_idx]); + auto m2 = spinor2[s_col_idx]; + + dot += m1*m2; + + } + test_contract[i * 9 + ii] = dot; + } + trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); + } + trace_re = trace.real(); + trace_im = trace.imag(); + quda::comm_allreduce_sum(trace_re); + quda::comm_allreduce_sum(trace_im); + + std::complex trace_fin = {trace_re,trace_im}; + return trace_fin; +} + void createSiteLinkCPU(void *const *link, QudaPrecision precision, int phase) { if (precision == QUDA_DOUBLE_PRECISION) { diff --git a/tests/utils/host_utils.h b/tests/utils/host_utils.h index 473a527ac5..2eaa790b45 100644 --- a/tests/utils/host_utils.h +++ b/tests/utils/host_utils.h @@ -130,6 +130,7 @@ void constructPointSpinorSource(void *v, QudaPrecision precision, const int *con void constructWallSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, const int dil); void constructRandomSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, QudaSolutionType sol_type, const int *const x, int nDim, quda::RNG &rng); +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2); //------------------------------------------------------ // Helper functions