From 151e48f4cc555409fbf67ae85662dbcc9beffd34 Mon Sep 17 00:00:00 2001 From: rokarur Date: Fri, 25 Oct 2024 13:32:17 -0700 Subject: [PATCH 01/53] add fermion test skeleton --- tests/CMakeLists.txt | 5 + tests/su3_ferm_test.cpp | 322 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 tests/su3_ferm_test.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b295e00d9c..89d4332dc7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -199,6 +199,11 @@ target_link_libraries(su3_test ${TEST_LIBS}) quda_checkbuildtest(su3_test QUDA_BUILD_ALL_TESTS) install(TARGETS su3_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_executable(su3_ferm_test su3_ferm_test.cpp) +target_link_libraries(su3_ferm_test ${TEST_LIBS}) +quda_checkbuildtest(su3_ferm_test QUDA_BUILD_ALL_TESTS) +install(TARGETS su3_ferm_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(pack_test pack_test.cpp) target_link_libraries(pack_test ${TEST_LIBS}) quda_checkbuildtest(pack_test QUDA_BUILD_ALL_TESTS) diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp new file mode 100644 index 0000000000..2fb44ffe5b --- /dev/null +++ b/tests/su3_ferm_test.cpp @@ -0,0 +1,322 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// In a typical application, quda.h is the only QUDA header required. +#include + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// Smearing variables +double gauge_smear_rho = 0.1; +double gauge_smear_epsilon = 0.1; +double gauge_smear_alpha = 0.6; +double gauge_smear_alpha1 = 0.75; +double gauge_smear_alpha2 = 0.6; +double gauge_smear_alpha3 = 0.3; +int gauge_smear_steps = 50; +QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; +int gauge_smear_dir_ignore = -1; +int measurement_interval = 5; +bool su_project = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim); + + // Specific test + printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); + switch (gauge_smear_type) { + case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; + case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + printfQuda(" - rho %f\n", gauge_smear_rho); + printfQuda(" - epsilon %f\n", gauge_smear_epsilon); + break; + case QUDA_GAUGE_SMEAR_HYP: + printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); + printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); + printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); + break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; + default: errorQuda("Undefined test type %d given", test_type); + } + printfQuda(" - smearing steps %d\n", gauge_smear_steps); + printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); + printfQuda(" - Measurement interval %d\n", measurement_interval); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); + return; +} + +void add_su3_option_group(std::shared_ptr quda_app) +{ + CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, + {"stout", QUDA_GAUGE_SMEAR_STOUT}, + {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, + {"hyp", QUDA_GAUGE_SMEAR_HYP}, + {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, + {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; + + // Option group for SU(3) related options + auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); + + opgroup + ->add_option( + "--su3-smear-type", + gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") + ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); + ; + opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); + + opgroup->add_option("--su3-smear-rho", gauge_smear_rho, + "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); + + opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, + "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); + + opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); + opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); + opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); + + opgroup->add_option( + "--su3-smear-dir-ignore", gauge_smear_dir_ignore, + "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); + + opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); + + opgroup->add_option("--su3-measurement-interval", measurement_interval, + "Measure the field energy and/or topological charge every Nth step (default 5) "); + + opgroup->add_option("--su3-project", su_project, + "Project smeared gauge onto su3 manifold at measurement interval (default true)"); +} + +int main(int argc, char **argv) +{ + + auto app = make_app(); + add_su3_option_group(app); + + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + initComms(argc, argv, gridsize_from_cmdline); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + setDims(gauge_param.X); + + // All user inputs are now defined + display_test_info(); + + void *gauge[4], *new_gauge[4]; + + for (int dir = 0; dir < 4; dir++) { + gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + + initQuda(device_ordinal); + + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + + constructHostGaugeField(gauge, gauge_param, argc, argv); + // Load the gauge field to the device + loadGaugeQuda((void *)gauge, &gauge_param); + saveGaugeQuda(new_gauge, &gauge_param); + + // Prepare various perf info + long long flops_plaquette = 6ll * 597 * V; + long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // Prepare a gauge observable struct + QudaGaugeObservableParam param = newQudaGaugeObservableParam(); + + // start the timer + quda::host_timer_t host_timer; + +// // We call gaugeObservablesQuda multiple times to time each bit individually + +// // Compute the plaquette +// param.compute_plaquette = QUDA_BOOLEAN_TRUE; + +// // Tuning call +// gaugeObservablesQuda(¶m); + +// host_timer.start(); +// for (int i = 0; i < niter; i++) gaugeObservablesQuda(¶m); +// host_timer.stop(); +// double secs_plaquette = host_timer.last() / niter; +// double perf_plaquette = flops_plaquette / (secs_plaquette * 1024 * 1024 * 1024); +// printfQuda( +// "Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e), done in %g seconds, %g GFLOPS\n", +// param.plaquette[0], param.plaquette[1], param.plaquette[2], secs_plaquette, perf_plaquette); +// param.compute_plaquette = QUDA_BOOLEAN_FALSE; + +// // Compute the temporal Polyakov loop +// param.compute_polyakov_loop = QUDA_BOOLEAN_TRUE; + +// // Tuning call +// gaugeObservablesQuda(¶m); + +// host_timer.start(); +// for (int i = 0; i < niter; i++) gaugeObservablesQuda(¶m); +// host_timer.stop(); +// double secs_ploop = host_timer.last() / niter; +// double perf_ploop = flops_ploop / (secs_ploop * 1024 * 1024 * 1024); +// printfQuda("Computed Polyakov loop gauge precise is %.16e +/- I %.16e , done in %g seconds, %g GFLOPS\n", +// param.ploop[0], param.ploop[1], secs_ploop, perf_ploop); +// param.compute_polyakov_loop = QUDA_BOOLEAN_FALSE; + +// // Topological charge and gauge energy +// double q_charge_check = 0.0; +// // Size of floating point data +// size_t data_size = prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float); +// size_t array_size = V * data_size; +// void *qDensity = pinned_malloc(array_size); + +// // start the timer +// host_timer.start(); + +// param.compute_qcharge = QUDA_BOOLEAN_TRUE; +// param.compute_qcharge_density = QUDA_BOOLEAN_TRUE; +// param.qcharge_density = qDensity; + +// gaugeObservablesQuda(¶m); + +// // stop the timer +// host_timer.stop(); +// printfQuda("Computed Etot, Es, Et, Q is\n%.16e %.16e, %.16e %.16e\nDone in %g secs\n", param.energy[0], +// param.energy[1], param.energy[2], param.qcharge, host_timer.last()); + +// // Ensure host array sums to return value +// if (prec == QUDA_DOUBLE_PRECISION) { +// for (int i = 0; i < V; i++) q_charge_check += ((double *)qDensity)[i]; +// } else { +// for (int i = 0; i < V; i++) q_charge_check += ((float *)qDensity)[i]; +// } + +// // release memory +// host_free(qDensity); + +// // Q charge Reduction and normalisation +// quda::comm_allreduce_sum(q_charge_check); + +// printfQuda("GPU value %e and host density sum %e. Q charge deviation: %e\n", param.qcharge, q_charge_check, +// param.qcharge - q_charge_check); + + // The user may specify which measurements they wish to perform/omit + // using the QudaGaugeObservableParam struct, and whether or not to + // perform suN projection at each measurement step. We recommend that + // users perform suN projection. + // A unique observable param struct is constructed for each measurement. + + // Gauge Smearing Routines + //--------------------------------------------------------------------------- + // Stout smearing should be equivalent to APE smearing + // on D dimensional lattices for rho = alpha/2*(D-1). + // Typical values for + // APE: alpha=0.6 + // Stout: rho=0.1 + // Over Improved Stout: rho=0.08, epsilon=-0.25 + // + // Typically, the user will use smearing for Q charge data only, so + // we hardcode to compute Q only and not the plaquette. Users may + // of course set these as they wish. SU(N) projection su_project=true is recommended. + QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i] = newQudaGaugeObservableParam(); + obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; + obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; + obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + } + + // We here set all the problem parameters for all possible smearing types. + QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); + smear_param.smear_type = gauge_smear_type; + smear_param.n_steps = gauge_smear_steps; + smear_param.meas_interval = measurement_interval; + smear_param.alpha = gauge_smear_alpha; + smear_param.rho = gauge_smear_rho; + smear_param.epsilon = gauge_smear_epsilon; + smear_param.alpha1 = gauge_smear_alpha1; + smear_param.alpha2 = gauge_smear_alpha2; + smear_param.alpha3 = gauge_smear_alpha3; + smear_param.dir_ignore = gauge_smear_dir_ignore; + + + quda::ColorSpinorParam cs_param, cs_param_out; + QudaInvertParam inv_param = newQudaInvertParam(); + constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + + + host_timer.start(); // start the timer + switch (smear_param.smear_type) { + case QUDA_GAUGE_SMEAR_APE: + case QUDA_GAUGE_SMEAR_STOUT: + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + case QUDA_GAUGE_SMEAR_HYP: { + performGaugeSmearQuda(&smear_param, obs_param); + break; + } + + // Here we use a typical use case which is different from simple smearing in that + // the user will want to compute the plaquette values to compute the gauge energy. + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; + } + performWFlowQuda(&smear_param, obs_param); + break; + } + default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); + } + + host_timer.stop(); // stop the timer + printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); + + if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); + + for (int dir = 0; dir < 4; dir++) { + host_free(gauge[dir]); + host_free(new_gauge[dir]); + } + + freeGaugeQuda(); + endQuda(); + + finalizeComms(); + return 0; +} From 372df866ea39985fe4b19e35e75e65786ac1985b Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 27 Oct 2024 03:06:36 -0700 Subject: [PATCH 02/53] basic wilson fermion flow test --- lib/interface_quda.cpp | 2 +- tests/su3_ferm_test.cpp | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index e5b678acfd..5e0ff0628f 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5221,7 +5221,7 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG pushOutputPrefix("performGFlowQuda: "); checkGaugeSmearParam(smear_param); - pushVerbosity(inv_param->verbosity); + // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); if (smear_param->restart) { diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index 2fb44ffe5b..085220ae21 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -274,13 +274,40 @@ int main(int argc, char **argv) smear_param.alpha2 = gauge_smear_alpha2; smear_param.alpha3 = gauge_smear_alpha3; smear_param.dir_ignore = gauge_smear_dir_ignore; - + quda::ColorSpinorField check,check_out; + QudaInvertParam invParam = newQudaInvertParam(); + invParam.cpu_prec = QUDA_DOUBLE_PRECISION; + invParam.cuda_prec = QUDA_DOUBLE_PRECISION; + invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + invParam.dirac_order = QUDA_DIRAC_ORDER; + + constexpr int nSpin = 4; + constexpr int nColor = 3; quda::ColorSpinorParam cs_param, cs_param_out; - QudaInvertParam inv_param = newQudaInvertParam(); - constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + cs_param.nColor = nColor; + cs_param.nSpin = nSpin; + cs_param.x = {xdim, ydim, zdim, tdim}; + cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; + cs_param.setPrecision(invParam.cpu_prec); + cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; + cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; + cs_param.gammaBasis = invParam.gamma_basis; + cs_param.pc_type = QUDA_4D_PC; + cs_param.location = QUDA_CPU_FIELD_LOCATION; + cs_param.create = QUDA_NULL_FIELD_CREATE; + + cs_param_out = cs_param; + + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); + check = quda::ColorSpinorField(cs_param); + constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); + check_out = quda::ColorSpinorField(cs_param_out); + // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); + host_timer.start(); // start the timer switch (smear_param.smear_type) { case QUDA_GAUGE_SMEAR_APE: @@ -298,7 +325,7 @@ int main(int argc, char **argv) for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } - performWFlowQuda(&smear_param, obs_param); + performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); From 462e3fdb4414d91773916ef75bc5a9181849d6d6 Mon Sep 17 00:00:00 2001 From: rokarur Date: Thu, 31 Oct 2024 16:42:29 -0700 Subject: [PATCH 03/53] first successful compile of adjFlowSafe --- include/quda.h | 12 +++++++ lib/interface_quda.cpp | 57 +++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 5 +++ tests/su3_ferm_test.cpp | 74 ++--------------------------------------- 4 files changed, 76 insertions(+), 72 deletions(-) diff --git a/include/quda.h b/include/quda.h index 155791b60a..8a7ebfb41f 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1694,6 +1694,18 @@ extern "C" { */ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); + + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the "safe" way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps); + + /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 5e0ff0628f..12c1095d35 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5370,6 +5370,63 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG popOutputPrefix(); } /* end of performGFlowQuda */ + + + +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps) +{ + +//MKAE g_in :void *g_in how? +GaugeFieldParam gParamDummy(*gaugeSmeared); +GaugeField gaugeAux(gParamDummy); +GaugeField* gout_steps= new GaugeField[nsteps*3]; + +GaugeFieldParam gParam(*gaugePrecise); +gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct +GaugeField gaugeTemp(gParam); + +GaugeField &g_in = *gaugeSmeared; + +if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + +auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; +// GaugeField &g_in = *gaugeSmeared; +// GaugeField *in = GaugeField::Create(gParamDummy); + +// // Set the specific input parameters and create the cpu gauge field +// GaugeFieldParam gauge_param(&gParamDummy, g_in); + +// if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; +// GaugeField *work_gauge_in = GaugeField::Create(gauge_param); + + +// = gaugeAux; + + for (int i = 0; i < nsteps; i++){ + for (int ss = 0; ss < 3; ss++){ + GaugeField *in = GaugeField::Create(gParamDummy); + gout_steps[i*3 + ss] = *in; + } + } + + for (unsigned int i = 0; i < nsteps; i++) { + + if (i == 0) + GFlowStep(gout_steps[i*3 + 0], gaugeTemp, g_in, 0.01, smear_type, WFLOW_STEP_W1); + else + GFlowStep(gout_steps[i*3 + 0], gaugeTemp, gout_steps[(i-1)*3 + 2], 0.01, smear_type, WFLOW_STEP_W1); + + GFlowStep(gout_steps[i*3 + 1], gaugeTemp, gout_steps[i*3 + 0], 0.01, smear_type, WFLOW_STEP_W2); + GFlowStep(gout_steps[i*3 + 2], gaugeTemp, gout_steps[i*3 + 1], 0.01, smear_type, WFLOW_STEP_VT); + + + } + + +} + + +/* save list of gauge vectors */ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 89d4332dc7..e93386a782 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -182,6 +182,11 @@ if(QUDA_QIO) target_link_libraries(io_test ${TEST_LIBS}) quda_checkbuildtest(io_test QUDA_BUILD_ALL_TESTS) install(TARGETS io_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + + add_executable(vanilla_io vanilla_io.cpp) + target_link_libraries(vanilla_io ${TEST_LIBS}) + quda_checkbuildtest(vanilla_io QUDA_BUILD_ALL_TESTS) + install(TARGETS vanilla_io ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() add_executable(tune_test tune_test.cpp) diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index 085220ae21..2f95fe4c77 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -166,76 +166,6 @@ int main(int argc, char **argv) // start the timer quda::host_timer_t host_timer; -// // We call gaugeObservablesQuda multiple times to time each bit individually - -// // Compute the plaquette -// param.compute_plaquette = QUDA_BOOLEAN_TRUE; - -// // Tuning call -// gaugeObservablesQuda(¶m); - -// host_timer.start(); -// for (int i = 0; i < niter; i++) gaugeObservablesQuda(¶m); -// host_timer.stop(); -// double secs_plaquette = host_timer.last() / niter; -// double perf_plaquette = flops_plaquette / (secs_plaquette * 1024 * 1024 * 1024); -// printfQuda( -// "Computed plaquette gauge precise is %.16e (spatial = %.16e, temporal = %.16e), done in %g seconds, %g GFLOPS\n", -// param.plaquette[0], param.plaquette[1], param.plaquette[2], secs_plaquette, perf_plaquette); -// param.compute_plaquette = QUDA_BOOLEAN_FALSE; - -// // Compute the temporal Polyakov loop -// param.compute_polyakov_loop = QUDA_BOOLEAN_TRUE; - -// // Tuning call -// gaugeObservablesQuda(¶m); - -// host_timer.start(); -// for (int i = 0; i < niter; i++) gaugeObservablesQuda(¶m); -// host_timer.stop(); -// double secs_ploop = host_timer.last() / niter; -// double perf_ploop = flops_ploop / (secs_ploop * 1024 * 1024 * 1024); -// printfQuda("Computed Polyakov loop gauge precise is %.16e +/- I %.16e , done in %g seconds, %g GFLOPS\n", -// param.ploop[0], param.ploop[1], secs_ploop, perf_ploop); -// param.compute_polyakov_loop = QUDA_BOOLEAN_FALSE; - -// // Topological charge and gauge energy -// double q_charge_check = 0.0; -// // Size of floating point data -// size_t data_size = prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float); -// size_t array_size = V * data_size; -// void *qDensity = pinned_malloc(array_size); - -// // start the timer -// host_timer.start(); - -// param.compute_qcharge = QUDA_BOOLEAN_TRUE; -// param.compute_qcharge_density = QUDA_BOOLEAN_TRUE; -// param.qcharge_density = qDensity; - -// gaugeObservablesQuda(¶m); - -// // stop the timer -// host_timer.stop(); -// printfQuda("Computed Etot, Es, Et, Q is\n%.16e %.16e, %.16e %.16e\nDone in %g secs\n", param.energy[0], -// param.energy[1], param.energy[2], param.qcharge, host_timer.last()); - -// // Ensure host array sums to return value -// if (prec == QUDA_DOUBLE_PRECISION) { -// for (int i = 0; i < V; i++) q_charge_check += ((double *)qDensity)[i]; -// } else { -// for (int i = 0; i < V; i++) q_charge_check += ((float *)qDensity)[i]; -// } - -// // release memory -// host_free(qDensity); - -// // Q charge Reduction and normalisation -// quda::comm_allreduce_sum(q_charge_check); - -// printfQuda("GPU value %e and host density sum %e. Q charge deviation: %e\n", param.qcharge, q_charge_check, -// param.qcharge - q_charge_check); - // The user may specify which measurements they wish to perform/omit // using the QudaGaugeObservableParam struct, and whether or not to // perform suN projection at each measurement step. We recommend that @@ -301,8 +231,8 @@ int main(int argc, char **argv) constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); check = quda::ColorSpinorField(cs_param); - constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); - check_out = quda::ColorSpinorField(cs_param_out); + // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); + check_out = quda::ColorSpinorField(cs_param); // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); From eddf4317ebd710d4dfe1c5c27f34ec27219f0189 Mon Sep 17 00:00:00 2001 From: rokarur Date: Fri, 1 Nov 2024 23:35:39 -0700 Subject: [PATCH 04/53] Successful compile of adjFlowSafe --- lib/interface_quda.cpp | 107 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 12c1095d35..d5423366b2 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5377,9 +5377,21 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, in { //MKAE g_in :void *g_in how? + + + + GaugeFieldParam gParamDummy(*gaugeSmeared); GaugeField gaugeAux(gParamDummy); GaugeField* gout_steps= new GaugeField[nsteps*3]; + +//TODO +// Set the specific input parameters and create the cpu gauge field +// GaugeFieldParam gauge_param(*param, g_in); + +// if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; +// GaugeField *in = GaugeField::Create(gauge_param); + GaugeFieldParam gParam(*gaugePrecise); gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct @@ -5390,8 +5402,6 @@ GaugeField &g_in = *gaugeSmeared; if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; -// GaugeField &g_in = *gaugeSmeared; -// GaugeField *in = GaugeField::Create(gParamDummy); // // Set the specific input parameters and create the cpu gauge field // GaugeFieldParam gauge_param(&gParamDummy, g_in); @@ -5410,7 +5420,6 @@ auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; } for (unsigned int i = 0; i < nsteps; i++) { - if (i == 0) GFlowStep(gout_steps[i*3 + 0], gaugeTemp, g_in, 0.01, smear_type, WFLOW_STEP_W1); else @@ -5418,10 +5427,98 @@ auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; GFlowStep(gout_steps[i*3 + 1], gaugeTemp, gout_steps[i*3 + 0], 0.01, smear_type, WFLOW_STEP_W2); GFlowStep(gout_steps[i*3 + 2], gaugeTemp, gout_steps[i*3 + 1], 0.01, smear_type, WFLOW_STEP_VT); - - } + + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + int parity = 0; + + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + // auxilliary fermion fields [0], [1], [2] and [3] + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + // set [3] = input spinor + f_temp3 = fin; + + int measurement_n = 0; // The nth measurement to take + + for (unsigned int i = nsteps - 1; i >= 0; i--) { + //TODO: REPLACE + // if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step + + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + int index_g = i * 3; + + copyExtendedGauge(precise, gout_steps[index_g + 2], QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileGFlow); + + // f_temp0 = 3./4.*f_temp4; + blas::ax(3. / 4., f_temp4); + + f_temp2 = f_temp4; + + copyExtendedGauge(precise, gout_steps[index_g + 1], QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileGFlow); + + + blas::axpy(8. / 9., f_temp4, f_temp3); + + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + blas::axpy(-8. / 9.,f_temp2, f_temp4); + + copyExtendedGauge(precise, gout_steps[index_g + 0], QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileGFlow); + + blas::axpy(1.,f_temp2, f_temp0); + blas::axpy(1.,f_temp1, f_temp0); + + fout = f_temp0; + } + + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = fout; + popOutputPrefix(); + } From 637fd727fd4f63126a50ea5960a173804ccb185d Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 3 Nov 2024 00:37:31 -0700 Subject: [PATCH 05/53] first working Adj Safe GFlow --- include/quda.h | 2 +- lib/interface_quda.cpp | 261 +++++++++++++++++++++++++++++----------- tests/su3_ferm_test.cpp | 9 +- 3 files changed, 200 insertions(+), 72 deletions(-) diff --git a/include/quda.h b/include/quda.h index 8a7ebfb41f..45a4598496 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1703,7 +1703,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps); + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, int nsteps); diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d5423366b2..69141d78b7 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -196,6 +196,9 @@ static TimeProfile profileWFlow("wFlowQuda"); //!< Profiler for gFlowQuda static TimeProfile profileGFlow("gFlowQuda"); +//!< Profiler for gFlowQuda +static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); + //!< Profiler for projectSU3Quda static TimeProfile profileProject("projectSU3Quda"); @@ -5371,65 +5374,46 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG } /* end of performGFlowQuda */ - - -void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, int nsteps) { - -//MKAE g_in :void *g_in how? - - - -GaugeFieldParam gParamDummy(*gaugeSmeared); -GaugeField gaugeAux(gParamDummy); -GaugeField* gout_steps= new GaugeField[nsteps*3]; - -//TODO -// Set the specific input parameters and create the cpu gauge field -// GaugeFieldParam gauge_param(*param, g_in); + auto profile = pushProfile(profileAdjGFlowSafe); + pushOutputPrefix("performAdjGFlowQudaSafe: "); + checkGaugeSmearParam(smear_param); -// if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; -// GaugeField *in = GaugeField::Create(gauge_param); + // pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); - -GaugeFieldParam gParam(*gaugePrecise); -gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct -GaugeField gaugeTemp(gParam); + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowSafe); + } -GaugeField &g_in = *gaugeSmeared; - -if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - -auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; - -// // Set the specific input parameters and create the cpu gauge field -// GaugeFieldParam gauge_param(&gParamDummy, g_in); + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + GaugeField* gout_steps= new GaugeField[nsteps*3]; -// if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; -// GaugeField *work_gauge_in = GaugeField::Create(gauge_param); + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + // GaugeField &g_in = *gaugeSmeared; + GaugeField &g_W0 = *gaugeSmeared; + GaugeField &g_W1 = gaugeW1; + GaugeField &g_W2 = gaugeW2; + GaugeField &g_VT = gaugeVT; -// = gaugeAux; + if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - for (int i = 0; i < nsteps; i++){ - for (int ss = 0; ss < 3; ss++){ - GaugeField *in = GaugeField::Create(gParamDummy); - gout_steps[i*3 + ss] = *in; - } - } - - for (unsigned int i = 0; i < nsteps; i++) { - if (i == 0) - GFlowStep(gout_steps[i*3 + 0], gaugeTemp, g_in, 0.01, smear_type, WFLOW_STEP_W1); - else - GFlowStep(gout_steps[i*3 + 0], gaugeTemp, gout_steps[(i-1)*3 + 2], 0.01, smear_type, WFLOW_STEP_W1); - - GFlowStep(gout_steps[i*3 + 1], gaugeTemp, gout_steps[i*3 + 0], 0.01, smear_type, WFLOW_STEP_W2); - GFlowStep(gout_steps[i*3 + 2], gaugeTemp, gout_steps[i*3 + 1], 0.01, smear_type, WFLOW_STEP_VT); - } - + auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; + // helper gauge field for Laplace operator GaugeField precise; GaugeFieldParam gParam_helper(*gaugePrecise); @@ -5467,60 +5451,203 @@ auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; // set [3] = input spinor f_temp3 = fin; - - int measurement_n = 0; // The nth measurement to take - for (unsigned int i = nsteps - 1; i >= 0; i--) { - //TODO: REPLACE - // if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step + + printf("Stage 1 passed \n"); + for (unsigned int j = 0; j < smear_param->n_steps ; j++) + { + for (unsigned int i = 0; i < smear_param->n_steps - j; i++) { + + if (i > 0) std::swap(g_W0,g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); + } // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; f_temp2 = f_temp3; - int index_g = i * 3; - - copyExtendedGauge(precise, gout_steps[index_g + 2], QUDA_CUDA_FIELD_LOCATION); + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileGFlow); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowSafe); // f_temp0 = 3./4.*f_temp4; - blas::ax(3. / 4., f_temp4); + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); f_temp2 = f_temp4; - copyExtendedGauge(precise, gout_steps[index_g + 1], QUDA_CUDA_FIELD_LOCATION); + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileGFlow); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileAdjGFlowSafe); - blas::axpy(8. / 9., f_temp4, f_temp3); + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); f_temp1 = f_temp3; f_temp4 = f_temp1; blas::axpy(-8. / 9.,f_temp2, f_temp4); - copyExtendedGauge(precise, gout_steps[index_g + 0], QUDA_CUDA_FIELD_LOCATION); + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileGFlow); - + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileAdjGFlowSafe); + + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); blas::axpy(1.,f_temp2, f_temp0); blas::axpy(1.,f_temp1, f_temp0); fout = f_temp0; + //redefining f_temp0 to restart loop + f_temp3 = f_temp0; } - cpuParam.v = h_out; cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); fout_h = fout; popOutputPrefix(); +} + +// void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps) +// { + +// printf("Stage 0 passed \n"); + +// GaugeFieldParam gParamDummy(*gaugeSmeared); +// GaugeField gaugeAux(gParamDummy); +// GaugeField* gout_steps= new GaugeField[nsteps*3]; + +// GaugeFieldParam gParam(*gaugePrecise); +// gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct +// GaugeField gaugeTemp(gParam); + +// GaugeField &g_in = *gaugeSmeared; + +// if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + +// auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; + +// // // Set the specific input parameters and create the cpu gauge field +// // GaugeFieldParam gauge_param(&gParamDummy, g_in); + +// // if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; +// // GaugeField *work_gauge_in = GaugeField::Create(gauge_param); + + +// // = gaugeAux; +// printf("Stage 1 passed \n"); +// for (int i = 0; i < nsteps; i++){ +// for (int ss = 0; ss < 3; ss++){ +// GaugeField *in = GaugeField::Create(gParamDummy); +// gout_steps[i*3 + ss] = *in; +// } +// } +// printf("Stage 2 passed \n"); +// for (unsigned int i = 0; i < nsteps; i++) { +// if (i == 0) +// GFlowStep(gout_steps[i*3 + 0], gaugeTemp, g_in, 0.01, smear_type, WFLOW_STEP_W1); +// else +// GFlowStep(gout_steps[i*3 + 0], gaugeTemp, gout_steps[(i-1)*3 + 2], 0.01, smear_type, WFLOW_STEP_W1); + +// GFlowStep(gout_steps[i*3 + 1], gaugeTemp, gout_steps[i*3 + 0], 0.01, smear_type, WFLOW_STEP_W2); +// GFlowStep(gout_steps[i*3 + 2], gaugeTemp, gout_steps[i*3 + 1], 0.01, smear_type, WFLOW_STEP_VT); +// } + +// printf("Stage 3 passed \n"); +// // helper gauge field for Laplace operator +// GaugeField precise; +// GaugeFieldParam gParam_helper(*gaugePrecise); +// gParam_helper.create = QUDA_NULL_FIELD_CREATE; +// precise = GaugeField(gParam_helper); + +// // spinor fields +// ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); +// ColorSpinorField fin_h(cpuParam); + +// ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); +// ColorSpinorField fin(deviceParam); +// fin = fin_h; + +// deviceParam.create = QUDA_NULL_FIELD_CREATE; +// ColorSpinorField fout(deviceParam); + +// int parity = 0; + +// // initialize a and b for Laplace operator +// double a = 1.; +// double b = -8.; + +// int comm_dim[4] = {}; + +// // only switch on comms needed for directions with a derivative +// for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + +// // auxilliary fermion fields [0], [1], [2] and [3] +// ColorSpinorField f_temp0(deviceParam); +// ColorSpinorField f_temp1(deviceParam); +// ColorSpinorField f_temp2(deviceParam); +// ColorSpinorField f_temp3(deviceParam); +// ColorSpinorField f_temp4(deviceParam); + +// // set [3] = input spinor +// f_temp3 = fin; + +// int measurement_n = 0; // The nth measurement to take + +// for (unsigned int i = nsteps - 1; i >= 0; i--) { +// //TODO: REPLACE +// // if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step + +// // init auxilliary fields [0], [1] and [2] as [3] +// f_temp0 = f_temp3; +// f_temp1 = f_temp3; +// f_temp2 = f_temp3; + +// int index_g = i * 3; + +// copyExtendedGauge(precise, gout_steps[index_g + 2], QUDA_CUDA_FIELD_LOCATION); +// precise.exchangeGhost(); +// ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileGFlow); + +// // f_temp0 = 3./4.*f_temp4; +// blas::ax(3. / 4., f_temp4); + +// f_temp2 = f_temp4; + +// copyExtendedGauge(precise, gout_steps[index_g + 1], QUDA_CUDA_FIELD_LOCATION); +// precise.exchangeGhost(); +// ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileGFlow); + + +// blas::axpy(8. / 9., f_temp4, f_temp3); + +// f_temp1 = f_temp3; +// f_temp4 = f_temp1; + +// blas::axpy(-8. / 9.,f_temp2, f_temp4); + +// copyExtendedGauge(precise, gout_steps[index_g + 0], QUDA_CUDA_FIELD_LOCATION); +// precise.exchangeGhost(); +// ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileGFlow); + +// blas::axpy(1.,f_temp2, f_temp0); +// blas::axpy(1.,f_temp1, f_temp0); + +// fout = f_temp0; +// } + +// cpuParam.v = h_out; +// cpuParam.location = inv_param->output_location; +// ColorSpinorField fout_h(cpuParam); +// fout_h = fout; + +// popOutputPrefix(); -} +// } /* save list of gauge vectors */ diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index 2f95fe4c77..cf31d61735 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -25,7 +25,7 @@ double gauge_smear_alpha = 0.6; double gauge_smear_alpha1 = 0.75; double gauge_smear_alpha2 = 0.6; double gauge_smear_alpha3 = 0.3; -int gauge_smear_steps = 50; +int gauge_smear_steps = 5; QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; int gauge_smear_dir_ignore = -1; int measurement_interval = 5; @@ -237,7 +237,7 @@ int main(int argc, char **argv) // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); - + printf("Stage -1 passed\n"); host_timer.start(); // start the timer switch (smear_param.smear_type) { case QUDA_GAUGE_SMEAR_APE: @@ -247,7 +247,7 @@ int main(int argc, char **argv) performGaugeSmearQuda(&smear_param, obs_param); break; } - + // Here we use a typical use case which is different from simple smearing in that // the user will want to compute the plaquette values to compute the gauge energy. case QUDA_GAUGE_SMEAR_WILSON_FLOW: @@ -255,7 +255,8 @@ int main(int argc, char **argv) for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } - performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); + // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); + performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param, 3); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); From 1e18c3b9060951d3bb0292269941983856910e0f Mon Sep 17 00:00:00 2001 From: rokarur Date: Thu, 14 Nov 2024 18:47:19 -0800 Subject: [PATCH 06/53] added adjoint flow first commit --- include/quda.h | 12 +- lib/interface_quda.cpp | 313 ++++++++++++++++++++++++++-------------- tests/CMakeLists.txt | 5 + tests/su3_adj_test.cpp | 284 ++++++++++++++++++++++++++++++++++++ tests/su3_ferm_test.cpp | 2 + 5 files changed, 503 insertions(+), 113 deletions(-) create mode 100644 tests/su3_adj_test.cpp diff --git a/include/quda.h b/include/quda.h index 45a4598496..760f6a98a8 100644 --- a/include/quda.h +++ b/include/quda.h @@ -862,6 +862,8 @@ extern "C" { double alpha3; /**< The coefficient used in HYP smearing step 1*/ unsigned int meas_interval; /**< Perform the requested measurements on the gauge field at this interval */ QudaGaugeSmearType smear_type; /**< The smearing type to perform */ + unsigned int adj_n_save; /**< How many intermediate gauge fields to save at each large nblock to perform adj flow*/ + unsigned int adj_n_hier_save; /**< How many *hierarchical* intermediate gauge fields to save to perform adj flow*/ QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */ double t0; /**< Starting flow time for Wilson flow */ int dir_ignore; /**< The direction to be ignored by the smearing algorithm @@ -1705,7 +1707,15 @@ extern "C" { */ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, int nsteps); - + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the "NB" way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 69141d78b7..b898f6e774 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -199,6 +200,8 @@ static TimeProfile profileGFlow("gFlowQuda"); //!< Profiler for gFlowQuda static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); +static TimeProfile profileAdjGFlowNB("AdjgFlowNBQuda"); + //!< Profiler for projectSU3Quda static TimeProfile profileProject("projectSU3Quda"); @@ -5391,24 +5394,23 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowSafe); } - + GaugeFieldParam gParamDummy(*gaugeSmeared); GaugeField gaugeW0(gParamDummy); GaugeField gaugeW1(gParamDummy); GaugeField gaugeW2(gParamDummy); GaugeField gaugeVT(gParamDummy); - GaugeField* gout_steps= new GaugeField[nsteps*3]; GaugeFieldParam gParam(*gaugePrecise); gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct GaugeField gaugeTemp(gParam); - // GaugeField &g_in = *gaugeSmeared; GaugeField &g_W0 = *gaugeSmeared; GaugeField &g_W1 = gaugeW1; GaugeField &g_W2 = gaugeW2; GaugeField &g_VT = gaugeVT; + //necessary? if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; @@ -5511,145 +5513,232 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu popOutputPrefix(); } -// void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, int nsteps) -// { +void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) +{ + + GaugeField &g_W0 = gf_list[0].get(); + GaugeField &g_W1 = gf_list[1].get(); + GaugeField &g_W2 = gf_list[2].get(); + GaugeField &g_VT = gf_list[3].get(); + GaugeField &gaugeTemp = gf_list[4].get(); + GaugeField &precise = gf_list[5].get(); + + ColorSpinorField &fin = sf_list[0].get(); + ColorSpinorField &fout = sf_list[1].get(); + ColorSpinorField &f_temp0 = sf_list[2].get(); + ColorSpinorField &f_temp1 = sf_list[3].get(); + ColorSpinorField &f_temp2 = sf_list[4].get(); + ColorSpinorField &f_temp3 = sf_list[5].get(); + ColorSpinorField &f_temp4 = sf_list[6].get(); + + int parity = 0; -// printf("Stage 0 passed \n"); - -// GaugeFieldParam gParamDummy(*gaugeSmeared); -// GaugeField gaugeAux(gParamDummy); -// GaugeField* gout_steps= new GaugeField[nsteps*3]; + // initialize a and b for Laplace operator + double a = 1.; + double b = -8.; + + int comm_dim[4] = {}; + // only switch on comms needed for directions with a derivative + for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + + f_temp3 = fin; + f_temp0 = f_temp3; -// GaugeFieldParam gParam(*gaugePrecise); -// gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct -// GaugeField gaugeTemp(gParam); + for (unsigned int j = 0; j < ns_safe ; j++) + { + for (unsigned int i = 0; i < ns_safe - j; i++) { + + if (i > 0) std::swap(g_W0,g_VT); + + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); + GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); -// GaugeField &g_in = *gaugeSmeared; + } + // init auxilliary fields [0], [1] and [2] as [3] + f_temp0 = f_temp3; + f_temp1 = f_temp3; + f_temp2 = f_temp3; + + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowNB); + + // f_temp0 = 3./4.*f_temp4; + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); -// if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; + f_temp2 = f_temp4; + + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profile); -// auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; + + blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); -// // // Set the specific input parameters and create the cpu gauge field -// // GaugeFieldParam gauge_param(&gParamDummy, g_in); - -// // if (gauge_param.order <= 4) gauge_param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; -// // GaugeField *work_gauge_in = GaugeField::Create(gauge_param); - - -// // = gaugeAux; -// printf("Stage 1 passed \n"); -// for (int i = 0; i < nsteps; i++){ -// for (int ss = 0; ss < 3; ss++){ -// GaugeField *in = GaugeField::Create(gParamDummy); -// gout_steps[i*3 + ss] = *in; -// } -// } -// printf("Stage 2 passed \n"); -// for (unsigned int i = 0; i < nsteps; i++) { -// if (i == 0) -// GFlowStep(gout_steps[i*3 + 0], gaugeTemp, g_in, 0.01, smear_type, WFLOW_STEP_W1); -// else -// GFlowStep(gout_steps[i*3 + 0], gaugeTemp, gout_steps[(i-1)*3 + 2], 0.01, smear_type, WFLOW_STEP_W1); - -// GFlowStep(gout_steps[i*3 + 1], gaugeTemp, gout_steps[i*3 + 0], 0.01, smear_type, WFLOW_STEP_W2); -// GFlowStep(gout_steps[i*3 + 2], gaugeTemp, gout_steps[i*3 + 1], 0.01, smear_type, WFLOW_STEP_VT); -// } + f_temp1 = f_temp3; + f_temp4 = f_temp1; + + blas::axpy(-8. / 9.,f_temp2, f_temp4); + + copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); + precise.exchangeGhost(); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profile); -// printf("Stage 3 passed \n"); -// // helper gauge field for Laplace operator -// GaugeField precise; -// GaugeFieldParam gParam_helper(*gaugePrecise); -// gParam_helper.create = QUDA_NULL_FIELD_CREATE; -// precise = GaugeField(gParam_helper); + blas::ax(smear_param->epsilon * 1. / 4., f_temp0); + blas::axpy(1.,f_temp2, f_temp0); + blas::axpy(1.,f_temp1, f_temp0); + + fout = f_temp0; + //redefining f_temp0 to restart loop + f_temp3 = f_temp0; + } -// // spinor fields -// ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); -// ColorSpinorField fin_h(cpuParam); +} + + + +void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ -// ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); -// ColorSpinorField fin(deviceParam); -// fin = fin_h; + auto profile = pushProfile(profileAdjGFlowNB); + pushOutputPrefix("performAdjGFlowQudaNB: "); + checkGaugeSmearParam(smear_param); -// deviceParam.create = QUDA_NULL_FIELD_CREATE; -// ColorSpinorField fout(deviceParam); + // pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); -// int parity = 0; + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowNB); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + GaugeField gauge_out(gParamDummy); -// // initialize a and b for Laplace operator -// double a = 1.; -// double b = -8.; + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); -// int comm_dim[4] = {}; + auto n = smear_param->adj_n_save; + + std::vector gauge_stages(n,gParamDummy); + //Can also do below + //creates copies std::vector gauge_stages(n,*gaugeSmeared); -// // only switch on comms needed for directions with a derivative -// for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } + GaugeField &gin = *gaugeSmeared; + GaugeField &gout = gauge_out; + + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); -// // auxilliary fermion fields [0], [1], [2] and [3] -// ColorSpinorField f_temp0(deviceParam); -// ColorSpinorField f_temp1(deviceParam); -// ColorSpinorField f_temp2(deviceParam); -// ColorSpinorField f_temp3(deviceParam); -// ColorSpinorField f_temp4(deviceParam); + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); -// // set [3] = input spinor -// f_temp3 = fin; + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; -// int measurement_n = 0; // The nth measurement to take + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); -// for (unsigned int i = nsteps - 1; i >= 0; i--) { -// //TODO: REPLACE -// // if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step - -// // init auxilliary fields [0], [1] and [2] as [3] -// f_temp0 = f_temp3; -// f_temp1 = f_temp3; -// f_temp2 = f_temp3; - -// int index_g = i * 3; - -// copyExtendedGauge(precise, gout_steps[index_g + 2], QUDA_CUDA_FIELD_LOCATION); -// precise.exchangeGhost(); -// ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileGFlow); - -// // f_temp0 = 3./4.*f_temp4; -// blas::ax(3. / 4., f_temp4); + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); -// f_temp2 = f_temp4; - -// copyExtendedGauge(precise, gout_steps[index_g + 1], QUDA_CUDA_FIELD_LOCATION); -// precise.exchangeGhost(); -// ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileGFlow); + + unsigned int block_length = smear_param->n_steps / smear_param->adj_n_save; + int block_counter = 0; + std::vector dist_save(smear_param->adj_n_save); + std::fill(dist_save.begin(), dist_save.end(), block_length); + dist_save.at(dist_save.size() - 1) = smear_param->n_steps - (smear_param->adj_n_save - 1) * block_length; - -// blas::axpy(8. / 9., f_temp4, f_temp3); + for (unsigned int i = 0; i < smear_param->adj_n_save; i++) printf("evolve distance of %d added \n",dist_save[i]); -// f_temp1 = f_temp3; -// f_temp4 = f_temp1; + for (unsigned int i = 0; i < smear_param->adj_n_save; i++) { + + gauge_stages[i] = gout; + for (unsigned int j = 0; j < block_length; j++){ + if (i > 0) std::swap(gout,gin); + WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); + } -// blas::axpy(-8. / 9.,f_temp2, f_temp4); +// if (i == smear_param->adj_n_save - 1 ) { +// dist_save.push_back(smear_param->n_steps - block_counter); +// } +// else { + +// dist_save.push_back(block_length); +// } +// block_counter += block_length; + + // printf("evolve distance of %d added \n",dist_save.back()); -// copyExtendedGauge(precise, gout_steps[index_g + 0], QUDA_CUDA_FIELD_LOCATION); -// precise.exchangeGhost(); -// ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileGFlow); + } + std::vector> sf_list; + sf_list = {fin, fout, f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + std::vector> gf_list; + gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + for (int i = gauge_stages.size() - 1; i >= 0; --i) { + //first load correct gauge field (for beginning of the loop, it is the final gauge list element) + if (i < gauge_stages.size() - 1) gf_list.at(0) = std::ref(gauge_stages[i]); + + adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); -// blas::axpy(1.,f_temp2, f_temp0); -// blas::axpy(1.,f_temp1, f_temp0); + logQuda(QUDA_SUMMARIZE," block number %d successfully deployed \n",i); + std::swap(sf_list[0],sf_list[1]); + } -// fout = f_temp0; -// } - -// cpuParam.v = h_out; -// cpuParam.location = inv_param->output_location; -// ColorSpinorField fout_h(cpuParam); -// fout_h = fout; + + int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); + n_b = 3; + int l_b_outer = smear_param->n_steps / n_b; + std::vector outer_dist(n_b); + std::fill(outer_dist.begin(), outer_dist.end(), l_b_outer); + outer_dist.at(outer_dist.size() - 1) = smear_param->n_steps - (n_b - 1) * l_b_outer; + + int end_of_block = smear_param->n_steps; + std::vector hier_list; + printf("what is l_b_outer: %d \n",l_b_outer); + + bool start(true); + + for (int j = 0; j < outer_dist.size(); j++){ + printf("starting outderdist element %d \n",hier_list.back()); + for (int i = outer_dist[j]; i > 1; i = i/n_b) { -// popOutputPrefix(); + // if (start) {hier_list.push_back(l_b_outer - (i / n_b)); start = false;printf("first item of list: %d \n",hier_list.back());} + hier_list.push_back(i / n_b); + printf("number %d added to hier list\n",hier_list.back()); + } + } + + + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = sf_list[1].get(); + + popOutputPrefix(); + +} -// } + /* save list of gauge vectors */ int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e93386a782..74d0be3a24 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -209,6 +209,11 @@ target_link_libraries(su3_ferm_test ${TEST_LIBS}) quda_checkbuildtest(su3_ferm_test QUDA_BUILD_ALL_TESTS) install(TARGETS su3_ferm_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_executable(su3_adj_test su3_adj_test.cpp) +target_link_libraries(su3_adj_test ${TEST_LIBS}) +quda_checkbuildtest(su3_adj_test QUDA_BUILD_ALL_TESTS) +install(TARGETS su3_adj_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(pack_test pack_test.cpp) target_link_libraries(pack_test ${TEST_LIBS}) quda_checkbuildtest(pack_test QUDA_BUILD_ALL_TESTS) diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp new file mode 100644 index 0000000000..18bf8fc6b0 --- /dev/null +++ b/tests/su3_adj_test.cpp @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// In a typical application, quda.h is the only QUDA header required. +#include + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// Smearing variables +double gauge_smear_rho = 0.1; +double gauge_smear_epsilon = 0.1; +double gauge_smear_alpha = 0.6; +double gauge_smear_alpha1 = 0.75; +double gauge_smear_alpha2 = 0.6; +double gauge_smear_alpha3 = 0.3; +int gauge_smear_steps = 50; +int gauge_n_save = 6; +int gauge_n_hier_save = 3; +QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; +int gauge_smear_dir_ignore = -1; +int measurement_interval = 5; +bool su_project = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim); + + // Specific test + printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); + switch (gauge_smear_type) { + case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; + case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + printfQuda(" - rho %f\n", gauge_smear_rho); + printfQuda(" - epsilon %f\n", gauge_smear_epsilon); + break; + case QUDA_GAUGE_SMEAR_HYP: + printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); + printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); + printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); + break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; + default: errorQuda("Undefined test type %d given", test_type); + } + printfQuda(" - smearing steps %d\n", gauge_smear_steps); + printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); + printfQuda(" - Measurement interval %d\n", measurement_interval); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); + return; +} + +void add_su3_option_group(std::shared_ptr quda_app) +{ + CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, + {"stout", QUDA_GAUGE_SMEAR_STOUT}, + {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, + {"hyp", QUDA_GAUGE_SMEAR_HYP}, + {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, + {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; + + // Option group for SU(3) related options + auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); + + opgroup + ->add_option( + "--su3-smear-type", + gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") + ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); + ; + opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); + + opgroup->add_option("--su3-smear-rho", gauge_smear_rho, + "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); + + opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, + "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); + + opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); + opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); + opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); + + opgroup->add_option( + "--su3-smear-dir-ignore", gauge_smear_dir_ignore, + "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); + + opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); + + opgroup->add_option("--su3-measurement-interval", measurement_interval, + "Measure the field energy and/or topological charge every Nth step (default 5) "); + + opgroup->add_option("--su3-project", su_project, + "Project smeared gauge onto su3 manifold at measurement interval (default true)"); +} + +int main(int argc, char **argv) +{ + + auto app = make_app(); + add_su3_option_group(app); + + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + initComms(argc, argv, gridsize_from_cmdline); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + setDims(gauge_param.X); + + // All user inputs are now defined + display_test_info(); + + void *gauge[4], *new_gauge[4]; + + for (int dir = 0; dir < 4; dir++) { + gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + + initQuda(device_ordinal); + + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + + constructHostGaugeField(gauge, gauge_param, argc, argv); + // Load the gauge field to the device + loadGaugeQuda((void *)gauge, &gauge_param); + saveGaugeQuda(new_gauge, &gauge_param); + + // Prepare various perf info + long long flops_plaquette = 6ll * 597 * V; + long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // Prepare a gauge observable struct + QudaGaugeObservableParam param = newQudaGaugeObservableParam(); + + // start the timer + quda::host_timer_t host_timer; + + // The user may specify which measurements they wish to perform/omit + // using the QudaGaugeObservableParam struct, and whether or not to + // perform suN projection at each measurement step. We recommend that + // users perform suN projection. + // A unique observable param struct is constructed for each measurement. + + // Gauge Smearing Routines + //--------------------------------------------------------------------------- + // Stout smearing should be equivalent to APE smearing + // on D dimensional lattices for rho = alpha/2*(D-1). + // Typical values for + // APE: alpha=0.6 + // Stout: rho=0.1 + // Over Improved Stout: rho=0.08, epsilon=-0.25 + // + // Typically, the user will use smearing for Q charge data only, so + // we hardcode to compute Q only and not the plaquette. Users may + // of course set these as they wish. SU(N) projection su_project=true is recommended. + QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i] = newQudaGaugeObservableParam(); + obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; + obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; + obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + } + + // We here set all the problem parameters for all possible smearing types. + QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); + smear_param.smear_type = gauge_smear_type; + smear_param.n_steps = gauge_smear_steps; + smear_param.adj_n_save = gauge_n_save; + smear_param.adj_n_hier_save = gauge_n_hier_save; + smear_param.meas_interval = measurement_interval; + smear_param.alpha = gauge_smear_alpha; + smear_param.rho = gauge_smear_rho; + smear_param.epsilon = gauge_smear_epsilon; + smear_param.alpha1 = gauge_smear_alpha1; + smear_param.alpha2 = gauge_smear_alpha2; + smear_param.alpha3 = gauge_smear_alpha3; + smear_param.dir_ignore = gauge_smear_dir_ignore; + + quda::ColorSpinorField check,check_out; + QudaInvertParam invParam = newQudaInvertParam(); + invParam.cpu_prec = QUDA_DOUBLE_PRECISION; + invParam.cuda_prec = QUDA_DOUBLE_PRECISION; + invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + invParam.dirac_order = QUDA_DIRAC_ORDER; + + constexpr int nSpin = 4; + constexpr int nColor = 3; + quda::ColorSpinorParam cs_param, cs_param_out; + cs_param.nColor = nColor; + cs_param.nSpin = nSpin; + cs_param.x = {xdim, ydim, zdim, tdim}; + cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; + cs_param.setPrecision(invParam.cpu_prec); + cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; + cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; + cs_param.gammaBasis = invParam.gamma_basis; + cs_param.pc_type = QUDA_4D_PC; + cs_param.location = QUDA_CPU_FIELD_LOCATION; + cs_param.create = QUDA_NULL_FIELD_CREATE; + + cs_param_out = cs_param; + + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); + check = quda::ColorSpinorField(cs_param); + // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); + check_out = quda::ColorSpinorField(cs_param); + // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + + + // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); + printf("Stage -1 passed\n"); + host_timer.start(); // start the timer + switch (smear_param.smear_type) { + case QUDA_GAUGE_SMEAR_APE: + case QUDA_GAUGE_SMEAR_STOUT: + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + case QUDA_GAUGE_SMEAR_HYP: { + performGaugeSmearQuda(&smear_param, obs_param); + break; + } + + // Here we use a typical use case which is different from simple smearing in that + // the user will want to compute the plaquette values to compute the gauge energy. + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; + } + // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); + performAdjGFlowNB(check.data(),check_out.data(), &invParam, &smear_param); + break; + } + default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); + } + + host_timer.stop(); // stop the timer + printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); + + if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); + + for (int dir = 0; dir < 4; dir++) { + host_free(gauge[dir]); + host_free(new_gauge[dir]); + } + + freeGaugeQuda(); + endQuda(); + + finalizeComms(); + return 0; +} diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index cf31d61735..ded426dce7 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -26,6 +26,7 @@ double gauge_smear_alpha1 = 0.75; double gauge_smear_alpha2 = 0.6; double gauge_smear_alpha3 = 0.3; int gauge_smear_steps = 5; +int gauge_n_save = 6; QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; int gauge_smear_dir_ignore = -1; int measurement_interval = 5; @@ -196,6 +197,7 @@ int main(int argc, char **argv) QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); smear_param.smear_type = gauge_smear_type; smear_param.n_steps = gauge_smear_steps; + smear_param.adj_n_save = gauge_n_save; smear_param.meas_interval = measurement_interval; smear_param.alpha = gauge_smear_alpha; smear_param.rho = gauge_smear_rho; From e88dc987847dd599afa01d93ef3ea35b05d7983d Mon Sep 17 00:00:00 2001 From: rokarur Date: Fri, 15 Nov 2024 02:00:33 -0800 Subject: [PATCH 07/53] added hiearchical counting structure --- lib/interface_quda.cpp | 97 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 18 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index b898f6e774..9c1fc9338b 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5596,7 +5596,52 @@ void adjSafeEvolve(std::vector> sf_list } +/* total_dist == n_steps, n_b is dividing factor of each block, n_Save is the size of the list*/ +std::vector get_hier_list(int total_dist, int n_b, int n_save, bool front = true){ + std::vector hier_list; + int counter = 0; + + int val = total_dist; + for (int i_s = 0; i_s < n_save; i_s++) { + val = (val <= 1) ? 1 : val / n_b; + hier_list.push_back(val); + counter += val; + } + + if (front) hier_list.at(0) += total_dist - counter; + else hier_list.back() += total_dist - counter; + + return hier_list; + +} + +int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int threshold) { + + int result = 0; + int current_size = hier_list.size(); + std::vector temp_list; + if (current_size > n_save) errorQuda("something isnt right\n"); + + int diff = n_save - current_size; + + + for (int i=current_size - 1; i >= 0; --i){ + + if (hier_list[i] > threshold){ + + temp_list = get_hier_list(hier_list[i], n_b, diff+1,false); + // for (int ii = 0; ii< temp_list.size();ii++) printf("tempf %d \n",temp_list[ii]); + hier_list.erase(hier_list.begin()+i); + hier_list.insert(hier_list.begin()+i, temp_list.begin(),temp_list.end()); + result = 1; + break; + } + } + + return result; + +} void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ @@ -5703,27 +5748,43 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); - n_b = 3; - int l_b_outer = smear_param->n_steps / n_b; - std::vector outer_dist(n_b); - std::fill(outer_dist.begin(), outer_dist.end(), l_b_outer); - outer_dist.at(outer_dist.size() - 1) = smear_param->n_steps - (n_b - 1) * l_b_outer; - - int end_of_block = smear_param->n_steps; + int ret_stat; + // n_b = 3; std::vector hier_list; - printf("what is l_b_outer: %d \n",l_b_outer); + hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save - 1); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of hier list = %d\n",i,hier_list[i]); + hier_list.pop_back(); + ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); + hier_list.pop_back(); + ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); +// int l_b_outer = smear_param->n_steps / n_b; +// std::vector outer_dist(n_b); +// std::fill(outer_dist.begin(), outer_dist.end(), l_b_outer); +// outer_dist.at(outer_dist.size() - 1) = smear_param->n_steps - (n_b - 1) * l_b_outer; + +// std::vector> hier_list(n_b); +// printf("n_b outer: %d, and l_b_outer: %d \n",n_b,l_b_outer); + - bool start(true); - for (int j = 0; j < outer_dist.size(); j++){ - printf("starting outderdist element %d \n",hier_list.back()); - for (int i = outer_dist[j]; i > 1; i = i/n_b) { - - // if (start) {hier_list.push_back(l_b_outer - (i / n_b)); start = false;printf("first item of list: %d \n",hier_list.back());} - hier_list.push_back(i / n_b); - printf("number %d added to hier list\n",hier_list.back()); - } - } +// for (int j = 0; j < outer_dist.size(); j++){ +// printf("starting outderdist element of size %d \n",outer_dist[j]); +// int counter = 0; +// for (int i = outer_dist[j]; i > 1; i = i/n_b) { + +// // if (start) {hier_list.push_back(l_b_outer - (i / n_b)); start = false;printf("first item of list: %d \n",hier_list.back());} +// if (i / n_b == 0) break; +// hier_list[j].push_back(i / n_b); +// printf("number %d added to hier list\n",hier_list[j].back()); +// counter += i / n_b; +// } +// // hier_list[j].insert(hier_list[j].begin(),outer_dist[j] - counter); +// hier_list[j].at(0) += outer_dist[j] - counter; +// printf("beginning element modified to be %d \n",hier_list[j].front()); +// } + From 91d1391ba54e910d8408ed3173f1fd947e897d40 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 17 Nov 2024 02:40:08 -0800 Subject: [PATCH 08/53] first working implementation of hierarchial adjoint gradient flow --- include/quda.h | 10 ++ lib/interface_quda.cpp | 205 ++++++++++++++++++++++++++++++++--------- tests/su3_adj_test.cpp | 2 +- 3 files changed, 171 insertions(+), 46 deletions(-) diff --git a/include/quda.h b/include/quda.h index 760f6a98a8..192c59ea15 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1716,6 +1716,16 @@ extern "C" { * observables we are making and the resulting observables. */ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + + /** + * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared + * @param[out] h_out Output fermion field + * @param[in] h_in Input fermion field + * @param[in] smear_param Parameter struct that defines the computation parameters + * @param[in,out] obs_param Parameter struct that defines which + * observables we are making and the resulting observables. + */ + void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 9c1fc9338b..cc87471070 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -202,6 +202,8 @@ static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); static TimeProfile profileAdjGFlowNB("AdjgFlowNBQuda"); +static TimeProfile profileAdjGFlowHier("AdjgFlowHierQuda"); + //!< Profiler for projectSU3Quda static TimeProfile profileProject("projectSU3Quda"); @@ -5562,7 +5564,7 @@ void adjSafeEvolve(std::vector> sf_list copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowNB); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profile); // f_temp0 = 3./4.*f_temp4; blas::ax(smear_param->epsilon * 3. / 4., f_temp4); @@ -5618,7 +5620,7 @@ std::vector get_hier_list(int total_dist, int n_b, int n_save, bool front = int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int threshold) { - int result = 0; + int result = -1; int current_size = hier_list.size(); std::vector temp_list; if (current_size > n_save) errorQuda("something isnt right\n"); @@ -5627,14 +5629,14 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres for (int i=current_size - 1; i >= 0; --i){ - + if (hier_list[i] > threshold){ temp_list = get_hier_list(hier_list[i], n_b, diff+1,false); // for (int ii = 0; ii< temp_list.size();ii++) printf("tempf %d \n",temp_list[ii]); hier_list.erase(hier_list.begin()+i); hier_list.insert(hier_list.begin()+i, temp_list.begin(),temp_list.end()); - result = 1; + result = i; break; } } @@ -5720,22 +5722,13 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); } -// if (i == smear_param->adj_n_save - 1 ) { -// dist_save.push_back(smear_param->n_steps - block_counter); -// } -// else { - -// dist_save.push_back(block_length); -// } -// block_counter += block_length; - - // printf("evolve distance of %d added \n",dist_save.back()); - } std::vector> sf_list; sf_list = {fin, fout, f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; std::vector> gf_list; gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + + for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) if (i < gauge_stages.size() - 1) gf_list.at(0) = std::ref(gauge_stages[i]); @@ -5748,56 +5741,178 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); + int element_back; int ret_stat; + std::vector gauge_list{1,2,3,4,5}; // n_b = 3; std::vector hier_list; hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save - 1); for (int i=0; i < hier_list.size(); i++) printf("content # %d of hier list = %d\n",i,hier_list[i]); - hier_list.pop_back(); + hier_list.pop_back(); ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); + element_back = gauge_list.back(); + gauge_list.pop_back(); + gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); - hier_list.pop_back(); - ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); -// int l_b_outer = smear_param->n_steps / n_b; -// std::vector outer_dist(n_b); -// std::fill(outer_dist.begin(), outer_dist.end(), l_b_outer); -// outer_dist.at(outer_dist.size() - 1) = smear_param->n_steps - (n_b - 1) * l_b_outer; + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); -// std::vector> hier_list(n_b); -// printf("n_b outer: %d, and l_b_outer: %d \n",n_b,l_b_outer); - - + cpuParam.v = h_out; + cpuParam.location = inv_param->output_location; + ColorSpinorField fout_h(cpuParam); + fout_h = sf_list[1].get(); + + popOutputPrefix(); -// for (int j = 0; j < outer_dist.size(); j++){ -// printf("starting outderdist element of size %d \n",outer_dist[j]); -// int counter = 0; -// for (int i = outer_dist[j]; i > 1; i = i/n_b) { - -// // if (start) {hier_list.push_back(l_b_outer - (i / n_b)); start = false;printf("first item of list: %d \n",hier_list.back());} -// if (i / n_b == 0) break; -// hier_list[j].push_back(i / n_b); -// printf("number %d added to hier list\n",hier_list[j].back()); -// counter += i / n_b; -// } -// // hier_list[j].insert(hier_list[j].begin(),outer_dist[j] - counter); -// hier_list[j].at(0) += outer_dist[j] - counter; -// printf("beginning element modified to be %d \n",hier_list[j].front()); -// } +} + +void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ + + auto profile = pushProfile(profileAdjGFlowHier); + pushOutputPrefix("performAdjGFlowQudaHier: "); + checkGaugeSmearParam(smear_param); + + // pushVerbosity(inv_param->verbosity); + if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); + + if (smear_param->restart) { + if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); + } else { + if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); + freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); + gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowHier); + } + + GaugeFieldParam gParamDummy(*gaugeSmeared); + GaugeField gaugeW0(gParamDummy); + GaugeField gaugeW1(gParamDummy); + GaugeField gaugeW2(gParamDummy); + GaugeField gaugeVT(gParamDummy); + GaugeField gauge_out(gParamDummy); + + GaugeFieldParam gParam(*gaugePrecise); + gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct + GaugeField gaugeTemp(gParam); + + auto n = smear_param->adj_n_save; + + std::vector gauge_stages(n,gParamDummy); + gauge_stages[0] = *gaugeSmeared; + //Can also do below + //creates copies std::vector gauge_stages(n,*gaugeSmeared); + + GaugeField &gin = *gaugeSmeared; + GaugeField &gout = gauge_out; + // helper gauge field for Laplace operator + GaugeField precise; + GaugeFieldParam gParam_helper(*gaugePrecise); + gParam_helper.create = QUDA_NULL_FIELD_CREATE; + precise = GaugeField(gParam_helper); + + // spinor fields + ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); + ColorSpinorField fin_h(cpuParam); + + ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); + ColorSpinorField fin(deviceParam); + fin = fin_h; + + deviceParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField fout(deviceParam); + + ColorSpinorField f_temp0(deviceParam); + ColorSpinorField f_temp1(deviceParam); + ColorSpinorField f_temp2(deviceParam); + ColorSpinorField f_temp3(deviceParam); + ColorSpinorField f_temp4(deviceParam); + + int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); + int ret_idx = 0; + int threshold = 6; + std::vector hier_list; + //The first stage is saved at the very beginning, so its presence is implicit + hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); + + if (threshold < hier_list.back()) threshold = hier_list.back(); + if (hier_list.empty()) errorQuda("hier_list is not populated\n"); + if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); + + for (unsigned int i = 0; i < hier_list.size(); i++) { + + for (unsigned int j = 0; j < hier_list[i]; j++){ + if (i > 0) std::swap(gout,gin); + WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); + } + if (i == 0) + gauge_stages[0] = gin; + else + gauge_stages[i] = gout; + } + std::vector> sf_list; + sf_list = {fin, fout, f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + std::vector> gf_list; + gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + + + while (ret_idx != -1){ + + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); + std::swap(sf_list[0],sf_list[1]); + // break; + hier_list.pop_back(); + gauge_stages.pop_back(); + ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); + if (ret_idx == -1) { + + for (int i = gauge_stages.size() - 1; i >= 0; --i) { + //first load correct gauge field (for beginning of the loop, it is the final gauge list element) + gf_list.at(0) = std::ref(gauge_stages[i]); + + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowNB); + logQuda(QUDA_SUMMARIZE," block number %d successfully deployed \n",i); + // At final step, we do not conduct swap + if (i != 0) std::swap(sf_list[0],sf_list[1]); + } + + break; + } + + GaugeField g_2(gParamDummy); + GaugeField g_1 = gauge_stages[ret_idx]; + + for (unsigned int j = 0; j < hier_list[ret_idx]; j++){ + if (j > 0) std::swap(g_2,g_1); + WFlowStep(g_2, gaugeTemp, g_1, smear_param->epsilon, smear_param->smear_type); + } + // break; + gauge_stages.insert(gauge_stages.begin() + ret_idx + 1, g_2); + gf_list.at(0) = std::ref(gauge_stages.back()); + + printf("hier list size : %d \n",hier_list.size()); + + + } + +// adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); +// std::swap(sf_list[0],sf_list[1]); +// hier_list.pop_back(); +// // gauge_stages.pop_back(); +// // ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); +// adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); + + + cpuParam.v = h_out; cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); fout_h = sf_list[1].get(); popOutputPrefix(); - -} - +} /* save list of gauge vectors */ diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp index 18bf8fc6b0..fb5162d0ba 100644 --- a/tests/su3_adj_test.cpp +++ b/tests/su3_adj_test.cpp @@ -260,7 +260,7 @@ int main(int argc, char **argv) obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowNB(check.data(),check_out.data(), &invParam, &smear_param); + performAdjGFlowHier(check.data(),check_out.data(), &invParam, &smear_param); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); From 368ec5b8056e077fd6a9daed7b7c3907b32146cd Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 17 Nov 2024 18:49:52 -0800 Subject: [PATCH 09/53] more changes with meaningful log messages --- lib/interface_quda.cpp | 54 +++++++++++++++++++++++++++++++----------- tests/su3_adj_test.cpp | 2 +- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index cc87471070..d07aed5958 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5748,6 +5748,28 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda std::vector hier_list; hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save - 1); for (int i=0; i < hier_list.size(); i++) printf("content # %d of hier list = %d\n",i,hier_list[i]); + + + hier_list.pop_back(); + ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); + element_back = gauge_list.back(); + gauge_list.pop_back(); + gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); + + + + hier_list.pop_back(); + ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); + element_back = gauge_list.back(); + gauge_list.pop_back(); + gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); + for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); + + + hier_list.pop_back(); ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); element_back = gauge_list.back(); @@ -5827,6 +5849,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField f_temp4(deviceParam); int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); + logQuda(QUDA_VERBOSE,"Hierarchical block n_b: %d\n\n",n_b); int ret_idx = 0; int threshold = 6; std::vector hier_list; @@ -5857,22 +5880,27 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu while (ret_idx != -1){ + logQuda(QUDA_VERBOSE,"Starting a hierarchical loop log\n"); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); std::swap(sf_list[0],sf_list[1]); - // break; + + for (int j = 0; j < hier_list.size(); j++ ){ + logQuda(QUDA_VERBOSE,"previous hier list element %d : %ld \n",j,hier_list[j]); + } + logQuda(QUDA_VERBOSE,"\n"); hier_list.pop_back(); gauge_stages.pop_back(); ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); if (ret_idx == -1) { - + logQuda(QUDA_DEBUG_VERBOSE," now in final serial stage of hierarchial evolution \n"); for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) gf_list.at(0) = std::ref(gauge_stages[i]); - adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowNB); + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); - logQuda(QUDA_SUMMARIZE," block number %d successfully deployed \n",i); + logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); // At final step, we do not conduct swap if (i != 0) std::swap(sf_list[0],sf_list[1]); } @@ -5883,27 +5911,25 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField g_2(gParamDummy); GaugeField g_1 = gauge_stages[ret_idx]; + for (int j = 0; j < hier_list.size(); j++ ){ + logQuda(QUDA_VERBOSE,"modified hier list element %d : %ld \n",j,hier_list[j]); + } + + logQuda(QUDA_VERBOSE,"ret idx : %d \n",ret_idx); + for (unsigned int j = 0; j < hier_list[ret_idx]; j++){ if (j > 0) std::swap(g_2,g_1); WFlowStep(g_2, gaugeTemp, g_1, smear_param->epsilon, smear_param->smear_type); } // break; gauge_stages.insert(gauge_stages.begin() + ret_idx + 1, g_2); + logQuda(QUDA_VERBOSE,"recycled gauge field placed *before* index %d\n\n\n",ret_idx + 1); gf_list.at(0) = std::ref(gauge_stages.back()); - printf("hier list size : %d \n",hier_list.size()); - + } -// adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); -// std::swap(sf_list[0],sf_list[1]); -// hier_list.pop_back(); -// // gauge_stages.pop_back(); -// // ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); -// adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); - - cpuParam.v = h_out; cpuParam.location = inv_param->output_location; diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp index fb5162d0ba..b1d76f4b96 100644 --- a/tests/su3_adj_test.cpp +++ b/tests/su3_adj_test.cpp @@ -26,7 +26,7 @@ double gauge_smear_alpha1 = 0.75; double gauge_smear_alpha2 = 0.6; double gauge_smear_alpha3 = 0.3; int gauge_smear_steps = 50; -int gauge_n_save = 6; +int gauge_n_save = 3; int gauge_n_hier_save = 3; QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; int gauge_smear_dir_ignore = -1; From 42b8a5ab4e61f0fbd1947472f5294be82c1306b1 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 17 Nov 2024 22:42:48 -0800 Subject: [PATCH 10/53] verbosity switches in interface --- lib/interface_quda.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d07aed5958..32d873f13d 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5598,7 +5598,7 @@ void adjSafeEvolve(std::vector> sf_list } -/* total_dist == n_steps, n_b is dividing factor of each block, n_Save is the size of the list*/ +/* total_dist == n_steps, n_b is dividing factor of each block, n_Save is the size of the list, "front" denotes whether split hierarchy goes to existing or new subhierarchy */ std::vector get_hier_list(int total_dist, int n_b, int n_save, bool front = true){ std::vector hier_list; @@ -5893,18 +5893,18 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu gauge_stages.pop_back(); ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); if (ret_idx == -1) { - logQuda(QUDA_DEBUG_VERBOSE," now in final serial stage of hierarchial evolution \n"); + logQuda(QUDA_VERBOSE," now in final serial stage of hierarchial evolution \n"); for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) gf_list.at(0) = std::ref(gauge_stages[i]); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); - logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); + logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); // At final step, we do not conduct swap if (i != 0) std::swap(sf_list[0],sf_list[1]); } - + logQuda(QUDA_VERBOSE," hierarchial evolution completed \n"); break; } From d123539f8fd449db83f23b428f654f132af02d9b Mon Sep 17 00:00:00 2001 From: Balint Joo Date: Wed, 16 Oct 2024 14:27:21 +0000 Subject: [PATCH 11/53] Mods to allow copy in/out of full QDP-JIT spinors --- include/color_spinor_field_order.h | 8 ++++---- include/kernels/copy_color_spinor.cuh | 19 +++++++++++++++++-- lib/copy_color_spinor.cuh | 5 ----- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/color_spinor_field_order.h b/include/color_spinor_field_order.h index 25f5234390..b4be52fd8a 100644 --- a/include/color_spinor_field_order.h +++ b/include/color_spinor_field_order.h @@ -1799,8 +1799,8 @@ namespace quda { for (int s = 0; s < Ns; s++) { for (int c = 0; c < Nc; c++) { - v[s * Nc + c] = complex(field[(((0 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x], - field[(((1 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x]); + v[s * Nc + c] = complex(field[(((0 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x], + field[(((1 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x]); } } } @@ -1809,8 +1809,8 @@ namespace quda { for (int s = 0; s < Ns; s++) { for (int c = 0; c < Nc; c++) { - field[(((0 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x] = v[s * Nc + c].real(); - field[(((1 * Nc + c) * Ns + s) * 2 + (1 - parity)) * volumeCB + x] = v[s * Nc + c].imag(); + field[(((0 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x] = v[s * Nc + c].real(); + field[(((1 * Nc + c) * Ns + s) * 2 + parity) * volumeCB + x] = v[s * Nc + c].imag(); } } } diff --git a/include/kernels/copy_color_spinor.cuh b/include/kernels/copy_color_spinor.cuh index 39bc226f3f..b57533d54a 100644 --- a/include/kernels/copy_color_spinor.cuh +++ b/include/kernels/copy_color_spinor.cuh @@ -16,6 +16,21 @@ namespace quda using namespace colorspinor; + /** Helper function for parity computation */ + inline int computeParity(const ColorSpinorField &f) + { + + // Account for odd-even vs. even-odd site orders + int ret_val = f.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0; + + // Account for potential parity flip to access single parity subset QDP-JIT fields + if (f.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER && f.SiteSubset() == QUDA_PARITY_SITE_SUBSET) { + ret_val = 1 - ret_val; + } + + return ret_val; + } + template class Basis_> struct CopyColorSpinorArg : kernel_param<> { @@ -32,8 +47,8 @@ namespace quda kernel_param(dim3(in.VolumeCB(), in.SiteSubset(), 1)), out(out, 1, Out_), in(in, 1, const_cast(In_)), - outParity(out.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0), - inParity(in.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0) + outParity(computeParity(out)), + inParity(computeParity(in)) { } }; diff --git a/lib/copy_color_spinor.cuh b/lib/copy_color_spinor.cuh index f9c40ba9fc..02e6357a35 100644 --- a/lib/copy_color_spinor.cuh +++ b/lib/copy_color_spinor.cuh @@ -181,11 +181,6 @@ namespace quda errorQuda("Copying to full fields with lexicographical ordering is not currently supported"); } - if (dst.SiteSubset() == QUDA_FULL_SITE_SUBSET - && (src.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER || dst.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER)) { - errorQuda("QDPJIT field ordering not supported for full site fields"); - } - genericCopyColorSpinor(param); } From 95b12e38ba4c16ee2fbc1d1f9b887759b489f970 Mon Sep 17 00:00:00 2001 From: Balint Joo Date: Mon, 28 Oct 2024 16:15:15 +0000 Subject: [PATCH 12/53] QDPJIT parity only flopped on ODD parity --- include/kernels/copy_color_spinor.cuh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/kernels/copy_color_spinor.cuh b/include/kernels/copy_color_spinor.cuh index b57533d54a..5ed05c68ba 100644 --- a/include/kernels/copy_color_spinor.cuh +++ b/include/kernels/copy_color_spinor.cuh @@ -24,7 +24,9 @@ namespace quda int ret_val = f.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0; // Account for potential parity flip to access single parity subset QDP-JIT fields - if (f.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER && f.SiteSubset() == QUDA_PARITY_SITE_SUBSET) { + // The Flip is only needed fir offsetting into Odd Parity Fields + if (f.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER && f.SiteSubset() == QUDA_PARITY_SITE_SUBSET + && f.SuggestedParity() == QUDA_ODD_PARITY) { ret_val = 1 - ret_val; } From 6f3409e9ff58d91f1aa3fba363c578cf2bd37c0d Mon Sep 17 00:00:00 2001 From: Balint Joo Date: Mon, 28 Oct 2024 16:49:24 +0000 Subject: [PATCH 13/53] Prettied doxygen for compute parity --- include/kernels/copy_color_spinor.cuh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/kernels/copy_color_spinor.cuh b/include/kernels/copy_color_spinor.cuh index 5ed05c68ba..3751a42411 100644 --- a/include/kernels/copy_color_spinor.cuh +++ b/include/kernels/copy_color_spinor.cuh @@ -16,7 +16,14 @@ namespace quda using namespace colorspinor; - /** Helper function for parity computation */ + /** + * @brief A helper function to figure out what parity to use for input and output. + * @details Pick parity from input field site order. Addditionally QDPJIT fields + * may need a relative parity flip compared to what is expected when dealing with + * only the odd parity since the pointer is always to the top of the full spinort. + * @param f[in] Reference to the field for parity computation + * @return the computed parity + */ inline int computeParity(const ColorSpinorField &f) { From b339c8f82ba887a8dfee7302b2c293b3cabb12a9 Mon Sep 17 00:00:00 2001 From: Balint Joo Date: Mon, 28 Oct 2024 17:37:49 +0000 Subject: [PATCH 14/53] Fixed another doxygen booboo --- include/kernels/copy_color_spinor.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kernels/copy_color_spinor.cuh b/include/kernels/copy_color_spinor.cuh index 3751a42411..505163cc93 100644 --- a/include/kernels/copy_color_spinor.cuh +++ b/include/kernels/copy_color_spinor.cuh @@ -21,7 +21,7 @@ namespace quda * @details Pick parity from input field site order. Addditionally QDPJIT fields * may need a relative parity flip compared to what is expected when dealing with * only the odd parity since the pointer is always to the top of the full spinort. - * @param f[in] Reference to the field for parity computation + * @param[in] f Reference to the field for parity computation * @return the computed parity */ inline int computeParity(const ColorSpinorField &f) From eb3bc5a463e0ad94963a125ff79eb0b90bc27ad5 Mon Sep 17 00:00:00 2001 From: SaltyChiang Date: Fri, 1 Nov 2024 15:07:38 +0800 Subject: [PATCH 15/53] Use the first `r_coarse` to verify the MG setup. --- lib/multigrid.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/multigrid.cpp b/lib/multigrid.cpp index 5241fdeabc..f355203c6a 100644 --- a/lib/multigrid.cpp +++ b/lib/multigrid.cpp @@ -966,7 +966,7 @@ namespace quda } transfer->R(x_coarse[0], tmp2); - static_cast(diracCoarseResidual)->M(r_coarse, tmp_coarse); + static_cast(diracCoarseResidual)->M(r_coarse[0], tmp_coarse); #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging setOutputPrefix(""); From b2cea403f93a06d3d68767ec0723ac2475380a4d Mon Sep 17 00:00:00 2001 From: SaltyChiang Date: Fri, 1 Nov 2024 15:12:40 +0800 Subject: [PATCH 16/53] Enable `computeHISQForceQuda` for other gauge field orders than the MILC order. --- lib/interface_quda.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 32d873f13d..e4b8a9db3a 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -4264,7 +4264,6 @@ void computeHISQForceQuda(void* const milc_momentum, using namespace quda; using namespace quda::fermion_force; - if (gParam->gauge_order != QUDA_MILC_GAUGE_ORDER) errorQuda("Unsupported input field order %d", gParam->gauge_order); { // default settings for the unitarization @@ -4407,7 +4406,6 @@ void computeHISQForceQuda(void* const milc_momentum, GaugeFieldParam param(*gParam); param.location = QUDA_CPU_FIELD_LOCATION; param.create = QUDA_REFERENCE_FIELD_CREATE; - param.order = QUDA_MILC_GAUGE_ORDER; param.link_type = QUDA_ASQTAD_MOM_LINKS; param.reconstruct = QUDA_RECONSTRUCT_10; param.ghostExchange = QUDA_GHOST_EXCHANGE_NO; @@ -4429,7 +4427,6 @@ void computeHISQForceQuda(void* const milc_momentum, GaugeFieldParam wParam(gParam_field); wParam.location = QUDA_CPU_FIELD_LOCATION; wParam.create = QUDA_REFERENCE_FIELD_CREATE; - wParam.order = QUDA_MILC_GAUGE_ORDER; wParam.link_type = QUDA_GENERAL_LINKS; wParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO; wParam.gauge = (void *)w_link; From 1628ac7327306d77e6ff87781525fdbe0ab2ba0a Mon Sep 17 00:00:00 2001 From: SaltyChiang Date: Fri, 1 Nov 2024 15:14:02 +0800 Subject: [PATCH 17/53] Fix typo. --- lib/interface_quda.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index e4b8a9db3a..b3096370c6 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -6159,7 +6159,7 @@ void gaugeObservablesQuda(QudaGaugeObservableParam *param) auto profile = pushProfile(profileGaugeObs); checkGaugeObservableParam(param); - if (!gaugePrecise) errorQuda("Cannot compute Polyakov loop as there is no resident gauge field"); + if (!gaugePrecise) errorQuda("Cannot compute gauge observables as there is no resident gauge field"); GaugeField *gauge = nullptr; if (!gaugeSmeared) { From 3dd7fade0cc403ad0caa32d13c5d546c86d15886 Mon Sep 17 00:00:00 2001 From: SaltyChiang Date: Fri, 1 Nov 2024 15:21:46 +0800 Subject: [PATCH 18/53] Add hash for the Eigen tarball. --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 803f5dba41..88c83c5b8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -421,6 +421,7 @@ if(QUDA_DOWNLOAD_EIGEN) NAME Eigen VERSION ${QUDA_EIGEN_VERSION} URL https://gitlab.com/libeigen/eigen/-/archive/${QUDA_EIGEN_VERSION}/eigen-${QUDA_EIGEN_VERSION}.tar.bz2 + URL_HASH SHA256=B4C198460EBA6F28D34894E3A5710998818515104D6E74E5CC331CE31E46E626 DOWNLOAD_ONLY YES SYSTEM YES) target_include_directories(Eigen SYSTEM INTERFACE ${Eigen_SOURCE_DIR}) From a56cb4d9cff4e339d57806547e69267d157b4bcc Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sat, 16 Nov 2024 20:43:44 -0600 Subject: [PATCH 19/53] add printing gauge field for debugging initialize dslash_type_precondition fix CMake to use QUDA_DIRAC_LAPLACE instead of old QUDA_LAPLACE --- include/gauge_field.h | 27 +++++++++++-- include/gauge_field_order.h | 23 +++++++----- lib/check_params.h | 1 + lib/color_spinor_util.in.cu | 4 +- lib/gauge_field.cpp | 9 ++++- lib/gauge_norm.in.cu | 75 ++++++++++++++++++++++++++++++++++++- tests/CMakeLists.txt | 6 +-- 7 files changed, 123 insertions(+), 22 deletions(-) diff --git a/include/gauge_field.h b/include/gauge_field.h index a62b6a684f..776681e44b 100644 --- a/include/gauge_field.h +++ b/include/gauge_field.h @@ -250,12 +250,12 @@ namespace quda { */ void setTuningString(); + public: /** @brief Initialize the padded region to 0 */ void zeroPad(); - public: /** @brief Default constructor */ @@ -455,7 +455,7 @@ namespace quda { std::enable_if_t && !std::is_pointer_v::type>, T> data() const { if (is_pointer_array(order)) errorQuda("Non dim-array ordered field requested but order is %d", order); - return reinterpret_cast(gauge.data()); + return static_cast(gauge.data()); } /** @@ -473,7 +473,7 @@ namespace quda { "data() requires a pointer cast type"); if (d >= (unsigned)geometry) errorQuda("Invalid array index %d for geometry %d field", d, geometry); if (!is_pointer_array(order)) errorQuda("Dim-array ordered field requested but order is %d", order); - return reinterpret_cast(gauge_array[d].data()); + return static_cast(gauge_array[d].data()); } void *raw_pointer() const @@ -500,7 +500,7 @@ namespace quda { { if (!is_pointer_array(order)) errorQuda("Dim-array ordered field requested but order is %d", order); array u = {}; - for (auto d = 0; d < geometry; d++) u[d] = static_cast(gauge_array[d]); + for (auto d = 0; d < geometry; d++) u[d] = static_cast(gauge_array[d].data()); return u; } @@ -651,9 +651,28 @@ namespace quda { } } + /** + * @brief Print the site data + * @param[in] parity Parity index + * @param[in] dim The dimension in which we are printing + * @param[in] x_cb Checkerboard space-time index + * @param[in] rank The rank we are requesting from (default is rank = 0) + */ + void PrintMatrix(int dim, int parity, unsigned int x_cb, int rank = 0) const; + friend struct GaugeFieldParam; }; + /** + @brief Print the value of the field at the requested coordinates + @param[in] a The field we are printing from + @param[in] dim The dimension in which we are printing + @param[in] parity Parity index + @param[in] x_cb Checkerboard space-time index + @param[in] rank The rank we are requesting from (default is rank = 0) + */ + void genericPrintMatrix(const GaugeField &a, int dim, int parity, unsigned int x_cb, int rank = 0); + /** @brief This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. diff --git a/include/gauge_field_order.h b/include/gauge_field_order.h index 38079c3cbb..e3d777565e 100644 --- a/include/gauge_field_order.h +++ b/include/gauge_field_order.h @@ -153,8 +153,11 @@ namespace quda { template __host__ __device__ inline constexpr bool fixed_point() { return false; } template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } - template<> __host__ __device__ inline constexpr bool fixed_point() { return true; } - template<> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } template __host__ __device__ inline constexpr bool match() { return false; } template<> __host__ __device__ inline constexpr bool match() { return true; } @@ -377,7 +380,7 @@ namespace quda { { for (int d = 0; d < U.Geometry(); d++) u[d] = gauge_ ? static_cast **>(gauge_)[d] : U.data *>(d); - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -459,7 +462,7 @@ namespace quda { ghostOffset[d + 4] = U.Nface() * U.SurfaceCB(d) * U.Ncolor() * U.Ncolor(); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -494,7 +497,7 @@ namespace quda { scale(static_cast(1.0)), scale_inv(static_cast(1.0)) { - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -583,7 +586,7 @@ namespace quda { ghostOffset[d + 4] = U.Nface() * U.SurfaceCB(d) * U.Ncolor() * U.Ncolor(); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -636,7 +639,7 @@ namespace quda { scale(static_cast(1.0)), scale_inv(static_cast(1.0)) { - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -715,7 +718,7 @@ namespace quda { nullptr; ghostVolumeCB[d + 4] = U.Nface() * U.SurfaceCB(d); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -745,8 +748,8 @@ namespace quda { @tparam nSpinCoarse Number of "spin degrees of freedom" (for coarse-link fields only) @tparam order Storage order of the field @tparam native_ghost Whether to use native ghosts (inlined into + the padded area for internal-order fields or use a separate array if false) @tparam storeFloat_ Underlying storage type for the field - the padded area for internal-order fields or use a separate array if false) */ template @@ -1478,7 +1481,7 @@ namespace quda { z *= scale; #pragma unroll for (int i = 0; i < 9; i++) out[i] = cmul(z, out[i]); - } else { // stagic phase + } else { // static phase #pragma unroll for (int i = 0; i < 9; i++) { out[i] *= phase; } } diff --git a/lib/check_params.h b/lib/check_params.h index f227ee4716..639db3eb3d 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -578,6 +578,7 @@ void printQudaInvertParam(QudaInvertParam *param) { // domain decomposition parameters //P(inv_type_sloppy, QUDA_INVALID_INVERTER); // disable since invalid means no preconditioner #if defined INIT_PARAM + P(dslash_type_precondition, QUDA_INVALID_DSLASH); P(inv_type_precondition, QUDA_INVALID_INVERTER); P(preconditioner, 0); P(tol_precondition, INVALID_DOUBLE); diff --git a/lib/color_spinor_util.in.cu b/lib/color_spinor_util.in.cu index ef370eda18..324184153f 100644 --- a/lib/color_spinor_util.in.cu +++ b/lib/color_spinor_util.in.cu @@ -329,7 +329,7 @@ namespace quda { printf("rank = %d x = %u, s = %d, { ", comm_rank(), x_cb, s); for (int c = 0; c < o.Ncolor(); c++) { auto value = complex(o(parity, x_cb, s, c)); - printf("(%f,%f) ", value.real(), value.imag()); + printf("(%g,%g) ", value.real(), value.imag()); } printf("}\n"); } @@ -340,7 +340,7 @@ namespace quda { { if (a.isNative()) { constexpr auto order = colorspinor::getNative(nSpin); - print_vector(FieldOrderCB(a), parity, x_cb); + print_vector(FieldOrderCB(a), parity, x_cb); } else if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) { constexpr auto order = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; print_vector(FieldOrderCB(a), parity, x_cb); diff --git a/lib/gauge_field.cpp b/lib/gauge_field.cpp index 20cf7621ac..40a1351656 100644 --- a/lib/gauge_field.cpp +++ b/lib/gauge_field.cpp @@ -1355,7 +1355,7 @@ namespace quda { qudaMemcpy(buffer, data(), Bytes(), qudaMemcpyDeviceToHost); } else { if (is_pointer_array(order)) { - char *dst_buffer = reinterpret_cast(buffer); + char *dst_buffer = static_cast(buffer); for (int d = 0; d < site_dim; d++) { std::memcpy(&dst_buffer[d * bytes / site_dim], gauge_array[d].data(), bytes / site_dim); } @@ -1375,7 +1375,7 @@ namespace quda { qudaMemcpy(data(), buffer, Bytes(), qudaMemcpyHostToDevice); } else { if (is_pointer_array(order)) { - const char *dst_buffer = reinterpret_cast(buffer); + const char *dst_buffer = static_cast(buffer); for (int d = 0; d < site_dim; d++) { std::memcpy(gauge_array[d].data(), &dst_buffer[d * bytes / site_dim], bytes / site_dim); } @@ -1389,4 +1389,9 @@ namespace quda { } } + void GaugeField::PrintMatrix(int dim, int parity, unsigned int x_cb, int rank) const + { + genericPrintMatrix(*this, dim, parity, x_cb, rank); + } + } // namespace quda diff --git a/lib/gauge_norm.in.cu b/lib/gauge_norm.in.cu index 0da4412d17..1b1f3c6882 100644 --- a/lib/gauge_norm.in.cu +++ b/lib/gauge_norm.in.cu @@ -1,5 +1,6 @@ #include #include +#include namespace quda { @@ -61,7 +62,7 @@ namespace quda { norm_ = norm(u, d, type); // factor of two to account for spin with MG fields } else { if constexpr (sizeof...(N) > 0) { - norm_ = norm(u, d, type, IntList()); + norm_ = norm(u, d, type, IntList()); } else { errorQuda("Nc = %d has not been instantiated", u.Ncolor()); } @@ -108,4 +109,76 @@ namespace quda { return nrm; } + template void print_matrix(const Order &o, int d, int parity, unsigned int x_cb) + { + for (int r = 0; r < o.Ncolor(); r++) { + printf("rank %d parity %d x %u row %d", comm_rank(), parity, x_cb, r); + for (int c = 0; c < o.Ncolor(); c++) { + auto value = complex(o(d, parity, x_cb, r, c)); + printf(" (%g,%g)", value.real(), value.imag()); + } + printf("\n"); + } + } + + template + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb) + { + switch (a.FieldOrder()) { + case QUDA_FLOAT2_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + case QUDA_QDP_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + case QUDA_MILC_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + default: errorQuda("Unsupported field order %d", a.FieldOrder()); + } + } + + template + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, IntList) + { + if (a.Ncolor() == nColor) { + genericPrintMatrix(a, d, parity, x_cb); + } else { + if constexpr (sizeof...(N) > 0) { + genericPrintMatrix(a, d, parity, x_cb, IntList()); + } else { + errorQuda("Not supported Ncolor = %d", a.Ncolor()); + } + } + } + + template void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb) + { + genericPrintMatrix(a, d, parity, x_cb, IntList<@QUDA_MULTIGRID_NC_NVEC_LIST @>()); + } + + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, int rank) + { + if (rank != comm_rank()) return; + + GaugeFieldParam param(a); + param.field = const_cast(&a); + param.location = QUDA_CPU_FIELD_LOCATION; + param.create = QUDA_COPY_FIELD_CREATE; + // if field is a pinned device field then we need to clone it on the host + bool host_clone + = (a.Location() == QUDA_CUDA_FIELD_LOCATION && a.MemType() == QUDA_MEMORY_DEVICE && !use_managed_memory()) ? true : + false; + std::unique_ptr clone_a = !host_clone ? nullptr : std::make_unique(param); + const GaugeField &a_ = !host_clone ? a : *clone_a.get(); + + switch (a.Precision()) { + case QUDA_DOUBLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_SINGLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_HALF_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_QUARTER_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + default: errorQuda("Precision %d not implemented", a.Precision()); + } + } + } // namespace quda diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 74d0be3a24..b72bc37426 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -978,7 +978,7 @@ endif() set_tests_properties(dslash_${DIRAC_NAME}_build_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) endif() - if(QUDA_LAPLACE) + if(QUDA_DIRAC_LAPLACE) set(DIRAC_NAME laplace) add_test(NAME dslash_${DIRAC_NAME}_mat_policy${pol2} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} @@ -1236,7 +1236,7 @@ foreach(prec IN LISTS TEST_PRECS) endif() endif() - if (QUDA_LAPLACE) + if (QUDA_DIRAC_LAPLACE) add_test(NAME invert_test_laplace_${prec} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type laplace --ngcrkrylov 8 --compute-fat-long true @@ -1484,7 +1484,7 @@ foreach(prec IN LISTS TEST_PRECS) --gtest_output=xml:staggered_eigensolve_test_staggered_${prec}.xml) endif() - if (QUDA_LAPLACE) + if (QUDA_DIRAC_LAPLACE) add_test(NAME eigensolve_test_laplace_${prec} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type laplace --compute-fat-long true From c2307c636793e0e38b8480a44621971afda8d3ce Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sat, 16 Nov 2024 21:38:39 -0600 Subject: [PATCH 20/53] fix issue caused by formatting --- lib/gauge_norm.in.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gauge_norm.in.cu b/lib/gauge_norm.in.cu index 1b1f3c6882..08cb5bf2ca 100644 --- a/lib/gauge_norm.in.cu +++ b/lib/gauge_norm.in.cu @@ -154,7 +154,7 @@ namespace quda { template void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb) { - genericPrintMatrix(a, d, parity, x_cb, IntList<@QUDA_MULTIGRID_NC_NVEC_LIST @>()); + genericPrintMatrix(a, d, parity, x_cb, IntList<@QUDA_MULTIGRID_NC_NVEC_LIST@>()); } void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, int rank) From fae8a660b17bfa37214331e8c36f3f07f90daaff Mon Sep 17 00:00:00 2001 From: maddyscientist Date: Sun, 17 Nov 2024 13:12:13 -0800 Subject: [PATCH 21/53] Fix bug with fine-grained accessor with staggered half precision fields. Revert prior change to color_spinor_util.in.cu. --- include/color_spinor_field_order.h | 9 ++++++--- lib/color_spinor_util.in.cu | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/color_spinor_field_order.h b/include/color_spinor_field_order.h index b4be52fd8a..95feb9c2d5 100644 --- a/include/color_spinor_field_order.h +++ b/include/color_spinor_field_order.h @@ -969,16 +969,19 @@ namespace quda norm_t *norm = nullptr; int norm_offset = 0; if constexpr (fixed) { - if constexpr (block_float) { + if constexpr (fixed && block_float && nColor == 3 && nSpin == 1 && nVec == 1) { + norm = v.norm; + norm_offset = parity * v.norm_offset + 4 * x_cb + 3; + } else if constexpr (block_float) { norm = v.norm; - norm_offset = v.norm_offset; + norm_offset = parity * v.norm_offset + x_cb; } else { scale = v.scale; scale_inv = v.scale_inv; } } return fieldorder_wrapper( - v.v, accessor.index(parity, x_cb, s, c, n, volumeCB), scale, scale_inv, norm, parity * norm_offset + x_cb); + v.v, accessor.index(parity, x_cb, s, c, n, volumeCB), scale, scale_inv, norm, norm_offset); } /** Returns the number of field colors */ diff --git a/lib/color_spinor_util.in.cu b/lib/color_spinor_util.in.cu index 324184153f..a268a4b16a 100644 --- a/lib/color_spinor_util.in.cu +++ b/lib/color_spinor_util.in.cu @@ -340,7 +340,7 @@ namespace quda { { if (a.isNative()) { constexpr auto order = colorspinor::getNative(nSpin); - print_vector(FieldOrderCB(a), parity, x_cb); + print_vector(FieldOrderCB(a), parity, x_cb); } else if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) { constexpr auto order = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; print_vector(FieldOrderCB(a), parity, x_cb); From ef21a752436104a19d203ad4ef4c492288adc968 Mon Sep 17 00:00:00 2001 From: rokarur Date: Mon, 18 Nov 2024 08:34:12 -0800 Subject: [PATCH 22/53] add config options for adj_test --- include/quda.h | 1 + lib/interface_quda.cpp | 2 +- tests/su3_adj_test.cpp | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/quda.h b/include/quda.h index 192c59ea15..22e245f5ab 100644 --- a/include/quda.h +++ b/include/quda.h @@ -864,6 +864,7 @@ extern "C" { QudaGaugeSmearType smear_type; /**< The smearing type to perform */ unsigned int adj_n_save; /**< How many intermediate gauge fields to save at each large nblock to perform adj flow*/ unsigned int adj_n_hier_save; /**< How many *hierarchical* intermediate gauge fields to save to perform adj flow*/ + unsigned int hier_threshold; /**< Minimum *hierarchical* threshold for adj gradient flow*/ QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */ double t0; /**< Starting flow time for Wilson flow */ int dir_ignore; /**< The direction to be ignored by the smearing algorithm diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index b3096370c6..d5d45687bb 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5848,7 +5848,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); logQuda(QUDA_VERBOSE,"Hierarchical block n_b: %d\n\n",n_b); int ret_idx = 0; - int threshold = 6; + int threshold = smear_param->hier_threshold; std::vector hier_list; //The first stage is saved at the very beginning, so its presence is implicit hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp index b1d76f4b96..473ed8c9b0 100644 --- a/tests/su3_adj_test.cpp +++ b/tests/su3_adj_test.cpp @@ -28,6 +28,7 @@ double gauge_smear_alpha3 = 0.3; int gauge_smear_steps = 50; int gauge_n_save = 3; int gauge_n_hier_save = 3; +int hier_threshold = 6; QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; int gauge_smear_dir_ignore = -1; int measurement_interval = 5; @@ -105,6 +106,10 @@ void add_su3_option_group(std::shared_ptr quda_app) "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); + + opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, "The number of gauge steps to save for hierarchical adj grad flow"); + + opgroup->add_option("--su3-hier_threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and/or topological charge every Nth step (default 5) "); @@ -200,6 +205,7 @@ int main(int argc, char **argv) smear_param.n_steps = gauge_smear_steps; smear_param.adj_n_save = gauge_n_save; smear_param.adj_n_hier_save = gauge_n_hier_save; + smear_param.hier_threshold = hier_threshold; smear_param.meas_interval = measurement_interval; smear_param.alpha = gauge_smear_alpha; smear_param.rho = gauge_smear_rho; From 8ff70c4ba7e1214c51453ffa971b3fb377f05490 Mon Sep 17 00:00:00 2001 From: rokarur Date: Tue, 19 Nov 2024 19:57:59 -0800 Subject: [PATCH 23/53] ONE n_safe step of Safe agrees with Hier numerically --- include/quda.h | 1 - lib/check_params.h | 4 + lib/interface_quda.cpp | 178 ++++++++++++++++++++++++++++------------ tests/su3_adj_test.cpp | 30 +++++-- tests/su3_ferm_test.cpp | 2 +- 5 files changed, 152 insertions(+), 63 deletions(-) diff --git a/include/quda.h b/include/quda.h index 22e245f5ab..b14ac15fe8 100644 --- a/include/quda.h +++ b/include/quda.h @@ -863,7 +863,6 @@ extern "C" { unsigned int meas_interval; /**< Perform the requested measurements on the gauge field at this interval */ QudaGaugeSmearType smear_type; /**< The smearing type to perform */ unsigned int adj_n_save; /**< How many intermediate gauge fields to save at each large nblock to perform adj flow*/ - unsigned int adj_n_hier_save; /**< How many *hierarchical* intermediate gauge fields to save to perform adj flow*/ unsigned int hier_threshold; /**< Minimum *hierarchical* threshold for adj gradient flow*/ QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */ double t0; /**< Starting flow time for Wilson flow */ diff --git a/lib/check_params.h b/lib/check_params.h index 639db3eb3d..ee400acb31 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1175,6 +1175,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(rho, 0.0); P(epsilon, 0.0); P(restart, QUDA_BOOLEAN_FALSE); + P(adj_n_save,5); + P(hier_threshold,6); P(t0, 0.0); P(alpha1, 0.0); P(alpha2, 0.0); @@ -1187,6 +1189,8 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) P(rho, INVALID_DOUBLE); P(epsilon, INVALID_DOUBLE); P(restart, QUDA_BOOLEAN_INVALID); + P(adj_n_save,(unsigned int)INVALID_INT); + P(hier_threshold,(unsigned int)INVALID_INT); P(t0, INVALID_DOUBLE); P(alpha1, INVALID_DOUBLE); P(alpha2, INVALID_DOUBLE); diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d5d45687bb..8fd12967d2 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5375,8 +5375,11 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG popOutputPrefix(); } /* end of performGFlowQuda */ - -void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, int nsteps) + + +// perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix D) +// the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param,int nsteps) { auto profile = pushProfile(profileAdjGFlowSafe); @@ -5409,6 +5412,8 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField &g_W2 = gaugeW2; GaugeField &g_VT = gaugeVT; + // printf("Now calling gauge\n"); + // g_W0.PrintMatrix(0, 0, 0, 0); //necessary? if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; @@ -5462,6 +5467,10 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (i > 0) std::swap(g_W0,g_VT); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + // printf("\n\nprint what is the safe gaugeW0 now\n"); + // g_W0.PrintMatrix(0,0,0,0); + // printf("\n\nprint what is the safe gaugeW1 now\n"); + // g_W1.PrintMatrix(0,0,0,0); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); @@ -5471,15 +5480,51 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu f_temp1 = f_temp3; f_temp2 = f_temp3; +// printf("\n\nstarting safe study p0\n"); +// fout.PrintVector(0,0,0); +// printf("\n"); +// f_temp0.PrintVector(0,0,0); +// printf("\n"); +// f_temp1.PrintVector(0,0,0); +// printf("\n"); +// f_temp2.PrintVector(0,0,0); +// printf("\n"); +// f_temp3.PrintVector(0,0,0); +// printf("\n"); +// f_temp4.PrintVector(0,0,0); + +// printf("\n\nprint what is the safe gaugeW@ now\n"); +// g_W2.PrintMatrix(0,0,0,0); + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowSafe); - + // printf("\n\nprint what is the safe gauge now\n"); + // precise.PrintMatrix(0,0,0,0); + // printf("\n\nprint safe ft4 after laplacian\n"); + // f_temp4.PrintVector(0,0,0); // f_temp0 = 3./4.*f_temp4; blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + + + f_temp2 = f_temp4; + + // printf("\n\nstarting safe study p1\n"); + // fout.PrintVector(0,0,0); + // printf("\n"); + // f_temp0.PrintVector(0,0,0); + // printf("\n"); + // f_temp1.PrintVector(0,0,0); + // printf("\n"); + // f_temp2.PrintVector(0,0,0); + // printf("\n"); + // f_temp3.PrintVector(0,0,0); + // printf("\n"); + // f_temp4.PrintVector(0,0,0); + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileAdjGFlowSafe); @@ -5503,6 +5548,8 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; + + } cpuParam.v = h_out; cpuParam.location = inv_param->output_location; @@ -5514,7 +5561,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) { - + printf("nsafe = %d \n",ns_safe); GaugeField &g_W0 = gf_list[0].get(); GaugeField &g_W1 = gf_list[1].get(); GaugeField &g_W2 = gf_list[2].get(); @@ -5541,7 +5588,6 @@ void adjSafeEvolve(std::vector> sf_list for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } f_temp3 = fin; - f_temp0 = f_temp3; for (unsigned int j = 0; j < ns_safe ; j++) { @@ -5550,6 +5596,10 @@ void adjSafeEvolve(std::vector> sf_list if (i > 0) std::swap(g_W0,g_VT); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); + // printf("\n\nprint what is the hier gaugeW0 now\n"); + // g_W0.PrintMatrix(0,0,0,0); + // printf("\n\nprint what is the hier gaugeW1 now\n"); + // g_W1.PrintMatrix(0,0,0,0); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); @@ -5559,15 +5609,48 @@ void adjSafeEvolve(std::vector> sf_list f_temp1 = f_temp3; f_temp2 = f_temp3; + // printf("\n\nstarting hier study p0\n"); + // fout.PrintVector(0,0,0); + // printf("\n"); + // f_temp0.PrintVector(0,0,0); + // printf("\n"); + // f_temp1.PrintVector(0,0,0); + // printf("\n"); + // f_temp2.PrintVector(0,0,0); + // printf("\n"); + // f_temp3.PrintVector(0,0,0); + // printf("\n"); + // f_temp4.PrintVector(0,0,0); + // // if (ns_safe == 43) {if (j==41) f_temp0.PrintVector(0,0,0);} + // printf("\n\nprint what is the hier gaugeW@ now\n"); + // g_W2.PrintMatrix(0,0,0,0); + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profile); + // printf("\n\nprint what is the hier gauge now\n"); + // precise.PrintMatrix(0,0,0,0); + // printf("\n\nprint hier ft4 after laplacian\n"); + // f_temp4.PrintVector(0,0,0); // f_temp0 = 3./4.*f_temp4; blas::ax(smear_param->epsilon * 3. / 4., f_temp4); f_temp2 = f_temp4; + // printf("\n\nstarting hier study p1\n"); + // fout.PrintVector(0,0,0); + // printf("\n"); + // f_temp0.PrintVector(0,0,0); + // printf("\n"); + // f_temp1.PrintVector(0,0,0); + // printf("\n"); + // f_temp2.PrintVector(0,0,0); + // printf("\n"); + // f_temp3.PrintVector(0,0,0); + // printf("\n"); + // f_temp4.PrintVector(0,0,0); + copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profile); @@ -5591,6 +5674,7 @@ void adjSafeEvolve(std::vector> sf_list fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; + } } @@ -5703,13 +5787,14 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda ColorSpinorField f_temp4(deviceParam); + unsigned int block_length = smear_param->n_steps / smear_param->adj_n_save; int block_counter = 0; std::vector dist_save(smear_param->adj_n_save); std::fill(dist_save.begin(), dist_save.end(), block_length); dist_save.at(dist_save.size() - 1) = smear_param->n_steps - (smear_param->adj_n_save - 1) * block_length; - for (unsigned int i = 0; i < smear_param->adj_n_save; i++) printf("evolve distance of %d added \n",dist_save[i]); + for (unsigned int i = 0; i < smear_param->adj_n_save; i++) logQuda(QUDA_VERBOSE,"evolve distance of %d added \n",dist_save[i]); for (unsigned int i = 0; i < smear_param->adj_n_save; i++) { @@ -5732,48 +5817,9 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); - logQuda(QUDA_SUMMARIZE," block number %d successfully deployed \n",i); + logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); std::swap(sf_list[0],sf_list[1]); } - - - int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); - int element_back; - int ret_stat; - std::vector gauge_list{1,2,3,4,5}; - // n_b = 3; - std::vector hier_list; - hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save - 1); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of hier list = %d\n",i,hier_list[i]); - - - hier_list.pop_back(); - ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); - element_back = gauge_list.back(); - gauge_list.pop_back(); - gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); - - - - hier_list.pop_back(); - ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); - element_back = gauge_list.back(); - gauge_list.pop_back(); - gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); - - - - hier_list.pop_back(); - ret_stat = modify_hier_list(hier_list, n_b, smear_param->adj_n_save - 1, 6); - element_back = gauge_list.back(); - gauge_list.pop_back(); - gauge_list.insert(gauge_list.begin()+ret_stat+1,element_back); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest hier list = %d\n",i,hier_list[i]); - for (int i=0; i < hier_list.size(); i++) printf("content # %d of newest gauge list = %d\n",i,gauge_list[i]); cpuParam.v = h_out; cpuParam.location = inv_param->output_location; @@ -5789,7 +5835,8 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu auto profile = pushProfile(profileAdjGFlowHier); pushOutputPrefix("performAdjGFlowQudaHier: "); checkGaugeSmearParam(smear_param); - + + // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); @@ -5807,6 +5854,8 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField gaugeW2(gParamDummy); GaugeField gaugeVT(gParamDummy); GaugeField gauge_out(gParamDummy); + + GaugeFieldParam gParam(*gaugePrecise); gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct @@ -5845,6 +5894,9 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField f_temp3(deviceParam); ColorSpinorField f_temp4(deviceParam); + // set [3] = input spinor + f_temp3 = fin; + int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); logQuda(QUDA_VERBOSE,"Hierarchical block n_b: %d\n\n",n_b); int ret_idx = 0; @@ -5857,16 +5909,34 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (hier_list.empty()) errorQuda("hier_list is not populated\n"); if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); + + // printf("Now calling gauge\n"); + // gin.PrintMatrix(0, 0, 0, 0); + // fin.PrintVector(0,0,0); for (unsigned int i = 0; i < hier_list.size(); i++) { - + // printf("Gin before any evolve also\n\n"); + // gin.PrintMatrix(0,0,0,0); + + /*we first set gin to the first step*/ + if (i == 0){ + gauge_stages[0] = gin; + // printf("printing hier at the very beginning\n\n"); + // gauge_stages[0].PrintMatrix(0,0,0,0); + // printf("Now gin also\n\n"); + // gin.PrintMatrix(0,0,0,0); + } + for (unsigned int j = 0; j < hier_list[i]; j++){ if (i > 0) std::swap(gout,gin); + // printf("Gin immediately before evolve number below %d\n\n", j); + // gin.PrintMatrix(0,0,0,0); WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); + // printf("Gin after evolve number below %d\n\n", j); + // gin.PrintMatrix(0,0,0,0); } - if (i == 0) - gauge_stages[0] = gin; - else + + if (i > 0) gauge_stages[i] = gout; } @@ -5883,7 +5953,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu std::swap(sf_list[0],sf_list[1]); for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_VERBOSE,"previous hier list element %d : %ld \n",j,hier_list[j]); + logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); } logQuda(QUDA_VERBOSE,"\n"); hier_list.pop_back(); @@ -5909,7 +5979,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField g_1 = gauge_stages[ret_idx]; for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_VERBOSE,"modified hier list element %d : %ld \n",j,hier_list[j]); + logQuda(QUDA_VERBOSE,"modified hier list element %d : %d \n",j,hier_list[j]); } logQuda(QUDA_VERBOSE,"ret idx : %d \n",ret_idx); @@ -5931,7 +6001,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu cpuParam.v = h_out; cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); - fout_h = sf_list[1].get(); + fout_h = sf_list[0].get(); popOutputPrefix(); diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp index 473ed8c9b0..90b9d67121 100644 --- a/tests/su3_adj_test.cpp +++ b/tests/su3_adj_test.cpp @@ -27,7 +27,6 @@ double gauge_smear_alpha2 = 0.6; double gauge_smear_alpha3 = 0.3; int gauge_smear_steps = 50; int gauge_n_save = 3; -int gauge_n_hier_save = 3; int hier_threshold = 6; QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; int gauge_smear_dir_ignore = -1; @@ -109,7 +108,7 @@ void add_su3_option_group(std::shared_ptr quda_app) opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, "The number of gauge steps to save for hierarchical adj grad flow"); - opgroup->add_option("--su3-hier_threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); + opgroup->add_option("--su3-hier-threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); opgroup->add_option("--su3-measurement-interval", measurement_interval, "Measure the field energy and/or topological charge every Nth step (default 5) "); @@ -204,7 +203,6 @@ int main(int argc, char **argv) smear_param.smear_type = gauge_smear_type; smear_param.n_steps = gauge_smear_steps; smear_param.adj_n_save = gauge_n_save; - smear_param.adj_n_hier_save = gauge_n_hier_save; smear_param.hier_threshold = hier_threshold; smear_param.meas_interval = measurement_interval; smear_param.alpha = gauge_smear_alpha; @@ -215,7 +213,7 @@ int main(int argc, char **argv) smear_param.alpha3 = gauge_smear_alpha3; smear_param.dir_ignore = gauge_smear_dir_ignore; - quda::ColorSpinorField check,check_out; + quda::ColorSpinorField check,check_out,check_out1; QudaInvertParam invParam = newQudaInvertParam(); invParam.cpu_prec = QUDA_DOUBLE_PRECISION; invParam.cuda_prec = QUDA_DOUBLE_PRECISION; @@ -241,11 +239,17 @@ int main(int argc, char **argv) constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); check = quda::ColorSpinorField(cs_param); + //Add noise to spinor + quda::RNG rng(check, 1234); + spinorNoise(check, rng, QUDA_NOISE_GAUSS); + // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); check_out = quda::ColorSpinorField(cs_param); + check_out1 = quda::ColorSpinorField(cs_param); // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); - - + check.PrintVector(0,0,0); + check_out.PrintVector(0,0,0); + check_out1.PrintVector(0,0,0); // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); printf("Stage -1 passed\n"); host_timer.start(); // start the timer @@ -266,13 +270,25 @@ int main(int argc, char **argv) obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowHier(check.data(),check_out.data(), &invParam, &smear_param); + performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param); + performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param, 50); + + break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); } host_timer.stop(); // stop the timer + + printf("Original spinor\n:"); + check.PrintVector(0,0,0); + printf("Hierarchical method\n:"); + check_out1.PrintVector(0,0,0); + + printf("Safe method\n:"); + check_out.PrintVector(0,0,0); + printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index ded426dce7..dc0f02306b 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -258,7 +258,7 @@ int main(int argc, char **argv) obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param, 3); + performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param,3); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); From 34a890477e15701877762bbdfd574745eacf1916 Mon Sep 17 00:00:00 2001 From: rokarur Date: Tue, 19 Nov 2024 20:22:03 -0800 Subject: [PATCH 24/53] get the fin, fout out of the refernee wrapper list --- lib/interface_quda.cpp | 128 ++++++----------------------------------- 1 file changed, 18 insertions(+), 110 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 8fd12967d2..f9fa401b0c 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5412,8 +5412,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField &g_W2 = gaugeW2; GaugeField &g_VT = gaugeVT; - // printf("Now calling gauge\n"); - // g_W0.PrintMatrix(0, 0, 0, 0); //necessary? if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; @@ -5467,10 +5465,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (i > 0) std::swap(g_W0,g_VT); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); - // printf("\n\nprint what is the safe gaugeW0 now\n"); - // g_W0.PrintMatrix(0,0,0,0); - // printf("\n\nprint what is the safe gaugeW1 now\n"); - // g_W1.PrintMatrix(0,0,0,0); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); @@ -5480,30 +5474,10 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu f_temp1 = f_temp3; f_temp2 = f_temp3; -// printf("\n\nstarting safe study p0\n"); -// fout.PrintVector(0,0,0); -// printf("\n"); -// f_temp0.PrintVector(0,0,0); -// printf("\n"); -// f_temp1.PrintVector(0,0,0); -// printf("\n"); -// f_temp2.PrintVector(0,0,0); -// printf("\n"); -// f_temp3.PrintVector(0,0,0); -// printf("\n"); -// f_temp4.PrintVector(0,0,0); - -// printf("\n\nprint what is the safe gaugeW@ now\n"); -// g_W2.PrintMatrix(0,0,0,0); - copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowSafe); - // printf("\n\nprint what is the safe gauge now\n"); - // precise.PrintMatrix(0,0,0,0); - // printf("\n\nprint safe ft4 after laplacian\n"); - // f_temp4.PrintVector(0,0,0); - // f_temp0 = 3./4.*f_temp4; + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); @@ -5511,20 +5485,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu f_temp2 = f_temp4; - - // printf("\n\nstarting safe study p1\n"); - // fout.PrintVector(0,0,0); - // printf("\n"); - // f_temp0.PrintVector(0,0,0); - // printf("\n"); - // f_temp1.PrintVector(0,0,0); - // printf("\n"); - // f_temp2.PrintVector(0,0,0); - // printf("\n"); - // f_temp3.PrintVector(0,0,0); - // printf("\n"); - // f_temp4.PrintVector(0,0,0); - copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileAdjGFlowSafe); @@ -5568,14 +5528,12 @@ void adjSafeEvolve(std::vector> sf_list GaugeField &g_VT = gf_list[3].get(); GaugeField &gaugeTemp = gf_list[4].get(); GaugeField &precise = gf_list[5].get(); - - ColorSpinorField &fin = sf_list[0].get(); - ColorSpinorField &fout = sf_list[1].get(); - ColorSpinorField &f_temp0 = sf_list[2].get(); - ColorSpinorField &f_temp1 = sf_list[3].get(); - ColorSpinorField &f_temp2 = sf_list[4].get(); - ColorSpinorField &f_temp3 = sf_list[5].get(); - ColorSpinorField &f_temp4 = sf_list[6].get(); + + ColorSpinorField &f_temp0 = sf_list[0].get(); + ColorSpinorField &f_temp1 = sf_list[1].get(); + ColorSpinorField &f_temp2 = sf_list[2].get(); + ColorSpinorField &f_temp3 = sf_list[3].get(); + ColorSpinorField &f_temp4 = sf_list[4].get(); int parity = 0; @@ -5587,7 +5545,7 @@ void adjSafeEvolve(std::vector> sf_list // only switch on comms needed for directions with a derivative for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } - f_temp3 = fin; + // f_temp3 = fin; for (unsigned int j = 0; j < ns_safe ; j++) { @@ -5596,10 +5554,6 @@ void adjSafeEvolve(std::vector> sf_list if (i > 0) std::swap(g_W0,g_VT); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); - // printf("\n\nprint what is the hier gaugeW0 now\n"); - // g_W0.PrintMatrix(0,0,0,0); - // printf("\n\nprint what is the hier gaugeW1 now\n"); - // g_W1.PrintMatrix(0,0,0,0); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); @@ -5608,49 +5562,16 @@ void adjSafeEvolve(std::vector> sf_list f_temp0 = f_temp3; f_temp1 = f_temp3; f_temp2 = f_temp3; - - // printf("\n\nstarting hier study p0\n"); - // fout.PrintVector(0,0,0); - // printf("\n"); - // f_temp0.PrintVector(0,0,0); - // printf("\n"); - // f_temp1.PrintVector(0,0,0); - // printf("\n"); - // f_temp2.PrintVector(0,0,0); - // printf("\n"); - // f_temp3.PrintVector(0,0,0); - // printf("\n"); - // f_temp4.PrintVector(0,0,0); - // // if (ns_safe == 43) {if (j==41) f_temp0.PrintVector(0,0,0);} - // printf("\n\nprint what is the hier gaugeW@ now\n"); - // g_W2.PrintMatrix(0,0,0,0); + copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profile); - // printf("\n\nprint what is the hier gauge now\n"); - // precise.PrintMatrix(0,0,0,0); - // printf("\n\nprint hier ft4 after laplacian\n"); - // f_temp4.PrintVector(0,0,0); - - // f_temp0 = 3./4.*f_temp4; + blas::ax(smear_param->epsilon * 3. / 4., f_temp4); f_temp2 = f_temp4; - // printf("\n\nstarting hier study p1\n"); - // fout.PrintVector(0,0,0); - // printf("\n"); - // f_temp0.PrintVector(0,0,0); - // printf("\n"); - // f_temp1.PrintVector(0,0,0); - // printf("\n"); - // f_temp2.PrintVector(0,0,0); - // printf("\n"); - // f_temp3.PrintVector(0,0,0); - // printf("\n"); - // f_temp4.PrintVector(0,0,0); - copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profile); @@ -5671,7 +5592,7 @@ void adjSafeEvolve(std::vector> sf_list blas::axpy(1.,f_temp2, f_temp0); blas::axpy(1.,f_temp1, f_temp0); - fout = f_temp0; + // fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; @@ -5806,7 +5727,7 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda } std::vector> sf_list; - sf_list = {fin, fout, f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; std::vector> gf_list; gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; @@ -5818,7 +5739,6 @@ void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, Quda adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); - std::swap(sf_list[0],sf_list[1]); } cpuParam.v = h_out; @@ -5894,7 +5814,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField f_temp3(deviceParam); ColorSpinorField f_temp4(deviceParam); - // set [3] = input spinor + // initializing step: set [3] = input spinor f_temp3 = fin; int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); @@ -5909,31 +5829,19 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (hier_list.empty()) errorQuda("hier_list is not populated\n"); if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); - - // printf("Now calling gauge\n"); - // gin.PrintMatrix(0, 0, 0, 0); - // fin.PrintVector(0,0,0); for (unsigned int i = 0; i < hier_list.size(); i++) { - // printf("Gin before any evolve also\n\n"); - // gin.PrintMatrix(0,0,0,0); /*we first set gin to the first step*/ if (i == 0){ gauge_stages[0] = gin; - // printf("printing hier at the very beginning\n\n"); - // gauge_stages[0].PrintMatrix(0,0,0,0); - // printf("Now gin also\n\n"); - // gin.PrintMatrix(0,0,0,0); } for (unsigned int j = 0; j < hier_list[i]; j++){ if (i > 0) std::swap(gout,gin); - // printf("Gin immediately before evolve number below %d\n\n", j); - // gin.PrintMatrix(0,0,0,0); + WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); - // printf("Gin after evolve number below %d\n\n", j); - // gin.PrintMatrix(0,0,0,0); + } if (i > 0) @@ -5941,7 +5849,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } std::vector> sf_list; - sf_list = {fin, fout, f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; + sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; std::vector> gf_list; gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; @@ -5950,7 +5858,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE,"Starting a hierarchical loop log\n"); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); - std::swap(sf_list[0],sf_list[1]); + // std::swap(sf_list[0],sf_list[1]); for (int j = 0; j < hier_list.size(); j++ ){ logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); @@ -5969,7 +5877,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); // At final step, we do not conduct swap - if (i != 0) std::swap(sf_list[0],sf_list[1]); + // if (i != 0) std::swap(sf_list[0],sf_list[1]); } logQuda(QUDA_VERBOSE," hierarchial evolution completed \n"); break; From 01d44e40457ac9d04a0418f408ce1b599d8df739 Mon Sep 17 00:00:00 2001 From: rokarur Date: Tue, 19 Nov 2024 22:29:30 -0800 Subject: [PATCH 25/53] numerical agreement 2 steps, 2 nsave --- lib/interface_quda.cpp | 51 ++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index f9fa401b0c..ea94d478b3 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5407,6 +5407,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct GaugeField gaugeTemp(gParam); + const GaugeField gin = *gaugeSmeared; GaugeField &g_W0 = *gaugeSmeared; GaugeField &g_W1 = gaugeW1; GaugeField &g_W2 = gaugeW2; @@ -5462,13 +5463,17 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu { for (unsigned int i = 0; i < smear_param->n_steps - j; i++) { - if (i > 0) std::swap(g_W0,g_VT); + if (i == 0) g_W0 = gin; + else std::swap(g_W0,g_VT); + printf("our safe gauge field is \n"); + g_W0.PrintMatrix(0,0,0,0); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); } + // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; @@ -5508,7 +5513,9 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; - + printf("after first step safe, out in \n"); + f_temp0.PrintVector(0,0,0); + printf("\n\n"); } cpuParam.v = h_out; @@ -5516,12 +5523,15 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField fout_h(cpuParam); fout_h = fout; + printf("after final step safe, out in \n"); + fout.PrintVector(0,0,0); popOutputPrefix(); } void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) { printf("nsafe = %d \n",ns_safe); + const GaugeField gin = gf_list[0].get(); GaugeField &g_W0 = gf_list[0].get(); GaugeField &g_W1 = gf_list[1].get(); GaugeField &g_W2 = gf_list[2].get(); @@ -5551,7 +5561,8 @@ void adjSafeEvolve(std::vector> sf_list { for (unsigned int i = 0; i < ns_safe - j; i++) { - if (i > 0) std::swap(g_W0,g_VT); + if (i == 0) g_W0 = gin; + else std::swap(g_W0,g_VT); GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); @@ -5814,7 +5825,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField f_temp3(deviceParam); ColorSpinorField f_temp4(deviceParam); - // initializing step: set [3] = input spinor + //IMPORTANT initializing step: set [3] = input spinor f_temp3 = fin; int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); @@ -5824,16 +5835,17 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu std::vector hier_list; //The first stage is saved at the very beginning, so its presence is implicit hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); - + logQuda(QUDA_VERBOSE,"heir list size is %d\n",hier_list.size()); if (threshold < hier_list.back()) threshold = hier_list.back(); if (hier_list.empty()) errorQuda("hier_list is not populated\n"); if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); - for (unsigned int i = 0; i < hier_list.size(); i++) { + for (unsigned int i = 0; i < hier_list.size() - 1; i++) { + - /*we first set gin to the first step*/ if (i == 0){ + logQuda(QUDA_VERBOSE,"we first set gin to the first index of the gauge_Steps vector\n"); gauge_stages[0] = gin; } @@ -5844,8 +5856,8 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } - if (i > 0) - gauge_stages[i] = gout; + // if (i > 0) + gauge_stages[i + 1] = gout; } std::vector> sf_list; @@ -5858,8 +5870,12 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE,"Starting a hierarchical loop log\n"); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); - // std::swap(sf_list[0],sf_list[1]); + + printf("after first step hier, out3 \n"); + sf_list[3].get().PrintVector(0,0,0); + printf("first hier gauge \n"); + gauge_stages.back().PrintMatrix(0,0,0,0); for (int j = 0; j < hier_list.size(); j++ ){ logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); } @@ -5871,14 +5887,18 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," now in final serial stage of hierarchial evolution \n"); for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) + printf("after beginning of second step hier, out3 \n"); + sf_list[3].get().PrintVector(0,0,0); + printf("new hier gauge \n"); + gauge_stages[i].PrintMatrix(0,0,0,0); gf_list.at(0) = std::ref(gauge_stages[i]); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); - // At final step, we do not conduct swap - // if (i != 0) std::swap(sf_list[0],sf_list[1]); } + printf("after first step hier, out in \n"); + sf_list[0].get().PrintVector(0,0,0); logQuda(QUDA_VERBOSE," hierarchial evolution completed \n"); break; } @@ -5904,12 +5924,15 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } - + fout = sf_list[0].get(); cpuParam.v = h_out; cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); - fout_h = sf_list[0].get(); + fout_h = fout; + + printf("after final step hier, out in \n"); + fout.PrintVector(0,0,0); popOutputPrefix(); From e9856486733e2c0815e0592d1d17746f21542881 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 00:20:22 -0800 Subject: [PATCH 26/53] numerical agreement 3 steps, 2 nsave --- lib/interface_quda.cpp | 169 +++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 84 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index ea94d478b3..33929fdea6 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5523,8 +5523,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ColorSpinorField fout_h(cpuParam); fout_h = fout; - printf("after final step safe, out in \n"); - fout.PrintVector(0,0,0); popOutputPrefix(); } @@ -5660,104 +5658,104 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ - auto profile = pushProfile(profileAdjGFlowNB); - pushOutputPrefix("performAdjGFlowQudaNB: "); - checkGaugeSmearParam(smear_param); +// auto profile = pushProfile(profileAdjGFlowNB); +// pushOutputPrefix("performAdjGFlowQudaNB: "); +// checkGaugeSmearParam(smear_param); - // pushVerbosity(inv_param->verbosity); - if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); +// // pushVerbosity(inv_param->verbosity); +// if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); - if (smear_param->restart) { - if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); - } else { - if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); - freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); - gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowNB); - } +// if (smear_param->restart) { +// if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); +// } else { +// if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); +// freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); +// gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowNB); +// } - GaugeFieldParam gParamDummy(*gaugeSmeared); - GaugeField gaugeW0(gParamDummy); - GaugeField gaugeW1(gParamDummy); - GaugeField gaugeW2(gParamDummy); - GaugeField gaugeVT(gParamDummy); - GaugeField gauge_out(gParamDummy); - - GaugeFieldParam gParam(*gaugePrecise); - gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct - GaugeField gaugeTemp(gParam); +// GaugeFieldParam gParamDummy(*gaugeSmeared); +// GaugeField gaugeW0(gParamDummy); +// GaugeField gaugeW1(gParamDummy); +// GaugeField gaugeW2(gParamDummy); +// GaugeField gaugeVT(gParamDummy); +// GaugeField gauge_out(gParamDummy); + +// GaugeFieldParam gParam(*gaugePrecise); +// gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct +// GaugeField gaugeTemp(gParam); - auto n = smear_param->adj_n_save; +// auto n = smear_param->adj_n_save; - std::vector gauge_stages(n,gParamDummy); - //Can also do below - //creates copies std::vector gauge_stages(n,*gaugeSmeared); +// std::vector gauge_stages(n,gParamDummy); +// //Can also do below +// //creates copies std::vector gauge_stages(n,*gaugeSmeared); - GaugeField &gin = *gaugeSmeared; - GaugeField &gout = gauge_out; +// GaugeField &gin = *gaugeSmeared; +// GaugeField &gout = gauge_out; - // helper gauge field for Laplace operator - GaugeField precise; - GaugeFieldParam gParam_helper(*gaugePrecise); - gParam_helper.create = QUDA_NULL_FIELD_CREATE; - precise = GaugeField(gParam_helper); - - // spinor fields - ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); - ColorSpinorField fin_h(cpuParam); - - ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); - ColorSpinorField fin(deviceParam); - fin = fin_h; - - deviceParam.create = QUDA_NULL_FIELD_CREATE; - ColorSpinorField fout(deviceParam); +// // helper gauge field for Laplace operator +// GaugeField precise; +// GaugeFieldParam gParam_helper(*gaugePrecise); +// gParam_helper.create = QUDA_NULL_FIELD_CREATE; +// precise = GaugeField(gParam_helper); + +// // spinor fields +// ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); +// ColorSpinorField fin_h(cpuParam); + +// ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); +// ColorSpinorField fin(deviceParam); +// fin = fin_h; + +// deviceParam.create = QUDA_NULL_FIELD_CREATE; +// ColorSpinorField fout(deviceParam); - ColorSpinorField f_temp0(deviceParam); - ColorSpinorField f_temp1(deviceParam); - ColorSpinorField f_temp2(deviceParam); - ColorSpinorField f_temp3(deviceParam); - ColorSpinorField f_temp4(deviceParam); +// ColorSpinorField f_temp0(deviceParam); +// ColorSpinorField f_temp1(deviceParam); +// ColorSpinorField f_temp2(deviceParam); +// ColorSpinorField f_temp3(deviceParam); +// ColorSpinorField f_temp4(deviceParam); - unsigned int block_length = smear_param->n_steps / smear_param->adj_n_save; - int block_counter = 0; - std::vector dist_save(smear_param->adj_n_save); - std::fill(dist_save.begin(), dist_save.end(), block_length); - dist_save.at(dist_save.size() - 1) = smear_param->n_steps - (smear_param->adj_n_save - 1) * block_length; +// unsigned int block_length = smear_param->n_steps / smear_param->adj_n_save; +// int block_counter = 0; +// std::vector dist_save(smear_param->adj_n_save); +// std::fill(dist_save.begin(), dist_save.end(), block_length); +// dist_save.at(dist_save.size() - 1) = smear_param->n_steps - (smear_param->adj_n_save - 1) * block_length; - for (unsigned int i = 0; i < smear_param->adj_n_save; i++) logQuda(QUDA_VERBOSE,"evolve distance of %d added \n",dist_save[i]); +// for (unsigned int i = 0; i < smear_param->adj_n_save; i++) logQuda(QUDA_VERBOSE,"evolve distance of %d added \n",dist_save[i]); - for (unsigned int i = 0; i < smear_param->adj_n_save; i++) { +// for (unsigned int i = 0; i < smear_param->adj_n_save; i++) { - gauge_stages[i] = gout; - for (unsigned int j = 0; j < block_length; j++){ - if (i > 0) std::swap(gout,gin); - WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); - } +// gauge_stages[i] = gout; +// for (unsigned int j = 0; j < block_length; j++){ +// if (i > 0) std::swap(gout,gin); +// WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); +// } - } - std::vector> sf_list; - sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; - std::vector> gf_list; - gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; +// } +// std::vector> sf_list; +// sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; +// std::vector> gf_list; +// gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; - for (int i = gauge_stages.size() - 1; i >= 0; --i) { - //first load correct gauge field (for beginning of the loop, it is the final gauge list element) - if (i < gauge_stages.size() - 1) gf_list.at(0) = std::ref(gauge_stages[i]); +// for (int i = gauge_stages.size() - 1; i >= 0; --i) { +// //first load correct gauge field (for beginning of the loop, it is the final gauge list element) +// if (i < gauge_stages.size() - 1) gf_list.at(0) = std::ref(gauge_stages[i]); - adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); +// adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); - logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); - } +// logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); +// } - cpuParam.v = h_out; - cpuParam.location = inv_param->output_location; - ColorSpinorField fout_h(cpuParam); - fout_h = sf_list[1].get(); +// cpuParam.v = h_out; +// cpuParam.location = inv_param->output_location; +// ColorSpinorField fout_h(cpuParam); +// fout_h = sf_list[1].get(); - popOutputPrefix(); +// popOutputPrefix(); } @@ -5850,9 +5848,11 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } for (unsigned int j = 0; j < hier_list[i]; j++){ - if (i > 0) std::swap(gout,gin); + if (j > 0) std::swap(gout,gin); WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); + printf("hier gauge # %d:\n",j+1); + gout.PrintMatrix(0,0,0,0); } @@ -5872,10 +5872,10 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); - printf("after first step hier, out3 \n"); - sf_list[3].get().PrintVector(0,0,0); + printf("first hier gauge \n"); gauge_stages.back().PrintMatrix(0,0,0,0); + for (int j = 0; j < hier_list.size(); j++ ){ logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); } @@ -5887,10 +5887,11 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," now in final serial stage of hierarchial evolution \n"); for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) - printf("after beginning of second step hier, out3 \n"); - sf_list[3].get().PrintVector(0,0,0); + printf("new hier gauge \n"); gauge_stages[i].PrintMatrix(0,0,0,0); + + gf_list.at(0) = std::ref(gauge_stages[i]); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); From edf5967dd36b8f64041dd35b773d6f2880c44e4d Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 01:06:59 -0800 Subject: [PATCH 27/53] apparently working for n nsave steps --- lib/interface_quda.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 33929fdea6..b94830e4f0 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5846,6 +5846,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE,"we first set gin to the first index of the gauge_Steps vector\n"); gauge_stages[0] = gin; } + if (i > 0) std::swap(gout,gin); for (unsigned int j = 0; j < hier_list[i]; j++){ if (j > 0) std::swap(gout,gin); From 0fc7deb08e04ac24b21b1bee483678e05114e1b6 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 01:37:56 -0800 Subject: [PATCH 28/53] clean up interface --- lib/interface_quda.cpp | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index b94830e4f0..53412ebb9c 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5465,8 +5465,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (i == 0) g_W0 = gin; else std::swap(g_W0,g_VT); - printf("our safe gauge field is \n"); - g_W0.PrintMatrix(0,0,0,0); + GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); @@ -5513,9 +5512,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; - printf("after first step safe, out in \n"); - f_temp0.PrintVector(0,0,0); - printf("\n\n"); } cpuParam.v = h_out; @@ -5852,8 +5848,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (j > 0) std::swap(gout,gin); WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); - printf("hier gauge # %d:\n",j+1); - gout.PrintMatrix(0,0,0,0); } @@ -5872,11 +5866,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); - - - printf("first hier gauge \n"); - gauge_stages.back().PrintMatrix(0,0,0,0); - for (int j = 0; j < hier_list.size(); j++ ){ logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); } @@ -5888,10 +5877,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," now in final serial stage of hierarchial evolution \n"); for (int i = gauge_stages.size() - 1; i >= 0; --i) { //first load correct gauge field (for beginning of the loop, it is the final gauge list element) - - printf("new hier gauge \n"); - gauge_stages[i].PrintMatrix(0,0,0,0); - gf_list.at(0) = std::ref(gauge_stages[i]); @@ -5899,8 +5884,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); } - printf("after first step hier, out in \n"); - sf_list[0].get().PrintVector(0,0,0); logQuda(QUDA_VERBOSE," hierarchial evolution completed \n"); break; } @@ -5926,15 +5909,11 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } - fout = sf_list[0].get(); cpuParam.v = h_out; cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); - fout_h = fout; - - printf("after final step hier, out in \n"); - fout.PrintVector(0,0,0); + fout_h = sf_list[0].get(); popOutputPrefix(); From 3bfa0421a16757310df4aaf6343f45efbbe79433 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 03:01:20 -0800 Subject: [PATCH 29/53] cleaning log messages plus NB section removed (useless and irritating) --- lib/interface_quda.cpp | 135 ++++++----------------------------------- 1 file changed, 18 insertions(+), 117 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 53412ebb9c..450b229ca0 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5524,7 +5524,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) { - printf("nsafe = %d \n",ns_safe); const GaugeField gin = gf_list[0].get(); GaugeField &g_W0 = gf_list[0].get(); GaugeField &g_W1 = gf_list[1].get(); @@ -5653,105 +5652,6 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres } void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ - -// auto profile = pushProfile(profileAdjGFlowNB); -// pushOutputPrefix("performAdjGFlowQudaNB: "); -// checkGaugeSmearParam(smear_param); - -// // pushVerbosity(inv_param->verbosity); -// if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); - -// if (smear_param->restart) { -// if (gaugeSmeared == nullptr) errorQuda("gaugeSmeared must be loaded"); -// } else { -// if (gaugePrecise == nullptr) errorQuda("Gauge field must be loaded"); -// freeUniqueGaugeQuda(QUDA_SMEARED_LINKS); -// gaugeSmeared = createExtendedGauge(*gaugePrecise, R, profileAdjGFlowNB); -// } - -// GaugeFieldParam gParamDummy(*gaugeSmeared); -// GaugeField gaugeW0(gParamDummy); -// GaugeField gaugeW1(gParamDummy); -// GaugeField gaugeW2(gParamDummy); -// GaugeField gaugeVT(gParamDummy); -// GaugeField gauge_out(gParamDummy); - -// GaugeFieldParam gParam(*gaugePrecise); -// gParam.reconstruct = QUDA_RECONSTRUCT_NO; // temporary field is not on manifold so cannot use reconstruct -// GaugeField gaugeTemp(gParam); - -// auto n = smear_param->adj_n_save; - -// std::vector gauge_stages(n,gParamDummy); -// //Can also do below -// //creates copies std::vector gauge_stages(n,*gaugeSmeared); - -// GaugeField &gin = *gaugeSmeared; -// GaugeField &gout = gauge_out; - -// // helper gauge field for Laplace operator -// GaugeField precise; -// GaugeFieldParam gParam_helper(*gaugePrecise); -// gParam_helper.create = QUDA_NULL_FIELD_CREATE; -// precise = GaugeField(gParam_helper); - -// // spinor fields -// ColorSpinorParam cpuParam(h_in, *inv_param, gaugePrecise->X(), false, inv_param->input_location); -// ColorSpinorField fin_h(cpuParam); - -// ColorSpinorParam deviceParam(cpuParam, *inv_param, QUDA_CUDA_FIELD_LOCATION); -// ColorSpinorField fin(deviceParam); -// fin = fin_h; - -// deviceParam.create = QUDA_NULL_FIELD_CREATE; -// ColorSpinorField fout(deviceParam); - -// ColorSpinorField f_temp0(deviceParam); -// ColorSpinorField f_temp1(deviceParam); -// ColorSpinorField f_temp2(deviceParam); -// ColorSpinorField f_temp3(deviceParam); -// ColorSpinorField f_temp4(deviceParam); - - - -// unsigned int block_length = smear_param->n_steps / smear_param->adj_n_save; -// int block_counter = 0; -// std::vector dist_save(smear_param->adj_n_save); -// std::fill(dist_save.begin(), dist_save.end(), block_length); -// dist_save.at(dist_save.size() - 1) = smear_param->n_steps - (smear_param->adj_n_save - 1) * block_length; - -// for (unsigned int i = 0; i < smear_param->adj_n_save; i++) logQuda(QUDA_VERBOSE,"evolve distance of %d added \n",dist_save[i]); - -// for (unsigned int i = 0; i < smear_param->adj_n_save; i++) { - -// gauge_stages[i] = gout; -// for (unsigned int j = 0; j < block_length; j++){ -// if (i > 0) std::swap(gout,gin); -// WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); -// } - -// } -// std::vector> sf_list; -// sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; -// std::vector> gf_list; -// gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; - - -// for (int i = gauge_stages.size() - 1; i >= 0; --i) { -// //first load correct gauge field (for beginning of the loop, it is the final gauge list element) -// if (i < gauge_stages.size() - 1) gf_list.at(0) = std::ref(gauge_stages[i]); - -// adjSafeEvolve(sf_list,gf_list,smear_param,dist_save[i],profileAdjGFlowNB); - -// logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); -// } - -// cpuParam.v = h_out; -// cpuParam.location = inv_param->output_location; -// ColorSpinorField fout_h(cpuParam); -// fout_h = sf_list[1].get(); - -// popOutputPrefix(); } @@ -5823,14 +5723,15 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu f_temp3 = fin; int n_b = ceil(pow(1. * smear_param->n_steps, 1. / (smear_param->adj_n_save + 1) )); - logQuda(QUDA_VERBOSE,"Hierarchical block n_b: %d\n\n",n_b); + logQuda(QUDA_SUMMARIZE,"Hierarchical block n_b: %d\n\n",n_b); int ret_idx = 0; int threshold = smear_param->hier_threshold; std::vector hier_list; //The first stage is saved at the very beginning, so its presence is implicit hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); - logQuda(QUDA_VERBOSE,"heir list size is %d\n",hier_list.size()); - if (threshold < hier_list.back()) threshold = hier_list.back(); + logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %d\n",hier_list.size()); + if (threshold < hier_list.back()) {threshold = hier_list.back(); logQuda(QUDA_SUMMARIZE, "threshold changed to %d",threshold);} + else logQuda(QUDA_SUMMARIZE, "threshold is %d",threshold); if (hier_list.empty()) errorQuda("hier_list is not populated\n"); if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); @@ -5839,7 +5740,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (i == 0){ - logQuda(QUDA_VERBOSE,"we first set gin to the first index of the gauge_Steps vector\n"); + logQuda(QUDA_VERBOSE,"we first set gin to the first index of the gauge_steps vector\n"); gauge_stages[0] = gin; } if (i > 0) std::swap(gout,gin); @@ -5850,8 +5751,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); } - - // if (i > 0) gauge_stages[i + 1] = gout; } @@ -5860,16 +5759,19 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu std::vector> gf_list; gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; - + int hier_loop_counter = 0; while (ret_idx != -1){ - logQuda(QUDA_VERBOSE,"Starting a hierarchical loop log\n"); + logQuda(QUDA_VERBOSE,"Hier loop count %d has begun \n",hier_loop_counter); + logQuda(QUDA_DEBUG_VERBOSE,"Starting a hierarchical loop log: \n"); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); + logQuda(QUDA_DEBUG_VERBOSE,"Previous hier list elements: \n"); for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_VERBOSE,"previous hier list element %d : %d \n",j,hier_list[j]); + logQuda(QUDA_DEBUG_VERBOSE,"%d \n",hier_list[j]); } - logQuda(QUDA_VERBOSE,"\n"); + logQuda(QUDA_DEBUG_VERBOSE,"\n"); + hier_list.pop_back(); gauge_stages.pop_back(); ret_idx = modify_hier_list(hier_list, n_b, smear_param->adj_n_save, threshold); @@ -5884,18 +5786,18 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); } - logQuda(QUDA_VERBOSE," hierarchial evolution completed \n"); + logQuda(QUDA_DEBUG_VERBOSE," hierarchial evolution completed \n"); break; } GaugeField g_2(gParamDummy); GaugeField g_1 = gauge_stages[ret_idx]; + logQuda(QUDA_DEBUG_VERBOSE,"Modified hier list elements: \n"); for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_VERBOSE,"modified hier list element %d : %d \n",j,hier_list[j]); + logQuda(QUDA_DEBUG_VERBOSE,"%d \n",hier_list[j]); } - - logQuda(QUDA_VERBOSE,"ret idx : %d \n",ret_idx); + logQuda(QUDA_DEBUG_VERBOSE,"\n"); for (unsigned int j = 0; j < hier_list[ret_idx]; j++){ if (j > 0) std::swap(g_2,g_1); @@ -5903,11 +5805,10 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } // break; gauge_stages.insert(gauge_stages.begin() + ret_idx + 1, g_2); - logQuda(QUDA_VERBOSE,"recycled gauge field placed *before* index %d\n\n\n",ret_idx + 1); + logQuda(QUDA_DEBUG_VERBOSE,"recycled gauge field placed *before* index %d\n\n",ret_idx + 1); gf_list.at(0) = std::ref(gauge_stages.back()); + hier_loop_counter += 1; - - } cpuParam.v = h_out; From a6977ed19a03f54b3f006a35641e52737b8d25c6 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 04:13:54 -0800 Subject: [PATCH 30/53] removed all NB stuff from header files...etc and changed config on Safe --- include/quda.h | 12 +----------- lib/interface_quda.cpp | 6 +----- tests/su3_adj_test.cpp | 2 +- tests/su3_ferm_test.cpp | 2 +- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/include/quda.h b/include/quda.h index b14ac15fe8..705e0d9e26 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1705,17 +1705,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, int nsteps); - - /** - * Performs Adjoint Gradient Flow (gauge + fermion) the "NB" way on gaugePrecise and stores it in gaugeSmeared - * @param[out] h_out Output fermion field - * @param[in] h_in Input fermion field - * @param[in] smear_param Parameter struct that defines the computation parameters - * @param[in,out] obs_param Parameter struct that defines which - * observables we are making and the resulting observables. - */ - void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 450b229ca0..8a131742a0 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5379,7 +5379,7 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG // perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix D) // the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) -void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param,int nsteps) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) { auto profile = pushProfile(profileAdjGFlowSafe); @@ -5650,10 +5650,6 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres return result; } - -void performAdjGFlowNB(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ - -} void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp index 90b9d67121..d7f31f50d9 100644 --- a/tests/su3_adj_test.cpp +++ b/tests/su3_adj_test.cpp @@ -271,7 +271,7 @@ int main(int argc, char **argv) } // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param); - performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param, 50); + performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param); break; diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp index dc0f02306b..d97de03ae7 100644 --- a/tests/su3_ferm_test.cpp +++ b/tests/su3_ferm_test.cpp @@ -258,7 +258,7 @@ int main(int argc, char **argv) obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param,3); + performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); From 5d455affbfb3739ccb4eb9dcc8daab62fbe0317e Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 17:04:20 -0800 Subject: [PATCH 31/53] merged two fermion tests --- lib/interface_quda.cpp | 3 - tests/CMakeLists.txt | 13 +- tests/su3_adj_test.cpp | 306 ---------------------------------------- tests/su3_ferm_test.cpp | 282 ------------------------------------ 4 files changed, 4 insertions(+), 600 deletions(-) delete mode 100644 tests/su3_adj_test.cpp delete mode 100644 tests/su3_ferm_test.cpp diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 8a131742a0..df561a1db8 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5415,9 +5415,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu //necessary? if (gParamDummy.order <= 4) gParamDummy.ghostExchange = QUDA_GHOST_EXCHANGE_NO; - - auto smear_type = QUDA_GAUGE_SMEAR_WILSON_FLOW; - // helper gauge field for Laplace operator GaugeField precise; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b72bc37426..de9d7166a2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -204,15 +204,10 @@ target_link_libraries(su3_test ${TEST_LIBS}) quda_checkbuildtest(su3_test QUDA_BUILD_ALL_TESTS) install(TARGETS su3_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) -add_executable(su3_ferm_test su3_ferm_test.cpp) -target_link_libraries(su3_ferm_test ${TEST_LIBS}) -quda_checkbuildtest(su3_ferm_test QUDA_BUILD_ALL_TESTS) -install(TARGETS su3_ferm_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) - -add_executable(su3_adj_test su3_adj_test.cpp) -target_link_libraries(su3_adj_test ${TEST_LIBS}) -quda_checkbuildtest(su3_adj_test QUDA_BUILD_ALL_TESTS) -install(TARGETS su3_adj_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_executable(su3_fermion_test su3_fermion_test.cpp) +target_link_libraries(su3_fermion_test ${TEST_LIBS}) +quda_checkbuildtest(su3_fermion_test QUDA_BUILD_ALL_TESTS) +install(TARGETS su3_fermion_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) add_executable(pack_test pack_test.cpp) target_link_libraries(pack_test ${TEST_LIBS}) diff --git a/tests/su3_adj_test.cpp b/tests/su3_adj_test.cpp deleted file mode 100644 index d7f31f50d9..0000000000 --- a/tests/su3_adj_test.cpp +++ /dev/null @@ -1,306 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -// In a typical application, quda.h is the only QUDA header required. -#include - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -// Smearing variables -double gauge_smear_rho = 0.1; -double gauge_smear_epsilon = 0.1; -double gauge_smear_alpha = 0.6; -double gauge_smear_alpha1 = 0.75; -double gauge_smear_alpha2 = 0.6; -double gauge_smear_alpha3 = 0.3; -int gauge_smear_steps = 50; -int gauge_n_save = 3; -int hier_threshold = 6; -QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; -int gauge_smear_dir_ignore = -1; -int measurement_interval = 5; -bool su_project = true; - -void display_test_info() -{ - printfQuda("running the following test:\n"); - - printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); - printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), - get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, - tdim); - - // Specific test - printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); - switch (gauge_smear_type) { - case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; - case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; - case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: - printfQuda(" - rho %f\n", gauge_smear_rho); - printfQuda(" - epsilon %f\n", gauge_smear_epsilon); - break; - case QUDA_GAUGE_SMEAR_HYP: - printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); - printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); - printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); - break; - case QUDA_GAUGE_SMEAR_WILSON_FLOW: - case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; - default: errorQuda("Undefined test type %d given", test_type); - } - printfQuda(" - smearing steps %d\n", gauge_smear_steps); - printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); - printfQuda(" - Measurement interval %d\n", measurement_interval); - - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); - return; -} - -void add_su3_option_group(std::shared_ptr quda_app) -{ - CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, - {"stout", QUDA_GAUGE_SMEAR_STOUT}, - {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, - {"hyp", QUDA_GAUGE_SMEAR_HYP}, - {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, - {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; - - // Option group for SU(3) related options - auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); - - opgroup - ->add_option( - "--su3-smear-type", - gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") - ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); - ; - opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); - - opgroup->add_option("--su3-smear-rho", gauge_smear_rho, - "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); - - opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, - "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); - - opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); - opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); - opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); - - opgroup->add_option( - "--su3-smear-dir-ignore", gauge_smear_dir_ignore, - "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); - - opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); - - opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, "The number of gauge steps to save for hierarchical adj grad flow"); - - opgroup->add_option("--su3-hier-threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); - - opgroup->add_option("--su3-measurement-interval", measurement_interval, - "Measure the field energy and/or topological charge every Nth step (default 5) "); - - opgroup->add_option("--su3-project", su_project, - "Project smeared gauge onto su3 manifold at measurement interval (default true)"); -} - -int main(int argc, char **argv) -{ - - auto app = make_app(); - add_su3_option_group(app); - - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) - initComms(argc, argv, gridsize_from_cmdline); - - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - - setWilsonGaugeParam(gauge_param); - gauge_param.t_boundary = QUDA_PERIODIC_T; - setDims(gauge_param.X); - - // All user inputs are now defined - display_test_info(); - - void *gauge[4], *new_gauge[4]; - - for (int dir = 0; dir < 4; dir++) { - gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); - new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); - } - - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - - constructHostGaugeField(gauge, gauge_param, argc, argv); - // Load the gauge field to the device - loadGaugeQuda((void *)gauge, &gauge_param); - saveGaugeQuda(new_gauge, &gauge_param); - - // Prepare various perf info - long long flops_plaquette = 6ll * 597 * V; - long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; - - // Prepare a gauge observable struct - QudaGaugeObservableParam param = newQudaGaugeObservableParam(); - - // start the timer - quda::host_timer_t host_timer; - - // The user may specify which measurements they wish to perform/omit - // using the QudaGaugeObservableParam struct, and whether or not to - // perform suN projection at each measurement step. We recommend that - // users perform suN projection. - // A unique observable param struct is constructed for each measurement. - - // Gauge Smearing Routines - //--------------------------------------------------------------------------- - // Stout smearing should be equivalent to APE smearing - // on D dimensional lattices for rho = alpha/2*(D-1). - // Typical values for - // APE: alpha=0.6 - // Stout: rho=0.1 - // Over Improved Stout: rho=0.08, epsilon=-0.25 - // - // Typically, the user will use smearing for Q charge data only, so - // we hardcode to compute Q only and not the plaquette. Users may - // of course set these as they wish. SU(N) projection su_project=true is recommended. - QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; - for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { - obs_param[i] = newQudaGaugeObservableParam(); - obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; - obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; - obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - } - - // We here set all the problem parameters for all possible smearing types. - QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); - smear_param.smear_type = gauge_smear_type; - smear_param.n_steps = gauge_smear_steps; - smear_param.adj_n_save = gauge_n_save; - smear_param.hier_threshold = hier_threshold; - smear_param.meas_interval = measurement_interval; - smear_param.alpha = gauge_smear_alpha; - smear_param.rho = gauge_smear_rho; - smear_param.epsilon = gauge_smear_epsilon; - smear_param.alpha1 = gauge_smear_alpha1; - smear_param.alpha2 = gauge_smear_alpha2; - smear_param.alpha3 = gauge_smear_alpha3; - smear_param.dir_ignore = gauge_smear_dir_ignore; - - quda::ColorSpinorField check,check_out,check_out1; - QudaInvertParam invParam = newQudaInvertParam(); - invParam.cpu_prec = QUDA_DOUBLE_PRECISION; - invParam.cuda_prec = QUDA_DOUBLE_PRECISION; - invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; - invParam.dirac_order = QUDA_DIRAC_ORDER; - - constexpr int nSpin = 4; - constexpr int nColor = 3; - quda::ColorSpinorParam cs_param, cs_param_out; - cs_param.nColor = nColor; - cs_param.nSpin = nSpin; - cs_param.x = {xdim, ydim, zdim, tdim}; - cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; - cs_param.setPrecision(invParam.cpu_prec); - cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; - cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; - cs_param.gammaBasis = invParam.gamma_basis; - cs_param.pc_type = QUDA_4D_PC; - cs_param.location = QUDA_CPU_FIELD_LOCATION; - cs_param.create = QUDA_NULL_FIELD_CREATE; - - cs_param_out = cs_param; - - constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); - check = quda::ColorSpinorField(cs_param); - //Add noise to spinor - quda::RNG rng(check, 1234); - spinorNoise(check, rng, QUDA_NOISE_GAUSS); - - // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); - check_out = quda::ColorSpinorField(cs_param); - check_out1 = quda::ColorSpinorField(cs_param); - // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); - check.PrintVector(0,0,0); - check_out.PrintVector(0,0,0); - check_out1.PrintVector(0,0,0); - // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); - printf("Stage -1 passed\n"); - host_timer.start(); // start the timer - switch (smear_param.smear_type) { - case QUDA_GAUGE_SMEAR_APE: - case QUDA_GAUGE_SMEAR_STOUT: - case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: - case QUDA_GAUGE_SMEAR_HYP: { - performGaugeSmearQuda(&smear_param, obs_param); - break; - } - - // Here we use a typical use case which is different from simple smearing in that - // the user will want to compute the plaquette values to compute the gauge energy. - case QUDA_GAUGE_SMEAR_WILSON_FLOW: - case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { - for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { - obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; - } - // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param); - performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param); - - - break; - } - default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); - } - - host_timer.stop(); // stop the timer - - printf("Original spinor\n:"); - check.PrintVector(0,0,0); - printf("Hierarchical method\n:"); - check_out1.PrintVector(0,0,0); - - printf("Safe method\n:"); - check_out.PrintVector(0,0,0); - - printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); - - if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); - - for (int dir = 0; dir < 4; dir++) { - host_free(gauge[dir]); - host_free(new_gauge[dir]); - } - - freeGaugeQuda(); - endQuda(); - - finalizeComms(); - return 0; -} diff --git a/tests/su3_ferm_test.cpp b/tests/su3_ferm_test.cpp deleted file mode 100644 index d97de03ae7..0000000000 --- a/tests/su3_ferm_test.cpp +++ /dev/null @@ -1,282 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -// In a typical application, quda.h is the only QUDA header required. -#include - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -// Smearing variables -double gauge_smear_rho = 0.1; -double gauge_smear_epsilon = 0.1; -double gauge_smear_alpha = 0.6; -double gauge_smear_alpha1 = 0.75; -double gauge_smear_alpha2 = 0.6; -double gauge_smear_alpha3 = 0.3; -int gauge_smear_steps = 5; -int gauge_n_save = 6; -QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; -int gauge_smear_dir_ignore = -1; -int measurement_interval = 5; -bool su_project = true; - -void display_test_info() -{ - printfQuda("running the following test:\n"); - - printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); - printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), - get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, - tdim); - - // Specific test - printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); - switch (gauge_smear_type) { - case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; - case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; - case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: - printfQuda(" - rho %f\n", gauge_smear_rho); - printfQuda(" - epsilon %f\n", gauge_smear_epsilon); - break; - case QUDA_GAUGE_SMEAR_HYP: - printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); - printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); - printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); - break; - case QUDA_GAUGE_SMEAR_WILSON_FLOW: - case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; - default: errorQuda("Undefined test type %d given", test_type); - } - printfQuda(" - smearing steps %d\n", gauge_smear_steps); - printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); - printfQuda(" - Measurement interval %d\n", measurement_interval); - - printfQuda("Grid partition info: X Y Z T\n"); - printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), - dimPartitioned(3)); - return; -} - -void add_su3_option_group(std::shared_ptr quda_app) -{ - CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, - {"stout", QUDA_GAUGE_SMEAR_STOUT}, - {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, - {"hyp", QUDA_GAUGE_SMEAR_HYP}, - {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, - {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; - - // Option group for SU(3) related options - auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); - - opgroup - ->add_option( - "--su3-smear-type", - gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") - ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); - ; - opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); - - opgroup->add_option("--su3-smear-rho", gauge_smear_rho, - "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); - - opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, - "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); - - opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); - opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); - opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); - - opgroup->add_option( - "--su3-smear-dir-ignore", gauge_smear_dir_ignore, - "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); - - opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); - - opgroup->add_option("--su3-measurement-interval", measurement_interval, - "Measure the field energy and/or topological charge every Nth step (default 5) "); - - opgroup->add_option("--su3-project", su_project, - "Project smeared gauge onto su3 manifold at measurement interval (default true)"); -} - -int main(int argc, char **argv) -{ - - auto app = make_app(); - add_su3_option_group(app); - - try { - app->parse(argc, argv); - } catch (const CLI::ParseError &e) { - return app->exit(e); - } - - // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) - initComms(argc, argv, gridsize_from_cmdline); - - QudaGaugeParam gauge_param = newQudaGaugeParam(); - if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; - if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; - - setWilsonGaugeParam(gauge_param); - gauge_param.t_boundary = QUDA_PERIODIC_T; - setDims(gauge_param.X); - - // All user inputs are now defined - display_test_info(); - - void *gauge[4], *new_gauge[4]; - - for (int dir = 0; dir < 4; dir++) { - gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); - new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); - } - - initQuda(device_ordinal); - - setVerbosity(verbosity); - - // call srand() with a rank-dependent seed - initRand(); - - constructHostGaugeField(gauge, gauge_param, argc, argv); - // Load the gauge field to the device - loadGaugeQuda((void *)gauge, &gauge_param); - saveGaugeQuda(new_gauge, &gauge_param); - - // Prepare various perf info - long long flops_plaquette = 6ll * 597 * V; - long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; - - // Prepare a gauge observable struct - QudaGaugeObservableParam param = newQudaGaugeObservableParam(); - - // start the timer - quda::host_timer_t host_timer; - - // The user may specify which measurements they wish to perform/omit - // using the QudaGaugeObservableParam struct, and whether or not to - // perform suN projection at each measurement step. We recommend that - // users perform suN projection. - // A unique observable param struct is constructed for each measurement. - - // Gauge Smearing Routines - //--------------------------------------------------------------------------- - // Stout smearing should be equivalent to APE smearing - // on D dimensional lattices for rho = alpha/2*(D-1). - // Typical values for - // APE: alpha=0.6 - // Stout: rho=0.1 - // Over Improved Stout: rho=0.08, epsilon=-0.25 - // - // Typically, the user will use smearing for Q charge data only, so - // we hardcode to compute Q only and not the plaquette. Users may - // of course set these as they wish. SU(N) projection su_project=true is recommended. - QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; - for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { - obs_param[i] = newQudaGaugeObservableParam(); - obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; - obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; - obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - } - - // We here set all the problem parameters for all possible smearing types. - QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); - smear_param.smear_type = gauge_smear_type; - smear_param.n_steps = gauge_smear_steps; - smear_param.adj_n_save = gauge_n_save; - smear_param.meas_interval = measurement_interval; - smear_param.alpha = gauge_smear_alpha; - smear_param.rho = gauge_smear_rho; - smear_param.epsilon = gauge_smear_epsilon; - smear_param.alpha1 = gauge_smear_alpha1; - smear_param.alpha2 = gauge_smear_alpha2; - smear_param.alpha3 = gauge_smear_alpha3; - smear_param.dir_ignore = gauge_smear_dir_ignore; - - quda::ColorSpinorField check,check_out; - QudaInvertParam invParam = newQudaInvertParam(); - invParam.cpu_prec = QUDA_DOUBLE_PRECISION; - invParam.cuda_prec = QUDA_DOUBLE_PRECISION; - invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; - invParam.dirac_order = QUDA_DIRAC_ORDER; - - constexpr int nSpin = 4; - constexpr int nColor = 3; - quda::ColorSpinorParam cs_param, cs_param_out; - cs_param.nColor = nColor; - cs_param.nSpin = nSpin; - cs_param.x = {xdim, ydim, zdim, tdim}; - cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; - cs_param.setPrecision(invParam.cpu_prec); - cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; - cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; - cs_param.gammaBasis = invParam.gamma_basis; - cs_param.pc_type = QUDA_4D_PC; - cs_param.location = QUDA_CPU_FIELD_LOCATION; - cs_param.create = QUDA_NULL_FIELD_CREATE; - - cs_param_out = cs_param; - - constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); - check = quda::ColorSpinorField(cs_param); - // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); - check_out = quda::ColorSpinorField(cs_param); - // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); - - - // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); - printf("Stage -1 passed\n"); - host_timer.start(); // start the timer - switch (smear_param.smear_type) { - case QUDA_GAUGE_SMEAR_APE: - case QUDA_GAUGE_SMEAR_STOUT: - case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: - case QUDA_GAUGE_SMEAR_HYP: { - performGaugeSmearQuda(&smear_param, obs_param); - break; - } - - // Here we use a typical use case which is different from simple smearing in that - // the user will want to compute the plaquette values to compute the gauge energy. - case QUDA_GAUGE_SMEAR_WILSON_FLOW: - case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { - for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { - obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; - } - // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check.data(),check_out.data(), &invParam, &smear_param); - break; - } - default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); - } - - host_timer.stop(); // stop the timer - printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); - - if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); - - for (int dir = 0; dir < 4; dir++) { - host_free(gauge[dir]); - host_free(new_gauge[dir]); - } - - freeGaugeQuda(); - endQuda(); - - finalizeComms(); - return 0; -} From 26615f4aafb447355f002234d530f7c8f6fa3d43 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 20 Nov 2024 17:05:12 -0800 Subject: [PATCH 32/53] add fermion test --- tests/su3_fermion_test.cpp | 306 +++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 tests/su3_fermion_test.cpp diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp new file mode 100644 index 0000000000..3e5bb90488 --- /dev/null +++ b/tests/su3_fermion_test.cpp @@ -0,0 +1,306 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +// In a typical application, quda.h is the only QUDA header required. +#include + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// Smearing variables +double gauge_smear_rho = 0.1; +double gauge_smear_epsilon = 0.1; +double gauge_smear_alpha = 0.6; +double gauge_smear_alpha1 = 0.75; +double gauge_smear_alpha2 = 0.6; +double gauge_smear_alpha3 = 0.3; +int gauge_smear_steps = 50; +int gauge_n_save = 3; +int hier_threshold = 6; +QudaGaugeSmearType gauge_smear_type = QUDA_GAUGE_SMEAR_STOUT; +int gauge_smear_dir_ignore = -1; +int measurement_interval = 5; +bool su_project = true; + +void display_test_info() +{ + printfQuda("running the following test:\n"); + + printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension\n"); + printfQuda("%s %s %s %s %d/%d/%d %d\n", get_prec_str(prec), + get_prec_str(prec_sloppy), get_recon_str(link_recon), get_recon_str(link_recon_sloppy), xdim, ydim, zdim, + tdim); + + // Specific test + printfQuda("\n%s smearing\n", get_gauge_smear_str(gauge_smear_type)); + switch (gauge_smear_type) { + case QUDA_GAUGE_SMEAR_APE: printfQuda(" - alpha %f\n", gauge_smear_alpha); break; + case QUDA_GAUGE_SMEAR_STOUT: printfQuda(" - rho %f\n", gauge_smear_rho); break; + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + printfQuda(" - rho %f\n", gauge_smear_rho); + printfQuda(" - epsilon %f\n", gauge_smear_epsilon); + break; + case QUDA_GAUGE_SMEAR_HYP: + printfQuda(" - alpha1 %f\n", gauge_smear_alpha1); + printfQuda(" - alpha2 %f\n", gauge_smear_alpha2); + printfQuda(" - alpha3 %f\n", gauge_smear_alpha3); + break; + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: printfQuda(" - epsilon %f\n", gauge_smear_epsilon); break; + default: errorQuda("Undefined test type %d given", test_type); + } + printfQuda(" - smearing steps %d\n", gauge_smear_steps); + printfQuda(" - smearing ignore direction %d\n", gauge_smear_dir_ignore); + printfQuda(" - Measurement interval %d\n", measurement_interval); + + printfQuda("Grid partition info: X Y Z T\n"); + printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2), + dimPartitioned(3)); + return; +} + +void add_su3_option_group(std::shared_ptr quda_app) +{ + CLI::TransformPairs gauge_smear_type_map {{"ape", QUDA_GAUGE_SMEAR_APE}, + {"stout", QUDA_GAUGE_SMEAR_STOUT}, + {"ovrimp-stout", QUDA_GAUGE_SMEAR_OVRIMP_STOUT}, + {"hyp", QUDA_GAUGE_SMEAR_HYP}, + {"wilson", QUDA_GAUGE_SMEAR_WILSON_FLOW}, + {"symanzik", QUDA_GAUGE_SMEAR_SYMANZIK_FLOW}}; + + // Option group for SU(3) related options + auto opgroup = quda_app->add_option_group("SU(3)", "Options controlling SU(3) tests"); + + opgroup + ->add_option( + "--su3-smear-type", + gauge_smear_type, "The type of action to use in the smearing. Options: APE, Stout, Over Improved Stout, HYP, Wilson Flow, Symanzik Flow (default stout)") + ->transform(CLI::QUDACheckedTransformer(gauge_smear_type_map)); + ; + opgroup->add_option("--su3-smear-alpha", gauge_smear_alpha, "alpha coefficient for APE smearing (default 0.6)"); + + opgroup->add_option("--su3-smear-rho", gauge_smear_rho, + "rho coefficient for Stout and Over-Improved Stout smearing (default 0.1)"); + + opgroup->add_option("--su3-smear-epsilon", gauge_smear_epsilon, + "epsilon coefficient for Over-Improved Stout smearing or Wilson flow (default 0.1)"); + + opgroup->add_option("--su3-smear-alpha1", gauge_smear_alpha1, "alpha1 coefficient for HYP smearing (default 0.75)"); + opgroup->add_option("--su3-smear-alpha2", gauge_smear_alpha2, "alpha2 coefficient for HYP smearing (default 0.6)"); + opgroup->add_option("--su3-smear-alpha3", gauge_smear_alpha3, "alpha3 coefficient for HYP smearing (default 0.3)"); + + opgroup->add_option( + "--su3-smear-dir-ignore", gauge_smear_dir_ignore, + "Direction to be ignored by the smearing, negative value means decided by --su3-smear-type (default -1)"); + + opgroup->add_option("--su3-smear-steps", gauge_smear_steps, "The number of smearing steps to perform (default 50)"); + + opgroup->add_option("--su3-adj-gauge-nsave", gauge_n_save, "The number of gauge steps to save for hierarchical adj grad flow"); + + opgroup->add_option("--su3-hier-threshold", hier_threshold, "Minimum threshold for hierarchical adj grad flow"); + + opgroup->add_option("--su3-measurement-interval", measurement_interval, + "Measure the field energy and/or topological charge every Nth step (default 5) "); + + opgroup->add_option("--su3-project", su_project, + "Project smeared gauge onto su3 manifold at measurement interval (default true)"); +} + +int main(int argc, char **argv) +{ + + auto app = make_app(); + add_su3_option_group(app); + + try { + app->parse(argc, argv); + } catch (const CLI::ParseError &e) { + return app->exit(e); + } + + // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp) + initComms(argc, argv, gridsize_from_cmdline); + + QudaGaugeParam gauge_param = newQudaGaugeParam(); + if (prec_sloppy == QUDA_INVALID_PRECISION) prec_sloppy = prec; + if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID) link_recon_sloppy = link_recon; + + setWilsonGaugeParam(gauge_param); + gauge_param.t_boundary = QUDA_PERIODIC_T; + setDims(gauge_param.X); + + // All user inputs are now defined + display_test_info(); + + void *gauge[4], *new_gauge[4]; + + for (int dir = 0; dir < 4; dir++) { + gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + new_gauge[dir] = safe_malloc(V * gauge_site_size * host_gauge_data_type_size); + } + + initQuda(device_ordinal); + + setVerbosity(verbosity); + + // call srand() with a rank-dependent seed + initRand(); + + constructHostGaugeField(gauge, gauge_param, argc, argv); + // Load the gauge field to the device + loadGaugeQuda((void *)gauge, &gauge_param); + saveGaugeQuda(new_gauge, &gauge_param); + + // Prepare various perf info + long long flops_plaquette = 6ll * 597 * V; + long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // Prepare a gauge observable struct + QudaGaugeObservableParam param = newQudaGaugeObservableParam(); + + // start the timer + quda::host_timer_t host_timer; + + // The user may specify which measurements they wish to perform/omit + // using the QudaGaugeObservableParam struct, and whether or not to + // perform suN projection at each measurement step. We recommend that + // users perform suN projection. + // A unique observable param struct is constructed for each measurement. + + // Gauge Smearing Routines + //--------------------------------------------------------------------------- + // Stout smearing should be equivalent to APE smearing + // on D dimensional lattices for rho = alpha/2*(D-1). + // Typical values for + // APE: alpha=0.6 + // Stout: rho=0.1 + // Over Improved Stout: rho=0.08, epsilon=-0.25 + // + // Typically, the user will use smearing for Q charge data only, so + // we hardcode to compute Q only and not the plaquette. Users may + // of course set these as they wish. SU(N) projection su_project=true is recommended. + QudaGaugeObservableParam *obs_param = new QudaGaugeObservableParam[gauge_smear_steps / measurement_interval + 1]; + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i] = newQudaGaugeObservableParam(); + obs_param[i].compute_plaquette = QUDA_BOOLEAN_FALSE; + obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; + obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + } + + // We here set all the problem parameters for all possible smearing types. + QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); + smear_param.smear_type = gauge_smear_type; + smear_param.n_steps = gauge_smear_steps; + smear_param.adj_n_save = gauge_n_save; + smear_param.hier_threshold = hier_threshold; + smear_param.meas_interval = measurement_interval; + smear_param.alpha = gauge_smear_alpha; + smear_param.rho = gauge_smear_rho; + smear_param.epsilon = gauge_smear_epsilon; + smear_param.alpha1 = gauge_smear_alpha1; + smear_param.alpha2 = gauge_smear_alpha2; + smear_param.alpha3 = gauge_smear_alpha3; + smear_param.dir_ignore = gauge_smear_dir_ignore; + + quda::ColorSpinorField check,check_out,check_out1; + QudaInvertParam invParam = newQudaInvertParam(); + invParam.cpu_prec = QUDA_DOUBLE_PRECISION; + invParam.cuda_prec = QUDA_DOUBLE_PRECISION; + invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; + invParam.dirac_order = QUDA_DIRAC_ORDER; + + constexpr int nSpin = 4; + constexpr int nColor = 3; + quda::ColorSpinorParam cs_param, cs_param_out; + cs_param.nColor = nColor; + cs_param.nSpin = nSpin; + cs_param.x = {xdim, ydim, zdim, tdim}; + cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; + cs_param.setPrecision(invParam.cpu_prec); + cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; + cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; + cs_param.gammaBasis = invParam.gamma_basis; + cs_param.pc_type = QUDA_4D_PC; + cs_param.location = QUDA_CPU_FIELD_LOCATION; + cs_param.create = QUDA_NULL_FIELD_CREATE; + + cs_param_out = cs_param; + + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); + check = quda::ColorSpinorField(cs_param); + //Add noise to spinor + quda::RNG rng(check, 1234); + spinorNoise(check, rng, QUDA_NOISE_GAUSS); + + // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); + check_out = quda::ColorSpinorField(cs_param); + check_out1 = quda::ColorSpinorField(cs_param); + // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + check.PrintVector(0,0,0); + check_out.PrintVector(0,0,0); + check_out1.PrintVector(0,0,0); + // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); + printf("Stage -1 passed\n"); + host_timer.start(); // start the timer + switch (smear_param.smear_type) { + case QUDA_GAUGE_SMEAR_APE: + case QUDA_GAUGE_SMEAR_STOUT: + case QUDA_GAUGE_SMEAR_OVRIMP_STOUT: + case QUDA_GAUGE_SMEAR_HYP: { + performGaugeSmearQuda(&smear_param, obs_param); + break; + } + + // Here we use a typical use case which is different from simple smearing in that + // the user will want to compute the plaquette values to compute the gauge energy. + case QUDA_GAUGE_SMEAR_WILSON_FLOW: + case QUDA_GAUGE_SMEAR_SYMANZIK_FLOW: { + for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { + obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; + } + // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); + performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param); + performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param); + + + break; + } + default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); + } + + host_timer.stop(); // stop the timer + + printf("Original spinor\n:"); + check.PrintVector(0,0,0); + printf("Hierarchical method\n:"); + check_out1.PrintVector(0,1,0); + + printf("Safe method\n:"); + check_out.PrintVector(0,1,0); + + printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); + + if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); + + for (int dir = 0; dir < 4; dir++) { + host_free(gauge[dir]); + host_free(new_gauge[dir]); + } + + freeGaugeQuda(); + endQuda(); + + finalizeComms(); + return 0; +} From 7483d2766fd33b2fb7649eb89748588452e61395 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 01:11:17 -0800 Subject: [PATCH 33/53] added first pass correctness check and cleaning things up --- include/quda.h | 6 ++-- lib/interface_quda.cpp | 53 ++++++++++++++++++++++++------------ tests/su3_fermion_test.cpp | 56 ++++++++++++++++++++++++++++++-------- tests/utils/host_utils.cpp | 2 ++ 4 files changed, 86 insertions(+), 31 deletions(-) diff --git a/include/quda.h b/include/quda.h index 705e0d9e26..2dcd27b70f 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1705,7 +1705,8 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, + QudaGaugeObservableParam *obs_param); /** * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared @@ -1715,7 +1716,8 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); + void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, + QudaGaugeObservableParam *obs_param); /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index df561a1db8..1cc4ddb356 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -200,8 +200,6 @@ static TimeProfile profileGFlow("gFlowQuda"); //!< Profiler for gFlowQuda static TimeProfile profileAdjGFlowSafe("AdjgFlowSafeQuda"); -static TimeProfile profileAdjGFlowNB("AdjgFlowNBQuda"); - static TimeProfile profileAdjGFlowHier("AdjgFlowHierQuda"); //!< Profiler for projectSU3Quda @@ -5379,12 +5377,21 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG // perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix D) // the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) -void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, + QudaGaugeObservableParam *obs_param) { auto profile = pushProfile(profileAdjGFlowSafe); pushOutputPrefix("performAdjGFlowQudaSafe: "); checkGaugeSmearParam(smear_param); + + if (smear_param->n_steps < smear_param->adj_n_save ) { + + logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered to equal n_steps: \n",smear_param->n_steps,smear_param->adj_n_save); + smear_param->adj_n_save = smear_param->n_steps; + logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); + + } // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); @@ -5463,7 +5470,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu if (i == 0) g_W0 = gin; else std::swap(g_W0,g_VT); - GFlowStep(g_W1, gaugeTemp, g_W0, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W1); GFlowStep(g_W2, gaugeTemp, g_W1, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_W2); GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); @@ -5480,9 +5486,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowSafe); blas::ax(smear_param->epsilon * 3. / 4., f_temp4); - - - + f_temp2 = f_temp4; @@ -5518,7 +5522,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu popOutputPrefix(); } - + void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) { const GaugeField gin = gf_list[0].get(); @@ -5564,31 +5568,38 @@ void adjSafeEvolve(std::vector> sf_list f_temp1 = f_temp3; f_temp2 = f_temp3; - + // [4] = Lap2 [0] copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profile); + // [4] -> 3/4 eps [4] blas::ax(smear_param->epsilon * 3. / 4., f_temp4); + // [2] = [4] f_temp2 = f_temp4; - + + // [4] = Lap1 [2] copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profile); - + // [3] -> [3] + 8/9 eps [4] blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); + // [1], [4] <- [3] f_temp1 = f_temp3; f_temp4 = f_temp1; - + + // [4] <- [4] - 8/9 [2] blas::axpy(-8. / 9.,f_temp2, f_temp4); - + + // [0] <- Lap0 [4] copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profile); + // [0] <- 1/4 eps [0]; [0] <- [2] + [0]; [0] <- [1] + [0] blas::ax(smear_param->epsilon * 1. / 4., f_temp0); blas::axpy(1.,f_temp2, f_temp0); blas::axpy(1.,f_temp1, f_temp0); @@ -5648,13 +5659,21 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres } -void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ +void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, + QudaGaugeObservableParam *obs_param){ auto profile = pushProfile(profileAdjGFlowHier); pushOutputPrefix("performAdjGFlowQudaHier: "); checkGaugeSmearParam(smear_param); - + if (smear_param->n_steps < smear_param->adj_n_save ) { + + logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered to equal n_steps: \n",smear_param->n_steps,smear_param->adj_n_save); + smear_param->adj_n_save = smear_param->n_steps; + logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); + + } + // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); @@ -5724,10 +5743,10 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %d\n",hier_list.size()); if (threshold < hier_list.back()) {threshold = hier_list.back(); logQuda(QUDA_SUMMARIZE, "threshold changed to %d",threshold);} - else logQuda(QUDA_SUMMARIZE, "threshold is %d",threshold); + else logQuda(QUDA_SUMMARIZE, "threshold is %d\n",threshold); if (hier_list.empty()) errorQuda("hier_list is not populated\n"); - if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages \n"); + if (hier_list.size() != gauge_stages.size()) errorQuda("hier_list is not same size as gauge_stages\n"); for (unsigned int i = 0; i < hier_list.size() - 1; i++) { diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 3e5bb90488..249eb5cd44 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -213,6 +213,7 @@ int main(int argc, char **argv) smear_param.alpha3 = gauge_smear_alpha3; smear_param.dir_ignore = gauge_smear_dir_ignore; + quda::ColorSpinorField check,check_out,check_out1; QudaInvertParam invParam = newQudaInvertParam(); invParam.cpu_prec = QUDA_DOUBLE_PRECISION; @@ -242,6 +243,19 @@ int main(int argc, char **argv) //Add noise to spinor quda::RNG rng(check, 1234); spinorNoise(check, rng, QUDA_NOISE_GAUSS); + + quda::ColorSpinorField check_norm(cs_param); + quda::ColorSpinorField check_norm_out(cs_param); + + #pragma omp parallel for + for (int i = 0; i < V * 24; i++) { + + if (i % 2 == 0) + check_norm.data()[i] = check.data()[i]; + else + check_norm.data()[i] = 1.*check.data()[i]; + } + // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); check_out = quda::ColorSpinorField(cs_param); @@ -249,7 +263,8 @@ int main(int argc, char **argv) // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); check.PrintVector(0,0,0); check_out.PrintVector(0,0,0); - check_out1.PrintVector(0,0,0); + check_norm.PrintVector(0,0,0); + check_out1.PrintVector(0,0,0); // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); printf("Stage -1 passed\n"); host_timer.start(); // start the timer @@ -269,25 +284,42 @@ int main(int argc, char **argv) for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } - // performGFlowQuda(check.data(),check_out.data(), &invParam, &smear_param, obs_param); - performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param); - performAdjGFlowSafe(check_out.data(),check.data(), &invParam, &smear_param); - + // performGFlowQuda(check_norm_out.data(),check_norm.data(), &invParam, &smear_param, obs_param); + + performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param, obs_param); + performAdjGFlowSafe(check_out.data(),check.data() , &invParam, &smear_param, obs_param); + performGFlowQuda(check_norm_out.data(),check_norm.data(), &invParam, &smear_param, obs_param); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); } host_timer.stop(); // stop the timer + //Change this to a tolerance check + printf("First, inspecting the very first element of the 3 evolved fermions:\n"); + printf("Hierarchical method:\n"); + check_out1.PrintVector(0,0,0); + printf("Safe method:\n"); + check_out.PrintVector(0,0,0); + printf("Norm out method:\n"); + check_norm_out.PrintVector(0,0,0); + + double method_adj_diff = 0., adj_fwd_diff = 0.; + + for (int i = 0; i < V * 24; i++) { + + method_adj_diff += pow(check_out.data()[i] - check_out1.data()[i], 2); + adj_fwd_diff += pow(check_out.data()[i] - check_norm_out.data()[i], 2)/(V*V*24.*24.); + + } + + double method_adj_check = sqrt(method_adj_diff), adj_fwd_check = sqrt(adj_fwd_diff); + double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); - printf("Original spinor\n:"); - check.PrintVector(0,0,0); - printf("Hierarchical method\n:"); - check_out1.PrintVector(0,1,0); - - printf("Safe method\n:"); - check_out.PrintVector(0,1,0); + printf("sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); + + printf("mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index 207d072d44..28ebec9f36 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -1240,6 +1240,8 @@ void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precisi checkGauge((float **)oldG, (float **)newG, epsilon); } + + void createSiteLinkCPU(void *const *link, QudaPrecision precision, int phase) { if (precision == QUDA_DOUBLE_PRECISION) { From b9287f33528604fed2b2c600ebeafa1fa5af11a7 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 02:03:40 -0800 Subject: [PATCH 34/53] cleaned up tests --- lib/interface_quda.cpp | 9 +++-- tests/su3_fermion_test.cpp | 72 ++++++++++++++++++++------------------ 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 1cc4ddb356..3e824e8446 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5461,8 +5461,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu // set [3] = input spinor f_temp3 = fin; - - printf("Stage 1 passed \n"); + for (unsigned int j = 0; j < smear_param->n_steps ; j++) { for (unsigned int i = 0; i < smear_param->n_steps - j; i++) { @@ -5773,7 +5772,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu int hier_loop_counter = 0; while (ret_idx != -1){ - logQuda(QUDA_VERBOSE,"Hier loop count %d has begun \n",hier_loop_counter); + logQuda(QUDA_DEBUG_VERBOSE,"Hier loop count %d has begun \n",hier_loop_counter); logQuda(QUDA_DEBUG_VERBOSE,"Starting a hierarchical loop log: \n"); adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); @@ -5796,9 +5795,9 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); - logQuda(QUDA_VERBOSE," block number %d successfully deployed \n",i); + logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); } - logQuda(QUDA_DEBUG_VERBOSE," hierarchial evolution completed \n"); + logQuda(QUDA_DEBUG_VERBOSE,"Hierarchial evolution completed \n"); break; } diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 249eb5cd44..50cc811db6 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -214,7 +214,7 @@ int main(int argc, char **argv) smear_param.dir_ignore = gauge_smear_dir_ignore; - quda::ColorSpinorField check,check_out,check_out1; + quda::ColorSpinorField check,check_safe,check_hier,check_fwd; QudaInvertParam invParam = newQudaInvertParam(); invParam.cpu_prec = QUDA_DOUBLE_PRECISION; invParam.cuda_prec = QUDA_DOUBLE_PRECISION; @@ -243,30 +243,32 @@ int main(int argc, char **argv) //Add noise to spinor quda::RNG rng(check, 1234); spinorNoise(check, rng, QUDA_NOISE_GAUSS); - - quda::ColorSpinorField check_norm(cs_param); - quda::ColorSpinorField check_norm_out(cs_param); - - #pragma omp parallel for - for (int i = 0; i < V * 24; i++) { + +// Example of how to construct a spinor that is the complex conjugate of check. +// quda::ColorSpinorField check_norm(cs_param); +// #pragma omp parallel for +// for (int i = 0; i < V * 24; i++) { - if (i % 2 == 0) - check_norm.data()[i] = check.data()[i]; - else - check_norm.data()[i] = 1.*check.data()[i]; - } +// if (i % 2 == 0) +// check_norm.data()[i] = check.data()[i]; +// else +// check_norm.data()[i] = -1.*check.data()[i]; +// } - - // constructWilsonTestSpinorParam(&cs_param_out, &invParam, &gauge_param); - check_out = quda::ColorSpinorField(cs_param); - check_out1 = quda::ColorSpinorField(cs_param); - // constructWilsonTestSpinorParam(&cs_param, &inv_param, &gauge_param); + check_safe = quda::ColorSpinorField(cs_param); + check_hier = quda::ColorSpinorField(cs_param); + check_fwd = quda::ColorSpinorField(cs_param); + + printf("Inspecting the very first element of the random fermion we will use:\n"); check.PrintVector(0,0,0); - check_out.PrintVector(0,0,0); - check_norm.PrintVector(0,0,0); - check_out1.PrintVector(0,0,0); - // quda::ColorSpinorField rngDummy(cs_param), rngDummy1(cs_param_out); - printf("Stage -1 passed\n"); + printf("Inspecting the very first element of the 3 un-evolved fermions (should be zero):\n"); + printf("Hierarchical method:\n"); + check_hier.PrintVector(0,0,0); + printf("Safe method:\n"); + check_safe.PrintVector(0,0,0); + printf("Forward method:\n"); + check_fwd.PrintVector(0,0,0); + host_timer.start(); // start the timer switch (smear_param.smear_type) { case QUDA_GAUGE_SMEAR_APE: @@ -284,12 +286,12 @@ int main(int argc, char **argv) for (int i = 0; i < gauge_smear_steps / measurement_interval + 1; i++) { obs_param[i].compute_plaquette = QUDA_BOOLEAN_TRUE; } - - // performGFlowQuda(check_norm_out.data(),check_norm.data(), &invParam, &smear_param, obs_param); - - performAdjGFlowHier(check_out1.data(),check.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check_out.data(),check.data() , &invParam, &smear_param, obs_param); - performGFlowQuda(check_norm_out.data(),check_norm.data(), &invParam, &smear_param, obs_param); + + // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first + performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param, obs_param); + performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param, obs_param); + // Perform forward flow algorithm + performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); @@ -299,22 +301,22 @@ int main(int argc, char **argv) //Change this to a tolerance check printf("First, inspecting the very first element of the 3 evolved fermions:\n"); printf("Hierarchical method:\n"); - check_out1.PrintVector(0,0,0); + check_hier.PrintVector(0,0,0); printf("Safe method:\n"); - check_out.PrintVector(0,0,0); - printf("Norm out method:\n"); - check_norm_out.PrintVector(0,0,0); + check_safe.PrintVector(0,0,0); + printf("Forward method:\n"); + check_fwd.PrintVector(0,0,0); double method_adj_diff = 0., adj_fwd_diff = 0.; for (int i = 0; i < V * 24; i++) { - method_adj_diff += pow(check_out.data()[i] - check_out1.data()[i], 2); - adj_fwd_diff += pow(check_out.data()[i] - check_norm_out.data()[i], 2)/(V*V*24.*24.); + method_adj_diff += pow(check_safe.data()[i] - check_hier.data()[i], 2); + adj_fwd_diff += pow(check_safe.data()[i] - check_fwd.data()[i], 2); } - double method_adj_check = sqrt(method_adj_diff), adj_fwd_check = sqrt(adj_fwd_diff); + double method_adj_check = sqrt(method_adj_diff), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); printf("sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); From b981d9f3ebb2a430ba3408e9a238600da338a556 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 02:44:15 -0800 Subject: [PATCH 35/53] begun adding obs params for adj --- lib/interface_quda.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 3e824e8446..a6bf222c8c 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5522,7 +5522,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu popOutputPrefix(); } -void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile) +void adjSafeEvolve(std::vector> sf_list,std::vector> gf_list, QudaGaugeSmearParam *smear_param, unsigned int ns_safe, TimeProfile &profile, std::vector> meas_cinf) { const GaugeField gin = gf_list[0].get(); GaugeField &g_W0 = gf_list[0].get(); @@ -5536,7 +5536,10 @@ void adjSafeEvolve(std::vector> sf_list ColorSpinorField &f_temp1 = sf_list[1].get(); ColorSpinorField &f_temp2 = sf_list[2].get(); ColorSpinorField &f_temp3 = sf_list[3].get(); - ColorSpinorField &f_temp4 = sf_list[4].get(); + ColorSpinorField &f_temp4 = sf_list[4].get(); + + int &meas_i = meas_cinf[0].get(); + int &measurement_n = meas_cinf[1].get(); int parity = 0; @@ -5606,7 +5609,8 @@ void adjSafeEvolve(std::vector> sf_list // fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; - + + meas_i++; } } @@ -5769,13 +5773,17 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu sf_list = {f_temp0, f_temp1, f_temp2, f_temp3, f_temp4}; std::vector> gf_list; gf_list = {gauge_stages.back(), gaugeW1, gaugeW2, gaugeVT, gaugeTemp, precise}; + + //first one is global counter, second is meas counter + int i_glob = 0, measurement_n = 0 ; + std::vector> meas_cinf{i_glob, measurement_n}; int hier_loop_counter = 0; while (ret_idx != -1){ logQuda(QUDA_DEBUG_VERBOSE,"Hier loop count %d has begun \n",hier_loop_counter); logQuda(QUDA_DEBUG_VERBOSE,"Starting a hierarchical loop log: \n"); - adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier); + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier,meas_cinf); logQuda(QUDA_DEBUG_VERBOSE,"Previous hier list elements: \n"); for (int j = 0; j < hier_list.size(); j++ ){ @@ -5793,7 +5801,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu gf_list.at(0) = std::ref(gauge_stages[i]); - adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier); + adjSafeEvolve(sf_list,gf_list,smear_param,hier_list[i],profileAdjGFlowHier,meas_cinf); logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); } From ccbc7501a556975d5326ba2e13ef85228fcba8b5 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 14:10:20 -0800 Subject: [PATCH 36/53] verified global indexing in hier works --- lib/interface_quda.cpp | 13 ++++++++++--- tests/su3_fermion_test.cpp | 4 +++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index a6bf222c8c..dfe10195ee 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5447,7 +5447,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu double b = -8.; int comm_dim[4] = {}; - + int measurement_n = 0; // The nth measurement to take // only switch on comms needed for directions with a derivative for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } @@ -5512,6 +5512,13 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; + + if ((j + 1) % smear_param->meas_interval == 0) { + measurement_n++; // increment measurements. + gaugeObservables(g_VT, obs_param[measurement_n]); + logQuda(QUDA_SUMMARIZE, "%le %.16e %+.16e\n", (smear_param->t0 + smear_param->epsilon * (j + 1)), + obs_param[measurement_n].plaquette[0], blas::norm2(fout)); + } } cpuParam.v = h_out; @@ -5538,7 +5545,7 @@ void adjSafeEvolve(std::vector> sf_list ColorSpinorField &f_temp3 = sf_list[3].get(); ColorSpinorField &f_temp4 = sf_list[4].get(); - int &meas_i = meas_cinf[0].get(); + int &i_glob = meas_cinf[0].get(); int &measurement_n = meas_cinf[1].get(); int parity = 0; @@ -5610,7 +5617,7 @@ void adjSafeEvolve(std::vector> sf_list //redefining f_temp0 to restart loop f_temp3 = f_temp0; - meas_i++; + i_glob++; } } diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 50cc811db6..cd217ce3f9 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -197,6 +197,8 @@ int main(int argc, char **argv) obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; } + + QudaGaugeObservableParam *obs_adj_safe(obs_param), *obs_adj_hier(obs_param); // We here set all the problem parameters for all possible smearing types. QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); @@ -289,7 +291,7 @@ int main(int argc, char **argv) // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param, obs_param); + performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param, obs_adj_safe); // Perform forward flow algorithm performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); break; From a2a31457146a8f02896714b344d7c0a19822a073 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 15:10:32 -0800 Subject: [PATCH 37/53] removed gauge obs params from adjoint --- include/quda.h | 6 ++---- lib/interface_quda.cpp | 15 +++------------ tests/su3_fermion_test.cpp | 4 ++-- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/include/quda.h b/include/quda.h index 2dcd27b70f..705e0d9e26 100644 --- a/include/quda.h +++ b/include/quda.h @@ -1705,8 +1705,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, - QudaGaugeObservableParam *obs_param); + void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * Performs Adjoint Gradient Flow (gauge + fermion) the Hierarchical way on gaugePrecise and stores it in gaugeSmeared @@ -1716,8 +1715,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, - QudaGaugeObservableParam *obs_param); + void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param); /** * @brief Calculates a variety of gauge-field observables. If a diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index dfe10195ee..1e33a09aad 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5375,10 +5375,9 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG } /* end of performGFlowQuda */ -// perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix D) +// perform adjoint (backwards) gradient flow on gauge and spinor field following the algorithm in arXiv:1302.5246 (Appendix E) // the gauge flow steps are identical to Wilson Flow algorithm in arXiv:1006.4518 (Vt <-> W3) -void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, - QudaGaugeObservableParam *obs_param) +void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param) { auto profile = pushProfile(profileAdjGFlowSafe); @@ -5512,13 +5511,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu fout = f_temp0; //redefining f_temp0 to restart loop f_temp3 = f_temp0; - - if ((j + 1) % smear_param->meas_interval == 0) { - measurement_n++; // increment measurements. - gaugeObservables(g_VT, obs_param[measurement_n]); - logQuda(QUDA_SUMMARIZE, "%le %.16e %+.16e\n", (smear_param->t0 + smear_param->epsilon * (j + 1)), - obs_param[measurement_n].plaquette[0], blas::norm2(fout)); - } } cpuParam.v = h_out; @@ -5669,8 +5661,7 @@ int modify_hier_list(std::vector &hier_list, int n_b, int n_save, int thres } -void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, - QudaGaugeObservableParam *obs_param){ +void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param){ auto profile = pushProfile(profileAdjGFlowHier); pushOutputPrefix("performAdjGFlowQudaHier: "); diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index cd217ce3f9..5232d5a39f 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -290,8 +290,8 @@ int main(int argc, char **argv) } // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first - performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param, obs_param); - performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param, obs_adj_safe); + performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param); + performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param); // Perform forward flow algorithm performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); break; From 8a683e0cd84e25714e6fff0e2bf7c2f0ed48563c Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 15:27:33 -0800 Subject: [PATCH 38/53] just identical verification btw fwd and adj --- lib/interface_quda.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 1e33a09aad..9e1551a077 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5294,6 +5294,8 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step + printf("just fwd first element of gauge\n"); + gin.PrintMatrix(0,0,0,0); // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; @@ -5473,7 +5475,8 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); } - + printf("just adjoint first element of gauge\n"); + g_W0.PrintMatrix(0,0,0,0); // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; From 883d8f351b93a1521fcfc697f1b8749966bb7700 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 15:29:03 -0800 Subject: [PATCH 39/53] removed gauge verification --- lib/interface_quda.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 9e1551a077..e348d89cdc 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5294,8 +5294,6 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG if (i > 0) std::swap(gin, gout); // output from prior step becomes input for next step - printf("just fwd first element of gauge\n"); - gin.PrintMatrix(0,0,0,0); // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; @@ -5475,8 +5473,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); } - printf("just adjoint first element of gauge\n"); - g_W0.PrintMatrix(0,0,0,0); + // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; From a08c2fa5edd26eccfea2a359174b63925424f53f Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 17:38:32 -0800 Subject: [PATCH 40/53] added separate timings for different methods --- lib/interface_quda.cpp | 2 +- tests/su3_fermion_test.cpp | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index e348d89cdc..38e80334b6 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5224,7 +5224,7 @@ void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaG pushOutputPrefix("performGFlowQuda: "); checkGaugeSmearParam(smear_param); - // pushVerbosity(inv_param->verbosity); + pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); if (smear_param->restart) { diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 5232d5a39f..5bac1a0397 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -170,7 +170,7 @@ int main(int argc, char **argv) QudaGaugeObservableParam param = newQudaGaugeObservableParam(); // start the timer - quda::host_timer_t host_timer; + quda::host_timer_t host_timer, host_safe_timer, host_hier_timer, host_fwd_timer; // The user may specify which measurements they wish to perform/omit // using the QudaGaugeObservableParam struct, and whether or not to @@ -290,10 +290,16 @@ int main(int argc, char **argv) } // Perform two adjoint flow algorithms, these methods dont alter the final value for the gauge so we excecute them first + host_hier_timer.start(); performAdjGFlowHier(check_hier.data(),check.data(), &invParam, &smear_param); + host_hier_timer.stop(); + host_safe_timer.start(); performAdjGFlowSafe(check_safe.data(),check.data() , &invParam, &smear_param); + host_safe_timer.stop(); // Perform forward flow algorithm + host_fwd_timer.start(); performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); + host_fwd_timer.stop(); break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); @@ -324,8 +330,11 @@ int main(int argc, char **argv) printf("sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); printf("mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); - - printfQuda("Total time for gauge smearing = %g secs\n", host_timer.last()); + + printfQuda("Time elapsed for adjoint hierarchical fermion/gauge smearing = %g secs\n", host_hier_timer.last()); + printfQuda("Time elapsed for adjoint safe fermion/gauge smearing = %g secs\n", host_safe_timer.last()); + printfQuda("Time elapsed for forward fermion/gauge smearing = %g secs\n", host_fwd_timer.last()); + printfQuda("Total time for collective fermion/gauge smearing = %g secs\n", host_timer.last()); if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); From c214eaefec2922472772c7dd8df803224a51fba4 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 17:44:25 -0800 Subject: [PATCH 41/53] rearranging --- tests/su3_fermion_test.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 5bac1a0397..b2910fbe9e 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -300,14 +300,20 @@ int main(int argc, char **argv) host_fwd_timer.start(); performGFlowQuda(check_fwd.data(),check.data(), &invParam, &smear_param, obs_param); host_fwd_timer.stop(); + + printfQuda("Time elapsed for adjoint hierarchical fermion/gauge smearing = %g secs\n", host_hier_timer.last()); + printfQuda("Time elapsed for adjoint safe fermion/gauge smearing = %g secs\n", host_safe_timer.last()); + printfQuda("Time elapsed for forward fermion/gauge smearing = %g secs\n", host_fwd_timer.last()); + break; } default: errorQuda("Undefined gauge smear type %d given", smear_param.smear_type); } host_timer.stop(); // stop the timer - //Change this to a tolerance check - printf("First, inspecting the very first element of the 3 evolved fermions:\n"); + + printfQuda("Total time for collective fermion/gauge smearing = %g secs\n", host_timer.last()); + printf("Now, inspecting the very first element of the 3 evolved fermions:\n"); printf("Hierarchical method:\n"); check_hier.PrintVector(0,0,0); printf("Safe method:\n"); @@ -327,14 +333,8 @@ int main(int argc, char **argv) double method_adj_check = sqrt(method_adj_diff), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); - printf("sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); - - printf("mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); - - printfQuda("Time elapsed for adjoint hierarchical fermion/gauge smearing = %g secs\n", host_hier_timer.last()); - printfQuda("Time elapsed for adjoint safe fermion/gauge smearing = %g secs\n", host_safe_timer.last()); - printfQuda("Time elapsed for forward fermion/gauge smearing = %g secs\n", host_fwd_timer.last()); - printfQuda("Total time for collective fermion/gauge smearing = %g secs\n", host_timer.last()); + printf("Sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); + printf("Mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); From a1ecba36d7cac838fdd7d8f6114b0cccd72bfee1 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 1 Dec 2024 20:50:30 -0800 Subject: [PATCH 42/53] tidy --- lib/interface_quda.cpp | 6 +++--- tests/su3_fermion_test.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 38e80334b6..3cbd633207 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5392,7 +5392,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } - // pushVerbosity(inv_param->verbosity); + pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); if (smear_param->restart) { @@ -5803,7 +5803,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_DEBUG_VERBOSE," block number %d successfully deployed \n",i); } - logQuda(QUDA_DEBUG_VERBOSE,"Hierarchial evolution completed \n"); + logQuda(QUDA_VERBOSE,"Hierarchial evolution completed \n"); break; } @@ -5832,7 +5832,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu cpuParam.location = inv_param->output_location; ColorSpinorField fout_h(cpuParam); fout_h = sf_list[0].get(); - + logQuda(QUDA_DEBUG_VERBOSE,"Spinor written to cpu \n"); popOutputPrefix(); } diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index b2910fbe9e..a2977fdc19 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -242,7 +242,7 @@ int main(int argc, char **argv) constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); check = quda::ColorSpinorField(cs_param); - //Add noise to spinor + //Add noise to spinor quda::RNG rng(check, 1234); spinorNoise(check, rng, QUDA_NOISE_GAUSS); @@ -330,10 +330,10 @@ int main(int argc, char **argv) } - double method_adj_check = sqrt(method_adj_diff), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); + double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); - printf("Sum of mag errors between Safe and Hierarchical Adj methods (should be zero) = %1.5e \n", method_adj_check); + printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); printf("Mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); From 2bcfe523f7d2e59509aad54b943d13b3c0d9cbf9 Mon Sep 17 00:00:00 2001 From: rokarur Date: Tue, 3 Dec 2024 20:08:54 -0800 Subject: [PATCH 43/53] correction to deviation oom pred --- tests/su3_fermion_test.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index a2977fdc19..13d8d8dd99 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -325,12 +325,23 @@ int main(int argc, char **argv) for (int i = 0; i < V * 24; i++) { - method_adj_diff += pow(check_safe.data()[i] - check_hier.data()[i], 2); - adj_fwd_diff += pow(check_safe.data()[i] - check_fwd.data()[i], 2); + method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); + // adj_fwd_diff += pow(fabs(check_safe.data()[i] - check_fwd.data()[i]), 2); + } + + for (int i = 0; i < V * 12; i++) { + + int re_idx = 2*i; + int im_idx = 2*i + 1; + + double re_diff2 = pow(check_safe.data()[re_idx] - check_fwd.data()[re_idx], 2); + double im_diff2 = pow(check_safe.data()[im_idx] - check_fwd.data()[im_idx], 2); + + adj_fwd_diff += sqrt(re_diff2 + im_diff2); } - double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); + double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = adj_fwd_diff/(V*12.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); From 81b55a99375b0221e8a4e8d001015fcf80904db0 Mon Sep 17 00:00:00 2001 From: rokarur Date: Wed, 4 Dec 2024 15:15:26 -0800 Subject: [PATCH 44/53] revert back --- tests/su3_fermion_test.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 13d8d8dd99..af5ca672cf 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -326,22 +326,10 @@ int main(int argc, char **argv) for (int i = 0; i < V * 24; i++) { method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); - // adj_fwd_diff += pow(fabs(check_safe.data()[i] - check_fwd.data()[i]), 2); - } - - for (int i = 0; i < V * 12; i++) { - - int re_idx = 2*i; - int im_idx = 2*i + 1; - - double re_diff2 = pow(check_safe.data()[re_idx] - check_fwd.data()[re_idx], 2); - double im_diff2 = pow(check_safe.data()[im_idx] - check_fwd.data()[im_idx], 2); - - adj_fwd_diff += sqrt(re_diff2 + im_diff2); - + adj_fwd_diff += pow((check_safe.data()[i] - check_fwd.data()[i]), 2); } - double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = adj_fwd_diff/(V*12.); + double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); From c12e7743787f65d7ccabeba406c934590ebdf15c Mon Sep 17 00:00:00 2001 From: rokarur Date: Thu, 12 Dec 2024 14:03:48 -0800 Subject: [PATCH 45/53] with matmul in test --- lib/interface_quda.cpp | 2 +- tests/su3_fermion_test.cpp | 60 ++++++++++++++++++++++++++++++++++++-- tests/utils/host_blas.cpp | 1 + tests/utils/host_utils.cpp | 39 +++++++++++++++++++++++++ tests/utils/host_utils.h | 1 + 5 files changed, 100 insertions(+), 3 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 3cbd633207..4a86ebec42 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5473,7 +5473,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GFlowStep(g_VT, gaugeTemp, g_W2, smear_param->epsilon, smear_param->smear_type, WFLOW_STEP_VT); } - + // init auxilliary fields [0], [1] and [2] as [3] f_temp0 = f_temp3; f_temp1 = f_temp3; diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index af5ca672cf..4f2667f8c3 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -322,12 +322,68 @@ int main(int argc, char **argv) check_fwd.PrintVector(0,0,0); double method_adj_diff = 0., adj_fwd_diff = 0.; - + /* To access the ith complex entry in a raw vector, do, for example: check.data*>()[i]*/ for (int i = 0; i < V * 24; i++) { - method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); adj_fwd_diff += pow((check_safe.data()[i] - check_fwd.data()[i]), 2); } + + int row_inc = 1; + int col_inc = 3; + + std::vector col_st{0, 1, 2}; + std::vector row_st{0, 3, 6}; + + std::complex test_contract[9 * V]; + + std::complex trace = {0.,0.}; + + for (int i = 0; i < V; i++) { + + for (int ii = 0; ii < 9; ii++){ + int col_start = i * 9 + (ii % 3); + int row_start = i * 9 + ii - ((ii % 3)); + + int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3))/ 3; + + std::complex dot = {0.,0.}; + + for(int i_s = 0; i_s < 4; i_s++){ + + int s_row_idx = i * 12 + col_st[which_row_idx]+ i_s*col_inc; + int s_col_idx = i * 12 + col_st[which_col_idx]+ i_s*col_inc; + + auto m1 = std::conj(check.data*>()[s_row_idx]); + auto m2 = check_fwd.data*>()[s_col_idx]; + + // if (i == 0 && ii == 0){ + // printf("m1 is %1.5e, %1.5e \n",m1.real(), m1.imag()); + // printf("m2 is %1.5e, %1.5e \n",m2.real(), m2.imag()); + // printf("dot is %1.5e, %1.5e \n",(m1*m2).real(), (m1*m2).imag()); + // } + + + dot += m1*m2; + + } + test_contract[i * 9 + ii] = dot; + // if (i == 0){ + // printf("chcksafe dot is %1.5e, %1.5e \n",test_contract[i * 9 + ii].real(), test_contract[i * 9 + ii].imag()); + // } + } + + trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); + } + + printf("trace is %1.5e, %1.5e \n",trace.real(), trace.imag()); + + std::complextrace_fwd,trace_adj; + trace_fwd = twoColorSpinorContract(check.data*>(), check_fwd.data*>()); + trace_adj = twoColorSpinorContract(check.data*>(), check_safe.data*>()); + // test_contract2.data()[i] = check.data()[i]; + + printf("trace adj is %1.5e, %1.5e \n",trace_adj.real(), trace_adj.imag()); + printf("trace fwd is %1.5e, %1.5e \n",trace_fwd.real(), trace_fwd.imag()); double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); diff --git a/tests/utils/host_blas.cpp b/tests/utils/host_blas.cpp index c9d8c63a59..b6101022c2 100644 --- a/tests/utils/host_blas.cpp +++ b/tests/utils/host_blas.cpp @@ -128,3 +128,4 @@ void cpu_xpy(QudaPrecision prec, const void *x, void *y, int size) for (int i = 0; i < size; i++) { dst[i] += src[i]; } } } + diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index 28ebec9f36..ef4b24528e 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -1240,7 +1240,46 @@ void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precisi checkGauge((float **)oldG, (float **)newG, epsilon); } +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2) +{ + + int row_inc = 1; + int col_inc = 3; + std::vector col_st{0, 1, 2}; + std::vector row_st{0, 3, 6}; + + complex test_contract[9 * V]; + complex trace = {0. , 0.}; + for (int i = 0; i < V; i++) { + + for (int ii = 0; ii < 9; ii++){ + int col_start = i * 9 + (ii % 3); + int row_start = i * 9 + ii - ((ii % 3)); + + int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3))/ 3; + + std::complex dot = {0.,0.}; + + for(int i_s = 0; i_s < 4; i_s++){ + + int s_row_idx = i * 12 + col_st[which_row_idx]+ i_s*col_inc; + int s_col_idx = i * 12 + col_st[which_col_idx]+ i_s*col_inc; + + auto m1 = std::conj(spinor1[s_row_idx]); + auto m2 = spinor2[s_col_idx]; + + dot += m1*m2; + + } + test_contract[i * 9 + ii] = dot; + } + + trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); + } + + return trace; +} void createSiteLinkCPU(void *const *link, QudaPrecision precision, int phase) { diff --git a/tests/utils/host_utils.h b/tests/utils/host_utils.h index c63d124dc8..78079eaa77 100644 --- a/tests/utils/host_utils.h +++ b/tests/utils/host_utils.h @@ -130,6 +130,7 @@ void constructPointSpinorSource(void *v, QudaPrecision precision, const int *con void constructWallSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, const int dil); void constructRandomSpinorSource(void *v, int nSpin, int nColor, QudaPrecision precision, QudaSolutionType sol_type, const int *const x, int nDim, quda::RNG &rng); +std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2); //------------------------------------------------------ // Helper functions From 7c34c5a56753395ccba05d75288a83a4d5740957 Mon Sep 17 00:00:00 2001 From: rokarur Date: Thu, 12 Dec 2024 15:08:40 -0800 Subject: [PATCH 46/53] rearranging smear warnings --- lib/check_params.h | 12 +++++++ lib/interface_quda.cpp | 26 ++++++++------ tests/su3_fermion_test.cpp | 73 ++++++-------------------------------- 3 files changed, 39 insertions(+), 72 deletions(-) diff --git a/lib/check_params.h b/lib/check_params.h index ee400acb31..4f1abc59a0 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1162,6 +1162,18 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) #if defined CHECK_PARAM if (param->struct_size != (size_t)INVALID_INT && param->struct_size != sizeof(*param)) errorQuda("Unexpected QudaGaugeSmearParam struct size %lu, expected %lu", param->struct_size, sizeof(*param)); + + if (param->n_steps <= param->adj_n_save ) { + + logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",param->n_steps,param->adj_n_save); + if (param->n_steps == 1) + param->adj_n_save = param->n_steps; + else + param->adj_n_save = param->n_steps - 1; + logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",param->n_steps,param->adj_n_save); + + } + #else P(struct_size, (size_t)INVALID_INT); #endif diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index 4a86ebec42..f2adf51264 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5384,13 +5384,16 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu pushOutputPrefix("performAdjGFlowQudaSafe: "); checkGaugeSmearParam(smear_param); - if (smear_param->n_steps < smear_param->adj_n_save ) { + // if (smear_param->n_steps <= smear_param->adj_n_save ) { - logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered to equal n_steps: \n",smear_param->n_steps,smear_param->adj_n_save); - smear_param->adj_n_save = smear_param->n_steps; - logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); + // logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",smear_param->n_steps,smear_param->adj_n_save); + // if (smear_param->n_steps == 1) + // smear_param->adj_n_save = smear_param->n_steps; + // else + // smear_param->adj_n_save = smear_param->n_steps - 1; + // logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); - } + // } pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); @@ -5667,13 +5670,16 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu pushOutputPrefix("performAdjGFlowQudaHier: "); checkGaugeSmearParam(smear_param); - if (smear_param->n_steps < smear_param->adj_n_save ) { + // if (smear_param->n_steps <= smear_param->adj_n_save ) { - logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered to equal n_steps: \n",smear_param->n_steps,smear_param->adj_n_save); - smear_param->adj_n_save = smear_param->n_steps; - logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); + // logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",smear_param->n_steps,smear_param->adj_n_save); + // if (smear_param->n_steps == 1) + // smear_param->adj_n_save = smear_param->n_steps; + // else + // smear_param->adj_n_save = smear_param->n_steps - 1; + // logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); - } + // } // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index 4f2667f8c3..f51f3669f7 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -321,75 +321,24 @@ int main(int argc, char **argv) printf("Forward method:\n"); check_fwd.PrintVector(0,0,0); - double method_adj_diff = 0., adj_fwd_diff = 0.; + double method_adj_diff = 0.; /* To access the ith complex entry in a raw vector, do, for example: check.data*>()[i]*/ for (int i = 0; i < V * 24; i++) { method_adj_diff += pow(fabs(check_safe.data()[i] - check_hier.data()[i]), 2); - adj_fwd_diff += pow((check_safe.data()[i] - check_fwd.data()[i]), 2); } - - int row_inc = 1; - int col_inc = 3; - - std::vector col_st{0, 1, 2}; - std::vector row_st{0, 3, 6}; + double method_adj_check = sqrt(method_adj_diff)/(V*24.); + printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); - std::complex test_contract[9 * V]; + std::complextrace_fwd,trace_adj; + trace_fwd = twoColorSpinorContract(check.data*>(), check_fwd.data*>()); + trace_adj = twoColorSpinorContract(check.data*>(), check_safe.data*>()); - std::complex trace = {0.,0.}; - - for (int i = 0; i < V; i++) { - - for (int ii = 0; ii < 9; ii++){ - int col_start = i * 9 + (ii % 3); - int row_start = i * 9 + ii - ((ii % 3)); - - int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3))/ 3; - - std::complex dot = {0.,0.}; - - for(int i_s = 0; i_s < 4; i_s++){ - - int s_row_idx = i * 12 + col_st[which_row_idx]+ i_s*col_inc; - int s_col_idx = i * 12 + col_st[which_col_idx]+ i_s*col_inc; - - auto m1 = std::conj(check.data*>()[s_row_idx]); - auto m2 = check_fwd.data*>()[s_col_idx]; - - // if (i == 0 && ii == 0){ - // printf("m1 is %1.5e, %1.5e \n",m1.real(), m1.imag()); - // printf("m2 is %1.5e, %1.5e \n",m2.real(), m2.imag()); - // printf("dot is %1.5e, %1.5e \n",(m1*m2).real(), (m1*m2).imag()); - // } - - - dot += m1*m2; - - } - test_contract[i * 9 + ii] = dot; - // if (i == 0){ - // printf("chcksafe dot is %1.5e, %1.5e \n",test_contract[i * 9 + ii].real(), test_contract[i * 9 + ii].imag()); - // } - } - - trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); - } - - printf("trace is %1.5e, %1.5e \n",trace.real(), trace.imag()); + auto trace_diff_err = 2.*std::fabs(trace_fwd - std::conj(trace_adj))/std::fabs(trace_fwd + std::conj(trace_adj)); - std::complextrace_fwd,trace_adj; - trace_fwd = twoColorSpinorContract(check.data*>(), check_fwd.data*>()); - trace_adj = twoColorSpinorContract(check.data*>(), check_safe.data*>()); - // test_contract2.data()[i] = check.data()[i]; - - printf("trace adj is %1.5e, %1.5e \n",trace_adj.real(), trace_adj.imag()); - printf("trace fwd is %1.5e, %1.5e \n",trace_fwd.real(), trace_fwd.imag()); - - double method_adj_check = sqrt(method_adj_diff)/(V*24.), adj_fwd_check = sqrt(adj_fwd_diff)/(V*24.); - double oom_error = pow(smear_param.n_steps,2) * pow(smear_param.epsilon,3); - - printf("Mean of mag errors between Safe and Hierarchical Adj methods (should be zero up to machine precision) = %1.5e \n", method_adj_check); - printf("Mean of mag errors between Adj and Fwd method (should be of *order* %1.5e) = %1.5e \n", oom_error, adj_fwd_check); + printf("The two numbers below should be complex conjugates of one another\n"); + printf(" is %1.5e, %1.5e \n",trace_adj.real(), trace_adj.imag()); + printf(" is %1.5e, %1.5e \n",trace_fwd.real(), trace_fwd.imag()); + printf("Fractional error of ( - .conj()) = %1.5e \n", trace_diff_err); if (verify_results) check_gauge(gauge, new_gauge, 1e-3, gauge_param.cpu_prec); From 6ae63740fb9129b729598c797f63f8ad1cb6dc20 Mon Sep 17 00:00:00 2001 From: rokarur Date: Thu, 12 Dec 2024 15:15:37 -0800 Subject: [PATCH 47/53] cleanup --- lib/check_params.h | 2 +- lib/interface_quda.cpp | 22 ---------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/lib/check_params.h b/lib/check_params.h index 4f1abc59a0..4ccd8f7cc6 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -1165,7 +1165,7 @@ void printQudaGaugeSmearParam(QudaGaugeSmearParam *param) if (param->n_steps <= param->adj_n_save ) { - logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",param->n_steps,param->adj_n_save); + logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) >= n_steps (%d); adj_n_save manually altered: \n",param->n_steps,param->adj_n_save); if (param->n_steps == 1) param->adj_n_save = param->n_steps; else diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index f2adf51264..d8fc280e36 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5383,17 +5383,6 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu auto profile = pushProfile(profileAdjGFlowSafe); pushOutputPrefix("performAdjGFlowQudaSafe: "); checkGaugeSmearParam(smear_param); - - // if (smear_param->n_steps <= smear_param->adj_n_save ) { - - // logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",smear_param->n_steps,smear_param->adj_n_save); - // if (smear_param->n_steps == 1) - // smear_param->adj_n_save = smear_param->n_steps; - // else - // smear_param->adj_n_save = smear_param->n_steps - 1; - // logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); - - // } pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); @@ -5670,17 +5659,6 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu pushOutputPrefix("performAdjGFlowQudaHier: "); checkGaugeSmearParam(smear_param); - // if (smear_param->n_steps <= smear_param->adj_n_save ) { - - // logQuda(QUDA_SUMMARIZE,"Not good practice to adj_n_save (%d) > n_steps (%d); adj_n_save manually altered: \n",smear_param->n_steps,smear_param->adj_n_save); - // if (smear_param->n_steps == 1) - // smear_param->adj_n_save = smear_param->n_steps; - // else - // smear_param->adj_n_save = smear_param->n_steps - 1; - // logQuda(QUDA_SUMMARIZE,"adj_n_save (%d) ; n_steps (%d) \n\n",smear_param->n_steps,smear_param->adj_n_save); - - // } - // pushVerbosity(inv_param->verbosity); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printQudaInvertParam(inv_param); From b7abaf2d2ff438129205d30758c059fc8f9116b9 Mon Sep 17 00:00:00 2001 From: rokarur Date: Fri, 13 Dec 2024 15:28:38 -0800 Subject: [PATCH 48/53] extend to multirank --- lib/interface_quda.cpp | 4 ++-- tests/utils/host_utils.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d8fc280e36..bfa8fae5e4 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5726,7 +5726,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu std::vector hier_list; //The first stage is saved at the very beginning, so its presence is implicit hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); - logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %d\n",hier_list.size()); + logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %lu\n",hier_list.size()); if (threshold < hier_list.back()) {threshold = hier_list.back(); logQuda(QUDA_SUMMARIZE, "threshold changed to %d",threshold);} else logQuda(QUDA_SUMMARIZE, "threshold is %d\n",threshold); @@ -5769,7 +5769,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu logQuda(QUDA_DEBUG_VERBOSE,"Previous hier list elements: \n"); for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_DEBUG_VERBOSE,"%d \n",hier_list[j]); + logQuda(QUDA_DEBUG_VERBOSE,"%lu \n",hier_list[j]); } logQuda(QUDA_DEBUG_VERBOSE,"\n"); diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index ef4b24528e..519d8d9888 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -1242,8 +1242,6 @@ void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precisi std::complex twoColorSpinorContract(std::complex *spinor1, std::complex *spinor2) { - - int row_inc = 1; int col_inc = 3; std::vector col_st{0, 1, 2}; @@ -1251,12 +1249,10 @@ std::complex twoColorSpinorContract(std::complex *spinor1, std:: complex test_contract[9 * V]; complex trace = {0. , 0.}; + double trace_re,trace_im; for (int i = 0; i < V; i++) { for (int ii = 0; ii < 9; ii++){ - int col_start = i * 9 + (ii % 3); - int row_start = i * 9 + ii - ((ii % 3)); - int which_col_idx = (ii % 3), which_row_idx = (ii - (ii % 3))/ 3; std::complex dot = {0.,0.}; @@ -1274,11 +1270,15 @@ std::complex twoColorSpinorContract(std::complex *spinor1, std:: } test_contract[i * 9 + ii] = dot; } - trace += (test_contract[i * 9] + test_contract[i * 9 + 4] + test_contract[i * 9 + 8]); } - - return trace; + trace_re = trace.real(); + trace_im = trace.imag(); + quda::comm_allreduce_sum(trace_re); + quda::comm_allreduce_sum(trace_im); + + std::complex trace_fin = {trace_re,trace_im}; + return trace_fin; } void createSiteLinkCPU(void *const *link, QudaPrecision precision, int phase) From 091ff768f9bdf55d75eded241eb60c7ef1c849aa Mon Sep 17 00:00:00 2001 From: rokarur Date: Fri, 13 Dec 2024 16:51:58 -0800 Subject: [PATCH 49/53] fix some warnings --- lib/interface_quda.cpp | 18 ++++++++++-------- tests/su3_fermion_test.cpp | 20 +++++++++----------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index bfa8fae5e4..93cc334e0f 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5438,7 +5438,8 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu double b = -8.; int comm_dim[4] = {}; - int measurement_n = 0; // The nth measurement to take + // Will add fermion measruement utilities later + // int measurement_n = 0; // The nth measurement to take // only switch on comms needed for directions with a derivative for (int i = 0; i < 4; i++) { comm_dim[i] = comm_dim_partitioned(i); } @@ -5531,6 +5532,7 @@ void adjSafeEvolve(std::vector> sf_list int &i_glob = meas_cinf[0].get(); int &measurement_n = meas_cinf[1].get(); + measurement_n = 0; int parity = 0; @@ -5726,7 +5728,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu std::vector hier_list; //The first stage is saved at the very beginning, so its presence is implicit hier_list = get_hier_list(smear_param->n_steps, n_b,smear_param->adj_n_save); - logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %lu\n",hier_list.size()); + logQuda(QUDA_SUMMARIZE,"hier list size (number of gauge fields to save) is %d\n",(int) hier_list.size()); if (threshold < hier_list.back()) {threshold = hier_list.back(); logQuda(QUDA_SUMMARIZE, "threshold changed to %d",threshold);} else logQuda(QUDA_SUMMARIZE, "threshold is %d\n",threshold); @@ -5742,7 +5744,7 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu } if (i > 0) std::swap(gout,gin); - for (unsigned int j = 0; j < hier_list[i]; j++){ + for (unsigned int j = 0; j < (unsigned int) hier_list[i]; j++){ if (j > 0) std::swap(gout,gin); WFlowStep(gout, gaugeTemp, gin, smear_param->epsilon, smear_param->smear_type); @@ -5768,8 +5770,8 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu adjSafeEvolve(sf_list,gf_list,smear_param,hier_list.back(),profileAdjGFlowHier,meas_cinf); logQuda(QUDA_DEBUG_VERBOSE,"Previous hier list elements: \n"); - for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_DEBUG_VERBOSE,"%lu \n",hier_list[j]); + for (int j = 0; j < (int) hier_list.size(); j++ ){ + logQuda(QUDA_DEBUG_VERBOSE,"%d \n", (int) hier_list[j]); } logQuda(QUDA_DEBUG_VERBOSE,"\n"); @@ -5795,12 +5797,12 @@ void performAdjGFlowHier(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu GaugeField g_1 = gauge_stages[ret_idx]; logQuda(QUDA_DEBUG_VERBOSE,"Modified hier list elements: \n"); - for (int j = 0; j < hier_list.size(); j++ ){ - logQuda(QUDA_DEBUG_VERBOSE,"%d \n",hier_list[j]); + for (int j = 0; j < (int) hier_list.size(); j++ ){ + logQuda(QUDA_DEBUG_VERBOSE,"%d \n",(int) hier_list[j]); } logQuda(QUDA_DEBUG_VERBOSE,"\n"); - for (unsigned int j = 0; j < hier_list[ret_idx]; j++){ + for (unsigned int j = 0; j < (unsigned int) hier_list[ret_idx]; j++){ if (j > 0) std::swap(g_2,g_1); WFlowStep(g_2, gaugeTemp, g_1, smear_param->epsilon, smear_param->smear_type); } diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index f51f3669f7..f4cc04dc7a 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -161,16 +161,16 @@ int main(int argc, char **argv) // Load the gauge field to the device loadGaugeQuda((void *)gauge, &gauge_param); saveGaugeQuda(new_gauge, &gauge_param); - - // Prepare various perf info - long long flops_plaquette = 6ll * 597 * V; - long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; - - // Prepare a gauge observable struct - QudaGaugeObservableParam param = newQudaGaugeObservableParam(); - // start the timer quda::host_timer_t host_timer, host_safe_timer, host_hier_timer, host_fwd_timer; + + // The commented out section is all geared towards gauge observables, so unlikely to be needed for now + // // Prepare various perf info + // long long flops_plaquette = 6ll * 597 * V; + // long long flops_ploop = 198ll * V + 6 * V / gauge_param.X[3]; + + // // Prepare a gauge observable struct + // QudaGaugeObservableParam param = newQudaGaugeObservableParam(); // The user may specify which measurements they wish to perform/omit // using the QudaGaugeObservableParam struct, and whether or not to @@ -197,8 +197,6 @@ int main(int argc, char **argv) obs_param[i].compute_qcharge = QUDA_BOOLEAN_TRUE; obs_param[i].su_project = su_project ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; } - - QudaGaugeObservableParam *obs_adj_safe(obs_param), *obs_adj_hier(obs_param); // We here set all the problem parameters for all possible smearing types. QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); @@ -256,7 +254,7 @@ int main(int argc, char **argv) // else // check_norm.data()[i] = -1.*check.data()[i]; // } - + check_safe = quda::ColorSpinorField(cs_param); check_hier = quda::ColorSpinorField(cs_param); check_fwd = quda::ColorSpinorField(cs_param); From 5dea08eee830e36d03421f61ed3d473f853049ad Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 15 Dec 2024 12:39:57 -0800 Subject: [PATCH 50/53] modify laplacians in adjoint --- lib/interface_quda.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp index d48cfc84a2..8203e6b5c4 100644 --- a/lib/interface_quda.cpp +++ b/lib/interface_quda.cpp @@ -5482,7 +5482,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profileAdjGFlowSafe); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profileAdjGFlowSafe); blas::ax(smear_param->epsilon * 3. / 4., f_temp4); @@ -5491,7 +5491,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profileAdjGFlowSafe); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profileAdjGFlowSafe); blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); @@ -5503,7 +5503,7 @@ void performAdjGFlowSafe(void *h_out, void *h_in, QudaInvertParam *inv_param, Qu copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profileAdjGFlowSafe); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profileAdjGFlowSafe); blas::ax(smear_param->epsilon * 1. / 4., f_temp0); blas::axpy(1.,f_temp2, f_temp0); @@ -5574,7 +5574,7 @@ void adjSafeEvolve(std::vector> sf_list // [4] = Lap2 [0] copyExtendedGauge(precise, g_W2, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, false, comm_dim, profile); + ApplyLaplace(f_temp4, f_temp0, precise, 4, a, b, f_temp0, parity, comm_dim, profile); // [4] -> 3/4 eps [4] blas::ax(smear_param->epsilon * 3. / 4., f_temp4); @@ -5585,7 +5585,7 @@ void adjSafeEvolve(std::vector> sf_list // [4] = Lap1 [2] copyExtendedGauge(precise, g_W1, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, false, comm_dim, profile); + ApplyLaplace(f_temp4, f_temp2, precise, 4, a, b, f_temp2, parity, comm_dim, profile); // [3] -> [3] + 8/9 eps [4] blas::axpy(smear_param->epsilon * 8. / 9., f_temp4, f_temp3); @@ -5600,7 +5600,7 @@ void adjSafeEvolve(std::vector> sf_list // [0] <- Lap0 [4] copyExtendedGauge(precise, g_W0, QUDA_CUDA_FIELD_LOCATION); precise.exchangeGhost(); - ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, false, comm_dim, profile); + ApplyLaplace(f_temp0, f_temp4, precise, 4, a, b, f_temp4, parity, comm_dim, profile); // [0] <- 1/4 eps [0]; [0] <- [2] + [0]; [0] <- [1] + [0] blas::ax(smear_param->epsilon * 1. / 4., f_temp0); From 0683ad62ff9155aa6940f45a0f06e2f00ecca0e5 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sun, 15 Dec 2024 16:33:38 -0800 Subject: [PATCH 51/53] trace computation clang compatibility --- tests/utils/host_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index 519d8d9888..bc5ca05682 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -1247,7 +1247,7 @@ std::complex twoColorSpinorContract(std::complex *spinor1, std:: std::vector col_st{0, 1, 2}; std::vector row_st{0, 3, 6}; - complex test_contract[9 * V]; + std::vector> test_contract(9 * V); complex trace = {0. , 0.}; double trace_re,trace_im; for (int i = 0; i < V; i++) { From 7d79cf2e136d809b552f5b8d099d6e1e1e3b5a49 Mon Sep 17 00:00:00 2001 From: rokarur Date: Sat, 28 Dec 2024 12:20:38 -0800 Subject: [PATCH 52/53] remove superflouous --- tests/su3_fermion_test.cpp | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/tests/su3_fermion_test.cpp b/tests/su3_fermion_test.cpp index f4cc04dc7a..3de53e4fc7 100644 --- a/tests/su3_fermion_test.cpp +++ b/tests/su3_fermion_test.cpp @@ -221,23 +221,8 @@ int main(int argc, char **argv) invParam.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; invParam.dirac_order = QUDA_DIRAC_ORDER; - constexpr int nSpin = 4; - constexpr int nColor = 3; - quda::ColorSpinorParam cs_param, cs_param_out; - cs_param.nColor = nColor; - cs_param.nSpin = nSpin; - cs_param.x = {xdim, ydim, zdim, tdim}; - cs_param.siteSubset = QUDA_FULL_SITE_SUBSET; - cs_param.setPrecision(invParam.cpu_prec); - cs_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER; - cs_param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; - cs_param.gammaBasis = invParam.gamma_basis; - cs_param.pc_type = QUDA_4D_PC; - cs_param.location = QUDA_CPU_FIELD_LOCATION; - cs_param.create = QUDA_NULL_FIELD_CREATE; - - cs_param_out = cs_param; - + quda::ColorSpinorParam cs_param; + constructWilsonTestSpinorParam(&cs_param, &invParam, &gauge_param); check = quda::ColorSpinorField(cs_param); //Add noise to spinor From 95b1a9df69206e4b4afb53c0946c59c16e599982 Mon Sep 17 00:00:00 2001 From: rkarur <79956637+rkarur@users.noreply.github.com> Date: Mon, 27 Jan 2025 23:59:14 -0800 Subject: [PATCH 53/53] Update CMakeLists.txt to remove vanilla_io --- tests/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f2a0f8e1c5..27500dbde8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -182,11 +182,6 @@ if(QUDA_QIO) target_link_libraries(io_test ${TEST_LIBS}) quda_checkbuildtest(io_test QUDA_BUILD_ALL_TESTS) install(TARGETS io_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) - - add_executable(vanilla_io vanilla_io.cpp) - target_link_libraries(vanilla_io ${TEST_LIBS}) - quda_checkbuildtest(vanilla_io QUDA_BUILD_ALL_TESTS) - install(TARGETS vanilla_io ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() add_executable(tune_test tune_test.cpp)