Skip to content

Commit

Permalink
Merge branch 'ver5.0' of github.com:3dem/relion into ver5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
biochem-fan committed Dec 21, 2023
2 parents eda1732 + 8514526 commit e648daa
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 12 deletions.
18 changes: 18 additions & 0 deletions src/acc/sycl/sycl_kernels/diff2_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,27 @@ void sycl_kernel_diff2_coarse(
{
// translatePixel(x, y, z, tx, ty, tz, s_real[pix+init_pixel%block_sz], s_imag[pix+init_pixel%block_sz], real, imag);
XFLOAT val {x*tx + y*ty + z*tz};
#ifdef ACC_DOUBLE_PRECISION
XFLOAT s {sycl::sin(val)};
XFLOAT c {sycl::cos(val)};
#else
XFLOAT s {sycl::native::sin(val)};
XFLOAT c {sycl::native::cos(val)};
#endif
real = c * s_real[pix + init_pixel%block_sz] - s * s_imag[pix + init_pixel%block_sz];
imag = c * s_imag[pix + init_pixel%block_sz] + s * s_real[pix + init_pixel%block_sz];
}
else
{
// translatePixel(x, y, tx, ty, s_real[pix+init_pixel%block_sz], s_imag[pix+init_pixel%block_sz], real, imag);
XFLOAT val {x*tx + y*ty};
#ifdef ACC_DOUBLE_PRECISION
XFLOAT s {sycl::sin(val)};
XFLOAT c {sycl::cos(val)};
#else
XFLOAT s {sycl::native::sin(val)};
XFLOAT c {sycl::native::cos(val)};
#endif
real = c*s_real[pix + init_pixel%block_sz] - s*s_imag[pix + init_pixel%block_sz];
imag = c*s_imag[pix + init_pixel%block_sz] + s*s_real[pix + init_pixel%block_sz];
}
Expand Down Expand Up @@ -396,7 +406,11 @@ void sycl_kernel_diff2_CC_coarse(
}

if (tid == 0)
#ifdef ACC_DOUBLE_PRECISION
g_diff2[iorient*trans_num + itrans] += -s_weight[0] / sycl::sqrt(s_norm[0]);
#else
g_diff2[iorient*trans_num + itrans] += -s_weight[0] / sycl::native::sqrt(s_norm[0]);
#endif
}

template<bool REF3D, bool DATA3D, int block_sz>
Expand Down Expand Up @@ -517,7 +531,11 @@ void sycl_kernel_diff2_CC_fine(
}

if (tid < trans_num)
#ifdef ACC_DOUBLE_PRECISION
g_diff2s[d_job_idx[bid] + tid] += -s[tid * block_sz] / sycl::sqrt(s_cc[tid * block_sz]);
#else
g_diff2s[d_job_idx[bid] + tid] += -s[tid * block_sz] / sycl::native::sqrt(s_cc[tid * block_sz]);
#endif
}
}

Expand Down
47 changes: 47 additions & 0 deletions src/acc/sycl/sycl_kernels/helper_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,11 @@ static void sycl_kernel_exponentiate(
assert(count <= std::numeric_limits<int>::max());
sycl::queue *Q = dynamic_cast<devSYCL*>(devAcc)->getQueue();
#ifdef USE_MORE_ONEDPL
#ifdef ACC_DOUBLE_PRECISION
dpl::transform(dynamic_cast<devSYCL*>(devAcc)->getDevicePolicy(), g_array, g_array+count, g_array, [=](T v) { v+=add; v = (v<KERNEL_EXP_VALUE) ? static_cast<T>(0) : sycl::exp(v); return v;});
#else
dpl::transform(dynamic_cast<devSYCL*>(devAcc)->getDevicePolicy(), g_array, g_array+count, g_array, [=](T v) { v+=add; v = (v<KERNEL_EXP_VALUE) ? static_cast<T>(0) : sycl::native::exp(v); return v;});
#endif
#else
Q->submit([&](sycl::handler &cgh)
{
Expand All @@ -359,7 +363,11 @@ static void sycl_kernel_exponentiate(
if (a < KERNEL_EXP_VALUE)
g_array[idx] = static_cast<T>(0);
else
#ifdef ACC_DOUBLE_PRECISION
g_array[idx] = sycl::exp(a);
#else
g_array[idx] = sycl::native::exp(a);
#endif
}
});
}).wait_and_throw();
Expand Down Expand Up @@ -852,7 +860,11 @@ inline void powerClass(int gridSize,

inline std::pair<XFLOAT, XFLOAT> sycl_sincos(XFLOAT val)
{
#ifdef ACC_DOUBLE_PRECISION
return std::make_pair(sycl::sin(val), sycl::cos(val));
#else
return std::make_pair(sycl::native::sin(val), sycl::native::cos(val));
#endif
}

inline void translatePixel(
Expand All @@ -866,8 +878,13 @@ inline void translatePixel(
XFLOAT &tImag)
{
XFLOAT v = x * tx + y * ty;
#ifdef ACC_DOUBLE_PRECISION
XFLOAT s = sycl::sin(v);
XFLOAT c = sycl::cos(v);
#else
XFLOAT s = sycl::native::sin(v);
XFLOAT c = sycl::native::cos(v);
#endif

tReal = c * real - s * imag;
tImag = c * imag + s * real;
Expand All @@ -886,8 +903,13 @@ inline void translatePixel(
XFLOAT &tImag)
{
XFLOAT v = x * tx + y * ty + z * tz;
#ifdef ACC_DOUBLE_PRECISION
XFLOAT s = sycl::sin(v);
XFLOAT c = sycl::cos(v);
#else
XFLOAT s = sycl::native::sin(v);
XFLOAT c = sycl::native::cos(v);
#endif

tReal = c * real - s * imag;
tImag = c * imag + s * real;
Expand Down Expand Up @@ -916,15 +938,25 @@ inline void computeSincosLookupTable2D(unsigned long trans_num,
for(int x=0; x<xSize; x++) {
unsigned long index = i * xSize + x;
XFLOAT v = x * tx;
#ifdef ACC_DOUBLE_PRECISION
sin_x[index] = sycl::sin(v);
cos_x[index] = sycl::cos(v);
#else
sin_x[index] = sycl::native::sin(v);
cos_x[index] = sycl::native::cos(v);
#endif
}

for(int y=0; y<ySize; y++) {
unsigned long index = i * ySize + y;
XFLOAT v = y * ty;
#ifdef ACC_DOUBLE_PRECISION
sin_y[index] = sycl::sin(v);
cos_y[index] = sycl::cos(v);
#else
sin_y[index] = sycl::native::sin(v);
cos_y[index] = sycl::native::cos(v);
#endif
}
}
}
Expand All @@ -951,22 +983,37 @@ inline void computeSincosLookupTable3D(unsigned long trans_num,
for(int x=0; x<xSize; x++) {
unsigned long index = i * xSize + x;
XFLOAT v = x * tx;
#ifdef ACC_DOUBLE_PRECISION
sin_x[index] = sycl::sin(v);
cos_x[index] = sycl::cos(v);
#else
sin_x[index] = sycl::native::sin(v);
cos_x[index] = sycl::native::cos(v);
#endif
}

for(int y=0; y<ySize; y++) {
unsigned long index = i * ySize + y;
XFLOAT v = y * ty;
#ifdef ACC_DOUBLE_PRECISION
sin_y[index] = sycl::sin(v);
cos_y[index] = sycl::cos(v);
#else
sin_y[index] = sycl::native::sin(v);
cos_y[index] = sycl::native::cos(v);
#endif
}

for(int z=0; z<zSize; z++) {
unsigned long index = i * zSize + z;
XFLOAT v = z * tz;
#ifdef ACC_DOUBLE_PRECISION
sin_z[index] = sycl::sin(v);
cos_z[index] = sycl::cos(v);
#else
sin_z[index] = sycl::native::sin(v);
cos_z[index] = sycl::native::cos(v);
#endif
}
}
}
Expand Down
27 changes: 15 additions & 12 deletions src/ml_optimiser_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -745,25 +745,28 @@ will still yield good performance and possibly a more stable execution. \n" << s

initialiseWorkLoad();

// Only the first follower calculates the sigma2_noise spectra and sets initial guesses for Iref
if (node->rank == 1)
{
MlOptimiser::initialiseSigma2Noise();
MlOptimiser::initialiseReferences();
}
// Only the first follower calculates the sigma2_noise spectra (and if fn_ref == None, later sets initial guesses for Iref)
if (node->rank == 1) MlOptimiser::initialiseSigma2Noise();

//Now the first follower broadcasts resulting Iref and sigma2_noise to everyone else
MlOptimiser::initialiseReferences();

// Now the first follower broadcasts resulting sigma2_noise to everyone else
for (int i = 0; i < mymodel.sigma2_noise.size(); i++)
{
node->relion_MPI_Bcast(MULTIDIM_ARRAY(mymodel.sigma2_noise[i]),
MULTIDIM_SIZE(mymodel.sigma2_noise[i]), MY_MPI_DOUBLE, 1, MPI_COMM_WORLD);
}
for (int i = 0; i < mymodel.Iref.size(); i++)
{
node->relion_MPI_Bcast(MULTIDIM_ARRAY(mymodel.Iref[i]),
MULTIDIM_SIZE(mymodel.Iref[i]), MY_MPI_DOUBLE, 1, MPI_COMM_WORLD);
}

// Also broadcast Iref if that was set in initialiseSigma2Noise
if (fn_ref == "None")
{
for (int i = 0; i < mymodel.Iref.size(); i++)
{
node->relion_MPI_Bcast(MULTIDIM_ARRAY(mymodel.Iref[i]),
MULTIDIM_SIZE(mymodel.Iref[i]), MY_MPI_DOUBLE, 1, MPI_COMM_WORLD);
}
}

// Initialise the data_versus_prior ratio to get the initial current_size right
if (iter == 0 && !do_initialise_bodies && !node->isLeader())
mymodel.initialiseDataVersusPrior(fix_tau); // fix_tau was set in initialiseGeneral
Expand Down

0 comments on commit e648daa

Please sign in to comment.