diff --git a/.azure-pipelines/xfel/download-source.yml b/.azure-pipelines/xfel/download-source.yml index b59af39774..7c2b32d412 100644 --- a/.azure-pipelines/xfel/download-source.yml +++ b/.azure-pipelines/xfel/download-source.yml @@ -61,7 +61,9 @@ jobs: # preserve permissions and delete extra files - script: | cd $(Pipeline.Workspace) + mv modules/kokkos/.git modules/kokkos/.git.tmp rm -fr modules/*/.git/* + mv modules/kokkos/.git.tmp modules/kokkos/.git rm -fr modules/*/.svn/* rm -fr modules/*.tar tar -cf modules.tar modules diff --git a/.azure-pipelines/xfel/unix-conda-build.yml b/.azure-pipelines/xfel/unix-conda-build.yml index 3766d91335..bece4d695b 100644 --- a/.azure-pipelines/xfel/unix-conda-build.yml +++ b/.azure-pipelines/xfel/unix-conda-build.yml @@ -95,6 +95,7 @@ steps: libtbx.configure lunus make fi + rm -fr modules/kokkos/.git/* displayName: Configure and Build # test diff --git a/kokkostbx/Legacy/kokkos_matrix.h b/kokkostbx/Legacy/kokkos_matrix.h index 140d69e01d..4a7f07077a 100644 --- a/kokkostbx/Legacy/kokkos_matrix.h +++ b/kokkostbx/Legacy/kokkos_matrix.h @@ -156,7 +156,7 @@ namespace kokkostbx { } KOKKOS_INLINE_FUNCTION NumType length() const { - return ::Kokkos::Experimental::sqrt(length_sqr()); + return ::Kokkos::sqrt(length_sqr()); } KOKKOS_INLINE_FUNCTION NumType dot(const matrix& v) const { diff --git a/kokkostbx/Legacy/kokkos_matrix3.h b/kokkostbx/Legacy/kokkos_matrix3.h index 96105beb3a..561f6b36b8 100644 --- a/kokkostbx/Legacy/kokkos_matrix3.h +++ b/kokkostbx/Legacy/kokkos_matrix3.h @@ -147,7 +147,7 @@ namespace kokkostbx { } KOKKOS_INLINE_FUNCTION NumType length() const { - return ::Kokkos::Experimental::sqrt(length_sqr()); + return ::Kokkos::sqrt(length_sqr()); } KOKKOS_INLINE_FUNCTION NumType dot(const matrix3& v) const { diff --git a/kokkostbx/Legacy/kokkos_vector.h b/kokkostbx/Legacy/kokkos_vector.h index 76beaaa245..f34de4586e 100644 --- a/kokkostbx/Legacy/kokkos_vector.h +++ b/kokkostbx/Legacy/kokkos_vector.h @@ -215,7 +215,7 @@ namespace kokkostbx { } KOKKOS_INLINE_FUNCTION NumType length() const { - return ::Kokkos::Experimental::sqrt(length_sqr()); + return ::Kokkos::sqrt(length_sqr()); } KOKKOS_INLINE_FUNCTION NumType dot(const vector3& v) const { @@ -248,8 +248,8 @@ namespace kokkostbx { // rotate a point about a unit vector3 axis KOKKOS_INLINE_FUNCTION vector3 rotate_around_axis(const vector3& axis, NumType angle) const { - NumType sinphi = ::Kokkos::Experimental::sin(angle); - NumType cosphi = ::Kokkos::Experimental::cos(angle); + NumType sinphi = ::Kokkos::sin(angle); + NumType cosphi = ::Kokkos::cos(angle); NumType dot_factor = axis.dot(*this) * (1.0-cosphi); vector3 vector_rot = axis.cross(*this) * sinphi; diff --git a/kokkostbx/Legacy/kokkos_vector3.h b/kokkostbx/Legacy/kokkos_vector3.h index 802a3c8e26..39a316a9dc 100644 --- a/kokkostbx/Legacy/kokkos_vector3.h +++ b/kokkostbx/Legacy/kokkos_vector3.h @@ -144,7 +144,7 @@ namespace kokkostbx { } KOKKOS_INLINE_FUNCTION NumType length() const { - return ::Kokkos::Experimental::sqrt(length_sqr()); + return ::Kokkos::sqrt(length_sqr()); } KOKKOS_INLINE_FUNCTION NumType dot(const vector3& v) const { @@ -177,8 +177,8 @@ namespace kokkostbx { // rotate a point about a unit vector3 axis KOKKOS_INLINE_FUNCTION vector3 rotate_around_axis(const vector3& axis, NumType angle) const { - NumType sinphi = ::Kokkos::Experimental::sin(angle); - NumType cosphi = ::Kokkos::Experimental::cos(angle); + NumType sinphi = ::Kokkos::sin(angle); + NumType cosphi = ::Kokkos::cos(angle); NumType dot_factor = axis.dot(*this) * (1.0-cosphi); vector3 vector_rot = axis.cross(*this) * sinphi; diff --git a/kokkostbx/SConscript b/kokkostbx/SConscript index 61212f0261..19e522c6f8 100644 --- a/kokkostbx/SConscript +++ b/kokkostbx/SConscript @@ -22,7 +22,7 @@ if env_etc.enable_kokkos: if os.getenv('KOKKOS_ARCH') is None: os.environ['KOKKOS_ARCH'] = "HSW" if use_cuda and os.getenv('KOKKOS_CUDA_OPTIONS') is None: - os.environ['KOKKOS_CUDA_OPTIONS'] = "enable_lambda,force_uvm" + os.environ['KOKKOS_CUDA_OPTIONS'] = "enable_lambda" os.environ['CXXFLAGS'] = '-O3 -fPIC -DCUDAREAL=double' library_flags = "-Llib" @@ -37,10 +37,6 @@ if env_etc.enable_kokkos: linked_libraries += " -lcudart -lcuda" os.environ['LDLIBS'] = linked_libraries - cxx_standard = '14' - if use_sycl: - cxx_standard = '17' - original_cxx = None kokkos_lib = 'libkokkos.a' kokkos_cxxflags = None @@ -119,7 +115,7 @@ if env_etc.enable_kokkos: returncode = subprocess.call([ 'cmake', os.environ['KOKKOS_PATH'], - '-DCMAKE_CXX_STANDARD={}'.format(cxx_standard), + '-DCMAKE_CXX_STANDARD={}'.format('17'), '-DCMAKE_INSTALL_PREFIX={}'.format(libtbx.env.under_build('.')), '-DCMAKE_INSTALL_LIBDIR=lib', '-DBUILD_SHARED_LIBS={}'.format(OnOff[True]), @@ -138,9 +134,9 @@ if env_etc.enable_kokkos: '-DKokkos_ENABLE_SERIAL=ON', '-DKokkos_ENABLE_OPENMP={}'.format(OnOff[use_openmp]), '-DKokkos_ENABLE_CUDA={}'.format(OnOff[use_cuda]), - '-DKokkos_ENABLE_CUDA_UVM={}'.format(OnOff[use_cuda]), '-DKokkos_ENABLE_HIP={}'.format(OnOff[use_hip]), - '-DKokkos_ENABLE_SYCL={}'.format(OnOff[use_sycl]) + '-DKokkos_ENABLE_SYCL={}'.format(OnOff[use_sycl]), + '-DKokkos_ENABLE_IMPL_MDSPAN=ON' ], cwd=kokkos_build_dir) diff --git a/kokkostbx/kokkos_types.h b/kokkostbx/kokkos_types.h index 619fa20568..3ea314437c 100644 --- a/kokkostbx/kokkos_types.h +++ b/kokkostbx/kokkos_types.h @@ -3,10 +3,10 @@ #include #ifdef KOKKOS_ENABLE_CUDA - #define MemSpace Kokkos::CudaUVMSpace + #define MemSpace Kokkos::CudaSpace #endif #ifdef KOKKOS_ENABLE_HIP - #define MemSpace Kokkos::Experimental::HIPSpace + #define MemSpace Kokkos::HIPSpace #endif #ifdef KOKKOS_ENABLE_OPENMPTARGET #define MemSpace Kokkos::OpenMPTargetSpace @@ -19,8 +19,11 @@ using ExecSpace = MemSpace::execution_space; using range_policy = Kokkos::RangePolicy; -template -using view_1d_t = Kokkos::View; +template using view_1d_t = Kokkos::View; +template using view_4d_t = Kokkos::View; +template using view_5d_t = Kokkos::View; +template using view_6d_t = Kokkos::View; +template using view_6d6_t = Kokkos::View; using vector_bool_t = view_1d_t; using vector_double_t = view_1d_t; diff --git a/kokkostbx/kokkos_utils.cpp b/kokkostbx/kokkos_utils.cpp index ee46cc35d5..02bb35994b 100644 --- a/kokkostbx/kokkos_utils.cpp +++ b/kokkostbx/kokkos_utils.cpp @@ -18,4 +18,20 @@ void transfer_double2kokkos(vector_cudareal_t& dst, const double* src, const siz } } +void transfer_vector2kokkos(view_1d_t& dst, const std::vector& src) { + if (true) { + // printf("== Transfer %s from %p\n", dst.label().c_str(), (void*) dst.data()); + // printf(" - size src|dst: %d|%d\n", src.size(), dst.span() ); + } + if (dst.span() < src.size()) { + resize(dst, src.size()); + // printf(" - size changed, new size: %d\n", dst.span() ); + } + auto host_view = Kokkos::create_mirror_view(dst); + for (int i = 0; i < src.size(); ++i) { + host_view(i) = src[i]; + } + Kokkos::deep_copy(dst, host_view); +} + } // namespace kokkostbx diff --git a/kokkostbx/kokkos_utils.h b/kokkostbx/kokkos_utils.h index e82a776ebc..4b0105ab0b 100644 --- a/kokkostbx/kokkos_utils.h +++ b/kokkostbx/kokkos_utils.h @@ -78,6 +78,8 @@ void transfer_kokkos2shared(af::shared& dst, const view_1d_t& src) { transfer_kokkos2X(dst, src); } +void transfer_vector2kokkos(view_1d_t& dst, const std::vector& src); + template void transfer_vector2kokkos(view_1d_t& dst, const std::vector& src) { if (true) { @@ -88,11 +90,9 @@ void transfer_vector2kokkos(view_1d_t& dst, const std::vector& src) { resize(dst, src.size()); // printf(" - size changed, new size: %d\n", dst.span() ); } - auto host_view = Kokkos::create_mirror_view(dst); - for (int i = 0; i < src.size(); ++i) { - host_view(i) = src[i]; - } - Kokkos::deep_copy(dst, host_view); + auto host_view = Kokkos::View(src.data(), src.size()); + auto dst_subview = Kokkos::subview(dst, std::pair(0, src.size())); + Kokkos::deep_copy(dst_subview, host_view); } template diff --git a/kokkostbx/kokkos_vector.h b/kokkostbx/kokkos_vector.h index e31d905bb6..5bda8e86d7 100644 --- a/kokkostbx/kokkos_vector.h +++ b/kokkostbx/kokkos_vector.h @@ -207,8 +207,9 @@ struct vector_base { } KOKKOS_FUNCTION void operator/=(const NumType& v) { + const NumType v_r = 1 / v; for (size_t i = 0; i < size; ++i) { - data[i] /= v; + data[i] *= v_r; } } @@ -261,14 +262,15 @@ struct vector_base { KOKKOS_FUNCTION NumType length() const { // return sqrt_func(length_sqr()); - return ::Kokkos::Experimental::sqrt(length_sqr()); + return ::Kokkos::sqrt(length_sqr()); } KOKKOS_FUNCTION void normalize() { NumType l = length(); if (l > 0) { + NumType l_r = 1 / l; for (size_t i = 0; i < size; ++i) { - data[i] /= l; + data[i] *= l_r; } } } @@ -277,8 +279,9 @@ struct vector_base { NumType l = length(); Derived unit_vector{}; if (l > 0) { + NumType l_r = 1 / l; for (size_t i = 0; i < size; ++i) { - unit_vector[i] = data[i] / l; + unit_vector[i] = data[i] * l_r; } } return unit_vector; diff --git a/kokkostbx/kokkos_vector3.h b/kokkostbx/kokkos_vector3.h index 98d46ee7dc..058540eb07 100644 --- a/kokkostbx/kokkos_vector3.h +++ b/kokkostbx/kokkos_vector3.h @@ -10,8 +10,8 @@ // #ifdef KOKKOS_CORE_HPP // template KOKKOS_FUNCTION T sin_func(T x) { return -// ::Kokkos::Experimental::sin(x); } template KOKKOS_FUNCTION T cos_func(T x) { -// return ::Kokkos::Experimental::cos(x); } +// ::Kokkos::sin(x); } template KOKKOS_FUNCTION T cos_func(T x) { +// return ::Kokkos::cos(x); } // #else // #include // template KOKKOS_FUNCTION T sin_func(T x) { return sin(x); } @@ -27,26 +27,26 @@ struct vector3 : public vector { using vector_base = kokkostbx::vector; vector3() = default; - KOKKOS_FUNCTION vector3(NumType val) : vector_base(val){}; - KOKKOS_FUNCTION vector3(NumType arr[]) : vector_base(arr){}; - KOKKOS_FUNCTION vector3(const vector_base& vec) : vector_base(vec){}; + KOKKOS_INLINE_FUNCTION vector3(NumType val) : vector_base(val){}; + KOKKOS_INLINE_FUNCTION vector3(NumType arr[]) : vector_base(arr){}; + KOKKOS_INLINE_FUNCTION vector3(const vector_base& vec) : vector_base(vec){}; - KOKKOS_FUNCTION vector3(NumType x, NumType y, NumType z) : vector_base() { + KOKKOS_INLINE_FUNCTION vector3(NumType x, NumType y, NumType z) : vector_base() { vector_base::data[0] = x; vector_base::data[1] = y; vector_base::data[2] = z; } // decided against using properties, as this would increase the size of the class - KOKKOS_FUNCTION NumType& x_val() { return vector_base::data[0]; } - KOKKOS_FUNCTION NumType& y_val() { return vector_base::data[1]; } - KOKKOS_FUNCTION NumType& z_val() { return vector_base::data[2]; } + KOKKOS_INLINE_FUNCTION NumType& x_val() { return vector_base::data[0]; } + KOKKOS_INLINE_FUNCTION NumType& y_val() { return vector_base::data[1]; } + KOKKOS_INLINE_FUNCTION NumType& z_val() { return vector_base::data[2]; } - KOKKOS_FUNCTION NumType x_val() const { return vector_base::data[0]; } - KOKKOS_FUNCTION NumType y_val() const { return vector_base::data[1]; } - KOKKOS_FUNCTION NumType z_val() const { return vector_base::data[2]; } + KOKKOS_INLINE_FUNCTION NumType x_val() const { return vector_base::data[0]; } + KOKKOS_INLINE_FUNCTION NumType y_val() const { return vector_base::data[1]; } + KOKKOS_INLINE_FUNCTION NumType z_val() const { return vector_base::data[2]; } - KOKKOS_FUNCTION vector3 cross(const vector3& v) const { + KOKKOS_INLINE_FUNCTION vector3 cross(const vector3& v) const { vector3 cross_vector{}; cross_vector.x_val() = y_val() * v.z_val() - z_val() * v.y_val(); cross_vector.y_val() = z_val() * v.x_val() - x_val() * v.z_val(); @@ -56,14 +56,14 @@ struct vector3 : public vector { } // rotate a point around a unit vector3 axis - KOKKOS_FUNCTION vector3 rotate_around_axis(const vector3& axis, NumType angle) + KOKKOS_INLINE_FUNCTION vector3 rotate_around_axis(const vector3& axis, NumType angle) const { // NumType sinphi = sin_func(angle); // NumType cosphi = cos_func(angle); - NumType sinphi = ::Kokkos::Experimental::sin(angle); - NumType cosphi = ::Kokkos::Experimental::cos(angle); + const NumType sinphi = ::Kokkos::sin(angle); + const NumType cosphi = ::Kokkos::cos(angle); - NumType dot_factor = axis.dot(*this) * (1.0 - cosphi); + const NumType dot_factor = axis.dot(*this) * (1.0 - cosphi); vector3 vector_rot = axis.cross(*this) * sinphi; vector_rot += axis * dot_factor; diff --git a/libtbx/auto_build/bootstrap.py b/libtbx/auto_build/bootstrap.py index 5fbfca691d..fc9ce3f3cc 100644 --- a/libtbx/auto_build/bootstrap.py +++ b/libtbx/auto_build/bootstrap.py @@ -1006,17 +1006,17 @@ class xia2_module(SourceModule): class kokkos_module(SourceModule): module = 'kokkos' - anonymous = ['git', '-b 3.7.01', + anonymous = ['git', '-b 4.2.00', 'git@github.com:kokkos/kokkos.git', 'https://github.com/kokkos/kokkos.git', - 'https://github.com/kokkos/kokkos/archive/refs/tags/3.7.01.zip'] + 'https://github.com/kokkos/kokkos/archive/refs/tags/4.2.00.zip'] class kokkos_kernels_module(SourceModule): module = 'kokkos-kernels' - anonymous = ['git', '-b 3.7.01', + anonymous = ['git', '-b 4.2.00', 'git@github.com:kokkos/kokkos-kernels.git', 'https://github.com/kokkos/kokkos-kernels.git', - 'https://github.com/kokkos/kokkos-kernels/archive/refs/tags/3.7.01.zip'] + 'https://github.com/kokkos/kokkos-kernels/archive/refs/tags/4.2.00.zip'] # Duke repositories class probe_module(SourceModule): diff --git a/simtbx/command_line/complete_an_F.py b/simtbx/command_line/complete_an_F.py new file mode 100644 index 0000000000..1bf34dad3e --- /dev/null +++ b/simtbx/command_line/complete_an_F.py @@ -0,0 +1,48 @@ +from __future__ import division, print_function +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("mtzin", help="input mtz file", type=str) +parser.add_argument("mtzout", help="output mtz file", type=str) +args = parser.parse_args() + +# LIBTBX_SET_DISPATCHER_NAME diffBragg.completeF + +import numpy as np +from iotbx.reflection_file_reader import any_reflection_file +from dials.array_family import flex +from scipy.interpolate import interp1d +from cctbx import miller + +F = any_reflection_file(args.mtzin).as_miller_arrays()[0] +F = F.as_amplitude_array() +if not F.is_xray_amplitude_array(): + F = F.set_observation_type_xray_amplitude() + +print("Bin-ID Res-range Completeness #ASU-indices") +F.show_completeness() +d_max,d_min = F.resolution_range() +print("d_min, d_max (Angstrom): ", d_min, d_max) +mset_full = F.build_miller_set(False, d_min=d_min) +mset_full_d = {h: d for h,d in zip(mset_full.d_spacings().indices(), mset_full.d_spacings().data())} +Fmap = {h:val for h,val in zip(F.indices(), F.data())} +xvals = np.array(F.d_spacings().data()) +yvals = np.array(F.data()) +fill_vals = yvals[np.argmin(xvals)], yvals[np.argmax(xvals)] +I = interp1d(xvals, yvals, fill_value=fill_vals, bounds_error=False) +data = [] +for h in mset_full.indices(): + if h not in Fmap: + d_h = mset_full_d[h] + amp = I(d_h) + else: + amp = Fmap[h] + data.append(amp) + +complete_amps = flex.double(data) +complete_inds = mset_full.indices() +ma = miller.array(mset_full, complete_amps) +if not ma.is_xray_amplitude_array(): + ma = ma.set_observation_type_xray_amplitude() +ma = ma.as_anomalous_array() +assert ma.anomalous_flag() +ma.as_mtz_dataset(column_root_label="F").mtz_object().write(args.mtzout) diff --git a/simtbx/command_line/estimate_Ncells_Eta.py b/simtbx/command_line/estimate_Ncells_Eta.py new file mode 100644 index 0000000000..d6ba9476a4 --- /dev/null +++ b/simtbx/command_line/estimate_Ncells_Eta.py @@ -0,0 +1,111 @@ +from __future__ import division +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("dirname", help="still process output folder", type=str) +parser.add_argument("--updatePhil", default=None, help="name of an exisiting stage 1 phil file to update (just the init.Ncells portion)", type=str) +parser.add_argument("--expSuffix", help="extension of refined experiments", type=str, default="_refined.expt") +parser.add_argument("--thresh", type=float, default=7, help="MAD score for outliers (default=7 standard deviation above the median)") +parser.add_argument("--useMean", action="store_true", help="set Eta and Nabc using the mean (default is median)") +parser.add_argument("--NabcMax", type=float, default=70, help="If estaimated Nabc is above this value, it will set to this value") +parser.add_argument("--NabcMin", type=float, default=5, help="If estaimated Nabc is BELOW this value, it will be set to this value") +parser.add_argument("--EtaMax", type=float, default=0.5, help="If estimated Eta is above this range, it will be set to this value") +parser.add_argument("--EtaMin", type=float, default=1e-3, help="If estimated Eta is BELOW this range, it will be set to this value") + +#parser.add_argument("--njobs", type=int, default=5, help="number of jobs (only runs on single node, no MPI)") +parser.add_argument("--plot", action="store_true", help="show a histogram at the end") +args = parser.parse_args() +# LIBTBX_SET_DISPATCHER_NAME diffBragg.estimate_Ncells_Eta +from mpi4py import MPI +COMM = MPI.COMM_WORLD +#from joblib import Parallel, delayed +import json +import numpy as np +from cctbx import uctbx +from scitbx.matrix import sqr +import os +import glob +from dxtbx.model import ExperimentList + +glob_s = os.path.join(args.dirname, "*%s" % args.expSuffix) +fnames = glob.glob(glob_s) + +#def main(jid): +all_Ns = [] +all_mos_spreads = [] +for i, f in enumerate(fnames): + if i % COMM.size != COMM.rank: + continue + print(f) + #Cs = ExperimentList.from_file(f, False).crystals() + Cs = json.load(open(f, 'r'))['crystal'] + dom_sizes = np.array([C['ML_domain_size_ang'] for C in Cs]) + mos_spreads = [2*C['ML_half_mosaicity_deg'] for C in Cs] + uc_vols = [] + for C in Cs: + a = C['real_space_a'] + b = C['real_space_b'] + c = C['real_space_c'] + uc = uctbx.unit_cell(orthogonalization_matrix=sqr(a + b + c).transpose()) + uc_vols.append(uc.volume()) + + Ns = dom_sizes / np.power(uc_vols, 1 / 3.) + all_Ns += list(Ns) + all_mos_spreads += list(mos_spreads) +# return all_Ns, all_mos_spreads + +all_Ns = COMM.reduce(all_Ns) +all_mos_spreads = COMM.reduce(all_mos_spreads) +#results = Parallel(n_jobs=args.njobs)(delayed(main)(j) for j in range(args.njobs)) +#all_Ns = [] +#all_mos_spreads = [] +#for N,mos in results: +# all_Ns += N +# all_mos_spreads += mos + +if COMM.rank==0: + import pandas + import pylab as plt + from simtbx.diffBragg import utils + all_Ns = np.array(all_Ns) + all_mos_spreads = np.array(all_mos_spreads) + bad_Ns = utils.is_outlier(all_Ns, args.thresh) + bad_mos_spreads = utils.is_outlier(all_mos_spreads, args.thresh) + is_bad = np.logical_or(bad_Ns, bad_mos_spreads) + print("Removing %d outlier estiamtes" % is_bad.sum()) + all_Ns = all_Ns[~is_bad] + all_mos_spreads = all_mos_spreads[~is_bad] + + df = pandas.DataFrame({"Ncells": all_Ns, "mos_spread_deg": all_mos_spreads}) + print(df.Ncells.describe()) + print(df.mos_spread_deg.describe()) + if args.useMean: + mean_N = df.Ncells.mean() + mean_mos = df.mos_spread_deg.mean() + else: + mean_N = df.Ncells.median() + mean_mos = df.mos_spread_deg.median() + print("mean Ncells=%f" % mean_N) + print("mean mos_spread=%f (deg.)" % mean_mos) + + if mean_mos > args.EtaMax or mean_mos < args.EtaMin: + temp = mean_mos + mean_mos = args.EtaMax if mean_mos > args.EtaMax else args.EtaMin + print("Estimated Eta=%f, setting it to %f" % (temp, mean_mos)) + if mean_N > args.NabcMax or mean_N < args.NabcMin: + temp = mean_N + mean_N = args.NabcMax if mean_N > args.NabcMax else args.NabcMin + print("Estimated N=%f, setting it to %f" %(temp, mean_N)) + + phil = """\ninit {{ + Nabc = [{n},{n},{n}] + eta_abc = [{m},{m},{m}] + }}\n""".format(n=round(mean_N,4), m=mean_mos) + + if args.updatePhil is not None: + with open(args.updatePhil, "r+") as o: + s = o.read() + s += phil + o.write(s) + if args.plot: + df.hist(bins=100, log=True) + plt.show() diff --git a/simtbx/command_line/hopper.py b/simtbx/command_line/hopper.py index cfd0e8ac75..32f8216d90 100644 --- a/simtbx/command_line/hopper.py +++ b/simtbx/command_line/hopper.py @@ -96,6 +96,7 @@ def run(self): assert os.path.exists(self.params.exp_ref_spec_file) input_lines = None best_models = None + pd_dir = os.path.join(self.params.outdir, "pandas") if COMM.rank == 0: input_lines = open(self.params.exp_ref_spec_file, "r").readlines() if self.params.skip is not None: @@ -113,6 +114,10 @@ def run(self): if self.params.gathers_dir is None: raise ValueError("Need to provide a file dir path in order to dump_gathers") utils.safe_makedirs(self.params.gathers_dir) + + utils.safe_makedirs(pd_dir) + + COMM.barrier() input_lines = COMM.bcast(input_lines) best_models = COMM.bcast(best_models) @@ -127,53 +132,63 @@ def run(self): exp_gatheredRef_spec = [] # optional list of expt, refls, spectra trefs = [] - for i_exp, line in enumerate(input_lines): - if i_exp == self.params.max_process: + this_rank_dfs = [] # dataframes storing the modeling results for each shot + for i_shot, line in enumerate(input_lines): + if i_shot == self.params.max_process: break - if i_exp % COMM.size != COMM.rank: + if i_shot % COMM.size != COMM.rank: continue - logging.info("COMM.rank %d on shot %d / %d" % (COMM.rank, i_exp + 1, len(input_lines))) + logging.info("COMM.rank %d on shot %d / %d" % (COMM.rank, i_shot + 1, len(input_lines))) line_fields = line.strip().split() - assert len(line_fields) in [2, 3] - if len(line_fields) == 2: - exp, ref = line_fields - spec = None - else: - exp, ref, spec = line_fields + num_fields = len(line_fields) + assert num_fields in [2, 3, 4] + exp, ref = line_fields[:2] + spec = None + exp_idx = 0 + if num_fields==3: + try: + exp_idx = int(line_fields[2]) + except ValueError: + spec = line_fields[2] + exp_idx = 0 + elif num_fields==4: + assert os.path.isfile(line_fields[2]) + spec = line_fields[2] + exp_idx = int(line_fields[3]) if self.params.ignore_existing: basename = os.path.splitext(os.path.basename(exp))[0] exists = False - for ii in [i_exp, 0]: - opt_exp = "%s_%s_%d.expt" % (self.params.tag, basename, ii) + for ii in [i_shot, 0]: + opt_exp = "%s_%s_%d_%d.expt" % (self.params.tag, basename, exp_idx, ii) opt_refl = opt_exp.replace(".expt", ".refl") if opt_exp in exp_names_already and opt_refl in refl_names_already: exists = True break if exists: - print("Found existing!! %d" % i_exp) + print("Found existing!! %d" % i_shot) continue best = None if best_models is not None: - best = best_models.query("exp_name=='%s'" % exp) - if len(best) == 0: - best = best_models.query("opt_exp_name=='%s'" % exp) + best = best_models.query("exp_name=='%s'" % exp).query("exp_idx==%d" % exp_idx) if len(best) != 1: raise ValueError("Should be 1 entry for exp %s in best pickle %s" % (exp, self.params.best_pickle)) self.params.simulator.spectrum.filename = spec Modeler = hopper_utils.DataModeler(self.params) Modeler.exper_name = exp + Modeler.exper_idx = exp_idx Modeler.refl_name = ref Modeler.rank = COMM.rank - Modeler.i_exp = i_exp + Modeler.i_shot = i_shot if self.params.load_data_from_refls: gathered = Modeler.GatherFromReflectionTable(exp, ref, sg_symbol=self.params.space_group) else: gathered = Modeler.GatherFromExperiment(exp, ref, remove_duplicate_hkl=self.params.remove_duplicate_hkl, - sg_symbol=self.params.space_group) + sg_symbol=self.params.space_group, + exp_idx=exp_idx) if not gathered: logging.warning("No refls in %s; CONTINUE; COMM.rank=%d" % (ref, COMM.rank)) continue @@ -213,20 +228,24 @@ def run(self): # best pickle is not supported yet for multiple crystals # also, if number of crystals is >1 , then the params.number_of_xtals flag will be overridden exp_list = ExperimentListFactory.from_json_file(exp, False) - xtals = exp_list.crystals() - if len(xtals) > 1: + xtals = exp_list.crystals() # TODO: fix as this is broken now that we allow multi image experiments + if self.params.consider_multicrystal_shots and len(xtals) > 1: assert best is None, "cannot pass best pickle if expt list has more than one crystal" assert self.params.number_of_xtals==1, "if expt list has more than one xtal, leave number_of_xtals as the default" self.params.number_of_xtals = len(xtals) MAIN_LOGGER.debug("Found %d xtals with unit cells:" %len(xtals)) for xtal in xtals: MAIN_LOGGER.debug("%.4f %.4f %.4f %.4f %.4f %.4f" % xtal.get_unit_cell().parameters()) + if self.params.record_device_timings and COMM.rank >0: + self.params.record_device_timings = False # only record for rank 0 otherwise there's too much output SIM = hopper_utils.get_simulator_for_data_modelers(Modeler) Modeler.set_parameters_for_experiment(best) - Modeler.Umatrices = [xtal.get_U() for xtal in xtals] - # TODO, move this to SimulatorFromExperiment + Modeler.Umatrices = [Modeler.E.crystal.get_U()] + + # TODO: move this to SimulatorFromExperiment + # TODO: fix multi crystal shot mode if best is not None and "other_spotscales" in list(best) and "other_Umats" in list(best): - Modeler.Umatrices[0] = self.E.get_U() + Modeler.Umatrices[0] = Modeler.E.get_U() assert len(xtals) == len(best.other_spotscales.values[0])+1 for i_xtal in range(1, len(xtals),1): scale_xt = best.other_spotscales.values[0][i_xtal] @@ -250,22 +269,31 @@ def run(self): nparam += SIM.Num_ASU*SIM.num_Fhkl_channels x0 = [1] * nparam tref = time.time() - MAIN_LOGGER.info("Beginning refinement of shot %d / %d" % (i_exp+1, len(input_lines))) + MAIN_LOGGER.info("Beginning refinement of shot %d / %d" % (i_shot+1, len(input_lines))) try: - x = Modeler.Minimize(x0, SIM, i_exp=i_exp) + x = Modeler.Minimize(x0, SIM, i_shot=i_shot) + for i_rep in range(self.params.filter_after_refinement.max_attempts): + final_sigz = Modeler.target.all_sigZ[-1] + niter = len(Modeler.target.all_sigZ) + too_few_iter = niter < self.params.filter_after_refinement.min_prev_niter + too_high_sigz = final_sigz > self.params.filter_after_refinement.max_prev_sigz + if too_few_iter or too_high_sigz: + Modeler.filter_pixels(self.params.filter_after_refinement.threshold) + x = Modeler.Minimize(x0, SIM, i_shot=i_shot) + except StopIteration: x = Modeler.target.x0 tref = time.time()-tref sigz = niter = None try: niter = len(Modeler.target.all_hop_id) - sigz = np.mean(Modeler.target.all_sigZ) + sigz = Modeler.target.all_sigZ[-1] except Exception: pass trefs.append(tref) print_s = "Finished refinement of shot %d / %d in %.4f sec. (rank mean t/im=%.4f sec.)" \ - % (i_exp+1, len(input_lines), tref, np.mean(trefs)) + % (i_shot+1, len(input_lines), tref, np.mean(trefs)) if sigz is not None and niter is not None: print_s += " Ran %d iterations. Final sigmaZ = %.1f," % (niter, sigz) if COMM.rank==0: @@ -275,11 +303,18 @@ def run(self): if self.params.profile: SIM.D.show_timings(COMM.rank) - Modeler.save_up(x,SIM, rank=COMM.rank, i_exp=i_exp) + dbg = self.params.debug_mode + shot_df = Modeler.save_up(x, SIM, rank=COMM.rank, i_shot=i_shot, + save_fhkl_data=dbg, save_refl=dbg, save_modeler_file=dbg, + save_sim_info=dbg, save_pandas=dbg, save_traces=dbg, save_expt=dbg) + this_rank_dfs.append(shot_df) if Modeler.params.refiner.debug_pixel_panelfastslow is not None: # TODO separate diffBragg logger utils.show_diffBragg_state(SIM.D, Modeler.params.refiner.debug_pixel_panelfastslow) + # TODO verify this works: + if SIM.D.record_timings: + SIM.D.show_timings(COMM.rank) Modeler.clean_up(SIM) del SIM.D # TODO: is this necessary ? @@ -294,6 +329,21 @@ def run(self): o.write("%s %s\n" % (e,r)) o.close() + if this_rank_dfs: + this_rank_dfs = pandas.concat(this_rank_dfs).reset_index(drop=True) + df_name = os.path.join(pd_dir, "hopper_results_rank%d.pkl" % COMM.rank) + this_rank_dfs.to_pickle(df_name) + + #MAIN_LOGGER.info("MPI-Gathering data frames across ranks") + #all_rank_dfs = COMM.gather(this_rank_dfs) + #if COMM.rank==0: + # all_rank_dfs = pandas.concat(all_rank_dfs) + # all_rank_dfs.reset_index(inplace=True, drop=True) + # all_df_name = os.path.join(self.params.outdir, "hopper_results.pkl") + # all_rank_dfs.to_pickle(all_df_name) + + + if __name__ == '__main__': from dials.util import show_mail_on_error @@ -311,6 +361,7 @@ def run(self): print("Install line_profiler in order to use logging: libtbx.python -m pip install line_profiler") with DeviceWrapper(script.dev) as _: + #with np.errstate(all='raise'): RUN() if lp is not None: diff --git a/simtbx/command_line/hopper_ensemble.py b/simtbx/command_line/hopper_ensemble.py index ca578a9214..49e0d2e449 100644 --- a/simtbx/command_line/hopper_ensemble.py +++ b/simtbx/command_line/hopper_ensemble.py @@ -7,7 +7,7 @@ parser.add_argument("input", type=str, help="combined pandas pickle") parser.add_argument("phil", type=str, help="user phil file used to run hopper (see simtbx/diffBragg/phil.py)") parser.add_argument("--outdir", type=str, default=None, help="output folder") -parser.add_argument("--exp", type=str, default="opt_exp_name", help="column name for input expeirments (default is opt_exp_name)") +parser.add_argument("--exp", type=str, default="exp_name", help="column name for input expeirments (default is opt_exp_name)") parser.add_argument("--refl", type=str, default="stage2_refls", help="column name for refls (default is stage2_refls)") parser.add_argument("--cmdlinePhil", nargs="+", default=None, type=str, help="command line phil params") parser.add_argument("--cell", nargs=6, type=float, default=None, help="unit cell to use when writing MTZ files. If not provided, average will be used") @@ -25,7 +25,7 @@ from simtbx.diffBragg.hopper_ensemble_utils import load_inputs from libtbx.mpi4py import MPI - +from simtbx.diffBragg.device import DeviceWrapper COMM= MPI.COMM_WORLD LOGGER = logging.getLogger("diffBragg.main") @@ -66,6 +66,8 @@ def write_commandline(params): if args.outdir is not None: params.outdir = args.outdir params.tag = args.saveTag + if params.record_device_timings and COMM.rank > 0: + params.record_device_timings = False # only record for rank 0 otherwise there's too much output # end of phil stuff ======== write_commandline(params) @@ -79,8 +81,8 @@ def write_commandline(params): if params.skip is not None: df = df.iloc[params.skip:] - if params.first_n is not None: - df = df.iloc[:params.first_n] + if params.max_process is not None: + df = df.iloc[:params.max_process] df.reset_index(inplace=True, drop=True) gather_dir=None @@ -91,16 +93,24 @@ def write_commandline(params): if not os.path.exists(gather_dir): os.makedirs(gather_dir) + for col in [args.exp, args.refl]: + if col not in list(df): + raise KeyError("Col %s is missing from dataframe" % col) + modelers = load_inputs(df, params, exper_key=args.exp, refls_key=args.refl, gather_dir=gather_dir) # note, we only go beyond this point if perImport flag was not passed modelers.cell_for_mtz = args.cell modelers.max_sigma = args.maxSigma modelers.outdir = args.outdir if args.outdir is not None else modelers.params.outdir modelers.save_freq = args.saveFreq + modelers.prep_for_refinement() - modelers.save_modeler_params = args.saveAll - # do all sanity checks up front before minimization - modelers.Minimize(save=True) + with DeviceWrapper(modelers.SIM.D.device_Id) as _: + modelers.alloc_max_pix_per_shot() + modelers.save_modeler_params = args.saveAll + + # do all sanity checks up front before minimization + modelers.Minimize(save=True) - LOGGER.debug("Done!") + LOGGER.debug("Done!") diff --git a/simtbx/command_line/hopper_process.py b/simtbx/command_line/hopper_process.py index f04d136b72..f170c89018 100644 --- a/simtbx/command_line/hopper_process.py +++ b/simtbx/command_line/hopper_process.py @@ -256,7 +256,7 @@ def integrate(self, experiments, indexed): ) if self.params.dispatch.coset: - from dials.algorithms.integration.sublattice_helper import integrate_coset + from xfel.util.sublattice_helper import integrate_coset integrate_coset(self, experiments, indexed) diff --git a/simtbx/command_line/integrate.py b/simtbx/command_line/integrate.py index c23e6011e9..e1d0cdd3c1 100644 --- a/simtbx/command_line/integrate.py +++ b/simtbx/command_line/integrate.py @@ -10,17 +10,29 @@ parser.add_argument("outdir", type=str, help="path to output refls") parser.add_argument("--cmdlinePhil", nargs="+", default=None, type=str, help="command line phil params") +parser.add_argument("--dialsInteg", action="store_true", help="Integrate new shoeboxes using dials and write *integrated.expt files") parser.add_argument("--numdev", type=int, default=1, help="number of GPUs (default=1)") parser.add_argument("--pklTag", type=str, help="optional suffix for globbing for pandas pickles (default .pkl)", default=".pkl") parser.add_argument("--loud", action="store_true", help="show lots of screen output") parser.add_argument("--hopInputName", default="preds_for_hopper", type=str, help="write exp_ref_spec file and best_pickle pointing to the preditction models, such that one can run predicted rois through simtbx.diffBragg.hopper (e.g. to fit per-roi scale factors)") parser.add_argument("--filterDupes", action="store_true", help="filter refls with same HKL") +parser.add_argument("--keepShoeboxes", action="store_true", help="Optionally keep shoeboxes present in the prediction refl tables (can lead to OOM errors)") +parser.add_argument("--scanWeakFracs", action="store_true", help="optionally stores a variety of inputs for stage2 based filtering different fractions of weak reflections") args = parser.parse_args() from mpi4py import MPI COMM = MPI.COMM_WORLD +import logging +if not args.loud: + logging.disable(logging.CRITICAL) +else: + if COMM.rank==0: + logger = logging.getLogger("diffBragg.main") + logger.setLevel(logging.DEBUG) + + def printR(*args, **kwargs): print("RANK %d" % COMM.rank, *args, **kwargs) def print0(*args, **kwargs): @@ -28,22 +40,40 @@ def print0(*args, **kwargs): print(*args, **kwargs) import numpy as np -from simtbx.diffBragg import utils +import json +from simtbx.diffBragg import hopper_utils, utils from simtbx.modeling import predictions from simtbx.diffBragg.hopper_utils import downsamp_spec_from_params import glob import pandas import os -import shutil from dials.algorithms.integration.stills_significance_filter import SignificanceFilter from dials.algorithms.indexing.stills_indexer import calc_2D_rmsd_and_displacements -import logging import sys -if not args.loud: - logging.disable(logging.CRITICAL) -else: - logging.basicConfig(level=logging.DEBUG) + +def filter_weak_reflections(refls, weak_fraction): + """ + :param pred: reflection table created by this script + :param weak_fraction: number from 0-1 (if 0, only strong spots are saved) + :return: new reflection table with weak reflections filtered according to weak_fraction + """ + new_refls = None + for idx in set(refls['id']): + pred = refls.select(refls['id']==idx) + weaks = pred.select(pred['is_weak']) + nweak = len(weaks) + weaks_sorted = np.argsort(weaks["scatter"])[::-1] + num_keep = int(nweak * weak_fraction) + weak_refl_inds_keep = set(np.array(weaks["refl_idx"])[weaks_sorted[:num_keep]]) + weak_sel = flex.bool([i in weak_refl_inds_keep for i in pred['refl_idx']]) + keeps = np.logical_or(pred['is_strong'], weak_sel) + pred = pred.select(flex.bool(keeps)) + if new_refls is None: + new_refls = deepcopy(pred) + else: + new_refls.extend(pred) + return new_refls # Note: these imports and following 3 methods will eventually be in CCTBX/simtbx/diffBragg/utils @@ -61,6 +91,7 @@ def print0(*args, **kwargs): from copy import deepcopy from collections import Counter +from simtbx.diffBragg.device import DeviceWrapper def filter_refls(R): vec3_dbl_keys = 'xyzcal.px', 'xyzcal.mm', 'xyzobs.px.value', 'xyzobs.px.value', 'rlp', 's1' @@ -338,148 +369,248 @@ def refls_from_sims(panel_imgs, detector, beam, thresh=0, filter=None, panel_ids os.makedirs(args.outdir) COMM.barrier() + #rank_outdir = os.path.join( args.outdir, "rank%d" % COMM.rank) + #if not os.path.exists(rank_outdir): + # os.makedirs(rank_outdir) + params = utils.get_extracted_params_from_phil_sources(args.predPhil, args.cmdlinePhil) - if os.path.isfile(args.inputGlob): - df_all = pandas.read_pickle(args.inputGlob) - df_all.reset_index(inplace=True, drop=True) - def df_iter(): - for i_f in range(len(df_all)): - if i_f % COMM.size != COMM.rank: - continue - df_i = df_all.iloc[i_f:i_f+1].copy().reset_index(drop=True) - yield i_f, df_i - Nf = len(df_all) - else: - if os.path.isdir(args.inputGlob): - glob_s = os.path.join(args.inputGlob, "pandas/rank*/*.pkl") - fnames = glob.glob(glob_s) + + # inputGlob can be a glob in strings, a single pandas file, or a hopper output folder + if os.path.isfile(args.inputGlob) or os.path.isdir(args.inputGlob): + if os.path.isfile(args.inputGlob): + fnames = [args.inputGlob] else: - fnames = glob.glob(args.inputGlob) - def df_iter(): - for i_f,f in enumerate(fnames): - if i_f % COMM.size != COMM.rank: - continue - df = pandas.read_pickle(f) - yield i_f, df - Nf = len(fnames) + dirname = args.inputGlob + fnames = glob.glob( os.path.join(dirname, "pandas/hopper_results_rank*.pkl")) + else: + fnames = glob.glob(args.inputGlob) + + if not fnames: + raise OSError("Found no filenames to load!") + Nf = 0 + shots_per_df = [] + print0("getting total number of shots") + for i_f, f in enumerate(fnames): + if i_f % COMM.size != COMM.rank: + continue + n = len(pandas.read_pickle(f)) + shots_per_df += [(f, str(x)) for x in range(n)] # note we cast to string because of mpi reduce + Nf += n + + shots_per_df = COMM.bcast(COMM.reduce( shots_per_df)) + Nf = COMM.bcast(COMM.reduce(Nf)) + print0("total num shots is %d" % Nf) + df_rows_per_rank = np.array_split(shots_per_df, COMM.size)[COMM.rank] + + print0("getting dataframe handles") + df_handles = {} + dfs = [] + if df_rows_per_rank.size: + dfs, _ = zip(*df_rows_per_rank) + for f in set(dfs): + df_handles[f] = pandas.read_pickle(f).reset_index(drop=True) + + def df_iter(): + for i_df, (df_name, row_idx) in enumerate(df_rows_per_rank): + row_idx = int(row_idx) + printR("Opening shot %d / %d" % (i_df+1, len(df_rows_per_rank))) + df = df_handles[df_name].iloc[row_idx: row_idx+1].copy() + yield i_df, df if params.predictions.verbose: params.predictions.verbose = COMM.rank==0 dev = COMM.rank % args.numdev - print0("Found %d input files" % Nf) - - all_dfs = [] - all_pred_names = [] - exp_ref_spec_lines = [] - for i_f, df in df_iter(): - printR("Shot %d / %d" % (i_f+1, Nf), flush=True) - - expt_name = df.opt_exp_name.values[0] - tag = os.path.splitext(os.path.basename(expt_name))[0] - new_expt_name = "%s/%s_%d_predicted.expt" % (args.outdir,tag, i_f) - new_expt_name = os.path.abspath(new_expt_name) - df["opt_exp_name"] = new_expt_name - - shutil.copyfile(expt_name, new_expt_name) - - data_exptList = ExperimentList.from_file(expt_name) - data_expt = data_exptList[0] - - try: - spectrum_override = None - if params.spectrum_from_imageset: - spectrum_override = downsamp_spec_from_params(params, data_expt) - pred = predictions.get_predicted_from_pandas( - df, params, strong=None, device_Id=dev, spectrum_override=spectrum_override) - if args.filterDupes: - pred = filter_refls(pred) - except ValueError: - os.remove(new_expt_name) - continue - - data = utils.image_data_from_expt(data_expt) - Rstrong = refls_from_sims(data, data_expt.detector, data_expt.beam, phil_file=args.procPhil ) - Rstrong['id'] = flex.int(len(Rstrong), 0) - num_panels = len(data_expt.detector) - if num_panels > 1: - assert params.predictions.label_weak_col == "rlp" - - Rstrong.centroid_px_to_mm(data_exptList) - Rstrong.map_centroids_to_reciprocal_space(data_exptList) - predictions.label_weak_predictions(pred, Rstrong, q_cutoff=params.predictions.qcut, col=params.predictions.label_weak_col ) - - pred['is_strong'] = flex.bool(np.logical_not(pred['is_weak'])) - strong_sel = np.logical_not(pred['is_weak']) - - pred["refl_idx"] = flex.int(np.arange(len(pred))) - weaks = pred.select(pred['is_weak']) - weaks_sorted = np.argsort(weaks["scatter"])[::-1] - nweak = len(weaks) - num_keep = int(nweak*params.predictions.weak_fraction) - weak_refl_inds_keep = set(np.array(weaks["refl_idx"])[weaks_sorted[:num_keep]]) - - weak_sel = flex.bool([i in weak_refl_inds_keep for i in pred['refl_idx']]) - keeps = np.logical_or( pred['is_strong'], weak_sel) - #printR("Sum keeps=%d; num_strong=%d, num_kept_weak=%d" % (sum(keeps), sum(strong_sel), sum(weak_sel))) - pred = pred.select(flex.bool(keeps)) - nstrong = np.sum(strong_sel) - printR("Will save %d refls (%d strong, %d weak)" % (len(pred), np.sum(strong_sel), np.sum(weak_sel))) - pred_file = os.path.abspath("%s/%s_%d_predicted.refl" % ( args.outdir, tag, i_f)) - pred.as_file(pred_file) - - Rindexed = Rstrong.select(Rstrong['indexed']) - if len(Rindexed)==0: - print("No strong indexed refls for shot %s" % new_expt_name) - continue + EXPT_DIRS = os.path.join(args.outdir, "expts_and_refls") + if COMM.rank==0: + utils.safe_makedirs(EXPT_DIRS) - utils.refls_to_hkl(Rindexed, data_expt.detector, data_expt.beam, data_expt.crystal, update_table=True) - try: - int_expt, int_refl = integrate(args.procPhil, data_exptList, Rindexed, pred) - int_expt_name = "%s/%s_%d_integrated.expt" % ( args.outdir,tag, i_f) - int_expt.as_file(int_expt_name) - int_refl['bbox'] = int_refl['shoebox'].bounding_boxes() - int_refl_name = int_expt_name.replace(".expt", ".refl") - int_refl.as_file(int_refl_name) - except RuntimeError as err: - print("Integration failed for %s because: '%s'" % (pred_file, str(err))) - - all_dfs.append(df) - all_pred_names.append(pred_file) - spec_name = df.spectrum_filename.values[0] - if spec_name is None: - spec_name = "" - exp_ref_spec_lines.append("%s %s %s\n" % (new_expt_name, pred_file, spec_name)) - - if all_dfs: - all_dfs = pandas.concat(all_dfs) - all_dfs["predicted_refls"] = all_pred_names - all_dfs["predictions"] = all_pred_names - else: - all_dfs = None + if args.scanWeakFracs and params.predictions.weak_fraction != 1: + print("WARNING: overriding weak_fracion because of scanWeakFracs") + params.predictions.weak_fraction=1 - all_dfs = COMM.gather(all_dfs) - exp_ref_spec_lines = COMM.reduce(exp_ref_spec_lines) - print0("\nReflections written to folder %s.\n" % args.outdir) - if COMM.rank==0: - hopper_input_name = os.path.abspath(os.path.join(args.outdir , "%s.txt" % args.hopInputName)) - o = open(hopper_input_name, "w") - for l in exp_ref_spec_lines: - o.write(l) - o.close() - all_dfs = [df for df in all_dfs if df is not None] - if not all_dfs: - raise ValueError("No dataframes to concat: prediction/integration failed for all shots..") - - all_dfs = pandas.concat([df for df in all_dfs if df is not None]) - all_dfs.reset_index(inplace=True, drop=True) - best_pkl_name = os.path.abspath(os.path.join(args.outdir , "%s.pkl" % args.hopInputName)) - all_dfs.to_pickle(best_pkl_name) - print("Wrote %s (best_pickle option for simtbx.diffBragg.hopper) and %s (exp_ref_spec option for simtbx.diffBragg.hopper). Use them to run the predictions through hopper. Use the centroid=cal option to specify the predictions" % (best_pkl_name, hopper_input_name)) - - with open(args.outdir +".exectution.txt", "w") as o: - o.write("integrate was run from folder: %s\n" % os.getcwd()) - o.write("The command line input was:\n") - o.write(" ".join(sys.argv)) - #TODO: write the diff phils here: + print0("Found %d input files" % Nf) + with DeviceWrapper(dev) as _: + all_dfs = [] + all_pred_names = [] + exp_ref_spec_lines = [] + all_rank_pred = None + all_rank_expt = None + + rank_shot_count = 0 + rank_pred_file = os.path.join(EXPT_DIRS, "rank%d_preds.refl" % COMM.rank) + rank_pred_file = os.path.abspath(rank_pred_file) + rank_expt_file = rank_pred_file.replace(".refl", ".expt") + for i_f, df in df_iter(): + + expt_name = df.exp_name.values[0] + expt_idx = df.exp_idx.values[0] + tag = os.path.splitext(os.path.basename(expt_name))[0] + + data_expt = hopper_utils.DataModeler.exper_json_single_file(expt_name, expt_idx) + data_exptList = ExperimentList() + data_exptList.append(data_expt) + + try: + spectrum_override = None + if params.spectrum_from_imageset: + spectrum_override = downsamp_spec_from_params(params, data_expt) + pred = predictions.get_predicted_from_pandas( + df, params, strong=None, device_Id=dev, spectrum_override=spectrum_override) + if args.filterDupes: + pred = filter_refls(pred) + except ValueError: + #os.remove(new_expt_name) + continue + + data = utils.image_data_from_expt(data_expt) + Rstrong = refls_from_sims(data, data_expt.detector, data_expt.beam, phil_file=args.procPhil ) + Rstrong['id'] = flex.int(len(Rstrong), 0) + num_panels = len(data_expt.detector) + if num_panels > 1: + assert params.predictions.label_weak_col == "rlp" + + Rstrong.centroid_px_to_mm(data_exptList) + Rstrong.map_centroids_to_reciprocal_space(data_exptList) + predictions.label_weak_predictions(pred, Rstrong, q_cutoff=params.predictions.qcut, col=params.predictions.label_weak_col ) + + pred['is_strong'] = flex.bool(np.logical_not(pred['is_weak'])) + strong_sel = np.logical_not(pred['is_weak']) + + pred["refl_idx"] = flex.int(np.arange(len(pred))) + + #weaks = pred.select(pred['is_weak']) + #weaks_sorted = np.argsort(weaks["scatter"])[::-1] + #nweak = len(weaks) + #num_keep = int(nweak*params.predictions.weak_fraction) + #weak_refl_inds_keep = set(np.array(weaks["refl_idx"])[weaks_sorted[:num_keep]]) + #weak_sel = flex.bool([i in weak_refl_inds_keep for i in pred['refl_idx']]) + #keeps = np.logical_or( pred['is_strong'], weak_sel) + #printR("Sum keeps=%d; num_strong=%d, num_kept_weak=%d" % (sum(keeps), sum(strong_sel), sum(weak_sel))) + #pred = pred.select(flex.bool(keeps)) + pred = filter_weak_reflections(pred, weak_fraction=params.predictions.weak_fraction) + + nstrong = np.sum(strong_sel) + printR("Will save %d refls (%d strong, %d weak)" % (len(pred), np.sum(pred["is_strong"]), np.sum(pred["is_weak"]))) + pred['id'] = flex.int(len(pred), rank_shot_count) + if 'shoebox' in list(pred) and not args.keepShoeboxes: + del pred['shoebox'] + if all_rank_pred is None: + all_rank_pred = deepcopy(pred) + else: + all_rank_pred.extend(pred) + + # Note, the simple append causes memory leak: + #all_rank_expt.append(data_expt) + if all_rank_expt is None: + all_rank_expt = deepcopy(data_exptList.to_dict()) + else: + Edict = data_exptList.to_dict() + for exp_key in 'beam', 'detector', 'crystal', 'imageset': + Edict['experiment'][0][exp_key] = rank_shot_count + for exp_key in 'experiment', 'beam', 'detector', 'crystal', 'imageset': + assert len( Edict[exp_key])==1 + all_rank_expt[exp_key] .append(Edict[exp_key][0]) + + Rindexed = Rstrong.select(Rstrong['indexed']) + if len(Rindexed)==0: + print("No strong indexed refls for shot %s" % expt_name) + continue + + utils.refls_to_hkl(Rindexed, data_expt.detector, data_expt.beam, data_expt.crystal, update_table=True) + if args.dialsInteg: + # TODO: save these files as multi-shot experiment/refls + try: + int_expt, int_refl = integrate(args.procPhil, data_exptList, Rindexed, pred) + int_expt_name = "%s/%s_%d_integrated.expt" % (rank_outdir, tag, i_f) + int_expt.as_file(int_expt_name) + int_refl['bbox'] = int_refl['shoebox'].bounding_boxes() + int_refl_name = int_expt_name.replace(".expt", ".refl") + int_refl.as_file(int_refl_name) + except RuntimeError: + print("Integration failed" ) + + df['old_exp_name'] = expt_name + df['old_exp_idx'] = expt_idx + df['exp_name'] = rank_expt_file + df['exp_idx'] = rank_shot_count + + df['predictions'] = rank_pred_file + df['predicted_refs'] = rank_pred_file + df['num_pred'] = len(pred) + + all_dfs.append(df) + rank_shot_count += 1 + + spec_name = df.spectrum_filename.values[0] + if spec_name is None: + spec_name = "" + exp_ref_spec_lines.append("%s %s %s %d\n" % (rank_expt_file, rank_pred_file, spec_name, rank_shot_count)) + + all_rank_pred.as_file(rank_pred_file) + # NOTE: all_rank_expt is a dictionary to avoid weird OOM, so we write a simple json + #all_rank_expt.as_file(rank_expt_file) + with open(rank_expt_file, "w") as file_O: + json.dump(all_rank_expt, file_O) + print0("Done with predictions, combining dataframes") + if all_dfs: + all_dfs = pandas.concat(all_dfs) + else: + all_dfs = None + + if args.scanWeakFracs and all_dfs is not None: + assert len(all_dfs.predictions.unique()) == 1 + pred_file = all_dfs.predictions.values[0] + n_total_weak = np.sum(all_rank_pred['is_weak']) + n_total = len(all_rank_pred) + weak_fracs = [.11,.22,.33,.44,.55,.66,.77,.88] + labels = [] + for i_frac, weak_frac in enumerate(weak_fracs): + filt_refls = filter_weak_reflections(all_rank_pred, weak_frac) + label="%dperc"%(weak_frac*100,) + labels.append(label) + new_pred_file = os.path.splitext(pred_file)[0]+"_%s.refl" % label + all_dfs['predictions_%s' % label] = new_pred_file + all_dfs['predicted_refs_%s' %label] = new_pred_file + num_preds = [] + for exp_id in all_dfs.exp_idx.values: + n = np.sum(filt_refls['id'] == int(exp_id)) + num_preds.append(n) + all_dfs['num_pred_%s' %label] = num_preds + filt_refls.as_file(new_pred_file) + printR("Saved %d/%d refls (%d strong, %d/%d weak) to %s" + % (len(filt_refls), n_total, np.sum(filt_refls["is_strong"]), np.sum(filt_refls["is_weak"]), n_total_weak, new_pred_file)) + # note this sanity check below requires that weaK_fracs be sorted + if sorted(weak_fracs) == weak_fracs: + # then as weak frac increases, there should be an increasing number of predictions + num_preds_per_frac = [all_dfs["num_pred_%s" % lab].sum() for lab in labels] + assert num_preds_per_frac == sorted(num_preds_per_frac) + + print0("MPI gather all_dfs") + all_dfs = COMM.gather(all_dfs) + print0("MPI reduce lines") + exp_ref_spec_lines = COMM.reduce(exp_ref_spec_lines) + if COMM.rank==0: + hopper_input_name = os.path.abspath(os.path.join(args.outdir , "%s.txt" % args.hopInputName)) + o = open(hopper_input_name, "w") + for l in exp_ref_spec_lines: + o.write(l) + o.close() + all_dfs = [df for df in all_dfs if df is not None] + if not all_dfs: + raise ValueError("No dataframes to concat: prediction/integration failed for all shots..") + + print("Concat frames") + all_dfs = pandas.concat([df for df in all_dfs if df is not None]) + all_dfs.reset_index(inplace=True, drop=True) + best_pkl_name = os.path.abspath(os.path.join(args.outdir , "%s.pkl" % args.hopInputName)) + all_dfs.to_pickle(best_pkl_name) + print("Wrote %s (best_pickle option for simtbx.diffBragg.hopper) and %s (exp_ref_spec option for simtbx.diffBragg.hopper). Use them to run the predictions through hopper (use phil centroid=cal) or simtbx.diffBragg.stage_two." % (best_pkl_name, hopper_input_name)) + + cmd_log_file = os.path.join(args.outdir, "cmdline_execution.txt") + with open(cmd_log_file, "w") as o: + o.write("integrate was run from folder: %s\n" % os.getcwd()) + o.write("The command line input was:\n") + o.write(" ".join(sys.argv) + "\n") + #TODO: write the diff phils here: diff --git a/simtbx/command_line/make_input_file.py b/simtbx/command_line/make_input_file.py index b63b9c4ac5..3229cd1b53 100644 --- a/simtbx/command_line/make_input_file.py +++ b/simtbx/command_line/make_input_file.py @@ -11,6 +11,7 @@ "split files will be written in same folders as their sources") parser.add_argument("--exptSuffix", type=str, default="refined.expt", help="find experiments with this suffix") parser.add_argument("--reflSuffix", type=str, default="indexed.refl", help="find reflection files with this suffix") +parser.add_argument("--write", action="store_true") args = parser.parse_args() @@ -25,10 +26,11 @@ from simtbx.diffBragg import hopper_io import hashlib + from libtbx.mpi4py import MPI COMM = MPI.COMM_WORLD -if COMM.rank==0: +if COMM.rank==0 and args.write: if args.splitDir is not None and not os.path.exists(args.splitDir): os.makedirs(args.splitDir) @@ -48,7 +50,7 @@ def get_idx_path(El): return idx, path -def split_stills_expts(expt_f, refl_f, split_dir): +def split_stills_expts(expt_f, refl_f, split_dir, write=False): El = ExperimentList.from_file(expt_f, False) R = flex.reflection_table.from_file(refl_f) expt_names = [] @@ -65,15 +67,16 @@ def split_stills_expts(expt_f, refl_f, split_dir): seen_isets[iset_id] += 1 tag = "%s-%d" % (os.path.basename(os.path.splitext(path)[0]), idx) new_expt_name = os.path.splitext(expt_f)[0] + "_%s_xtal%d.expt" % (tag, seen_isets[iset_id]) - if split_dir is not None: + if write and split_dir is not None: unique_tag = "shot_%s" % hash_name(new_expt_name) + ".expt" new_expt_name = os.path.join(split_dir, unique_tag) new_refl_name = new_expt_name.replace(".expt", ".refl") refls = R.select(R['id'] == i_expt) refls.reset_ids() - one_exp_El.as_file(new_expt_name) - refls.as_file(new_refl_name) + if write: + one_exp_El.as_file(new_expt_name) + refls.as_file(new_refl_name) expt_names.append(new_expt_name) refl_names.append(new_refl_name) orig_expt_names.append((apath(new_expt_name), (apath(expt_f), i_expt))) @@ -119,12 +122,20 @@ def split_stills_expts(expt_f, refl_f, split_dir): if COMM.rank==0: - print("Saving the input file for diffBragg") - hopper_io.save_expt_refl_file(args.filename, exp_names, ref_names, check_exists=True) - print("Saved %s" % args.filename) - - jname = args.filename + ".json" - jdat = {"expt": dict(orig_exp_names), "refl": dict(orig_ref_names)} - with open(jname, "w") as fp: - json.dump(jdat, fp, indent=1) - print("Wrote json %s, which maps the hashnames to the original expt files" % jname) + if args.write: + print("Saving the input file for diffBragg") + hopper_io.save_expt_refl_file(args.filename, exp_names, ref_names, check_exists=True) + print("Saved %s" % args.filename) + jname = args.filename + ".json" + jdat = {"expt": dict(orig_exp_names), "refl": dict(orig_ref_names)} + with open(jname, "w") as fp: + json.dump(jdat, fp, indent=1) + print("Wrote json %s, which maps the hashnames to the original expt files" % jname) + else: + _, exp_and_idx = zip(*orig_exp_names) + _, ref_and_idx = zip(*orig_ref_names) + exp, exp_idx = zip(*exp_and_idx) + ref, ref_idx = zip(*ref_and_idx) + assert exp_idx == ref_idx + hopper_io.save_expt_refl_file(args.filename, exp, ref, check_exists=True, + indices=exp_idx) diff --git a/simtbx/command_line/spectra.py b/simtbx/command_line/spectra.py new file mode 100644 index 0000000000..0ccfa78791 --- /dev/null +++ b/simtbx/command_line/spectra.py @@ -0,0 +1,99 @@ +from __future__ import division + +# LIBTBX_SET_DISPATCHER_NAME diffBragg.spectra + +#TODO: add text entry boxes for other spectrum filter params, position entry boxes sensibly + +from simtbx.diffBragg import hopper_utils +from pylab import * +import dxtbx +from simtbx.diffBragg.phil import philz, hopper_phil +from libtbx.phil import parse +from matplotlib.widgets import TextBox + +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("image_file", type=str, help="path to a diffBragg modeler file (output from hopper, see the imgs folder in the outdir)") +parser.add_argument("--filt_freq", default=0.07, type=float) +parser.add_argument("--filt_order", default=3, type=float) +parser.add_argument("--tail", default=50, type=int) +parser.add_argument("--delta_en", default=0.5, type=float) +parser.add_argument("--skip", action="store_true") +args = parser.parse_args() + +FIG,ax0 = subplots(nrows=1,ncols=1) +FIG.set_size_inches((5,3)) + +class P: + + def __init__(self, params, imgset, ax0, FIG): + self.params = params + self.imgset = imgset + self.ax0 = ax0 + self.FIG= FIG + + def entry(self, text): + delta_en = float(text) + self.params.downsamp_spec.delta_en = delta_en + print(delta_en) + self.update_plot(self.imgset) + + def update_plot(self, i_img): + raw_spec = self.imgset.get_spectrum(i_img) + raw_en = raw_spec.get_energies_eV() + raw_wt = raw_spec.get_weights() + + spec = hopper_utils.downsamp_spec_from_params(self.params, imgset=self.imgset, i_img=i_img) + en, wt = map(np.array, zip(*spec)) + en = hopper_utils.utils.ENERGY_CONV / en + + self.ax0.clear() + self.ax0.plot( raw_en, raw_wt, lw=2, label="raw spec (%d chan)" % len(raw_en)) + self.ax0.plot( en, wt, '--', lw=1, label="filt spec (%d chan)" % len(en)) + self.FIG.suptitle("image %d" % (i_img, ), fontsize=14) + self.ax0.set_xlabel("Energy (eV)", fontsize=12) + self.ax0.tick_params(labelsize=11) + self.ax0.legend(prop={"size":11}, loc="upper right") + draw() + + +def press(event): + if event.key == 'right': + FIG.loop_counter += 1 + elif event.key=="left": + FIG.loop_counter = FIG.loop_counter -1 + FIG.loop_counter = max(FIG.loop_counter,0) + FIG.loop_counter = min(FIG.loop_counter, FIG.nspots-1) + + if event.key=="escape": + FIG.loop_counter = FIG.nspots + +FIG.loop_counter = 0 +loader = dxtbx.load(args.image_file) +imgset = loader.get_imageset(loader.get_image_file()) +FIG.nspots = len(imgset) +FIG.canvas.mpl_connect('key_press_event', press) + +phil_scope = parse(philz + hopper_phil) +params = phil_scope.fetch(sources=[phil_scope]).extract() + +params.downsamp_spec.filt_freq = args.filt_freq +params.downsamp_spec.filt_order = args.filt_order +params.downsamp_spec.tail = args.tail +params.downsamp_spec.delta_en = args.delta_en +params.downsamp_spec.skip = args.skip + +Pinst = P(params, imgset, ax0, FIG) +axbox = plt.axes([0.25, 0.75, 0.1, 0.075]) +text_box = TextBox(axbox, 'delta_en', initial="%.2f" % args.delta_en) +text_box.on_submit(Pinst.entry) + +while FIG.loop_counter < len(imgset): + i_img = FIG.loop_counter + + Pinst.update_plot(i_img) + + waitforbuttonpress() + i_img += 1 + +plt.close() diff --git a/simtbx/command_line/stage_two.py b/simtbx/command_line/stage_two.py index bb72c9a541..0787a9867f 100644 --- a/simtbx/command_line/stage_two.py +++ b/simtbx/command_line/stage_two.py @@ -3,13 +3,13 @@ # LIBTBX_SET_DISPATCHER_NAME simtbx.diffBragg.stage_two from libtbx.mpi4py import MPI -from simtbx.kokkos import gpu_instance -kokkos_run = gpu_instance(deviceId = 0) from simtbx.command_line.hopper import hopper_phil import time import logging from simtbx.diffBragg import mpi_logger +from simtbx.diffBragg.device import DeviceWrapper + COMM = MPI.COMM_WORLD if COMM.rank > 0: @@ -28,10 +28,13 @@ pandas_table = None .type = str .help = path to an input pandas table (usually output by simtbx.diffBragg.predictions) -prep_time = 60 +refls_key = predictions + .type = str + .help = name of the predicted refls column in the pandas table input +max_sigz = 10. .type = float - .help = Time spent optimizing order of input dataframe to better divide shots across ranks - .help = Unit is seconds, 1-2 minutes of prep might save a lot of time during refinement! + .help = Maximum allowed value ot sigz in the input pandas table (dataframe) + .help = (high sigz above 10 usually indicates failed stage 1 refinement) """ philz = script_phil + philz + hopper_phil @@ -85,6 +88,7 @@ def run(self): if LineProfiler is not None and script.params.profile: lp = LineProfiler() lp.add_function(ensemble_refine_launcher.RefineLauncher.launch_refiner) + lp.add_function(ensemble_refine_launcher.RefineLauncher.load_inputs) lp.add_function(stage_two_refiner.StageTwoRefiner._compute_functional_and_gradients) lp.add_function(stage_two_refiner.StageTwoRefiner._run_diffBragg_current) lp.add_function(stage_two_refiner.StageTwoRefiner._update_Fcell) @@ -108,7 +112,9 @@ def run(self): else: mpi_logger.setup_logging_from_params(script.params) - RUN() + dev = COMM.rank % script.params.refiner.num_devices + with DeviceWrapper(dev) as _: + RUN() if lp is not None: stats = lp.get_stats() @@ -116,4 +122,4 @@ def run(self): hopper_utils.print_profile(stats, ["launch_refiner", "_compute_functional_and_gradients", "_run_diffBragg_current", "_update_Fcell", "_scale_pixel_data", "_Fcell_derivatives", "_mpi_aggregation", - "GatherFromExperiment", "_setup"]) + "GatherFromExperiment", "_setup", "load_inputs"]) diff --git a/simtbx/command_line/update_stage1_phil.py b/simtbx/command_line/update_stage1_phil.py new file mode 100644 index 0000000000..869d1fe32f --- /dev/null +++ b/simtbx/command_line/update_stage1_phil.py @@ -0,0 +1,82 @@ + +from __future__ import division +from argparse import ArgumentParser +parser = ArgumentParser() +parser.add_argument("dirname", help="stage1 output folder with pandas and diff.phil", type=str) +parser.add_argument("newPhil", default=None, help="new stage 1 phil file", type=str) +parser.add_argument("--setGlim", action="store_true", help="Use unrestrained stage1 to set bounds on G") +#parser.add_argument("--njobs", type=int, default=5, help="number of jobs (only runs on single node, no MPI)") +#parser.add_argument("--plot", action="store_true", help="show a histogram at the end") +args = parser.parse_args() + +# LIBTBX_SET_DISPATCHER_NAME diffBragg.update_stage1_phil + +import os +import pandas +import numpy as np +import glob + +glob_s = os.path.join(args.dirname, "pandas/*pkl") +fnames = glob.glob(glob_s) + +# read in the pandas pickles +df1 = pandas.concat( [pandas.read_pickle(f) for f in fnames]).reset_index(drop=True) + +# unit cell phil +a,b,c,al,be,ga = df1[['a', 'b', 'c', 'al', 'be', 'ga']].median() + +# Ncells abc and Nvol +na, nb, nc = np.vstack(df1.ncells).T +nvol = na*nb*nc +nvol = np.median(na*nb*nc) +na, nb, nc = map(np.median, (na, nb, nc)) + +# eta +ea, eb, ec = map( np.median, np.vstack(df1.eta_abc).T) + +# spot scale (G) +Gmed = df1.spot_scales.median() +Gmin = df1.spot_scales.min()/100 +Gmax = df1.spot_scales.max()*100 + +update_phil = """ +init {{ + G = {G} + Nabc = [{na},{nb},{nc}] + eta_abc = [{ea},{eb},{ec}] +}} +centers {{ + Nvol = {nvol} + ucell_a = {a} + ucell_b = {b} + ucell_c = {c} + ucell_alpha = {al} + ucell_beta = {be} + ucell_gamma = {ga} +}} +betas {{ + Nvol = 1e-2 + ucell_a = 1e-7 + ucell_b = 1e-7 + ucell_c = 1e-7 + ucell_alpha = 1e-7 + ucell_beta = 1e-7 + ucell_gamma = 1e-7 +}} +use_restraints = True +""".format(G=Gmed,na=na, nb=nb, nc=nb, ea=ea, eb=eb, ec=ec,a=a,b=b,c=c,al=al,be=be,ga=ga, nvol=nvol) + +Gmin_Gmax=""" +mins.G={Gmin} +maxs.G={Gmax}\n""".format(Gmin=Gmin, Gmax=Gmax) + +if args.setGlim: + update_phil += Gmin_Gmax + +diff_phil_name = os.path.join(args.dirname, "diff.phil") +assert os.path.exists(diff_phil_name) +diff_phil = open(diff_phil_name, "r").read() +with open(args.newPhil, "w") as o: + o.write(diff_phil + "\n"+ update_phil) + +print("Done. added \n%s\n to the %s and saved to %s" % (update_phil, diff_phil_name, args.newPhil) ) diff --git a/simtbx/diffBragg/ensemble_refine_launcher.py b/simtbx/diffBragg/ensemble_refine_launcher.py index bb43493782..e84f5c3e9b 100644 --- a/simtbx/diffBragg/ensemble_refine_launcher.py +++ b/simtbx/diffBragg/ensemble_refine_launcher.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, division, print_function from simtbx.diffBragg.stage_two_utils import PAR_from_params +from collections import Counter +from itertools import chain import os import sys from libtbx.mpi4py import MPI @@ -11,11 +13,10 @@ except ImportError: print("Pandas is required. Install using 'libtbx.python -m pip install pandas'") exit() -from xfel.merging.application.utils.memory_usage import get_memory_usage from simtbx.diffBragg.refiners.stage_two_refiner import StageTwoRefiner from simtbx.diffBragg import utils from simtbx.diffBragg import hopper_utils -from dxtbx.model.experiment_list import ExperimentListFactory +from dxtbx.model.experiment_list import ExperimentList, ExperimentListFactory from simtbx.diffBragg.prep_stage2_input import prep_dataframe from cctbx import miller, crystal import logging @@ -28,11 +29,20 @@ def global_refiner_from_parameters(params): # TODO read on each rank, or read and broadcast ? LOGGER.info("EVENT: read input pickle") pandas_table = pandas.read_pickle(params.pandas_table) + if params.max_sigz is not None and "sigz" in list(pandas_table): + Nframe = len(pandas_table) + pandas_table = pandas_table.query("sigz < %f" % params.max_sigz) + pandas_table.reset_index(drop=True, inplace=True) + LOGGER.info("Removed %d / %d dataframes due to max_sigz=%.2f filter" + % (Nframe - len(pandas_table), Nframe, params.max_sigz)) + if params.max_process > 0: + pandas_table = pandas_table.iloc[:params.max_process] LOGGER.info("EVENT: BEGIN prep dataframe") - if params.prep_time > 0: - work_distribution = prep_dataframe(pandas_table) + if "exp_idx" not in list(pandas_table): + pandas_table["exp_idx"] = 0 + work_distribution = prep_dataframe(pandas_table, res_ranges_string=params.refiner.res_ranges) LOGGER.info("EVENT: DONE prep dataframe") - return launcher.launch_refiner(pandas_table, work_distribution=work_distribution) + return launcher.launch_refiner(pandas_table, work_distribution=work_distribution, refls_key=params.refls_key) class RefineLauncher: @@ -63,7 +73,7 @@ def check_parameter_integrity(params): raise ValueError("Cannot refine because params.refiner.max_calls is empty") if os.environ.get("DIFFBRAGG_CUDA") is not None: - params.refiner.use_cuda = True + params.refiner.use_gpu = True return params @@ -115,8 +125,8 @@ def _check_experiment_integrity(expt): if not hasattr(expt, model): raise ValueError("No %s in experiment, exiting. " % model) - def launch_refiner(self, pandas_table, miller_data=None, work_distribution=None): - self.load_inputs(pandas_table, miller_data=miller_data, work_distribution=work_distribution) + def launch_refiner(self, pandas_table, miller_data=None, work_distribution=None, refls_key="predictions"): + self.load_inputs(pandas_table, miller_data=miller_data, work_distribution=work_distribution, refls_key=refls_key) LOGGER.info("EVENT: launch refiner") self._launch() return self.RUC @@ -128,7 +138,7 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re the pandas table is expected to have been written by diffBragg.hopper or diffBragg.hopper_process . See method save_to_pandas in simtbx/command_line/hopper.py For example, if the outputdir of diffBragg.hopper was set to `all_shots`, then - there should be a golder all_shots/pandas created which contains all of the per-shot pandas + there should be a folder all_shots/pandas created which contains all of the per-shot pandas dataframes. They should be concatenated as follows, forming a suitable argument for this method >> import glob,pandas >> fnames = glob.glob("all_shots/pandas/rank*/*pkl") @@ -145,6 +155,8 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re """ COMM.Barrier() num_exp = len(pandas_table) + if "exp_idx" not in list(pandas_table): + pandas_table["exp_idx"] = 0 first_exper_file = pandas_table.exp_name.values[0] detector = ExperimentListFactory.from_json_file(first_exper_file, check_format=False)[0].detector if detector is None and self.params.refiner.reference_geom is None: @@ -169,51 +181,58 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re shot_idx = 0 # each rank keeps index of the shots local to it rank_panel_groups_refined = set() exper_names = pandas_table.exp_name - assert len(exper_names) == len(set(exper_names)) + exper_ids = pandas_table.exp_idx.values + shot_ids = list(zip(exper_names, exper_ids)) + assert len(shot_ids) == len(set(shot_ids)) # TODO assert all exper are single-file, probably way before this point if work_distribution is None: worklist = range(COMM.rank, len(exper_names), COMM.size) else: worklist = work_distribution[COMM.rank] LOGGER.info("EVENT: begin loading inputs") - for i_exp in worklist: - exper_name = exper_names[i_exp] + + # load the Fhkl model once here to check which hkl are missing (and filter from the refls below) + first_exper = ExperimentList.from_file(exper_names[0], check_format=False)[0] + Fhkl_model = utils.load_Fhkl_model_from_params_and_expt(self.params, first_exper) + self.symbol = Fhkl_model.space_group().info().type().lookup_symbol() + if self.params.refiner.force_symbol is not None: + self.symbol = self.params.refiner.force_symbol + LOGGER.info("Will use space group symbol %s" % self.symbol) + Fhkl_model_p1 = Fhkl_model.expand_to_p1().generate_bijvoet_mates() + Fhkl_model_p1_indices = set(Fhkl_model_p1.indices()) + + for i_work, i_df in enumerate(worklist): + exper_name = exper_names[i_df] + exper_id = int(exper_ids[i_df]) LOGGER.info("EVENT: BEGIN loading experiment list") - expt_list = ExperimentListFactory.from_json_file(exper_name, check_format=self.params.refiner.check_expt_format) + # TODO: test that the diffBragg_benchmarks is not broken + expt = hopper_utils.DataModeler.exper_json_single_file(exper_name, exper_id) + expt_list = ExperimentList() + expt_list.append(expt) LOGGER.info("EVENT: DONE loading experiment list") - if len(expt_list) != 1: - print("Input experiments need to have length 1, %s does not" % exper_name) - expt = expt_list[0] expt.detector = detector # in case of supplied ref geom self._check_experiment_integrity(expt) - exper_dataframe = pandas_table.query("exp_name=='%s'" % exper_name) + exper_dataframe = pandas_table.query("exp_name=='%s'" % exper_name).query("exp_idx==%d" % exper_id) refl_name = exper_dataframe[refls_key].values[0] refls = flex.reflection_table.from_file(refl_name) - # FIXME need to remove (0,0,0) bboxes + refls = refls.select(refls['id'] == exper_id) try: - good_sel = flex.bool([h != (0, 0, 0) for h in list(refls["miller_index"])]) - refls = refls.select(good_sel) + miller_inds = list(refls["miller_index"]) + is_not_000 = [h != (0, 0, 0) for h in miller_inds] + is_in_Fhkl_model = [h in Fhkl_model_p1_indices for h in miller_inds] + LOGGER.debug("Only refining %d/%d refls whose HKL are in structure factor model" % ( + np.sum(is_in_Fhkl_model), len(refls))) + refl_sel = flex.bool(np.logical_and(is_not_000, is_in_Fhkl_model)) + refls = refls.select(refl_sel) except KeyError: pass - #UcellMan = utils.manager_from_crystal(expt.crystal) opt_uc_param = exper_dataframe[["a","b","c","al","be","ga"]].values[0] UcellMan = utils.manager_from_params(opt_uc_param) - if self.symbol is None: - if self.params.refiner.force_symbol is not None: - self.symbol = self.params.refiner.force_symbol - else: - self.symbol = expt.crystal.get_space_group().type().lookup_symbol() - LOGGER.info("Set space group symbol: %s" % self.symbol) - else: - if self.params.refiner.force_symbol is None: - if expt.crystal.get_space_group().type().lookup_symbol() != self.symbol: - raise ValueError("Crystals should all have the same space group symmetry") - if shot_idx == 0: # each rank initializes a simulator only once if self.params.simulator.init_scale != 1: print("WARNING: For stage_two , it is assumed that total scale is stored in the pandas dataframe") @@ -226,31 +245,16 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re self.Fref = utils.open_mtz(self.params.refiner.stage_two.Fref_mtzname, self.params.refiner.stage_two.Fref_mtzcol) - if "miller_index" in list(refls.keys()): - is_allowed = flex.bool(len(refls), True) - allowed_hkls = set(self.SIM.crystal.miller_array.indices()) - uc = self.SIM.crystal.dxtbx_crystal.get_unit_cell().parameters() - symb = self.SIM.crystal.space_group_info.type().lookup_symbol() - sym = crystal.symmetry(uc, symb) - mset = miller.set(sym, refls['miller_index'],True) - op = mset.change_of_basis_op_to_primitive_setting() - mset_p = mset.change_basis(op) - refl_hkls_p1 = mset_p.indices() - - for i_ref in range(len(refls)): - hkl_p1 = refl_hkls_p1[i_ref] - if hkl_p1 not in allowed_hkls: - is_allowed[i_ref] = False - refls = refls.select(is_allowed) - LOGGER.info("EVENT: LOADING ROI DATA") shot_modeler = hopper_utils.DataModeler(self.params) shot_modeler.exper_name = exper_name + shot_modeler.exper_idx = exper_id shot_modeler.refl_name = refl_name shot_modeler.rank = COMM.rank if self.params.refiner.load_data_from_refl: gathered = shot_modeler.GatherFromReflectionTable(expt, refls, sg_symbol=self.symbol) else: + # Note: no need to pass exper_id here because expt and refls have already been sliced out gathered = shot_modeler.GatherFromExperiment(expt, refls, sg_symbol=self.symbol) if not gathered: raise IOError("Failed to gather data from experiment %s", exper_name) @@ -279,7 +283,6 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re assert np.allclose(new_Modeler.roi_id, all_roi_id) LOGGER.info("Gathered file approved!") - self.Hi[shot_idx] = shot_modeler.Hi self.Hi_asu[shot_idx] = shot_modeler.Hi_asu @@ -322,19 +325,21 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re shot_modeler.originZ_init = exper_dataframe.detz_shift_mm.values[0]*1e-3 else: shot_modeler.originZ_init = 0 + # TODO: is there a reason these 3 attribs are set once more after being set above? shot_modeler.exper_name = exper_name + shot_modeler.exper_idx = exper_id shot_modeler.refl_name = refl_name shot_panel_groups_refined = self.determine_refined_panel_groups(shot_modeler.pids) rank_panel_groups_refined = rank_panel_groups_refined.union(set(shot_panel_groups_refined)) shot_idx += 1 - if COMM.rank == 0: - self._mem_usage() - print("Finished loading image %d / %d" % (i_exp+1, len(exper_names)), flush=True) + LOGGER.info(utils.memory_report('Process memory usage')) + LOGGER.info("Finished loading image %d / %d (%d / %d)" + % (i_df+1, len(exper_names), i_work+1, len(worklist))) #, flush=True) shot_modeler.PAR = PAR_from_params(self.params, expt, best=exper_dataframe) - self.Modelers[i_exp] = shot_modeler + self.Modelers[i_df] = shot_modeler # TODO: verify that i_df as a key is ok everywhere LOGGER.info("DONE LOADING DATA; ENTER BARRIER") COMM.Barrier() @@ -349,16 +354,18 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re for set_of_panels in all_refined_groups: panel_groups_refined = panel_groups_refined.union(set_of_panels) self.panel_groups_refined = list(COMM.bcast(panel_groups_refined)) + LOGGER.info(utils.memory_report('Mem after panel groups')) LOGGER.info("EVENT: Gathering global HKL information") try: self._gather_Hi_information() - except TypeError: + except TypeError: # TODO: should we siltently fail here ? pass LOGGER.info("EVENT: FINISHED gather global HKL information") if self.params.roi.cache_dir_only: print("Done creating cache directory and cache_dir_only=True, so goodbye.") sys.exit() + LOGGER.info(utils.memory_report('Mem after gather Hi info')) # in case of GPU LOGGER.info("BEGIN DETERMINE MAX PIX") @@ -369,15 +376,10 @@ def load_inputs(self, pandas_table, miller_data=None, work_distribution=None, re n = max(n) self.NPIX_TO_ALLOC = COMM.bcast(n) LOGGER.info("DONE DETERMINE MAX PIX") + LOGGER.info(utils.memory_report('Mem after determine max num pix')) self.DEVICE_ID = COMM.rank % self.params.refiner.num_devices - self._mem_usage() - - def _mem_usage(self): - memMB = get_memory_usage() - import socket - host = socket.gethostname() - print("Rank 0 reporting memory usage: %f GB on Rank 0 node %s" % (memMB / 1e3, host)) + LOGGER.info(utils.memory_report('Mem after load_inputs')) def determine_refined_panel_groups(self, pids): refined_groups = [] @@ -394,8 +396,8 @@ def _determine_per_rank_max_num_pix(self): x1, x2, y1, y2 = map(np.array, zip(*modeler.rois)) npix = np.sum((x2-x1)*(y2-y1)) max_npix = max(npix, max_npix) - print("Rank %d, shot %d has %d pixels" % (COMM.rank, i_shot+1, npix)) - print("Rank %d, max pix to be modeled: %d" % (COMM.rank, max_npix)) + #print("Rank %d, shot %d has %d pixels" % (COMM.rank, i_shot+1, npix)) + LOGGER.info("Rank %d, max pix to be modeled: %d" % (COMM.rank, max_npix)) return max_npix def _try_loading_spectrum_filelist(self): @@ -407,50 +409,37 @@ def _try_loading_spectrum_filelist(self): return file_list def _gather_Hi_information(self): - nshots_on_this_rank = len(self.Hi) # aggregate all miller indices - self.Hi_all_ranks, self.Hi_asu_all_ranks = [], [] - # TODO assert list types are stored in Hi and Hi_asu - for i_shot in self.Hi: #range(nshots_on_this_rank): - self.Hi_all_ranks += self.Hi[i_shot] - self.Hi_asu_all_ranks += self.Hi_asu[i_shot] - self.Hi_all_ranks = COMM.reduce(self.Hi_all_ranks) - self.Hi_all_ranks = COMM.bcast(self.Hi_all_ranks) - self.Hi_asu_all_ranks = COMM.reduce(self.Hi_asu_all_ranks) - self.Hi_asu_all_ranks = COMM.bcast(self.Hi_asu_all_ranks) + self.hiasu = HiAsu(self) - marr_unique_h = self._get_unique_Hi() + # TODO Restore this diagnostics step within the scope of `HiAsu` class + # Hi_asu_all_ranks used to be a list of all Hi_asu from all ranks, + # (None of ranks > 0) but was removed when moving to new `HiAsu` class. + # marr_unique_h = self._get_unique_Hi(Hi_asu_all_ranks) - # this will map the measured miller indices to their index in the LBFGS parameter array self.x - self.idx_from_asu = {h: i for i, h in enumerate(set(self.Hi_asu_all_ranks))} - # we will need the inverse map during refinement to update the miller array in diffBragg, so we cache it here - self.asu_from_idx = {i: h for i, h in enumerate(set(self.Hi_asu_all_ranks))} + # TODO: I think this code does absolutely nothing, but might be useful (it was used for B-factor modeling, and maybe more..) + # fres = marr_unique_h.d_spacings() + # self.res_from_asu = {h: res for h, res in zip(fres.indices(), fres.data())} + # TODO: End of code I think does absolutely nothing - self.num_hkl_global = len(self.idx_from_asu) + def get_first_modeller_symmetry(self): + uc = next(iter(self.Modelers.values())).ucell_man + params = uc.a, uc.b, uc.c, uc.al * 180 / np.pi, uc.be * 180 / np.pi, uc.ga * 180 / np.pi + if self.params.refiner.force_unit_cell is not None: + params = self.params.refiner.force_unit_cell + return crystal.symmetry(unit_cell=params, space_group_symbol=self.symbol) - fres = marr_unique_h.d_spacings() - self.res_from_asu = {h: res for h, res in zip(fres.indices(), fres.data())} - - def _get_unique_Hi(self): - COMM.barrier() + def _get_unique_Hi(self, Hi_asu_all_ranks): if COMM.rank == 0: - from cctbx.crystal import symmetry - from cctbx import miller from cctbx.array_family import flex as cctbx_flex - ii = list(self.Modelers.keys())[0] - uc = self.Modelers[ii].ucell_man - params = uc.a, uc.b, uc.c, uc.al * 180 / np.pi, uc.be * 180 / np.pi, uc.ga * 180 / np.pi - if self.params.refiner.force_unit_cell is not None: - params = self.params.refiner.force_unit_cell - symm = symmetry(unit_cell=params, space_group_symbol=self.symbol) - hi_asu_flex = cctbx_flex.miller_index(self.Hi_asu_all_ranks) + symm = self.get_first_modeller_symmetry() + hi_asu_flex = cctbx_flex.miller_index(Hi_asu_all_ranks) mset = miller.set(symm, hi_asu_flex, anomalous_flag=True) marr = miller.array(mset) binner = marr.setup_binner(d_max=self.params.refiner.stage_two.d_max, d_min=self.params.refiner.stage_two.d_min, n_bins=self.params.refiner.stage_two.n_bin) - from collections import Counter print("Average multiplicities:") print("<><><><><><><><><><><><>") for i_bin in range(self.params.refiner.stage_two.n_bin - 1): @@ -462,15 +451,15 @@ def _get_unique_Hi(self): print("\t %d refls with multi %d" % (sum(multi_in_bin == ii), ii)) print("Overall completeness\n<><><><><><><><>") - symm = symmetry(unit_cell=params, space_group_symbol=self.symbol) - hi_flex_unique = cctbx_flex.miller_index(list(set(self.Hi_asu_all_ranks))) + unique_Hi_asu = set(Hi_asu_all_ranks) + hi_flex_unique = cctbx_flex.miller_index(list(unique_Hi_asu)) mset = miller.set(symm, hi_flex_unique, anomalous_flag=True) self.binner = mset.setup_binner(d_min=self.params.refiner.stage_two.d_min, d_max=self.params.refiner.stage_two.d_max, n_bins=self.params.refiner.stage_two.n_bin) mset.completeness(use_binning=True).show() marr_unique_h = miller.array(mset) - print("Rank %d: total miller vars=%d" % (COMM.rank, len(set(self.Hi_asu_all_ranks)))) + print("Rank %d: total miller vars=%d" % (COMM.rank, len(unique_Hi_asu))) else: marr_unique_h = None @@ -486,6 +475,7 @@ def _launch(self): x_init = None nmacro = self.params.refiner.num_macro_cycles n_trials = len(self.params.refiner.max_calls) + for i_trial in range(n_trials*nmacro): self.RUC = StageTwoRefiner(self.Modelers, self.symbol, self.params) @@ -511,8 +501,7 @@ def _launch(self): self.RUC.log_fcells = True self.RUC.request_diag_once = False self.RUC.trad_conv = True - self.RUC.idx_from_asu = self.idx_from_asu - self.RUC.asu_from_idx = self.asu_from_idx + self.RUC.hiasu = self.hiasu self.RUC.S = self.SIM self.RUC.restart_file = self.params.refiner.io.restart_file @@ -527,7 +516,9 @@ def _launch(self): self.RUC.S.update_nanoBragg_instance('verbose', self.params.refiner.verbose) LOGGER.info("_launch run setup") + LOGGER.info(utils.memory_report('Mem usage before _setup')) self.RUC.run(setup_only=True) + LOGGER.info(utils.memory_report('Mem usage after _setup')) LOGGER.info("_launch done run setup") # for debug purposes: #if not self.params.refiner.quiet: @@ -552,9 +543,10 @@ def _launch(self): # more_sel_flags[i_shot] = [flag1 and flag2 for flag1,flag2 in zip(sel_flags, res_flags)] # self.RUC.selection_flags = more_sel_flags - LOGGER.info("_launcher runno setup") + LOGGER.info("_launcher running optimization") + self.RUC.run(setup=False) - LOGGER.info("_launcher done runno setup") + LOGGER.info("_launcher done running optimization") if self.RUC.hit_break_to_use_curvatures: self.RUC.fix_params_with_negative_curvature = False self.RUC.num_positive_curvatures = 0 @@ -576,8 +568,81 @@ def _launch(self): if self.params.profile: self.RUC.S.D.show_timings(self.RUC.rank) - if os.environ.get("DIFFBRAGG_USE_CUDA") is not None: + if os.environ.get("DIFFBRAGG_USE_CUDA") is not None or os.environ.get("DIFFBRAGG_USE_KOKKOS") is not None: self.RUC.S.D.gpu_free() def will_refine(self, param): return param is not None and any(param) + + +class HiAsu(object): + """ + Object which stores possible & present Miller Indices, their counts, + counters, maps between them and their integer indexes etc. + + Within `HiAsu`, the following notation is used for parameter names: + * no trailing `_`: global parameter - total or the same for all ranks. + * with trailing `_`: local parameter – value is unique for each rank. + Example: `counts_` would be specific to rank, but `counts` would be global. + """ + def __init__(self, refine_launcher): + self.rl = refine_launcher + self.possible = self.get_possible() + self.possible_len = len(self.possible) + self.possible_counts = self.get_counts() + self.present_len = len(list(self.present_zip)) + self.from_idx, self.to_idx = self._get_dicts() + + def get_possible(self): + if COMM.rank == 0: + sym = self.rl.get_first_modeller_symmetry() + res_ranges_str = self.rl.params.refiner.res_ranges + if res_ranges_str: + res_ranges = utils.parse_reso_string(res_ranges_str) + d_min = min([d_min for d_min, _ in res_ranges]) + else: + expt = next(iter(self.rl.Modelers.values())).E + det, s0 = expt.detector, expt.beam.get_s0() + d_min = min([p.get_max_resolution_at_corners(s0) for p in det]) + d_min *= 0.8 # accommodate variations in uc or det across expts + mset_full = sym.build_miller_set(anomalous_flag=True, d_min=d_min) + possible = list(mset_full.indices()) + else: + possible = None + return COMM.bcast(possible, root=0) + + def get_counts(self): + hi_asu_ = chain.from_iterable(self.rl.Hi_asu.values()) + hi_asu_counter_ = Counter(hi_asu_) + hi_asu_possible_counts_ = [hi_asu_counter_[k] for k in self.possible] + hi_asu_possible_counts_ = np.array(hi_asu_possible_counts_, dtype=np.uint16) + hi_asu_possible_counts = np.zeros_like(hi_asu_possible_counts_, dtype=np.uint16) + COMM.Allreduce(hi_asu_possible_counts_, hi_asu_possible_counts, op=MPI.SUM) + return hi_asu_possible_counts + + @property + def present(self): + return (p for p, c in self.present_zip) + + @property + def present_counts(self): + return (c for p, c in self.present_zip) + + @property + def present_counter(self): + return Counter({p: c for p, c in self.present_zip}) + + @property + def present_idx_counter(self): + return Counter({self.to_idx[p]: c for p, c in self.present_zip}) + + @property + def present_zip(self): + return ((p, c) for p, c in zip(self.possible, self.possible_counts) if c) + + def _get_dicts(self): + """from_idx maps miller indices to index in LBFGS par. array self.x; + to_ids is an inverse map during refinement to update diffBragg m.arr""" + from_idx = {i: h for i, h in enumerate(self.present)} + to_idx = {h: i for i, h in enumerate(self.present)} + return from_idx, to_idx diff --git a/simtbx/diffBragg/hopper_ensemble_utils.py b/simtbx/diffBragg/hopper_ensemble_utils.py index 0eba59a7c3..f15f608d10 100644 --- a/simtbx/diffBragg/hopper_ensemble_utils.py +++ b/simtbx/diffBragg/hopper_ensemble_utils.py @@ -15,7 +15,6 @@ from cctbx import miller, crystal, sgtbx from dials.array_family import flex from dxtbx.model import ExperimentList -from xfel.merging.application.utils.memory_usage import get_memory_usage COMM = MPI.COMM_WORLD @@ -80,9 +79,12 @@ def __call__(self, x, *args, **kwargs): min_info = "it=%d | t/it=%.4fs | F=%10.7g | sigZ=%10.7g" \ % (self.niter,self.ave_t_per_iter, f, ave_zscore_sig) if COMM.rank==0: - print(min_info, flush=True) + #print(min_info, flush=True) + MAIN_LOGGER.info(min_info) if modelers.save_freq is not None and self.niter % modelers.save_freq == 0: modelers.save_up(self.x0, ref_iter=self.niter) + if modelers.SIM.D.record_timings: + modelers.SIM.D.show_timings() return f @@ -106,12 +108,13 @@ def target_func(x, modelers): g_fhkl = np.zeros(num_fhkl_params) zscore_sigs = [] fcell_params = x[-num_fhkl_params:] - for i_shot in modelers: + for ii, i_shot in enumerate(modelers): shot_modeler = modelers[i_shot] shot_x_slice = modelers.x_slices[i_shot] per_shot_params = x[shot_x_slice] x_for_shot = np.hstack((per_shot_params, fcell_params)) - model_bragg, Jac = hopper_utils.model(x_for_shot, shot_modeler, modelers.SIM, compute_grad=True, update_spectrum=True) + model_bragg, Jac = hopper_utils.model(x_for_shot, shot_modeler, modelers.SIM, compute_grad=True, update_spectrum=True, + update_Fhkl_scales=ii==0) model_pix = model_bragg + shot_modeler.all_background @@ -367,6 +370,8 @@ def prep_for_refinement(self): self._set_mtz_data() self.set_device_id() + + def alloc_max_pix_per_shot(self): self._mpi_set_allocation_volume() def _get_fhkl_vary_flags(self): @@ -387,10 +392,13 @@ def _get_fhkl_vary_flags(self): all_nominal_hkl = set() for mod in self.data_modelers.values(): all_nominal_hkl = all_nominal_hkl.union(mod.hi_asu_perpix) + #TODO : is this memory intensive? all_nominal_hkl = COMM.gather(all_nominal_hkl) if COMM.rank == 0: + # TODO: all_nominal_hkl is P1, asu_map_int is non-P1 all_nominal_hkl = set(all_nominal_hkl[0]).union(*all_nominal_hkl[1:]) - asu_inds_to_vary = [self.SIM.asu_map_int[h] for h in all_nominal_hkl] + all_nominal_hkl_sym = utils.map_hkl_list(all_nominal_hkl, True, self.SIM.crystal.symbol) + asu_inds_to_vary = [self.SIM.asu_map_int[h] for h in all_nominal_hkl_sym] else: asu_inds_to_vary = None asu_inds_to_vary = set(COMM.bcast(asu_inds_to_vary)) @@ -564,13 +572,6 @@ def save_up(self, x, ref_iter=None): mod.params.tag = temp -def mem_usage(rank): - if COMM.rank == rank: - memMB = get_memory_usage() - host = socket.gethostname() - MAIN_LOGGER.info("Rank %d reporting memory usage: %f GB on Rank 0 node %s" % (COMM.rank, memMB / 1e3, host)) - - def get_gather_name(exper_name, gather_dir): gathered_name = os.path.splitext(os.path.basename(exper_name))[0] gathered_name += "_withData.refl" @@ -579,9 +580,10 @@ def get_gather_name(exper_name, gather_dir): def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictions', - gather_dir=None): + gather_dir=None, exper_idx_key="exp_idx"): - work_distribution = prep_dataframe(pandas_table, refls_key) + work_distribution = prep_dataframe(pandas_table, refls_key, + res_ranges_string=params.refiner.res_ranges) COMM.barrier() num_exp = len(pandas_table) first_exper_file = pandas_table[exper_key].values[0] @@ -600,7 +602,9 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio % (COMM.size, num_exp, num_exp)) exper_names = pandas_table[exper_key] - assert len(exper_names) == len(set(exper_names)) + exper_ids = pandas_table[exper_idx_key] + name_ids = list(zip(exper_names, exper_ids)) + assert len(name_ids) == len(set(name_ids)) worklist = work_distribution[COMM.rank] MAIN_LOGGER.info("EVENT: begin loading inputs") @@ -609,20 +613,22 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio Fhkl_model = Fhkl_model.expand_to_p1().generate_bijvoet_mates() Fhkl_model_indices = set(Fhkl_model.indices()) shot_modelers = hopper_ensemble_utils.DataModelers() - for ii, i_exp in enumerate(worklist): - exper_name = exper_names[i_exp] + for ii, i_df in enumerate(worklist): + exper_name = exper_names[i_df] + exper_id = int(exper_ids[i_df]) MAIN_LOGGER.info("EVENT: BEGIN loading experiment list") - expt_list = ExperimentList.from_file(exper_name, check_format=params.refiner.check_expt_format) + check_format = not params.refiner.load_data_from_refl + expt = hopper_utils.DataModeler.exper_json_single_file(exper_name, exper_id, check_format) + expt_list = ExperimentList() + expt_list.append(expt) MAIN_LOGGER.info("EVENT: DONE loading experiment list") - if len(expt_list) != 1: - MAIN_LOGGER.critical("Input experiments need to have length 1, %s does not" % exper_name) - expt = expt_list[0] expt.detector = detector # in case of supplied ref geom - exper_dataframe = pandas_table.query("%s=='%s'" % (exper_key, exper_name)) + exper_dataframe = pandas_table.query("%s=='%s'" % (exper_key, exper_name)).query("%s==%d" % (exper_idx_key, exper_id)) refl_name = exper_dataframe[refls_key].values[0] refls = flex.reflection_table.from_file(refl_name) + refls = refls.select(refls['id'] == exper_id) miller_inds = list( refls['miller_index']) is_not_000 = [h != (0, 0, 0) for h in miller_inds] @@ -632,7 +638,7 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio refls = refls.select(refl_sel) exp_cry_sym = expt.crystal.get_space_group().type().lookup_symbol() - if exp_cry_sym.replace(" ", "") != params.space_group: + if params.space_group is not None and exp_cry_sym.replace(" ", "") != params.space_group: gr = sgtbx.space_group_info(params.space_group).group() expt.crystal.set_space_group(gr) #raise ValueError("Crystals should all have the same space group symmetry") @@ -640,6 +646,7 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio MAIN_LOGGER.info("EVENT: LOADING ROI DATA") shot_modeler = hopper_utils.DataModeler(params) shot_modeler.exper_name = exper_name + shot_modeler.exper_idx = exper_id shot_modeler.refl_name = refl_name shot_modeler.rank = COMM.rank if params.refiner.load_data_from_refl: @@ -678,13 +685,13 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio continue shot_modeler.set_parameters_for_experiment(best=exper_dataframe) - shot_modeler.set_spectrum() + shot_modeler.set_spectrum(spectra_file=exper_dataframe.spectrum_filename.values[0]) MAIN_LOGGER.info("Will simulate %d energy channels" % len(shot_modeler.nanoBragg_beam_spectrum)) # verify this shot_modeler.Umatrices = [shot_modeler.E.crystal.get_U()] - mem_usage(0) + MAIN_LOGGER.info(utils.memory_report('Rank 0 reporting memory usage')) if COMM.rank==0: print("Finished loading image %d / %d" % (ii + 1, len(worklist)), flush=True) diff --git a/simtbx/diffBragg/hopper_io.py b/simtbx/diffBragg/hopper_io.py index da9316e37e..5285c68de1 100644 --- a/simtbx/diffBragg/hopper_io.py +++ b/simtbx/diffBragg/hopper_io.py @@ -9,7 +9,7 @@ import numpy as np -def save_expt_refl_file(filename, expts, refls, specs=None, check_exists=False): +def save_expt_refl_file(filename, expts, refls, specs=None, check_exists=False, indices=None): """ Save an input file for bg_and_probOri (the EMC initializer script) expt and refl names will be given absolute paths @@ -18,12 +18,15 @@ def save_expt_refl_file(filename, expts, refls, specs=None, check_exists=False): :param refls: list of reflection tables :param specs: optional list of spectrum .lam files :param check_exists: ensure files actually exist + :param indices: experiment indices if multiple images per experiment :return: """ if specs is None: specs = [None]*len(expts) + if indices is None: + indices = [None] *len(indices) with open(filename, "w") as o: - for expt, refl, spec in zip(expts, refls, specs): + for expt, refl, spec, idx in zip(expts, refls, specs, indices): expt = os.path.abspath(expt) refl = os.path.abspath(refl) if spec is not None: @@ -33,10 +36,15 @@ def save_expt_refl_file(filename, expts, refls, specs=None, check_exists=False): assert os.path.exists(refl) if spec is not None: assert os.path.exists(spec) - if spec is not None: - o.write("%s %s %s\n" % (expt, refl, spec)) + if spec is None: + spec = "" + else: + spec = " %s" %spec + if idx is None: + idx = "" else: - o.write("%s %s\n" % (expt, refl)) + idx = " %d" % idx + o.write("%s %s%s%s\n" % (expt, refl, spec, idx)) def make_rank_outdir(root, subfolder, rank=0): @@ -59,11 +67,31 @@ def diffBragg_Umat(rotX, rotY, rotZ, U): return U -def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_refls, stg1_img_path, - rank=0): +def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_refls, stg1_img_path=None, + rank=0, write_expt=True, write_pandas=True, exp_idx=0): + """ + + :param x: the optiized parameters used by hopper (output of Minimize) + :param Mod: the instance of the hopper_utils.DataModeler that was used by hopper + :param SIM: the instance of nanoBragg/sim_data.SimData that was used by hopper + :param orig_exp_name: the name of the experiment list that was input to hopper + :param params: the diffBragg hopper parameters + :param expt: the data modeler experiment + :param rank_exp_idx: order this shot was processed by this MPI rank #TODO rename this + :param stg1_refls: path to the refls that were input to hopper + :param stg1_img_path: leave as None, no longer used + :param rank: MPI rank + :param write_expt: whether to write the single shot experiment + :param write_pandas: whether to write the single shot dataframe + :param exp_idx: the index of the experiment within the experiment list (orig_exp_name) + :return: the single shot dataframe + """ LOGGER = logging.getLogger("refine") - rank_exper_outdir = make_rank_outdir(params.outdir, "expers",rank) - rank_pandas_outdir = make_rank_outdir(params.outdir, "pandas",rank) + opt_exp_path = None + basename = os.path.splitext(os.path.basename(orig_exp_name))[0] + if write_expt: + rank_exper_outdir = make_rank_outdir(params.outdir, "expers",rank) + opt_exp_path = os.path.join(rank_exper_outdir, "%s_%s_%d.expt" % (params.tag, basename, rank_exp_idx)) scale, rotX, rotY, rotZ, Na, Nb, Nc, Nd, Ne, Nf,\ diff_gam_a, diff_gam_b, diff_gam_c, diff_sig_a, \ @@ -109,9 +137,6 @@ def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_ rotY_xt = par['rotY'] rotZ_xt = par['rotZ'] U_xt = diffBragg_Umat(rotX_xt, rotY_xt, rotZ_xt, SIM.Umatrices[i_xtal]) - #cryst_temp = deepcopy(new_cryst) - #cryst_temp.set_U(U_xt) - #Amat_xt = cryst_temp.get_A() other_Umats.append(U_xt) other_spotscales.append(scale_xt) @@ -128,9 +153,6 @@ def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_ lam_coefs.append([val]) lam_coefs = tuple(lam_coefs) - basename = os.path.splitext(os.path.basename(orig_exp_name))[0] - opt_exp_path = os.path.join(rank_exper_outdir, "%s_%s_%d.expt" % (params.tag, basename, rank_exp_idx)) - pandas_path = os.path.join(rank_pandas_outdir, "%s_%s_%d.pkl" % (params.tag, basename, rank_exp_idx)) new_expt = Experiment() new_expt.crystal = new_cryst new_expt.detector = expt.detector @@ -140,8 +162,9 @@ def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_ # expt.detector = refiner.get_optimized_detector() new_exp_list = ExperimentList() new_exp_list.append(new_expt) - new_exp_list.as_file(opt_exp_path) - LOGGER.debug("saved opt_exp %s with wavelength %f" % (opt_exp_path, expt.beam.get_wavelength())) + if write_expt: + new_exp_list.as_file(opt_exp_path) + LOGGER.debug("saved opt_exp %s with wavelength %f" % (opt_exp_path, expt.beam.get_wavelength())) _,flux_vals = zip(*SIM.beam.spectrum) df = single_expt_pandas(xtal_scale=scale, Amat=Amat, @@ -166,14 +189,21 @@ def save_to_pandas(x, Mod, SIM, orig_exp_name, params, expt, rank_exp_idx, stg1_ opt_det=params.opt_det, stg1_refls=stg1_refls, stg1_img_path=stg1_img_path, ncells_init=Nabc_init, spot_scales_init=scale_init, - other_Umats = other_Umats, other_spotscales = other_spotscales) + other_Umats = other_Umats, other_spotscales = other_spotscales, + num_mosaicity_samples=params.simulator.crystal.num_mosaicity_samples) + + df['exp_idx'] = exp_idx + if hasattr(Mod, "sigz"): df['sigz'] = [Mod.sigz] if hasattr(Mod, "niter"): df['niter'] = [Mod.niter] df['phi_deg'] = SIM.D.phi_deg df['osc_deg'] = SIM.D.osc_deg - df.to_pickle(pandas_path) + if write_pandas: + rank_pandas_outdir = make_rank_outdir(params.outdir, "pandas",rank) + pandas_path = os.path.join(rank_pandas_outdir, "%s_%s_%d.pkl" % (params.tag, basename, rank_exp_idx)) + df.to_pickle(pandas_path) return df @@ -183,7 +213,7 @@ def single_expt_pandas(xtal_scale, Amat, ncells_abc, ncells_def, eta_abc, spec_file, spec_stride,flux, beamsize_mm, orig_exp_name, opt_exp_name, spec_from_imageset, oversample, opt_det, stg1_refls, stg1_img_path, ncells_init=None, spot_scales_init = None, - other_Umats=None, other_spotscales=None): + other_Umats=None, other_spotscales=None, num_mosaicity_samples=None): """ :param xtal_scale: @@ -212,6 +242,7 @@ def single_expt_pandas(xtal_scale, Amat, ncells_abc, ncells_def, eta_abc, :param opt_det: :param stg1_refls: :param stg1_img_path: + :num_mosaicity_samples: :return: """ if other_Umats is None: @@ -253,11 +284,16 @@ def single_expt_pandas(xtal_scale, Amat, ncells_abc, ncells_def, eta_abc, df["other_spotscales"] = [tuple(other_spotscales)] if other_Umats: df["other_Umats"] = [tuple(map(tuple, other_Umats))] + if num_mosaicity_samples is not None: + df['num_mosaicity_samples'] = num_mosaicity_samples df["total_flux"] = flux df["beamsize_mm"] = beamsize_mm df["exp_name"] = os.path.abspath(orig_exp_name) - df["opt_exp_name"] = os.path.abspath(opt_exp_name) + + if opt_exp_name is not None: + opt_exp_name = os.path.abspath(opt_exp_name) + df["opt_exp_name"] = opt_exp_name df["spectrum_from_imageset"] = spec_from_imageset df["oversample"] = oversample if opt_det is not None: diff --git a/simtbx/diffBragg/hopper_utils.py b/simtbx/diffBragg/hopper_utils.py index 1b29754898..5acd3ced80 100644 --- a/simtbx/diffBragg/hopper_utils.py +++ b/simtbx/diffBragg/hopper_utils.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function import time import os +import json from dials.algorithms.shoebox import MaskCode from copy import deepcopy from dials.model.data import Shoebox @@ -14,7 +15,10 @@ from scipy.ndimage import binary_dilation from dxtbx.model.experiment_list import ExperimentListFactory from dxtbx.model import Spectrum -from serialtbx.detector.jungfrau import get_pedestalRMS_from_jungfrau +try: # TODO keep backwards compatibility until we close the nxmx_writer_experimental branch + from serialtbx.detector.jungfrau import get_pedestalRMS_from_jungfrau +except ModuleNotFoundError: + from xfel.util.jungfrau import get_pedestalRMS_from_jungfrau from simtbx.nanoBragg.utils import downsample_spectrum from dials.array_family import flex from simtbx.diffBragg import utils @@ -137,6 +141,8 @@ def __init__(self, params): self.all_freq = None # flag for the h,k,l frequency of the observed pixel self.best_model = None # best model value at each pixel self.best_model_includes_background = False # whether the best model includes the background scattering estimate + self.all_nominal_hkl_p1 = None # nominal p1 hkl at each pixel + self.all_nominal_hkl = None # nominal hkl at each pixel self.all_data =None # data at each pixel (photon units) self.all_sigma_rdout = None # this is either a float or an array. if the phil param use_perpixel_dark_rms=True, then these are different per pixel, per shot self.all_gain = None # gain value per pixel (used during diffBragg/refiners/stage_two_refiner) @@ -146,6 +152,7 @@ def __init__(self, params): self.all_fast =None # fast-scan coordinate per pixel self.all_slow =None # slow-scan coordinate per pixel self.all_pid = None # panel id per pixel + self.all_zscore = None # the estimated z-score values for each pixel, updated each iteration in the Target class self.rois=None # region of interest (per spot) self.pids=None # panel id (per spot) self.tilt_abc=None # background plane constants (per spot), a,b are fast,slow scan components, c is offset @@ -158,6 +165,7 @@ def __init__(self, params): self.exper_name = None # optional name specifying where dxtbx.model.Experiment was loaded from self.refl_name = None # optional name specifying where dials.array_family.flex.reflection_table refls were loaded from self.spec_name = None # optional name specifying spectrum file(.lam) + self.exper_idx = 0 # optional number specifying the index of the experiment in the experiment list self.rank = 0 # in case DataModelers are part of an MPI program, have a rank attribute for record keeping self.Hi = None # miller index (P1) @@ -171,6 +179,49 @@ def __init__(self, params): "Hi", "Hi_asu", "roi_id", "params", "all_pid", "all_fast", "all_slow", "best_model_includes_background", "all_q_perpix", "all_sigma_rdout"] + def filter_pixels(self, thresh): + assert self.roi_id is not None + assert self.all_trusted is not None + assert self.all_zscore is not None + + if not hasattr(self, 'roi_id_slices') or self.roi_id_slices is None: + self.set_slices('roi_id') + + ntrust = self.all_trusted.sum() + + sigz_per_shoebox = [] + for roi_id in self.roi_id_unique: + slcs = self.roi_id_slices[roi_id] + Zs = [] + for slc in slcs: + trusted = self.all_trusted[slc] + Zs += list(self.all_zscore[slc][trusted]) + if not Zs: + sigz = np.nan + else: + sigz = np.std(Zs) + sigz_per_shoebox.append(sigz) + if np.all(np.isnan(sigz_per_shoebox)): + MAIN_LOGGER.debug("All shoeboxes are nan, nothing to filter") + return + med_sigz = np.median([sigz for sigz in sigz_per_shoebox if not np.isnan(sigz)]) + sigz_per_shoebox = np.nan_to_num(sigz_per_shoebox, nan=med_sigz) + shoebox_is_bad = utils.is_outlier(sigz_per_shoebox, thresh) + nbad_pix = 0 + for i_roi, roi_id in enumerate(self.roi_id_unique): + if shoebox_is_bad[i_roi]: + for slc in self.roi_id_slices[roi_id]: + nbad_pix += slc.stop - slc.start + self.all_trusted[slc] = False + + #inds = np.arange(len(self.all_trusted)) + #Zs = self.all_zscore[self.all_trusted] + #bad = utils.is_outlier(Zs, thresh=thresh) + #inds_trusted = inds[self.all_trusted] + #self.all_trusted[inds_trusted[bad]] = False + MAIN_LOGGER.debug("Added %d pixels from %d shoeboxes to the untrusted list (%d / %d trusted pixels remain)" + % (nbad_pix, shoebox_is_bad.sum(), self.all_trusted.sum(), len(self.all_trusted))) + def set_spectrum(self, spectra_file=None, spectra_stride=None, total_flux=None): # note , the following 3 settings will only be used if spectrum_from_imageset is False and gause_spec is False @@ -211,11 +262,11 @@ def at_minimum(self, x, f, accept): #self.target.minima.append((f,self.target.x0,accept)) self.target.lowest_x = x try: - # TODO get SIM and i_exp in here so we can save_up each new global minima! + # TODO get SIM and i_shot in here so we can save_up each new global minima! if f < self.target.lowest_f: self.target.lowest_f = f MAIN_LOGGER.info("New minimum found!") - self.save_up(self.target.x0, SIM, self.rank, i_exp=i_exp) + self.save_up(self.target.x0, SIM, self.rank, i_shot=i_shot) except NameError: pass @@ -275,9 +326,48 @@ def sigma_rdout(self): def clean_up(self, SIM): free_SIM_mem(SIM) - def set_experiment(self, exp, load_imageset=True): + @staticmethod + def exper_json_single_file(exp_file, i_exp=0, check_format=True): + """ + load a single experiment from an exp_file + If working with large combined experiment files, we only want to load + one image at a time on each MPI rank, otherwise at least one rank would need to + load the entire file into memory. + :param exp_file: experiment list file + :param i_exp: experiment id + :param check_format: bool, verifies the format class of the experiment, set to False if loading data from refls + :return: + """ + exper_json = json.load(open(exp_file)) + nexper = len(exper_json["experiment"]) + assert 0 <= i_exp < nexper + + this_exper = exper_json["experiment"][i_exp] + + new_json = {'__id__': "ExperimentList", "experiment": [deepcopy(this_exper)]} + + for model in ['beam', 'detector', 'crystal', 'imageset', 'profile', 'scan', 'goniometer', 'scaling_model']: + if model in this_exper: + model_index = this_exper[model] + new_json[model] = [exper_json[model][model_index]] + new_json["experiment"][0][model] = 0 + else: + new_json[model] = [] + explist = ExperimentListFactory.from_dict(new_json, check_format=check_format) + assert len(explist) == 1 + return explist[0] + + def set_experiment(self, exp, load_imageset=True, exp_idx=0): + """ + :param exp: experiment or filename + :param load_imageset: whether to load the imageset (usually True) + :param exp_idx: index corresponding to experiment in experiment list + """ if isinstance(exp, str): - self.E = ExperimentListFactory.from_json_file(exp, load_imageset)[0] + if not load_imageset: + self.E = ExperimentListFactory.from_json_file(exp, False)[exp_idx] + else: + self.E = self.exper_json_single_file(exp, exp_idx) else: self.E = exp if self.params.opt_det is not None: @@ -290,9 +380,15 @@ def set_experiment(self, exp, load_imageset=True): self.E.beam = opt_beam_E.beam MAIN_LOGGER.info("Set the optimal beam from %s" % self.params.opt_beam) - def load_refls(self, ref): + def load_refls(self, ref, exp_idx=0): + """ + :param ref: reflection table or filename + :param exp_idx: index corresponding to experiment in experiment list + """ if isinstance(ref, str): refls = flex.reflection_table.from_file(ref) + # TODO: is this the proper way to select the id ? + refls = refls.select(refls['id']==exp_idx) else: # assert is a reflection table. .. refls = ref @@ -311,6 +407,7 @@ def is_duplicate_hkl(self, refls): return is_duplicate def GatherFromReflectionTable(self, exp, ref, sg_symbol=None): + self.set_experiment(exp, load_imageset=False) self.refls = self.load_refls(ref) nref = len(self.refls) @@ -360,7 +457,8 @@ def GatherFromReflectionTable(self, exp, ref, sg_symbol=None): else: sb_trust = np.logical_or(mask==fg_code, mask==bg_code) - below_zero = sb_bkgrnd <= 0 + # below_zero = sb_bkgrnd <= 0 + below_zero = sb_bkgrnd < 0 if np.any(below_zero): nbelow = np.sum(below_zero) ntot = sb_bkgrnd.size @@ -390,10 +488,19 @@ def GatherFromReflectionTable(self, exp, ref, sg_symbol=None): self.data_to_one_dim(img_data, is_trusted, background) return True - def GatherFromExperiment(self, exp, ref, remove_duplicate_hkl=True, sg_symbol=None): - self.set_experiment(exp, load_imageset=True) + def GatherFromExperiment(self, exp, ref, remove_duplicate_hkl=True, sg_symbol=None, exp_idx=0): + """ + + :param exp: experiment list filename , or experiment object + :param ref: reflection table filename, or reflection table instance + :param remove_duplicate_hkl: search for miller index duplicates and remove + :param sg_symbol: space group lookup symbol P43212 + :param exp_idx: index of the experiment in the experiment list + :return: + """ + self.set_experiment(exp, load_imageset=True, exp_idx=exp_idx) - refls = self.load_refls(ref) + refls = self.load_refls(ref, exp_idx=exp_idx) if len(refls)==0: MAIN_LOGGER.warning("no refls loaded!") return False @@ -471,6 +578,9 @@ def GatherFromExperiment(self, exp, ref, remove_duplicate_hkl=True, sg_symbol=No self.tilt_abc = [abc for i_roi, abc in enumerate(self.tilt_abc) if self.selection_flags[i_roi]] self.pids = [pid for i_roi, pid in enumerate(self.pids) if self.selection_flags[i_roi]] self.tilt_cov = [cov for i_roi, cov in enumerate(self.tilt_cov) if self.selection_flags[i_roi]] + self.Hi =[hi for i_roi, hi in enumerate(self.Hi) if self.selection_flags[i_roi]] + self.Hi_asu =[hi_asu for i_roi, hi_asu in enumerate(self.Hi_asu) if self.selection_flags[i_roi]] + if not self.no_rlp_info: self.Q = [np.linalg.norm(refls[i_roi]["rlp"]) for i_roi in range(len(refls)) if self.selection_flags[i_roi]] @@ -560,8 +670,10 @@ def data_to_one_dim(self, img_data, is_trusted, background): if not self.no_rlp_info: all_q_perpix += [self.Q[i_roi]]*npix if self.Hi is not None: - self.all_nominal_hkl += [tuple(self.Hi[self.refls_idx[i_roi]])]*npix - self.hi_asu_perpix += [self.Hi_asu[self.refls_idx[i_roi]]] * npix + self.all_nominal_hkl += [tuple(self.Hi[i_roi])]*npix + self.hi_asu_perpix += [self.Hi_asu[i_roi]] * npix + #self.all_nominal_hkl += [tuple(self.Hi[i_roi])]*npix + #self.hi_asu_perpix += [self.Hi_asu[i_roi]] * npix if self.params.roi.mask_outside_trusted_range: MAIN_LOGGER.debug("Found %d pixels outside of trusted range" % numOutOfRange) @@ -640,37 +752,32 @@ def dump_gathered_to_refl(self, output_name, do_xyobs_sanity_check=False): shoeboxes.append(sb) R['shoebox'] = flex.shoebox(shoeboxes) + R['id'] = flex.int(len(R), 0) R.as_file(output_name) def set_parameters_for_experiment(self, best=None): + if self.params.symmetrize_Flatt and not self.params.fix.eta_abc: + if not self.params.simulator.crystal.has_isotropic_mosaicity: + raise NotImplementedError("if fix.eta_abc=False and symmetrize_Flatt=True, then eta must be isotropic. Set simulator.crystal.has_isotropic_mosaicity=True") ParameterTypes = {"ranged": RangedParameter, "positive": PositiveParameter} ParameterType = RangedParameter # most params currently only this type + if self.params.centers.Nvol is not None: + assert self.params.betas.Nvol is not None + if best is not None: # set the crystal Umat (rotational displacement) and Bmat (unit cell) # Umatrix # NOTE: just set the best Amatrix here - if self.params.apply_best_crystal_model: - xax = col((-1, 0, 0)) - yax = col((0, -1, 0)) - zax = col((0, 0, -1)) - rotX, rotY, rotZ = best[["rotX", "rotY", "rotZ"]].values[0] - RX = xax.axis_and_angle_as_r3_rotation_matrix(rotX, deg=False) - RY = yax.axis_and_angle_as_r3_rotation_matrix(rotY, deg=False) - RZ = zax.axis_and_angle_as_r3_rotation_matrix(rotZ, deg=False) - M = RX * RY * RZ - U = M * sqr(self.E.crystal.get_U()) - self.E.crystal.set_U(U) - - # Bmatrix: - ucparam = best[["a","b","c","al","be","ga"]].values[0] - ucman = utils.manager_from_params(ucparam) - self.E.crystal.set_B(ucman.B_recipspace) + #C = deepcopy(self.E.crystal) + #crystal = self.E.crystal + #self.E.crystal = crystal ## TODO , currently need this anyway ucparam = best[["a","b","c","al","be","ga"]].values[0] ucman = utils.manager_from_params(ucparam) self.E.crystal.set_B(ucman.B_recipspace) + self.E.crystal.set_A(best.Amats.values[0]) # mosaic block self.params.init.Nabc = tuple(best.ncells.values[0]) @@ -693,9 +800,6 @@ def set_parameters_for_experiment(self, best=None): maxs = self.params.maxs centers = self.params.centers betas = self.params.betas - if not self.params.use_restraints or self.params.fix.ucell: - centers.ucell = [1,1,1,1,1,1] - betas.ucell = [1,1,1,1,1,1] fix = self.params.fix types = self.params.types P = Parameters() @@ -707,7 +811,8 @@ def set_parameters_for_experiment(self, best=None): p = ParameterType(init=0, sigma=sigma.RotXYZ[ii], minval=mins.RotXYZ[ii], maxval=maxs.RotXYZ[ii], fix=fix.RotXYZ, name="RotXYZ%d_xtal%d" % (ii,i_xtal), - center=centers.RotXYZ[ii], beta=betas.RotXYZ) + center=0 if betas.RotXYZ is not None else None, + beta=betas.RotXYZ) P.add(p) p = ParameterTypes[types.G](init=init.G + init.G*0.01*i_xtal, sigma=sigma.G, @@ -746,33 +851,38 @@ def set_parameters_for_experiment(self, best=None): p = ParameterTypes[types.Nabc](init=init.Nabc[ii], sigma=sigma.Nabc[ii], minval=mins.Nabc[ii], maxval=maxs.Nabc[ii], fix=fix_Nabc[ii], name="Nabc%d" % (ii,), - center=centers.Nabc[ii], beta=betas.Nabc[ii]) + center=centers.Nabc[ii] if centers.Nabc is not None else None, + beta=betas.Nabc[ii] if betas.Nabc is not None else None) P.add(p) p = ParameterType(init=init.Ndef[ii], sigma=sigma.Ndef[ii], minval=mins.Ndef[ii], maxval=maxs.Ndef[ii], fix=fix.Ndef, name="Ndef%d" % (ii,), - center=centers.Ndef[ii], beta=betas.Ndef[ii]) + center=centers.Ndef[ii] if centers.Ndef is not None else None, + beta=betas.Ndef[ii] if betas.Ndef is not None else None) P.add(p) # diffuse gamma and sigma p = ParameterTypes[types.diffuse_gamma](init=init.diffuse_gamma[ii], sigma=sigma.diffuse_gamma[ii], minval=mins.diffuse_gamma[ii], maxval=maxs.diffuse_gamma[ii], fix=fix_difgam[ii], name="diffuse_gamma%d" % (ii,), - center=centers.diffuse_gamma[ii], beta=betas.diffuse_gamma[ii]) + center=centers.diffuse_gamma[ii] if centers.diffuse_gamma is not None else None, + beta=betas.diffuse_gamma[ii] if betas.diffuse_gamma is not None else None) P.add(p) p = ParameterTypes[types.diffuse_sigma](init=init.diffuse_sigma[ii], sigma=sigma.diffuse_sigma[ii], minval=mins.diffuse_sigma[ii], maxval=maxs.diffuse_sigma[ii], fix=fix_difsig[ii], name="diffuse_sigma%d" % (ii,), - center=centers.diffuse_sigma[ii], beta=betas.diffuse_sigma[ii]) + center=centers.diffuse_sigma[ii] if centers.diffuse_sigma is not None else None, + beta=betas.diffuse_sigma[ii] if betas.diffuse_sigma is not None else None) P.add(p) # mosaic spread (mosaicity) p = ParameterType(init=init.eta_abc[ii], sigma=sigma.eta_abc[ii], minval=mins.eta_abc[ii], maxval=maxs.eta_abc[ii], fix=fix_eta[ii], name="eta_abc%d" % (ii,), - center=centers.eta_abc[ii], beta=betas.eta_abc[ii]) + center=centers.eta_abc[ii] if centers.eta_abc is not None else None, + beta=betas.eta_abc[ii] if betas.eta_abc is not None else None) P.add(p) ucell_man = utils.manager_from_crystal(self.E.crystal) @@ -781,40 +891,29 @@ def set_parameters_for_experiment(self, best=None): if "Ang" in name: minval = val - ucell_vary_perc * val maxval = val + ucell_vary_perc * val - if centers.ucell is not None: - cent = centers.ucell[i_uc] - beta = betas.ucell[i_uc] + if name == 'a_Ang': + cent = centers.ucell_a + beta = betas.ucell_a + elif name== 'b_Ang': + cent = centers.ucell_b + beta = betas.ucell_b else: - if name == 'a_Ang': - cent = centers.ucell_a - beta = betas.ucell_a - elif name== 'b_Ang': - cent = centers.ucell_b - beta = betas.ucell_b - else: - cent = centers.ucell_c - beta = betas.ucell_c - assert cent is not None, "Set the center restraints properly!" - assert beta is not None + cent = centers.ucell_c + beta = betas.ucell_c else: val_in_deg = val * 180 / np.pi minval = (val_in_deg - self.params.ucell_ang_abs) * np.pi / 180. maxval = (val_in_deg + self.params.ucell_ang_abs) * np.pi / 180. - if centers.ucell is not None: - cent = centers.ucell[i_uc]*np.pi / 180. - beta = betas.ucell[i_uc] + if name=='alpha_rad': + cent = centers.ucell_alpha + beta = betas.ucell_alpha + elif name=='beta_rad': + cent = centers.ucell_beta + beta = betas.ucell_beta else: - if name=='alpha_rad': - cent = centers.ucell_alpha - beta = betas.ucell_alpha - elif name=='beta_rad': - cent = centers.ucell_beta - beta = betas.ucell_beta - else: - cent = centers.ucell_gamma - beta = betas.ucell_gamma - assert cent is not None - assert beta is not None + cent = centers.ucell_gamma + beta = betas.ucell_gamma + if cent is not None: cent = cent*np.pi / 180. p = ParameterType(init=val, sigma=sigma.ucell[i_uc], @@ -860,11 +959,13 @@ def set_parameters_for_experiment(self, best=None): # two parameters for optimizing the spectrum p = RangedParameter(init=self.params.init.spec[0], sigma=self.params.sigmas.spec[0], minval=mins.spec[0], maxval=maxs.spec[0], fix=fix.spec, - name="lambda_offset", center=centers.spec[0], beta=betas.spec[0]) + name="lambda_offset", center=centers.spec[0] if centers.spec is not None else None, + beta=betas.spec[0] if betas.spec is not None else None) P.add(p) p = RangedParameter(init=self.params.init.spec[1], sigma=self.params.sigmas.spec[1], minval=mins.spec[1], maxval=maxs.spec[1], fix=fix.spec, - name="lambda_scale", center=centers.spec[1], beta=betas.spec[1]) + name="lambda_scale", center=centers.spec[1] if centers.spec is not None else None, + beta=betas.spec[1] if betas.spec is not None else None) P.add(p) # iterating over this dict is time-consuming when refinine Fhkl, so we split up the names here: @@ -872,6 +973,11 @@ def set_parameters_for_experiment(self, best=None): self.scale_roi_names = [name for name in P if name.startswith("scale_roi")] self.P = P + for name in self.P: + p = self.P[name] + if (p.beta is not None and p.center is None) or (p.center is not None and p.beta is None): + raise RuntimeError("To use restraints, must specify both center and beta for param %s" % name) + def get_data_model_pairs(self, reorder=False): if self.best_model is None: raise ValueError("cannot get the best model, there is no best_model attribute") @@ -923,7 +1029,7 @@ def get_data_model_pairs(self, reorder=False): return ret_subimgs - def Minimize(self, x0, SIM, i_exp=0): + def Minimize(self, x0, SIM, i_shot=0): self.target = target = TargetFunc(SIM=SIM, niter_per_J=self.params.niter_per_J, profile=self.params.profile) # set up the refinement flags @@ -957,7 +1063,7 @@ def Minimize(self, x0, SIM, i_exp=0): assert self.params.hopper_save_freq is None at_min = self.at_minimum - callback_kwargs = {"SIM":SIM, "i_exp": i_exp, "save_freq": self.params.hopper_save_freq} + callback_kwargs = {"SIM":SIM, "i_shot": i_shot, "save_freq": self.params.hopper_save_freq} callback = lambda x: self.callback(x, callback_kwargs) target.terminate_after_n_converged_iterations = self.params.terminate_after_n_converged_iter target.percent_change_of_converged = self.params.converged_param_percent_change @@ -1035,13 +1141,13 @@ def Minimize(self, x0, SIM, i_exp=0): def callback(self, x, kwargs): save_freq = kwargs["save_freq"] - i_exp = kwargs["i_exp"] + i_shot = kwargs["i_shot"] SIM = kwargs["SIM"] target = self.target if save_freq is not None and target.iteration % save_freq==0 and target.iteration> 0: xall = target.x0.copy() xall[target.vary] = x - self.save_up(xall, SIM, rank=self.rank, i_exp=i_exp) + self.save_up(xall, SIM, rank=self.rank, i_shot=i_shot) return rescaled_vals = np.zeros_like(xall) @@ -1094,30 +1200,34 @@ def callback(self, x, kwargs): # at this point prev_iter_vals are the converged parameters! raise StopIteration() # Refinement has reached convergence! - def save_up(self, x, SIM, rank=0, i_exp=0, + def save_up(self, x, SIM, rank=0, i_shot=0, save_fhkl_data=True, save_modeler_file=True, save_refl=True, - save_sim_info=True): + save_sim_info=True, + save_traces=True, + save_pandas=True, save_expt=True): """ - :param x: - :param SIM: - :param rank: - :param i_exp: - :param save_fhkl_data: - :param save_modeler_file: - :param save_refl: - :param save_sim_info: - :return: + :param x: l-bfgs refinement parameters (reparameterized, e.g. unbounded) + :param SIM: sim_data.SimData instance + :param rank: MPI rank Id + :param i_shot: shot index for this rank (assuming each rank processes more than one shot, this should increment) + :param save_fhkl_data: whether to write mtz files + :param save_modeler_file: whether to write the DataModeler .npy file (a pickle file) + :param save_refl: whether to write a reflection table for this shot with updated xyzcal.px from diffBragg models + :param save_sim_info: whether to write a text file showing the diffBragg state + :param save_traces: whether to write a text file showing the refinement target functional and sigmaZ per iter + :param save_pandas: whether to write a single-shot pandas dataframe containing optimized diffBragg params + :param save_expt: whether to save a single-shot experiment file for this shot with optimized crystal model + :return: returns the single shot pandas dataframe (whether or not it was written) """ - # TODO optionally create directories assert self.exper_name is not None assert self.refl_name is not None Modeler = self LOGGER = logging.getLogger("refine") Modeler.best_model, _ = model(x, Modeler, SIM, compute_grad=False) Modeler.best_model_includes_background = False - LOGGER.info("Optimized values for i_exp %d:" % i_exp) + LOGGER.info("Optimized values for i_shot %d:" % i_shot) basename = os.path.splitext(os.path.basename(self.exper_name))[0] @@ -1170,26 +1280,30 @@ def save_up(self, x, SIM, rank=0, i_exp=0, scale_facs.append(scale_fac) scale_vars.append(scale_var) is_nominal_hkl.append(asu in all_nominal_hkl) - scale_fname = os.path.join(fhkl_scale_dir, "%s_%s_%d_channel%d_scale.npz"\ - % (Modeler.params.tag, basename, i_exp, i_chan)) + scale_fname = os.path.join(fhkl_scale_dir, "%s_%s_%d_%d_channel%d_scale.npz"\ + % (Modeler.params.tag, basename, i_shot, self.exper_idx, i_chan)) np.savez(scale_fname, asu_hkl=asu_hkls, scale_fac=scale_facs, scale_var=scale_vars, is_nominal_hkl=is_nominal_hkl) # TODO: pretty formatting ? if Modeler.target is not None: - rank_trace_outdir = hopper_io.make_rank_outdir(Modeler.params.outdir, "traces", rank) - trace_path = os.path.join(rank_trace_outdir, "%s_%s_%d_traces.txt" % (Modeler.params.tag, basename, i_exp)) # hop number, gradient descent index (resets with each new hop), target functional trace0, trace1, trace2 = Modeler.target.all_hop_id, Modeler.target.all_f, Modeler.target.all_sigZ - trace_data = np.array([trace0, trace1, trace2]).T - np.savetxt(trace_path, trace_data, fmt="%s") + + if save_traces: + rank_trace_outdir = hopper_io.make_rank_outdir(Modeler.params.outdir, "traces", rank) + trace_path = os.path.join(rank_trace_outdir, "%s_%s_%d_%d_traces.txt" + % (Modeler.params.tag, basename, i_shot, self.exper_idx)) + np.savetxt(trace_path, trace_data, fmt="%s") Modeler.niter = len(trace0) Modeler.sigz = trace2[-1] - hopper_io.save_to_pandas(x, Modeler, SIM, self.exper_name, Modeler.params, Modeler.E, i_exp, self.refl_name, None, rank) + shot_df = hopper_io.save_to_pandas(x, Modeler, SIM, self.exper_name, Modeler.params, Modeler.E, i_shot, + self.refl_name, None, rank, write_expt=save_expt, write_pandas=save_pandas, + exp_idx=self.exper_idx) if isinstance(Modeler.all_sigma_rdout, np.ndarray): data_subimg, model_subimg, trusted_subimg, bragg_subimg, sigma_rdout_subimg = Modeler.get_data_model_pairs() @@ -1208,7 +1322,8 @@ def save_up(self, x, SIM, rank=0, i_exp=0, if save_refl: rank_refls_outdir = hopper_io.make_rank_outdir(Modeler.params.outdir, "refls", rank) - new_refls_file = os.path.join(rank_refls_outdir, "%s_%s_%d.refl" % (Modeler.params.tag, basename, i_exp)) + new_refls_file = os.path.join(rank_refls_outdir, "%s_%s_%d_%d.refl" + % (Modeler.params.tag, basename, i_shot, self.exper_idx)) new_refls = deepcopy(Modeler.refls) has_xyzcal = 'xyzcal.px' in list(new_refls.keys()) if has_xyzcal: @@ -1268,19 +1383,24 @@ def save_up(self, x, SIM, rank=0, i_exp=0, if save_modeler_file: rank_imgs_outdir = hopper_io.make_rank_outdir(Modeler.params.outdir, "imgs", rank) modeler_file = os.path.join(rank_imgs_outdir, - "%s_%s_%d_modeler.npy" % (Modeler.params.tag, basename, i_exp)) + "%s_%s_%d_%d_modeler.npy" + % (Modeler.params.tag, basename, i_shot, self.exper_idx)) np.save(modeler_file, Modeler) if save_sim_info: spectrum_file = os.path.join(rank_imgs_outdir, - "%s_%s_%d_spectra.lam" % (Modeler.params.tag, basename, i_exp)) + "%s_%s_%d_%d_spectra.lam" + % (Modeler.params.tag, basename, i_shot, self.exper_idx)) rank_SIMlog_outdir = hopper_io.make_rank_outdir(Modeler.params.outdir, "simulator_state", rank) - SIMlog_path = os.path.join(rank_SIMlog_outdir, "%s_%s_%d.txt" % (Modeler.params.tag, basename, i_exp)) + SIMlog_path = os.path.join(rank_SIMlog_outdir, "%s_%s_%d_%d.txt" + % (Modeler.params.tag, basename, i_shot, self.exper_idx)) write_SIM_logs(SIM, log=SIMlog_path, lam=spectrum_file) if Modeler.params.refiner.debug_pixel_panelfastslow is not None: # TODO separate diffBragg logger utils.show_diffBragg_state(SIM.D, Modeler.params.refiner.debug_pixel_panelfastslow) + return shot_df + def convolve_model_with_psf(model_pix, J, mod, SIM, PSF=None, psf_args=None, roi_id_slices=None, roi_id_unique=None): @@ -1339,7 +1459,24 @@ def convolve_model_with_psf(model_pix, J, mod, SIM, PSF=None, psf_args=None, return model_pix, J -def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_spectrum=False): +def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_spectrum=False, + update_Fhkl_scales=True): + + if Mod.params.logging.parameters: + val_s = "" + for p in Mod.P.values(): + if p.name.startswith("Fhkl_"): + continue + if p.refine: + xval = x[p.xpos] + val = p.get_val(xval) + name = p.name + if name == "detz_shift": + val = val * 1e3 + name = p.name + "_mm" + val_s += "%s=%.3f, " % (name, val) + MAIN_LOGGER.debug(val_s) + pfs = Mod.pan_fast_slow @@ -1351,7 +1488,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s if Mod.Fhkl_channel_ids is not None: SIM.D.update_Fhkl_channels(Mod.Fhkl_channel_ids) - if SIM.refining_Fhkl: # once per iteration + if SIM.refining_Fhkl and update_Fhkl_scales: # once per iteration nscales = SIM.Num_ASU*SIM.num_Fhkl_channels current_Fhkl_xvals = x[-nscales:] SIM.Fhkl_scales = SIM.Fhkl_scales_init * np.exp( Mod.params.sigmas.Fhkl *(current_Fhkl_xvals-1)) @@ -1436,6 +1573,7 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s J = np.zeros((nparam-SIM.Num_ASU*SIM.num_Fhkl_channels, npix)) # gradients model_pix = None + #TODO check roiScales mode and if its broken, git rid of it! model_pix_noRoi = None # extract the scale factors per ROI, these might correspond to structure factor intensity scale factors, and quite possibly might result in overfits! @@ -1462,6 +1600,16 @@ def model(x, Mod, SIM, compute_grad=True, dont_rescale_gradient=False, update_s SIM.D.set_value(ROTY_ID, rotY) SIM.D.set_value(ROTZ_ID, rotZ) + if Mod.params.symmetrize_Flatt: + RXYZU = hopper_io.diffBragg_Umat(rotX, rotY, rotZ, SIM.D.Umatrix) + Cryst = deepcopy(SIM.crystal.dxtbx_crystal) + A = RXYZU * Mod.ucell_man.B_realspace + A_recip = A.inverse().transpose() + Cryst.set_A(A_recip) + symbol = SIM.crystal.space_group_info.type().lookup_symbol() + SIM.D.set_mosaic_blocks_sym(Cryst, symbol , Mod.params.simulator.crystal.num_mosaicity_samples, + refining_eta=not Mod.params.fix.eta_abc) + G = Mod.P["G_xtal%d" % i_xtal] scale = G.get_val(x[G.xpos]) @@ -1663,7 +1811,15 @@ def __call__(self, x, *args, **kwargs): self.all_x.append(self.x0) mod, SIM, compute_grad = args - f, g, modelpix, J, sigZ, debug_s = target_func(self.x0, update_terms, mod, SIM, compute_grad) + f, g, modelpix, J, sigZ, debug_s, zscore_perpix = target_func(self.x0, update_terms, mod, SIM, compute_grad, + return_all_zscores=True) + mod.all_zscore = zscore_perpix + + # filter during refinement? + if mod.params.filter_during_refinement.enable and self.iteration > 0: + if self.iteration % mod.params.filter_during_refinement.after_n == 0: + mod.filter_pixels(thresh=mod.params.filter_during_refinement.threshold) + self.t_per_iter.append(time.time()) if len(self.t_per_iter) > 2: @@ -1683,7 +1839,7 @@ def __call__(self, x, *args, **kwargs): return f -def target_func(x, udpate_terms, mod, SIM, compute_grad=True): +def target_func(x, udpate_terms, mod, SIM, compute_grad=True, return_all_zscores=False): pfs = mod.pan_fast_slow data = mod.all_data sigma_rdout = mod.all_sigma_rdout @@ -1757,15 +1913,17 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True): fLogLike = fLogLike[trusted].sum() # negative log Likelihood target # width of z-score should decrease as refinement proceeds - zscore_sigma = np.std( (resid / np.sqrt(V))[trusted]) + zscore_per = resid/np.sqrt(V) + zscore_sigma = np.std(zscore_per[trusted]) restraint_terms = {} if params.use_restraints: # scale factor restraint for name in mod.non_fhkl_params: p = mod.P[name] - val = p.get_restraint_val(x[p.xpos]) - restraint_terms[name] = val + if p.beta is not None: + val = p.get_restraint_val(x[p.xpos]) + restraint_terms[name] = val if params.centers.Nvol is not None: na,nb,nc = SIM.D.Ncells_abc_aniso @@ -1833,14 +1991,16 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True): # update gradients according to restraints for name in mod.non_fhkl_params: p = mod.P[name] - g[p.xpos] += p.get_restraint_deriv(x[p.xpos]) + if p.beta is not None: + g[p.xpos] += p.get_restraint_deriv(x[p.xpos]) - if not params.fix.perRoiScale: + if not params.fix.perRoiScale: # deprecated ? for name in mod.scale_roi_names: p = mod.P[name] - g[p.xpos] += p.get_restraint_deriv(x[p.xpos]) + if p.beta is not None: + g[p.xpos] += p.get_restraint_deriv(x[p.xpos]) - if params.centers.Nvol is not None: + if params.betas.Nvol is not None: Nmat_inv = np.linalg.inv(Nmat) dVol_dN_vals = [] for i_N in range(6): @@ -1900,7 +2060,11 @@ def target_func(x, udpate_terms, mod, SIM, compute_grad=True): debug_s = "F=%10.7g sigZ=%10.7g (Fracs of F: %s), |g|=%10.7g" \ % (f, zscore_sigma, restraint_debug_s, gnorm) - return f, g, model_bragg, Jac, zscore_sigma, debug_s + + return_data = f, g, model_bragg, Jac, zscore_sigma, debug_s + if return_all_zscores: + return_data += (zscore_per,) + return return_data def refine(exp, ref, params, spec=None, gpu_device=None, return_modeler=False, best=None, free_mem=True): @@ -2117,13 +2281,30 @@ def generate_gauss_spec(central_en=9500, fwhm=10, res=1, nchan=20, total_flux=1e else: return ens, wt +def downsamp_spec_from_params(params, expt=None, imgset=None, i_img=0): + """ + + :param params: hopper phil params extracted + :param expt: a dxtbx experiment (optional) + :param imgset: an dxtbx imageset (optional) + :param i_img: index of the image in the imageset (only matters if imgset is not None) + :return: dxtbx spectrum with parameters applied + """ + if expt is not None: + dxtbx_spec = expt.imageset.get_spectrum(0) + starting_wave = expt.beam.get_wavelength() + else: + assert imgset is not None + dxtbx_spec = imgset.get_spectrum(i_img) + starting_wave = imgset.get_beam(i_img).get_wavelength() -def downsamp_spec_from_params(params, expt): - dxtbx_spec = expt.imageset.get_spectrum(0) spec_en = dxtbx_spec.get_energies_eV() spec_wt = dxtbx_spec.get_weights() if params.downsamp_spec.skip: spec_wave = utils.ENERGY_CONV / spec_en.as_numpy_array() + stride=params.simulator.spectrum.stride + spec_wave = spec_wave[::stride] + spec_wt = spec_wt[::stride] spectrum = list(zip(spec_wave, spec_wt)) else: spec_en = dxtbx_spec.get_energies_eV() @@ -2149,11 +2330,12 @@ def downsamp_spec_from_params(params, expt): downsamp_wave = utils.ENERGY_CONV / downsamp_en spectrum = list(zip(downsamp_wave, downsamp_wt)) # the nanoBragg beam has an xray_beams property that is used internally in diffBragg - starting_wave = expt.beam.get_wavelength() waves, specs = map(np.array, zip(*spectrum)) ave_wave = sum(waves*specs) / sum(specs) - expt.beam.set_wavelength(ave_wave) - MAIN_LOGGER.debug("Shifting wavelength from %f to %f" % (starting_wave, ave_wave)) + MAIN_LOGGER.debug("Starting wavelength=%f. Spectrum ave wavelength=%f" % (starting_wave, ave_wave)) + if expt is not None: + expt.beam.set_wavelength(ave_wave) + MAIN_LOGGER.debug("Shifting expt wavelength from %f to %f" % (starting_wave, ave_wave)) MAIN_LOGGER.debug("USING %d ENERGY CHANNELS" % len(spectrum)) return spectrum @@ -2165,6 +2347,9 @@ def downsamp_spec(SIM, params, expt, return_and_dont_set=False): spec_wt = SIM.dxtbx_spec.get_weights() if params.downsamp_spec.skip: spec_wave = utils.ENERGY_CONV / spec_en.as_numpy_array() + stride = params.simulator.spectrum.stride + spec_wave = spec_wave[::stride] + spec_wt = spec_wt[::stride] SIM.beam.spectrum = list(zip(spec_wave, spec_wt)) else: spec_en = SIM.dxtbx_spec.get_energies_eV() @@ -2195,6 +2380,7 @@ def downsamp_spec(SIM, params, expt, return_and_dont_set=False): ave_wave = sum(waves*specs) / sum(specs) expt.beam.set_wavelength(ave_wave) MAIN_LOGGER.debug("Shifting wavelength from %f to %f" % (starting_wave, ave_wave)) + MAIN_LOGGER.debug("Using %d energy channels" % len(SIM.beam.spectrum)) if return_and_dont_set: return SIM.beam.spectrum else: @@ -2234,12 +2420,12 @@ def set_gauss_spec(SIM=None, params=None, E=None): def sanity_test_input_lines(input_lines): for line in input_lines: - line_fields = line.strip().split() - if len(line_fields) not in [2, 3]: + line_items = line.strip().split() + if len(line_items) not in [2, 3, 4]: raise IOError("Input line %s is not formatted properly" % line) - for fname in line_fields: - if not os.path.exists(fname): - raise FileNotFoundError("File %s does not exist" % fname) + for item in line_items: + if os.path.isfile(item) and not os.path.exists(item): + raise FileNotFoundError("File %s does not exist" % item) def full_img_pfs(img_sh): @@ -2304,10 +2490,19 @@ def get_simulator_for_data_modelers(data_modeler): if self.params.diffuse_stencil_size > 0: SIM.D.stencil_size = self.params.diffuse_stencil_size MAIN_LOGGER.debug("Set diffuse stencil size: %d" % SIM.D.stencil_size) + if self.params.diffuse_orientation == 1: + ori = (1,0,0,0,1,0,0,0,1) + else: + a = 1/np.sqrt(2) + ori = a, a, 0.0, a, a, 0.0, 0.0, 0.0, 1.0 + SIM.D.set_rotate_principal_axes(ori) SIM.D.gamma_miller_units = self.params.gamma_miller_units SIM.isotropic_diffuse_gamma = self.params.isotropic.diffuse_gamma SIM.isotropic_diffuse_sigma = self.params.isotropic.diffuse_sigma + if self.params.record_device_timings: + SIM.D.record_timings = True + # TODO: use data_modeler.set_spectrum instead if self.params.spectrum_from_imageset: downsamp_spec(SIM, self.params, self.E) elif self.params.gen_gauss_spec: diff --git a/simtbx/diffBragg/mpi_logger.py b/simtbx/diffBragg/mpi_logger.py index 8900ea8d54..c81cffd4d9 100644 --- a/simtbx/diffBragg/mpi_logger.py +++ b/simtbx/diffBragg/mpi_logger.py @@ -20,7 +20,7 @@ LEVELS = {"low": logging.WARNING, "normal": logging.INFO, "high": logging.DEBUG} DETAILED_FORMAT = 'RANK%d:%s | ' % (COMM.rank, HOST) + '%(asctime)s | %(filename)s:%(funcName)s >> %(message)s' -SIMPLE_FORMAT = "RANK%04d "%(COMM.rank)+"%(message)s" +SIMPLE_FORMAT = "RANK%04d "%(COMM.rank)+"| %(asctime)s | %(message)s" from simtbx.diffBragg import utils @@ -61,14 +61,16 @@ def setup_logging_from_params(params): utils.safe_makedirs(params.outdir) COMM.barrier() main_level = LEVELS[params.logging.logfiles_level] + logfile_name = params.logging.log_hostname*(HOST+"-") + params.logging.logname main_logger = _make_logger("diffBragg.main", - os.path.join(params.outdir, HOST+"-"+params.logging.logname), + os.path.join(params.outdir, logfile_name), level=main_level, overwrite=params.logging.overwrite, formatter=logging.Formatter(DETAILED_FORMAT)) + profile_name = params.logging.log_hostname*(HOST+"-") + params.profile_name _make_logger("diffBragg.profile", - os.path.join(params.outdir, HOST+"-"+params.profile_name), + os.path.join(params.outdir, profile_name), level=logging.INFO, overwrite=params.logging.overwrite, formatter=logging.Formatter(SIMPLE_FORMAT)) diff --git a/simtbx/diffBragg/phil.py b/simtbx/diffBragg/phil.py index 806780f47c..b6558f77fb 100644 --- a/simtbx/diffBragg/phil.py +++ b/simtbx/diffBragg/phil.py @@ -6,6 +6,53 @@ #''' hopper_phil = """ + +filter_during_refinement { + enable = False + .type = bool + .help = if True, filtering will occur each N iterations, controlled by parameter after_n + after_n = 50 + .type = int + .help = refiner will pause and check for outliers every after_n iterations + threshold = 20 + .type = float + .help = outliers are detected by looking at the distribution of per shoebox sigmaZ + .help = and then using a median absolute deviation filter. Lower values of threshold will flag more pixels as outliers +} + +filter_after_refinement { + enable = False + .type = bool + .help = if True, filter, then rerun refinement if certain conditions are met (e.g. too few refinement iterations) + max_attempts = 2 + .type = int + .help = how many additional times to run hopper + min_prev_niter = 50 + .type = int + .help = only repeat if the previous refinement was fewer than this many iterations + max_prev_sigz = 10 + .type = float + .help = only repeat if the previous refinement had sigma Z more than this + threshold = 20 + .type = float + .help = outliers are detected by looking at the distribution of per shoebox sigmaZ + .help = and then using a median absolute deviation filter. Lower values of threshold will flag more pixels as outliers +} + +symmetrize_Flatt = False + .type = bool + .help = If True, add 3-fold symmetric mosaic blocks to the calculation of F_latt +record_device_timings = False + .type = bool + .help = Record the execution times of diffBragg host-dev copies and kernel executions + .help = the results will be printed to the terminal +consider_multicrystal_shots = False + .type = bool + .help = If True, and if there are multiple crystals in the experiment list, + .help = then try to model all crystals for a given shot. +debug_mode = False + .type = bool + .help = If True, many output files are written to explore the diffBragg models in great detail nominal_Fhkl_only = True .type = bool .help = if refining Fhkls, only refine the ones that are assigned to a reflection table... @@ -121,12 +168,6 @@ .help = final resolution of downsampled spectrum in eV .expert_level=0 } -apply_best_crystal_model = False - .type = bool - .help = depending on what experiments in the exper refl file, one may want - .help = to apply the optimal crystal transformations (this parameter only matters - .help = if params.best_pickle is not None) - .expert_level=10 filter_unpredicted_refls_in_output = True .type = bool .help = filter reflections in the output refl table for which there was no model bragg peak @@ -192,41 +233,41 @@ ucell_gamma = None .type = float .help = restraint variance for unit cell gamma angle - Nvol = 1e8 + Nvol = None .type = float .help = tightness of the Nabc volume contraint - detz_shift = 1e8 + detz_shift = None .type = float .help = restraint variance for detector shift target - ucell = [1e8,1e8,1e8,1e8,1e8,1e8] + ucell = None .type = floats .help = DEPRECATED: use e.g. betas.ucell_a instead .help = variances for unit cell constants in order determined by unit cell manager class (see diffBragg/refiners/crystal_systems) - RotXYZ = 1e8 + RotXYZ = None .type = float .help = restraint factor for the rotXYZ restraint - Nabc = [1e8,1e8,1e8] + Nabc = None .type = floats(size=3) .help = restraint factor for the ncells abc - Ndef = [1e8,1e8,1e8] + Ndef = None .type = floats(size=3) .help = restraint factor for the ncells def - diffuse_sigma = 1e8,1e8,1e8 + diffuse_sigma = None .type = floats(size=3) .help = restraint factor for diffuse sigma - diffuse_gamma = 1e8,1e8,1e8 + diffuse_gamma = None .type = floats(size=3) .help = restraint factor for diffuse gamma - G = 1e8 + G = None .type = float .help = restraint factor for the scale G - B = 1e8 + B = None .type = float .help = restraint factor for Bfactor - eta_abc = [1e8,1e8,1e8] + eta_abc = None .type = floats(size=3) .help = restrain factor for mosaic spread angles - spec = [1e8,1e8] + spec = None .type = floats(size=2) .help = restraint factor for spectrum coefs Fhkl = None @@ -276,38 +317,34 @@ Nvol = None .type = float .help = if provided, constrain the product Na*Nb*Nc to this value - detz_shift = 0 + detz_shift = None .type = float .help = restraint target for detector shift along z-direction - ucell = [63.66, 28.87, 35.86, 1.8425] - .type = floats - .help = DEPRECATED: use e.g. betas.ucell_a instead - .help = centers for unit cell constants in order determined by unit cell manager class (see diffBragg/refiners/crystal_systems) - RotXYZ = [0,0,0] + RotXYZ = None .type = floats(size=3) .help = restraint target for Umat rotations - Nabc = [100,100,100] + Nabc = None .type = floats(size=3) .help = restraint target for Nabc - Ndef = [0,0,0] + Ndef = None .type = floats(size=3) .help = restraint target for Ndef - diffuse_sigma = [1,1,1] + diffuse_sigma = None .type = floats(size=3) .help = restraint target for diffuse sigma - diffuse_gamma = [1,1,1] + diffuse_gamma = None .type = floats(size=3) .help = restraint target for diffuse gamma - G = 100 + G = None .type = float .help = restraint target for scale G - B = 0 + B = None .type = float .help = restraint target for Bfactor - eta_abc = [0,0,0] + eta_abc = None .type = floats(size=3) .help = restraint target for mosaic spread angles in degrees - spec = [0,1] + spec = None .type = floats(size=2) .help = restraint target for specturm correction (0 + 1*Lambda ) } @@ -402,9 +439,9 @@ diffuse_gamma = [1,1,1] .type = floats(size=3) .help = sensitivity for diffuse gamma - RotXYZ = [1,1,1] + RotXYZ = [1e-3,1e-3,1e-3] .type = floats(size=3) - .help = sensitivity for RotXYZ + .help = sensitivity for RotXYZ in radians G = 1 .type = float .help = sensitivity for scale factor @@ -451,7 +488,7 @@ .help = init for diffuse gamma RotXYZ = [0,0,0] .type = floats(size=3) - .help = init for RotXYZ + .help = init for RotXYZ in radians G = 1 .type = float .help = init for scale factor @@ -482,9 +519,9 @@ diffuse_gamma = [0,0,0] .type = floats(size=3) .help = min for diffuse gamma - RotXYZ = [-1,-1,-1] + RotXYZ = [-3.1415926, -3.1415926, -3.1415926] .type = floats(size=3) - .help = min for rotXYZ in degrees + .help = min for rotXYZ in radians G = 0 .type = float .help = min for scale G @@ -520,12 +557,12 @@ diffuse_sigma = [20,20,20] .type = floats(size=3) .help = max diffuse sigma - diffuse_gamma = [1000,1000,1000] + diffuse_gamma = [10000,10000,10000] .type = floats(size=3) .help = max for diffuse gamma - RotXYZ = [1,1,1] + RotXYZ = [3.1415926, 3.1415926, 3.1415926] .type = floats(size=3) - .help = max for rotXYZ in degrees + .help = max for rotXYZ in radians G = 1e12 .type = float .help = max for scale G @@ -582,7 +619,7 @@ ucell = False .type = bool .help = fix the unit cell during refinement - detz_shift = False + detz_shift = True .type = bool .help = fix the detector distance shift during refinement } @@ -611,6 +648,9 @@ .type = int .help = Increase to add accuracy to diffuse scattering models, at the expense of longer computations .help = Best to increment by values of 1 when testing +diffuse_orientation = 1 + .type = int + .help = orient the diffuse scattering features. 0 is along (a-b, a+b, c), 1 is along (a,b,c) symmetrize_diffuse = True .type = bool .help = use the laue group rotation operators to symmetrize diffuse signals @@ -718,6 +758,9 @@ .type = str .help = if logfiles=True, then write the log to this file, stored in the folder specified by outdir .help = if None, then defaults to main_stage1.log for hopper, main_pred.log for prediction, main_stage2.log for stage_two + log_hostname = True + .type = bool + .help = prefix logfiles with host name } profile = False .type = bool diff --git a/simtbx/diffBragg/prep_stage2_input.py b/simtbx/diffBragg/prep_stage2_input.py index 205128234f..2c08657a5c 100644 --- a/simtbx/diffBragg/prep_stage2_input.py +++ b/simtbx/diffBragg/prep_stage2_input.py @@ -4,6 +4,7 @@ import numpy as np from dials.array_family import flex from libtbx.mpi4py import MPI +from simtbx.diffBragg import utils COMM = MPI.COMM_WORLD import logging @@ -27,20 +28,59 @@ def get_equal_partition(weights, partitions): load[lightest] += weights[idx] return distribution -def prep_dataframe(df, refls_key="predictions"): +def prep_dataframe(df, refls_key="predictions", res_ranges_string=None): + """ + + :param df: input pandas dataframe for stage2 + :param refls_key: column in df containing the reflection filenames + :param res_ranges_string: optional res_ranges_string phil param (params.refiner.res_ranges) + :return: + """ # TODO make sure all pred files exist + + res_ranges = None + if res_ranges_string is not None: + res_ranges = utils.parse_reso_string(res_ranges_string) + + if refls_key not in list(df): + raise KeyError("Dataframe has no key %s" % refls_key) nshots = len(df) - refls_names = df[refls_key] + df.reset_index(drop=True, inplace=True) + df['index'] = df.index.values + refl_info = df[["index", refls_key, "exp_idx"]].values + + # sort and split such that each rank will read many refls from same file + sorted_names_and_ids = sorted( + refl_info, + key=lambda x: x[1]) # sort by name + df_idx, refl_names, expt_ids = np.array_split(sorted_names_and_ids, COMM.size)[COMM.rank].T + refls_per_shot = [] if COMM.rank==0: LOGGER.info("Loading nrefls per shot") - for i_shot, name in enumerate(refls_names): - if i_shot % COMM.size != COMM.rank: - continue - R = flex.reflection_table.from_file(name) - if len(R)==0: - LOGGER.critical("Reflection %s has 0 reflections !" % (name, len(R))) - refls_per_shot.append((i_shot, len(R))) + + prev_name = "" # keep track of the most recently read refl table file + Rall = None + for (i_shot, name, expt_id) in zip(df_idx, refl_names, expt_ids): + if Rall is None or name != prev_name: + Rall = flex.reflection_table.from_file(name) + prev_name = name + + R = Rall.select(Rall['id'] == int(expt_id)) + if res_ranges is not None: + num_ref = 0 + if 'rlp' not in set(R.keys()): + raise KeyError("Cannot filter res ranges if rlp column not in refl tables") + d = 1. / np.linalg.norm(R["rlp"], axis=1) # resolution per refl + for d_fine, d_coarse in res_ranges: + d_sel = np.logical_and(d >= d_fine, d < d_coarse) + num_ref += d_sel.sum() + else: + num_ref = len(R) + + if num_ref==0: + LOGGER.critical("Reflection %s id=%d has 0 reflections !" % (name, expt_id, num_ref)) + refls_per_shot.append((i_shot, num_ref)) refls_per_shot = COMM.reduce(refls_per_shot, root=0) work_distribution = None diff --git a/simtbx/diffBragg/refiners/base_refiner.py b/simtbx/diffBragg/refiners/base_refiner.py index c74cd44436..f5179e95fa 100644 --- a/simtbx/diffBragg/refiners/base_refiner.py +++ b/simtbx/diffBragg/refiners/base_refiner.py @@ -210,12 +210,14 @@ def run(self, setup=True, setup_only=False): else: try: self.diag_mode = None + self.minimizer = scitbx.lbfgs.run( target_evaluator=self, core_params=self._core_param, exception_handling_params=self._handler, termination_params=self._terminator, gradient_only=self.gradient_only) + except BreakToUseCurvatures: self.hit_break_to_use_curvatures = True pass diff --git a/simtbx/diffBragg/refiners/stage_two_refiner.py b/simtbx/diffBragg/refiners/stage_two_refiner.py index b877c40f70..3e7beb41fa 100644 --- a/simtbx/diffBragg/refiners/stage_two_refiner.py +++ b/simtbx/diffBragg/refiners/stage_two_refiner.py @@ -11,6 +11,8 @@ import warnings import signal import logging +from copy import deepcopy +from simtbx.diffBragg import hopper_io LOGGER = logging.getLogger("diffBragg.main") warnings.filterwarnings("ignore") @@ -53,7 +55,6 @@ class Bcolors: from simtbx.diffBragg.refiners import BreakBecauseSignal, BreakToUseCurvatures from dials.array_family import flex from simtbx.diffBragg.refiners import BaseRefiner -from collections import Counter from cctbx import miller, sgtbx from simtbx.diffBragg.refiners.parameters import RangedParameter @@ -74,11 +75,10 @@ def __init__(self, shot_modelers, sgsymbol, params): self.save_model_freq = self.params.refiner.stage_two.save_model_freq self.use_nominal_h = self.params.refiner.stage_two.use_nominal_hkl - self.saveZ_freq = self.params.refiner.stage_two.save_Z_freq # save Z-score data every N iterations + self.saveZ_freq = self.params.refiner.stage_two.save_Z_freq # save Z-score data every N function calls self.break_signal = None # check for this signal during refinement, and break refinement if signal is received (see python signal module) TODO: make work with MPI self.save_model = False # whether to save the model - self.idx_from_asu = {} # maps global fcell index to asu hkl - self.asu_from_idx = {} # maps asu hkl to global fcell index + self.hiasu = None # stores Hi_asu, counts, maps to and from fcell indices self.rescale_params = True # whether to rescale parameters during refinement # TODO this will always be true, so remove the ability to disable self.request_diag_once = False # LBFGS refiner property self.min_multiplicity = self.params.refiner.stage_two.min_multiplicity @@ -91,6 +91,7 @@ def __init__(self, shot_modelers, sgsymbol, params): self.use_curvatures_threshold = 7 # how many positive curvature iterations required before breaking, after which simulation can be restart with use_curvatures=True self.verbose = True # whether to print during iterations self.iterations = 0 # iteration counter , used internally + self.target_eval_count = 0 # target function evaluation counter, used internally self.shot_ids = None # for global refinement , self.log2pi = np.log(np.pi*2) @@ -151,7 +152,7 @@ def n(self): @property def n_global_fcell(self): - return len(self.idx_from_asu) + return self.hiasu.present_len @property def image_shape(self): @@ -190,16 +191,20 @@ def make_output_dir(self): self.Zdir = os.path.join(self.output_dir, "Z") self.model_dir = os.path.join(self.output_dir, "model") for dirname in (self.Zdir, self.model_dir): - if self.I_AM_ROOT and not os.path.exists(dirname): + if self.params.debug_mode and self.I_AM_ROOT and not os.path.exists(dirname): os.makedirs(dirname) COMM.barrier() def _setup(self): # Here we go! https://youtu.be/7VvkXA6xpqI + if not self.params.debug_mode: + LOGGER.info("Disabling saveZ and save_model because debug_mode=False") + self.saveZ_freq = None + self.save_model_freq = None LOGGER.info("Setup begins!") - if self.refine_Fcell and not self.asu_from_idx: + if self.refine_Fcell and not self.hiasu.from_idx: raise ValueError("Need to supply a non empty asu from idx map") - if self.refine_Fcell and not self.idx_from_asu: # # TODO just derive from its inverse + if self.refine_Fcell and not self.hiasu.to_idx: raise ValueError("Need to supply a non empty idx from asu map") self.make_output_dir() @@ -240,18 +245,12 @@ def _setup(self): self._setup_ncells_refinement_parameters() self._track_num_times_pixel_was_modeled() - self.hkl_totals = [] - if self.refine_Fcell: - for i_shot in self.shot_ids: - for i_h, h in enumerate(self.Modelers[i_shot].Hi_asu): - self.hkl_totals.append(self.idx_from_asu[h]) - self.hkl_totals = self._MPI_reduce_broadcast(self.hkl_totals) - + self._setup_nominal_hkl_p1() self._MPI_setup_global_params() self._MPI_sync_fcell_parameters() # reduce then broadcast fcell LOGGER.info("--combining parameters across ranks") - self._MPI_sync_hkl_freq() + self._MPI_sync_hkl_freq() # FIXME does this do absolutely anything? if self.x_init is not None: LOGGER.info("Initializing with provided x_init array") @@ -270,7 +269,7 @@ def _setup(self): for sid in self.shot_ids: Modeler = self.Modelers[sid] - Modeler.all_fcell_global_idx = np.array([self.idx_from_asu[h] for h in Modeler.hi_asu_perpix]) + Modeler.all_fcell_global_idx = np.array([self.hiasu.to_idx[h] for h in Modeler.hi_asu_perpix]) Modeler.unique_i_fcell = set(Modeler.all_fcell_global_idx) Modeler.i_fcell_slices = self._get_i_fcell_slices(Modeler) self.Modelers[sid] = Modeler # TODO: VERIFY IF THIS IS NECESSARY ? @@ -349,7 +348,7 @@ def _make_p1_equiv_mapping(self): self.num_equivs_for_i_fcell = {} self.update_indices = [] for i_fcell in range(self.n_global_fcell): - hkl_asu = self.asu_from_idx[i_fcell] + hkl_asu = self.hiasu.from_idx[i_fcell] equivs = [i.h() for i in miller.sym_equiv_indices(self.space_group, hkl_asu).indices()] self.num_equivs_for_i_fcell[i_fcell] = len(equivs) @@ -360,27 +359,45 @@ def _MPI_setup_global_params(self): if self.I_AM_ROOT: LOGGER.info("--2 Setting up global parameters") if self.output_dir is not None: - np.save(os.path.join(self.output_dir, "f_asu_map"), self.asu_from_idx) + np.save(os.path.join(self.output_dir, "f_asu_map"), self.hiasu.from_idx) self._setup_fcell_params() + def _setup_nominal_hkl_p1(self): + Omatrix = np.reshape(self.S.crystal.Omatrix.elems, [3, 3]) + for i_shot in self.Modelers: + MOD = self.Modelers[i_shot] + nom_h = MOD.all_nominal_hkl + nom_h_p1 = np.dot(nom_h, Omatrix).astype(np.int32) + nom_h_p1 = list(map(tuple, nom_h_p1)) + self.Modelers[i_shot].all_nominal_hkl_p1 = nom_h_p1 + def _setup_fcell_params(self): if self.refine_Fcell: LOGGER.info("----loading fcell data") # this is the number of observations of hkl (accessed like a dictionary via global_fcell_index) LOGGER.info("---- -- counting hkl totes") LOGGER.info("compute HKL multiplicity") - self.hkl_frequency = Counter(self.hkl_totals) + self.hkl_frequency = self.hiasu.present_idx_counter LOGGER.info("save HKL multiplicity") np.save(os.path.join(self.output_dir, "f_asu_multi"), self.hkl_frequency) LOGGER.info("Done ") LOGGER.info("local refiner symbol=%s ; nanoBragg crystal symbol: %s" % (self.symbol, self.S.crystal.symbol)) - ma = self.S.crystal.miller_array_high_symmetry.map_to_asu() + self.fcell_init_from_i_fcell = [] + ma = self.S.crystal.miller_array LOGGER.info("make an Fhkl map") ma_map = {h: d for h,d in zip(ma.indices(), ma.data())} - LOGGER.info("make fcell_init") - self.fcell_init_from_i_fcell = np.array([ma_map[self.asu_from_idx[i_fcell]] for i_fcell in range(self.n_global_fcell)]) + Omatrix = np.reshape(self.S.crystal.Omatrix.elems,[3,3]) + + # TODO: Vectorize + for i_fcell in range(self.n_global_fcell): + asu_hkl = self.hiasu.from_idx[i_fcell] # high symmetry + P1_hkl = tuple(np.dot(Omatrix, asu_hkl).astype(int)) + fcell_val = ma_map[P1_hkl] + self.fcell_init_from_i_fcell.append(fcell_val) + self.fcell_init_from_i_fcell = np.array(self.fcell_init_from_i_fcell) + self.fcell_sigmas_from_i_fcell = self.params.sigmas.Fhkl LOGGER.info("DONE make fcell_init") @@ -436,7 +453,9 @@ def _get_ncells_abc(self, i_shot): return vals def _get_eta(self, i_shot): - pass + # NOTE: refinement of eta not supported in this script + vals = [self.Modelers[i_shot].PAR.eta[i_eta].init for i_eta in range(3)] + return vals def _get_spot_scale(self, i_shot): xval = self.x[self.spot_scale_xpos[i_shot]] @@ -465,8 +484,8 @@ def _run_diffBragg_current(self): LOGGER.info("run diffBragg for shot %d" % self._i_shot) pfs = self.Modelers[self._i_shot].pan_fast_slow if self.use_nominal_h: - nom_h = self.Modelers[self._i_shot].all_nominal_hkl - self.D.add_diffBragg_spots(pfs, nom_h) + nom_h_p1 = self.Modelers[self._i_shot].all_nominal_hkl_p1 + self.D.add_diffBragg_spots(pfs, nom_h_p1) else: self.D.add_diffBragg_spots(pfs) LOGGER.info("finished diffBragg for shot %d" % self._i_shot) @@ -506,7 +525,30 @@ def _update_spectra_coefficients(self): pass def _update_eta(self): - pass + + if self.S.umat_maker is not None: + eta_vals = self._get_eta(self._i_shot) + + if not self.D.has_anisotropic_mosaic_spread: + assert self.S.Umats_method == 2 + assert len(set(eta_vals))==1 + eta_vals = eta_vals[0] + + LOGGER.info("eta=%f" % eta_vals) + self.S.update_umats_for_refinement(eta_vals) + + def _symmetrize_Flatt(self): + if self.params.symmetrize_Flatt: + # NOTE: RotXYZ refinement disabled for this script, so offsets always 0,0,0 + RXYZU = hopper_io.diffBragg_Umat(0,0,0,self.D.Umatrix) + Cryst = deepcopy(self.S.crystal.dxtbx_crystal) + B_realspace = self.get_refined_Bmatrix(self._i_shot, recip=False) + A = RXYZU * B_realspace + A_recip = A.inverse().transpose() + Cryst.set_A(A_recip) + symbol = self.S.crystal.space_group_info.type().lookup_symbol() + self.D.set_mosaic_blocks_sym(Cryst, symbol , self.params.simulator.crystal.num_mosaicity_samples, + refining_eta=False) # NOTE:no eta refinement in this stage 2 script (possible in ens.hopper) def _set_background_plane(self): self.tilt_plane = self.Modelers[self._i_shot].all_background[self.roi_sel] @@ -626,11 +668,11 @@ def compute_functional_and_gradients(self): t = time.time() out = self._compute_functional_and_gradients() t = time.time()-t - LOGGER.info("TOok %.4f sec to compute functional and grad" % t) + LOGGER.info("Took %.4f sec to compute functional and grad" % t) return out def _compute_functional_and_gradients(self): - LOGGER.info(Bcolors.OKBLUE+"BEGIN FUNC GRAD ; iteration %d" % self.iterations+Bcolors.ENDC) + LOGGER.info(Bcolors.OKBLUE+"BEGIN FUNC GRAD ; Eval %d" % self.target_eval_count+Bcolors.ENDC) #if self.verbose: # self._print_iteration_header() @@ -654,15 +696,15 @@ def _compute_functional_and_gradients(self): LOGGER.info("Iterate over %d shots" % len(self.shot_ids)) self._shot_Zscores = [] - save_model = self.save_model_freq is not None and self.iterations % self.save_model_freq == 0 + save_model = self.save_model_freq is not None and self.target_eval_count % self.save_model_freq == 0 if save_model: - self._save_model_dir = os.path.join(self.model_dir, "iter%d" % self.iterations) + self._save_model_dir = os.path.join(self.model_dir, "eval%d" % self.target_eval_count) - if COMM.rank == 0 and not os.path.exists(self._save_model_dir): + if self.params.debug_mode and COMM.rank == 0 and not os.path.exists(self._save_model_dir): os.makedirs(self._save_model_dir) COMM.barrier() - if self.iterations % self.params.refiner.save_gain_freq == 0: + if self.target_eval_count % self.params.refiner.save_gain_freq == 0: self._save_optimized_gain_map() self.all_sigZ = [] @@ -684,6 +726,7 @@ def _compute_functional_and_gradients(self): self._update_ncells_def() self._update_rotXYZ() self._update_eta() # mosaic spread + self._symmetrize_Flatt() self._update_dxtbx_detector() self._update_sausages() @@ -702,7 +745,7 @@ def _compute_functional_and_gradients(self): self._derivative_convenience_factors() - if self.iterations % self.saveZ_freq == 0: + if self.saveZ_freq is not None and self.target_eval_count % self.saveZ_freq == 0: MOD = self.Modelers[self._i_shot] self._spot_Zscores = [] for i_fcell in MOD.unique_i_fcell: @@ -772,7 +815,7 @@ def _compute_functional_and_gradients(self): tsave = time.time()-tsave LOGGER.info("Time to dump param and Zscore data: %.4f" % tsave) - self.iterations += 1 + self.target_eval_count += 1 self.f_vals.append(self.target_functional) if self.calc_curvatures and not self.use_curvatures: @@ -782,21 +825,24 @@ def _compute_functional_and_gradients(self): LOGGER.info("DONE WITH FUNC GRAD") return self._f, self._g + def callback_after_step(self, minimizer): + self.iterations = minimizer.iter() + def _save_model(self, model_info): LOGGER.info("SAVING MODEL FOR SHOT %d" % self._i_shot) df = pandas.DataFrame(model_info) df["shot_id"] = self._i_shot outdir = self._save_model_dir - outname = os.path.join(outdir, "rank%d_shot%d_ITER%d.pkl" % (COMM.rank, self._i_shot, self.iterations)) + outname = os.path.join(outdir, "rank%d_shot%d_EVAL%d_ITER%d.pkl" % (COMM.rank, self._i_shot, self.target_eval_count, self.iterations)) df.to_pickle(outname) def _save_Zscore_data(self): - if not self.iterations % self.saveZ_freq == 0: + if self.saveZ_freq is None or not self.target_eval_count % self.saveZ_freq == 0: return outdir = os.path.join(self.Zdir, "rank%d_Zscore" % self.rank) if not os.path.exists(outdir): os.makedirs(outdir) - fname = os.path.join(outdir, "sigZ_iter%d_rank%d" % (self.iterations, self.rank)) + fname = os.path.join(outdir, "sigZ_eval%d_iter%d_rank%d" % (self.target_eval_count, self.iterations, self.rank)) np.save(fname, np.array(self._shot_Zscores, object)) def _sanity_check_grad(self): @@ -827,6 +873,7 @@ def _Fcell_derivatives(self): if not self.refine_Fcell: return MOD = self.Modelers[self._i_shot] + dumps = [] for i_fcell in MOD.unique_i_fcell: multi = self.hkl_frequency[i_fcell] @@ -836,6 +883,7 @@ def _Fcell_derivatives(self): xpos = self.fcell_xstart + i_fcell Famp = self._fcell_at_i_fcell[i_fcell] sig = 1 + for slc in MOD.i_fcell_slices[i_fcell]: self.fcell_dI_dtheta = self.fcell_deriv[slc] @@ -852,7 +900,9 @@ def _Fcell_derivatives(self): trust = MOD.all_trusted[slc] # NOTE : no need to normalize Fhkl gradients by the overlap rate - they should arise from different HKLs #freq = MOD.all_freq[slc] # pixel frequency (1 is no overlaps) - self.grad[xpos] += (g_accum[trust].sum())*.5 + dump = (g_accum[trust].sum())*.5 + self.grad[xpos] += dump + dumps.append(dump) if self.calc_curvatures: raise NotImplementedError("No curvature for Fcell refinement") @@ -985,11 +1035,11 @@ def _print_iteration_header(self): if self.use_curvatures: LOGGER.info( - "%s%s%s%s\nTrial%d (%s): Compute functional and gradients Iter %d %s(Using Curvatures)%s\n%s%s%s%s" - % (Bcolors.HEADER, border,border,border, self.trial_id + 1, refine_str, self.iterations + 1, Bcolors.OKGREEN, Bcolors.HEADER, border,border,border, Bcolors.ENDC)) + "%s%s%s%s\nTrial%d (%s): Compute functional and gradients eval %d %s(Using Curvatures)%s\n%s%s%s%s" + % (Bcolors.HEADER, border,border,border, self.trial_id + 1, refine_str, self.target_eval_count + 1, Bcolors.OKGREEN, Bcolors.HEADER, border,border,border, Bcolors.ENDC)) else: - LOGGER.info("%s%s%s%s\n, Trial%d (%s): Compute functional and gradients Iter %d PosCurva %d\n%s%s%s%s" - % (Bcolors.HEADER, border, border, border, self.trial_id + 1, refine_str, self.iterations + 1, self.num_positive_curvatures, border, border,border, Bcolors.ENDC)) + LOGGER.info("%s%s%s%s\n, Trial%d (%s): Compute functional and gradients eval %d PosCurva %d\n%s%s%s%s" + % (Bcolors.HEADER, border, border, border, self.trial_id + 1, refine_str, self.target_eval_count + 1, self.num_positive_curvatures, border, border,border, Bcolors.ENDC)) def _save_optimized_gain_map(self): if not self.params.refiner.refine_gain_map: @@ -1006,7 +1056,7 @@ def _save_optimized_gain_map(self): def _MPI_save_state_of_refiner(self): if self.I_AM_ROOT and self.output_dir is not None and self.refine_Fcell: - outf = os.path.join(self.output_dir, "_fcell_trial%d_iter%d" % (self.trial_id, self.iterations)) + outf = os.path.join(self.output_dir, "_fcell_trial%d_eval%d_iter%d" % (self.trial_id, self.target_eval_count, self.iterations)) np.savez(outf, fvals=self._fcell_at_i_fcell) def _target_accumulate(self): @@ -1071,8 +1121,11 @@ def _evaluate_log_averageI_plus_sigma_readout(self): self.log_v = np.log(v) self.log_v[v <= 0] = 0 # but will I ever negative_model ? - def get_refined_Bmatrix(self, i_shot): - return self.Modelers[i_shot].PAR.ucell_man.B_recipspace + def get_refined_Bmatrix(self, i_shot, recip=False): + if recip: + return self.Modelers[i_shot].PAR.ucell_man.B_recipspace + else: + return self.Modelers[i_shot].PAR.ucell_man.B_realspace def curvatures(self): return self.curv diff --git a/simtbx/diffBragg/src/diffBragg.cpp b/simtbx/diffBragg/src/diffBragg.cpp index 8d2915db35..f07b7e99c2 100644 --- a/simtbx/diffBragg/src/diffBragg.cpp +++ b/simtbx/diffBragg/src/diffBragg.cpp @@ -537,6 +537,7 @@ void diffBragg::update_dxtbx_geoms( double panel_rot_angS, double panel_offsetX, double panel_offsetY, double panel_offsetZ, bool force){ + db_cu_flags.update_detector = true; int old_verbose = verbose; verbose = 0; @@ -714,12 +715,18 @@ void diffBragg::update_dxtbx_geoms( SCITBX_ASSERT(close_distance > 0); verbose = old_verbose; set_close_distances(); + db_cu_flags.update_panel_deriv_vecs=true; + db_cu_flags.update_detector=true; } void diffBragg::shift_originZ(const dxtbx::model::Detector& detector, double shiftZ){ - for (int pid=0; pid< detector.size(); pid++) - db_det.pix0_vectors[pid*3 + 2] = detector[pid].get_origin()[2]/1000.0 + shiftZ; - set_close_distances(); + if (shiftZ != prev_shiftZ){ + for (int pid=0; pid< detector.size(); pid++) + db_det.pix0_vectors[pid*3 + 2] = detector[pid].get_origin()[2]/1000.0 + shiftZ; + set_close_distances(); + db_cu_flags.update_detector=true; + prev_shiftZ = shiftZ; + } } void diffBragg::init_raw_pixels_roi(){ @@ -749,7 +756,7 @@ void diffBragg::initialize_managers(){ pan_orig = boost::dynamic_pointer_cast(panels[1+i_pan_orig]); if (pan_orig->refine_me){ pan_orig->initialize(Npix_total, compute_curvatures); - update_detector_on_device=true; + db_cu_flags.update_detector=true; } } @@ -757,7 +764,8 @@ void diffBragg::initialize_managers(){ fcell_managers[0]->initialize(Npix_total, compute_curvatures); //fcell_managers[1]->initialize(Npix_total, compute_curvatures); //fcell_managers[2]->initialize(Npix_total, compute_curvatures); - update_Fhkl_on_device = true; + db_cu_flags.update_Fhkl = true; + db_cu_flags.update_Fhkl_scales = true; } for (int i_eta=0; i_eta<3; i_eta++){ @@ -785,7 +793,7 @@ void diffBragg::initialize_managers(){ pan_rot = boost::dynamic_pointer_cast(panels[manager_idx]); if (pan_rot->refine_me){ update_panel_deriv_vecs_on_device=true; - update_detector_on_device=true; + db_cu_flags.update_detector=true; pan_rot->initialize(Npix_total, compute_curvatures); } } @@ -797,6 +805,20 @@ void diffBragg::initialize_managers(){ } +af::shared +diffBragg::get_mosaic_blocks_prime() { + af::shared result; + int num_blocks = mosaic_domains; + // TODO if refining aniso eta, then num_blocks for the primes is 3*mosaic_domains, address this case + for(mos_tic=0;mos_tic 0){ @@ -1000,12 +1022,13 @@ void diffBragg::refine(int refine_id){ boost::shared_ptr pan_orig = boost::dynamic_pointer_cast(panels[1]); pan_orig->refine_me=true; pan_orig->initialize(Npix_total, compute_curvatures); - update_detector_on_device=true; + db_cu_flags.update_detector=true; } else if(refine_id==11){ fcell_managers[0]->refine_me=true; fcell_managers[0]->initialize(Npix_total, compute_curvatures); - update_Fhkl_on_device = true; + db_cu_flags.update_Fhkl = true; + db_cu_flags.update_Fhkl_scales = true; } else if (refine_id==12 || refine_id==13){ @@ -1020,7 +1043,7 @@ void diffBragg::refine(int refine_id){ pan_rot->refine_me=true; rotate_fs_ss_vecs_3D(0,0,0); pan_rot->initialize(Npix_total, compute_curvatures); - update_detector_on_device = true; + db_cu_flags.update_detector = true; update_panel_deriv_vecs_on_device = true; } @@ -1028,21 +1051,21 @@ void diffBragg::refine(int refine_id){ boost::shared_ptr pan_orig = boost::dynamic_pointer_cast(panels[2]); pan_orig->refine_me=true; pan_orig->initialize(Npix_total, compute_curvatures); - update_detector_on_device = true; + db_cu_flags.update_detector = true; } else if (refine_id==16){ boost::shared_ptr pan_orig = boost::dynamic_pointer_cast(panels[3]); pan_orig->refine_me=true; pan_orig->initialize(Npix_total, compute_curvatures); - update_detector_on_device = true; + db_cu_flags.update_detector = true; } else if (refine_id==17){ boost::shared_ptr pan_rot = boost::dynamic_pointer_cast(panels[4]); pan_rot->refine_me=true; rotate_fs_ss_vecs_3D(0,0,0); pan_rot->initialize(Npix_total, compute_curvatures); - update_detector_on_device = true; + db_cu_flags.update_detector = true; update_panel_deriv_vecs_on_device = true; } else if (refine_id==18){ @@ -1050,7 +1073,7 @@ void diffBragg::refine(int refine_id){ pan_rot->refine_me=true; rotate_fs_ss_vecs_3D(0,0,0); pan_rot->initialize(Npix_total, compute_curvatures); - update_detector_on_device = true; + db_cu_flags.update_detector = true; update_panel_deriv_vecs_on_device = true; } else if (refine_id==19){ @@ -1131,6 +1154,7 @@ void diffBragg::update_Fhkl_channels(np::ndarray& channels){ if (verbose) printf("source=%d channel_id=%d\n", i, channel_id); } + db_cu_flags.update_Fhkl_channels=true; } boost::python::list diffBragg::get_Fhkl_channels(){ @@ -1198,6 +1222,7 @@ void diffBragg::quick_Fcell_update(boost::python::tuple const& value){ } //update_linear_Fhkl(); if(verbose) printf("done with quick update of Fhkl:\n"); + db_cu_flags.update_Fhkl=true; } @@ -1797,6 +1822,7 @@ void diffBragg::update_Fhkl_scale_factors(np::ndarray& scale_factors, int num_Fh bool init_scales=first_deriv_imgs.Fhkl_scale.empty(); db_flags.Fhkl_have_scale_factors = true; db_cryst.num_Fhkl_channels=num_Fhkl_channels; + db_cu_flags.update_Fhkl_scales=true; double* scale_ptr = reinterpret_cast(scale_factors.get_data()); for(int i_chan=0; i_chan< num_Fhkl_channels; i_chan++){ @@ -1843,41 +1869,55 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows int pan_rot_ids[3] = {0,4,5}; int pan_orig_ids[3] = {1,2,3}; - db_flags.refine_panel_rot.resize(3, false); - db_flags.refine_panel_origin.resize(3,false); - db_flags.refine_lambda.resize(2,false); - db_flags.refine_Bmat.resize(6,false); - db_flags.refine_Umat.resize(3,false); - db_flags.refine_Ncells.resize(3,false); + if (db_flags.refine_panel_rot.empty()){ // if one is empty, they're all empty + db_flags.refine_panel_rot.resize(3, false); + db_flags.refine_panel_origin.resize(3,false); + db_flags.refine_lambda.resize(2,false); + db_flags.refine_Bmat.resize(6,false); + db_flags.refine_Umat.resize(3,false); + db_flags.refine_Ncells.resize(3,false); + db_cu_flags.update_refine_flags=true; + } for(int i_pan=0;i_pan < 3; i_pan++){ int i_pan_rot = pan_rot_ids[i_pan]; int i_pan_orig = pan_orig_ids[i_pan]; - if (panels[i_pan_rot]->refine_me) - db_flags.refine_panel_rot[i_pan] = true; - if (panels[i_pan_orig]-> refine_me) - db_flags.refine_panel_origin[i_pan] = true; + if (panels[i_pan_rot]->refine_me != db_flags.refine_panel_rot[i_pan]){ + db_flags.refine_panel_rot[i_pan] = panels[i_pan_rot]->refine_me; + db_cu_flags.update_refine_flags=true; + } + if (panels[i_pan_orig]-> refine_me != db_flags.refine_panel_origin[i_pan_orig]){ + db_flags.refine_panel_origin[i_pan] = panels[i_pan_orig]->refine_me; + db_cu_flags.update_refine_flags=true; + } } for (int i_uc = 0; i_uc < 6; i_uc++){ - if (ucell_managers[i_uc]->refine_me) - db_flags.refine_Bmat[i_uc] = true; + if (ucell_managers[i_uc]->refine_me != db_flags.refine_Bmat[i_uc]){ + db_flags.refine_Bmat[i_uc] = ucell_managers[i_uc]->refine_me; + db_cu_flags.update_refine_flags=true; + } } for (int i_rot =0; i_rot< 3; i_rot ++){ - if (rot_managers[i_rot]->refine_me) - db_flags.refine_Umat[i_rot] = true; + if (rot_managers[i_rot]->refine_me != db_flags.refine_Umat[i_rot]){ + db_flags.refine_Umat[i_rot] = rot_managers[i_rot]->refine_me; + db_cu_flags.update_refine_flags=true; + } } for (int i_lam=0; i_lam< 2; i_lam++){ - if (lambda_managers[i_lam]->refine_me) - db_flags.refine_lambda[i_lam] = true; + if (lambda_managers[i_lam]->refine_me != db_flags.refine_lambda[i_lam]){ + db_flags.refine_lambda[i_lam] = lambda_managers[i_lam]->refine_me; + db_cu_flags.update_refine_flags=true; + } } - if (Ncells_managers[0]->refine_me){ - db_flags.refine_Ncells[0] = true; - if (! isotropic_ncells){ - db_flags.refine_Ncells[1] = true; - db_flags.refine_Ncells[2] = true; + for (int i_nc=0; i_nc<3; i_nc++){ + if (isotropic_ncells && i_nc >0) + continue; + if (Ncells_managers[i_nc]->refine_me != db_flags.refine_Ncells[i_nc]){ + db_flags.refine_Ncells[i_nc] = Ncells_managers[i_nc]->refine_me; + db_cu_flags.update_refine_flags=true; } } @@ -2026,7 +2066,7 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows //fudge = 1.1013986013; // from manuscript computation gettimeofday(&t1,0 ); - if ((! use_cuda && getenv("DIFFBRAGG_USE_CUDA")==NULL && getenv("DIFFBRAGG_USE_KOKKOS")==NULL ) || force_cpu){ + if ((! use_gpu && getenv("DIFFBRAGG_USE_CUDA")==NULL && getenv("DIFFBRAGG_USE_KOKKOS")==NULL ) || force_cpu){ diffBragg_sum_over_steps( Npix_to_model, panels_fasts_slows_vec, image, @@ -2044,16 +2084,18 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows db_cu_flags.device_Id = device_Id; db_cu_flags.update_step_positions = update_step_positions_on_device; db_cu_flags.update_panels_fasts_slows = update_panels_fasts_slows_on_device; - db_cu_flags.update_sources = update_sources_on_device; db_cu_flags.update_umats = update_umats_on_device; db_cu_flags.update_dB_mats = update_dB_matrices_on_device; db_cu_flags.update_rotmats = update_rotmats_on_device; - db_cu_flags.update_Fhkl = update_Fhkl_on_device; - db_cu_flags.update_detector = update_detector_on_device; - db_cu_flags.update_refine_flags = update_refine_flags_on_device; + //db_cu_flags.update_Fhkl = update_Fhkl_on_device; db_cu_flags.update_panel_deriv_vecs = update_panel_deriv_vecs_on_device; db_cu_flags.Npix_to_allocate = Npix_to_allocate; + bool use_cuda = false; +#ifdef DIFFBRAGG_HAVE_CUDA + use_cuda = use_gpu; +#endif + if (use_cuda || getenv("DIFFBRAGG_USE_CUDA")!=NULL){ #ifdef DIFFBRAGG_HAVE_CUDA diffBragg_sum_over_steps_cuda( @@ -2076,7 +2118,7 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows SCITBX_ASSERT(DIFFBRAGG_USE_CUDA_flag_unsupported); #endif } - else { + else if (use_gpu || getenv("DIFFBRAGG_USE_KOKKOS")!=NULL){ #ifdef DIFFBRAGG_HAVE_KOKKOS if (!diffBragg_runner) { diffBragg_runner = std::make_shared(); @@ -2097,10 +2139,10 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows if (verbose) printf("Ran the Kokkos kernel\n"); #else - bool DIFFBRAGG_USE_KOKKOS_flag_unsupported=false; - SCITBX_ASSERT(DIFFBRAGG_USE_KOKKOS_flag_unsupported); + bool DIFFBRAGG_USE_KOKKOS_flag_unsupported=false; + SCITBX_ASSERT(DIFFBRAGG_USE_KOKKOS_flag_unsupported); #endif - } + } last_kernel_on_GPU=true; #else bool DIFFBRAGG_USE_KOKKOS_and_DIFFBRAGG_USE_CUDA_flags_unsupported=false; @@ -2116,7 +2158,7 @@ void diffBragg::add_diffBragg_spots(const af::shared& panels_fasts_slows printf("Nsteps=%d\noversample=%d\ndet_thick_steps=%d\nsources=%d\nphisteps=%d\nmosaic_domains=%d\n", db_steps.Nsteps,oversample,detector_thicksteps,sources,phisteps,mosaic_domains); printf("DIFFBRAGG isotropic Ncells=%d\n", isotropic_ncells); - if(use_cuda || getenv("DIFFBRAGG_USE_CUDA")!= NULL) + if(use_gpu || getenv("DIFFBRAGG_USE_CUDA")!= NULL || getenv("DIFFBRAGG_USE_KOKKOS")!= NULL) printf("TIME TO RUN DIFFBRAGG -GPU- (%llu iterations): %3.10f ms \n",n_total_iter, time); else printf("TIME TO RUN DIFFBRAGG -CPU- (%llu iterations): %3.10f ms \n",n_total_iter, time); @@ -2383,10 +2425,24 @@ void diffBragg::show_timing_stats(int MPI_RANK){ //}, boost_adaptbx::python::str printf("RANK%d TIMINGS: add_diffBragg_spots pre kernel wrapper: %10.3f\n", MPI_RANK, TIMERS.add_spots_pre ); printf("RANK%d TIMINGS: add_diffBragg_spots post kernel wrapper: %10.3f\n", MPI_RANK , TIMERS.add_spots_post); printf("RANK%d TIMINGS: add_diffBragg_spots kernel wrapper: %10.3f\n", MPI_RANK, TIMERS.add_spots_kernel_wrapper ); - printf("RANK%d TIMINGS: add_diffBragg_spots CUDA alloc: %10.3f\n", MPI_RANK, TIMERS.cuda_alloc ); - printf("RANK%d TIMINGS: add_diffBragg_spots CUDA copy host to dev: %10.3f\n", MPI_RANK, TIMERS.cuda_copy_to_dev ); - printf("RANK%d TIMINGS: add_diffBragg_spots CUDA copy dev to host: %10.3f\n", MPI_RANK, TIMERS.cuda_copy_from_dev ); - printf("RANK%d TIMINGS: add_diffBragg_spots CUDA kernel: %10.3f\n", MPI_RANK, TIMERS.cuda_kernel ); + printf("RANK%d TIMINGS: add_diffBragg_spots device alloc: %10.3f\n", MPI_RANK, TIMERS.cuda_alloc ); + printf("RANK%d TIMINGS: add_diffBragg_spots copy host-to-dev: %10.3f\n", MPI_RANK, TIMERS.cuda_copy_to_dev ); + + printf("RANK%d TIMINGS: host-to-dev Fhkl scales: %10.3f\n", MPI_RANK, TIMERS.copy_Fhkl_scale ); + printf("RANK%d TIMINGS: host-to-dev sources: %10.3f\n", MPI_RANK, TIMERS.copy_sources ); + printf("RANK%d TIMINGS: host-to-dev umats: %10.3f\n", MPI_RANK, TIMERS.copy_umats ); + printf("RANK%d TIMINGS: host-to-dev amats: %10.3f\n", MPI_RANK, TIMERS.copy_amats ); + printf("RANK%d TIMINGS: host-to-dev bmats: %10.3f\n", MPI_RANK, TIMERS.copy_bmats ); + printf("RANK%d TIMINGS: host-to-dev rotmats: %10.3f\n", MPI_RANK, TIMERS.copy_rotmats ); + printf("RANK%d TIMINGS: host-to-dev det: %10.3f\n", MPI_RANK, TIMERS.copy_det ); + printf("RANK%d TIMINGS: host-to-dev nomhkl: %10.3f\n", MPI_RANK, TIMERS.copy_nomhkl ); + printf("RANK%d TIMINGS: host-to-dev flags: %10.3f\n", MPI_RANK, TIMERS.copy_flags ); + printf("RANK%d TIMINGS: host-to-dev fhkl: %10.3f\n", MPI_RANK, TIMERS.copy_fhkl ); + printf("RANK%d TIMINGS: host-to-dev detderiv: %10.3f\n", MPI_RANK, TIMERS.copy_detderiv ); + printf("RANK%d TIMINGS: host-to-dev pfs: %10.3f\n", MPI_RANK, TIMERS.copy_pfs ); + + printf("RANK%d TIMINGS: add_diffBragg_spots copy dev-to-host: %10.3f\n", MPI_RANK, TIMERS.cuda_copy_from_dev ); + printf("RANK%d TIMINGS: add_diffBragg_spots device kernel: %10.3f\n", MPI_RANK, TIMERS.cuda_kernel ); printf("RANK%d TIMINGS: Total kernel calls=%d\n", MPI_RANK, TIMERS.timings ); } else printf("RANK%d No timing has occured since instantiation of diffBragg\n", MPI_RANK); diff --git a/simtbx/diffBragg/src/diffBragg.h b/simtbx/diffBragg/src/diffBragg.h index 36ee6c0afb..aa4983792f 100644 --- a/simtbx/diffBragg/src/diffBragg.h +++ b/simtbx/diffBragg/src/diffBragg.h @@ -139,6 +139,8 @@ class diffBragg: public nanoBragg{ ~diffBragg(){}; + af::shared get_mosaic_blocks_prime(); + /// pixels double* floatimage_roi; af::flex_double raw_pixels_roi; @@ -164,12 +166,21 @@ class diffBragg: public nanoBragg{ #ifdef DIFFBRAGG_HAVE_KOKKOS // diffBragg_cudaPointers cuda_pointers; - void kokkos_free() { diffBragg_runner.reset(); } + inline void kokkos_free() { diffBragg_runner.reset(); } // allocate when needed to avoid problems with kokkos initialization when cuda/kokkos isn't used std::shared_ptr diffBragg_runner{}; // diffBraggKOKKOS diffBragg_runner; #endif + inline void gpu_free(){ +#ifdef DIFFBRAGG_HAVE_CUDA + cuda_free(); +#endif +#ifdef DIFFBRAGG_HAVE_KOKKOS + kokkos_free(); +#endif + } + // methods void update_xray_beams(scitbx::af::versa > const& value); void initialize_managers(); @@ -299,6 +310,7 @@ class diffBragg: public nanoBragg{ double Nd, Ne, Nf; bool refine_Ncells_def; bool no_Nabc_scale; // if true, then absorb the Nabc scale into an overall scale factor + double prev_shiftZ=0; // keep track of when detector Z was shifted (helps determine when to set the update_detector flag for GPU devices Eigen::Matrix3d NABC; bool use_lambda_coefficients; @@ -311,12 +323,11 @@ class diffBragg: public nanoBragg{ bool update_rotmats_on_device=false; bool update_umats_on_device=false; bool update_panels_fasts_slows_on_device=false; - bool update_sources_on_device=false; bool update_Fhkl_on_device=false; bool update_refine_flags_on_device=false; bool update_step_positions_on_device=false; bool update_panel_deriv_vecs_on_device=false; - bool use_cuda=false; + bool use_gpu=false; bool force_cpu=false; int Npix_to_allocate=-1; // got GPU allocation, -1 is auto mode diff --git a/simtbx/diffBragg/src/diffBraggCUDA.cu b/simtbx/diffBragg/src/diffBraggCUDA.cu index 6b0cddd2ff..b318a189e5 100644 --- a/simtbx/diffBragg/src/diffBraggCUDA.cu +++ b/simtbx/diffBragg/src/diffBraggCUDA.cu @@ -444,6 +444,12 @@ void diffBragg_sum_over_steps_cuda( if (db_cryst.fpfdp.size() == 0){ // note cannot use atom data if fpfdp is 0, make this cleaner num_atoms=0; } + + if (db_flags.use_diffuse) { + if (db_cryst.laue_group_num < 1 || db_cryst.laue_group_num >14 ){ + throw std::string("Laue group number not in range 1-14"); + } + } //int sm_size = number_of_sources*5*sizeof(CUDAREAL); //gpu_sum_over_steps<<>>( bool aniso_eta = db_cryst.UMATS_RXYZ.size() != db_cryst.UMATS_RXYZ_prime.size(); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp index f49508ed39..fb88a6f066 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.cpp +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.cpp @@ -4,6 +4,45 @@ #include "diffBraggKOKKOS.h" #include "diffBragg_kokkos_kernel.h" +#define PRINTOUT(flag, function, ...) \ + if (flag) { \ + function(__VA_ARGS__); \ + } else { \ + function(__VA_ARGS__); \ + } \ + +uint32_t combine_refinement_flags(flags& db_flags) { + uint32_t refine_flag = 0; + refine_flag |= db_flags.refine_diffuse * REFINE_DIFFUSE; + refine_flag |= db_flags.refine_fcell * REFINE_FCELL; + refine_flag |= db_flags.refine_eta * REFINE_ETA; + refine_flag |= db_flags.refine_Umat[0] * REFINE_UMAT1; + refine_flag |= db_flags.refine_Umat[1] * REFINE_UMAT2; + refine_flag |= db_flags.refine_Umat[2] * REFINE_UMAT3; + refine_flag |= db_flags.refine_Ncells_def * REFINE_NCELLS_DEF; + refine_flag |= db_flags.refine_Ncells[0] * REFINE_NCELLS1; + refine_flag |= db_flags.refine_Ncells[1] * REFINE_NCELLS2; + refine_flag |= db_flags.refine_Ncells[2] * REFINE_NCELLS3; + refine_flag |= db_flags.refine_panel_rot[0] * REFINE_PANEL_ROT1; + refine_flag |= db_flags.refine_panel_rot[1] * REFINE_PANEL_ROT2; + refine_flag |= db_flags.refine_panel_rot[2] * REFINE_PANEL_ROT3; + refine_flag |= db_flags.refine_panel_origin[0] * REFINE_PANEL_ORIGIN1; + refine_flag |= db_flags.refine_panel_origin[1] * REFINE_PANEL_ORIGIN2; + refine_flag |= db_flags.refine_panel_origin[2] * REFINE_PANEL_ORIGIN3; + refine_flag |= db_flags.refine_lambda[0] * REFINE_LAMBDA1; + refine_flag |= db_flags.refine_lambda[1] * REFINE_LAMBDA2; + refine_flag |= db_flags.refine_Bmat[0] * REFINE_BMAT1; + refine_flag |= db_flags.refine_Bmat[1] * REFINE_BMAT2; + refine_flag |= db_flags.refine_Bmat[2] * REFINE_BMAT3; + refine_flag |= db_flags.refine_Bmat[3] * REFINE_BMAT4; + refine_flag |= db_flags.refine_Bmat[4] * REFINE_BMAT5; + refine_flag |= db_flags.refine_Bmat[5] * REFINE_BMAT6; + refine_flag |= db_flags.refine_fp_fdp * REFINE_FP_FDP; + refine_flag |= db_flags.refine_Icell * REFINE_ICELL; + + return refine_flag; +} + void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( int Npix_to_model, std::vector& panels_fasts_slows, @@ -20,7 +59,7 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( timer_variables& TIMERS) { if (db_cryst.phi0 != 0 || db_cryst.phisteps > 1) { printf( - "PHI (goniometer position) not supported in GPU code: phi0=%f phisteps=%d, phistep=%d\n", + "PHI (goniometer position) not supported in GPU code: phi0=%f phisteps=%d, phistep=%f\n", db_cryst.phi0, db_cryst.phisteps, db_cryst.phistep); exit(-1); } @@ -33,7 +72,7 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( Kokkos::Tools::popRegion(); double time; - struct timeval t1, t2; //, t3 ,t4; + struct timeval t1, t; // t1 times larger blocks of code, and t is used to time shorter blocks of code gettimeofday(&t1, 0); // determine if we need to allocate pixels, and how many. @@ -201,18 +240,19 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( resize(m_panels_fasts_slows, db_cu_flags.Npix_to_allocate * 3); + m_refine_flag = combine_refinement_flags(db_flags); + if (m_refine_flag) { + resize(m_manager_dI, db_cu_flags.Npix_to_allocate); + resize(m_manager_dI2, db_cu_flags.Npix_to_allocate); + } + m_npix_allocated = db_cu_flags.Npix_to_allocate; Kokkos::Tools::popRegion(); } // END of allocation bool ALLOC = !m_device_is_allocated; // shortcut variable - gettimeofday(&t2, 0); - time = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; - if (TIMERS.recording) - TIMERS.cuda_alloc += time; - if (db_flags.verbose > 1) - printf("TIME SPENT ALLOCATING (TOTAL): %3.10f ms \n", time); + easy_time(TIMERS.cuda_alloc, t1, TIMERS.recording); //, db_flags.verbose > 1); // ALLOC = false; // BEGIN COPYING DATA @@ -221,33 +261,36 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( // END step position if (db_flags.Fhkl_gradient_mode){ - transfer(m_data_residual, d_image.residual, Npix_to_model); - transfer(m_data_variance, d_image.variance, Npix_to_model); - transfer(m_data_trusted, d_image.trusted, Npix_to_model); - transfer(m_data_freq, d_image.freq, Npix_to_model); + kokkostbx::transfer_vector2kokkos(m_data_residual, d_image.residual); + kokkostbx::transfer_vector2kokkos(m_data_variance, d_image.variance); + kokkostbx::transfer_vector2kokkos(m_data_trusted, d_image.trusted); + kokkostbx::transfer_vector2kokkos(m_data_freq, d_image.freq); } if (db_flags.Fhkl_have_scale_factors && ALLOC){ - transfer(m_FhklLinear_ASUid, db_cryst.FhklLinear_ASUid); + kokkostbx::transfer_vector2kokkos(m_FhklLinear_ASUid, db_cryst.FhklLinear_ASUid); } + Kokkos::Tools::pushRegion("BEGIN Fhkl have scale factors"); + gettimeofday(&t, 0); if (db_flags.Fhkl_have_scale_factors){ - //SCITBX_ASSERT(db_beam.number_of_sources == db_beam.Fhkl_channels.size()); - transfer(m_Fhkl_channels, db_beam.Fhkl_channels); - transfer(m_Fhkl_scale, d_image.Fhkl_scale); + if (db_cu_flags.update_Fhkl_scales || ALLOC){ + kokkostbx::transfer_vector2kokkos(m_Fhkl_scale, d_image.Fhkl_scale); + db_cu_flags.update_Fhkl_scales = false; + } + if (db_cu_flags.update_Fhkl_channels || ALLOC){ + kokkostbx::transfer_vector2kokkos(m_Fhkl_channels, db_beam.Fhkl_channels); + db_cu_flags.update_Fhkl_channels = false; + } ::Kokkos::deep_copy(m_Fhkl_scale_deriv, 0); - - // for (int i=0; i < d_image.Fhkl_scale.size(); i++){ - // cp.Fhkl_scale[i] = d_image.Fhkl_scale[i]; - // if (db_flags.Fhkl_gradient_mode){ - // cp.Fhkl_scale_deriv[i] = 0; - // } - // } } + Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_Fhkl_scale, t, TIMERS.recording); //, db_flags.verbose > 1); // BEGIN sources Kokkos::Tools::pushRegion("BEGIN sources"); - if (db_cu_flags.update_sources || ALLOC || FORCE_COPY) { + gettimeofday(&t, 0); + if (db_cu_flags.update_sources || ALLOC) { int source_count = local_beam.number_of_sources; kokkostbx::transfer_double2kokkos(m_source_X, local_beam.source_X, source_count); kokkostbx::transfer_double2kokkos(m_source_Y, local_beam.source_Y, source_count); @@ -268,12 +311,15 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( Kokkos::fence(); if (db_flags.verbose > 1) printf("H2D sources\n"); + db_cu_flags.update_sources = false; } + easy_time(TIMERS.copy_sources, t, TIMERS.recording); //, db_flags.verbose > 1); Kokkos::Tools::popRegion(); // END sources // UMATS + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("UMATS"); if (db_cu_flags.update_umats || ALLOC || FORCE_COPY) { transfer_KOKKOS_MAT3(m_UMATS, db_cryst.UMATS); @@ -285,18 +331,24 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( printf("H2D Done copying Umats\n"); } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_umats, t, TIMERS.recording); //, db_flags.verbose > 1); // END UMATS + gettimeofday(&t, 0); if (db_cu_flags.update_umats || ALLOC || FORCE_COPY) { auto Amat_init = db_cryst.eig_U*db_cryst.eig_B*1e10*(db_cryst.eig_O.transpose()); + auto host_AMATS = Kokkos::create_mirror_view(m_AMATS); for (int i=0; i 1) printf("H2D Done copying Amats\n"); } + easy_time(TIMERS.copy_amats, t, TIMERS.recording); // BMATS + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("BMATS"); if (db_cu_flags.update_dB_mats || ALLOC || FORCE_COPY) { transfer_KOKKOS_MAT3(m_dB_Mats, db_cryst.dB_Mats); @@ -305,9 +357,11 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( printf("H2D Done copying dB_Mats\n"); } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_bmats, t, TIMERS.recording); // END BMATS // ROT MATS + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("ROT MATS"); if (db_cu_flags.update_rotmats || ALLOC || FORCE_COPY) { transfer_KOKKOS_MAT3(m_RotMats, db_cryst.RotMats); @@ -317,11 +371,13 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( printf("H2D Done copying rotmats\n"); } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_rotmats, t, TIMERS.recording); // END ROT MATS // DETECTOR VECTORS + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("DETECTOR VECTORS"); - if (db_cu_flags.update_detector || ALLOC || FORCE_COPY) { + if (db_cu_flags.update_detector || ALLOC) { kokkostbx::transfer_vector2kokkos(m_fdet_vectors, local_det.fdet_vectors); kokkostbx::transfer_vector2kokkos(m_sdet_vectors, local_det.sdet_vectors); kokkostbx::transfer_vector2kokkos(m_odet_vectors, local_det.odet_vectors); @@ -329,25 +385,30 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( kokkostbx::transfer_vector2kokkos(m_close_distances, local_det.close_distances); if (db_flags.verbose > 1) printf("H2D Done copying detector vectors\n"); + db_cu_flags.update_detector = false; } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_det, t, TIMERS.recording); // END DETECTOR VECTORS + gettimeofday(&t, 0); if (ALLOC || FORCE_COPY) { - transfer(m_nominal_hkl, db_cryst.nominal_hkl); - transfer(m_atom_data, db_cryst.atom_data); + kokkostbx::transfer_vector2kokkos(m_nominal_hkl, db_cryst.nominal_hkl); + kokkostbx::transfer_vector2kokkos(m_atom_data, db_cryst.atom_data); if (db_flags.verbose > 1) printf("H2D Done copying atom data\n"); - transfer(m_fpfdp, db_cryst.fpfdp); - transfer(m_fpfdp_derivs, db_cryst.fpfdp_derivs); + kokkostbx::transfer_vector2kokkos(m_fpfdp, db_cryst.fpfdp); + kokkostbx::transfer_vector2kokkos(m_fpfdp_derivs, db_cryst.fpfdp_derivs); if (db_flags.verbose > 1) printf("H2D Done copying fprime and fdblprime\n"); } + easy_time(TIMERS.copy_nomhkl, t, TIMERS.recording); - // BEGIN REFINEMENT FLAGS - Kokkos::Tools::pushRegion("BEGIN REFINMENT FLAGS"); - if (db_cu_flags.update_refine_flags || ALLOC || FORCE_COPY) { + // BEGIN UPDATE REFINEMENT + gettimeofday(&t, 0); + Kokkos::Tools::pushRegion("BEGIN UPDATE REFINMENT"); + if (db_cu_flags.update_refine_flags || ALLOC) { kokkostbx::transfer_vector2kokkos(m_refine_Umat, db_flags.refine_Umat); kokkostbx::transfer_vector2kokkos(m_refine_Ncells, db_flags.refine_Ncells); kokkostbx::transfer_vector2kokkos(m_refine_panel_origin, db_flags.refine_panel_origin); @@ -356,35 +417,44 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( kokkostbx::transfer_vector2kokkos(m_refine_Bmat, db_flags.refine_Bmat); if (db_flags.verbose > 1) printf("H2D Done copying refinement flags\n"); + db_cu_flags.update_refine_flags=false; } Kokkos::Tools::popRegion(); - // END REFINEMENT FLAGS + easy_time(TIMERS.copy_flags, t, TIMERS.recording); + // END UPDATE REFINEMENT // BEGIN Fhkl + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("Begin Fhkl"); - if (db_cu_flags.update_Fhkl || ALLOC || FORCE_COPY) { - transfer(m_Fhkl, db_cryst.FhklLinear); + if (db_cu_flags.update_Fhkl || ALLOC) { + kokkostbx::transfer_vector2kokkos(m_Fhkl, db_cryst.FhklLinear); if (db_flags.complex_miller) { - transfer(m_Fhkl2, db_cryst.Fhkl2Linear); + kokkostbx::transfer_vector2kokkos(m_Fhkl2, db_cryst.Fhkl2Linear); } if (db_flags.verbose > 1) printf("H2D Done copying step Fhkl\n"); + db_cu_flags.update_Fhkl = false; } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_fhkl, t, TIMERS.recording); // END Fhkl // BEGIN panel derivative vecs + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("BEGIN panel derivative vecs"); - if (db_cu_flags.update_panel_deriv_vecs || ALLOC || FORCE_COPY) { + if (db_cu_flags.update_panel_deriv_vecs || ALLOC) { kokkostbx::transfer_vector2kokkos(m_dF_vecs, local_det.dF_vecs); kokkostbx::transfer_vector2kokkos(m_dS_vecs, local_det.dS_vecs); if (db_flags.verbose > 1) printf("H2D Done copying step panel derivative vectors\n"); + db_cu_flags.update_panel_deriv_vecs=false; } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_detderiv, t, TIMERS.recording); // END panel derivative vecs // BEGIN panels fasts slows + gettimeofday(&t, 0); Kokkos::Tools::pushRegion("BEGIN panels fasts slows"); if (db_cu_flags.update_panels_fasts_slows || ALLOC || FORCE_COPY) { kokkostbx::transfer_vector2kokkos(m_panels_fasts_slows, panels_fasts_slows); @@ -392,14 +462,10 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( printf("H2D Done copying panels_fasts_slows\n"); } Kokkos::Tools::popRegion(); + easy_time(TIMERS.copy_pfs, t, TIMERS.recording); // END panels fasts slows - gettimeofday(&t2, 0); - time = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; - if (TIMERS.recording) - TIMERS.cuda_copy_to_dev += time; - if (db_flags.verbose > 1) - printf("TIME SPENT COPYING DATA HOST->DEV: %3.10f ms \n", time); + easy_time(TIMERS.cuda_copy_to_dev, t1, TIMERS.recording); m_device_is_allocated = true; ::Kokkos::fence("after copy to device"); @@ -412,61 +478,105 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( if (db_cryst.fpfdp.size() == 0) { num_atoms = 0; } - // int sm_size = number_of_sources*5*sizeof(CUDAREAL); - // gpu_sum_over_steps<<>>( + bool aniso_eta = db_cryst.UMATS_RXYZ.size() != db_cryst.UMATS_RXYZ_prime.size(); bool use_nominal_hkl = !db_cryst.nominal_hkl.empty(); - kokkos_sum_over_steps( - Npix_to_model, m_panels_fasts_slows, m_floatimage, m_wavelenimage, m_d_Umat_images, - m_d2_Umat_images, m_d_Bmat_images, m_d2_Bmat_images, m_d_Ncells_images, m_d2_Ncells_images, - m_d_fcell_images, m_d2_fcell_images, m_d_eta_images, m_d2_eta_images, m_d_lambda_images, - m_d2_lambda_images, m_d_panel_rot_images, m_d2_panel_rot_images, m_d_panel_orig_images, - m_d2_panel_orig_images, m_d_fp_fdp_images, db_steps.Nsteps, db_flags.printout_fpixel, - db_flags.printout_spixel, db_flags.printout, db_cryst.default_F, local_det.oversample, - db_flags.oversample_omega, local_det.subpixel_size, local_det.pixel_size, - local_det.detector_thickstep, local_det.detector_thick, m_close_distances, - local_det.detector_attnlen, local_det.detector_thicksteps, local_beam.number_of_sources, - db_cryst.phisteps, db_cryst.UMATS.size(), db_flags.use_lambda_coefficients, - local_beam.lambda0, local_beam.lambda1, to_mat3(db_cryst.eig_U), to_mat3(db_cryst.eig_O), - to_mat3(db_cryst.eig_B), to_mat3(db_cryst.RXYZ), m_dF_vecs, m_dS_vecs, m_UMATS_RXYZ, m_UMATS_RXYZ_prime, - m_UMATS_RXYZ_dbl_prime, m_RotMats, m_dRotMats, m_d2RotMats, m_UMATS, m_dB_Mats, m_dB2_Mats, - m_AMATS, m_source_X, m_source_Y, m_source_Z, m_source_lambda, m_source_I, - local_beam.kahn_factor, db_cryst.Na, db_cryst.Nb, db_cryst.Nc, db_cryst.Nd, - db_cryst.Ne, db_cryst.Nf, db_cryst.phi0, db_cryst.phistep, - to_vec3(db_cryst.spindle_vec), local_beam.polarization_axis, db_cryst.h_range, - db_cryst.k_range, db_cryst.l_range, db_cryst.h_max, db_cryst.h_min, - db_cryst.k_max, db_cryst.k_min, db_cryst.l_max, db_cryst.l_min, - db_cryst.dmin, db_cryst.fudge, db_flags.complex_miller, db_flags.verbose, - db_flags.only_save_omega_kahn, db_flags.isotropic_ncells, db_flags.compute_curvatures, - m_Fhkl, m_Fhkl2, m_refine_Bmat, m_refine_Ncells, db_flags.refine_Ncells_def, - m_refine_panel_origin, m_refine_panel_rot, db_flags.refine_fcell, m_refine_lambda, - db_flags.refine_eta, m_refine_Umat, m_fdet_vectors, m_sdet_vectors, m_odet_vectors, - m_pix0_vectors, db_flags.nopolar, db_flags.point_pixel, local_beam.fluence, - db_cryst.r_e_sqr, db_cryst.spot_scale, Npanels, aniso_eta, db_flags.no_Nabc_scale, - m_fpfdp, m_fpfdp_derivs, m_atom_data, num_atoms, db_flags.refine_fp_fdp, m_nominal_hkl, - use_nominal_hkl, to_mat3(db_cryst.anisoU), to_mat3(db_cryst.anisoG), to_mat3(db_cryst.rotate_principal_axes), - db_flags.use_diffuse, m_d_diffuse_gamma_images, m_d_diffuse_sigma_images, db_flags.refine_diffuse, - db_flags.gamma_miller_units, db_flags.refine_Icell, db_flags.wavelength_img, - db_cryst.laue_group_num, db_cryst.stencil_size, db_flags.Fhkl_gradient_mode, - db_flags.Fhkl_errors_mode, db_flags.using_trusted_mask, db_beam.Fhkl_channels.empty(), - db_flags.Fhkl_have_scale_factors, db_cryst.Num_ASU, - m_data_residual, m_data_variance, - m_data_freq, m_data_trusted, - m_FhklLinear_ASUid, - m_Fhkl_channels, - m_Fhkl_scale, m_Fhkl_scale_deriv - ); + if ((db_flags.printout==false) && + (db_flags.complex_miller==false) && + (db_flags.compute_curvatures==false) && + (m_refine_flag==REFINE_FCELL) && + (db_flags.use_diffuse==false) && + (db_flags.wavelength_img==false) && + (db_flags.Fhkl_gradient_mode==false) && + (db_flags.Fhkl_errors_mode==false) && + (db_flags.using_trusted_mask==false) && + (db_beam.Fhkl_channels.empty()==true) && + (db_flags.Fhkl_have_scale_factors)==false) { + kokkos_sum_over_steps( + Npix_to_model, m_panels_fasts_slows, m_floatimage, m_wavelenimage, m_d_Umat_images, + m_d2_Umat_images, m_d_Bmat_images, m_d2_Bmat_images, m_d_Ncells_images, m_d2_Ncells_images, + m_d_fcell_images, m_d2_fcell_images, m_d_eta_images, m_d2_eta_images, m_d_lambda_images, + m_d2_lambda_images, m_d_panel_rot_images, m_d2_panel_rot_images, m_d_panel_orig_images, + m_d2_panel_orig_images, m_d_fp_fdp_images, m_manager_dI, m_manager_dI2, db_steps.Nsteps, + db_flags.printout_fpixel, db_flags.printout_spixel, /*db_flags.printout,*/ db_cryst.default_F, + local_det.oversample, db_flags.oversample_omega, local_det.subpixel_size, local_det.pixel_size, + local_det.detector_thickstep, local_det.detector_thick, m_close_distances, + local_det.detector_attnlen, local_det.detector_thicksteps, local_beam.number_of_sources, + db_cryst.phisteps, (int) db_cryst.UMATS.size(), db_flags.use_lambda_coefficients, + local_beam.lambda0, local_beam.lambda1, to_mat3(db_cryst.eig_U), to_mat3(db_cryst.eig_O), + to_mat3(db_cryst.eig_B), to_mat3(db_cryst.RXYZ), m_dF_vecs, m_dS_vecs, m_UMATS_RXYZ, m_UMATS_RXYZ_prime, + m_UMATS_RXYZ_dbl_prime, m_RotMats, m_dRotMats, m_d2RotMats, m_UMATS, m_dB_Mats, m_dB2_Mats, + m_AMATS, m_source_X, m_source_Y, m_source_Z, m_source_lambda, m_source_I, + local_beam.kahn_factor, db_cryst.Na, db_cryst.Nb, db_cryst.Nc, db_cryst.Nd, + db_cryst.Ne, db_cryst.Nf, db_cryst.phi0, db_cryst.phistep, + to_vec3(db_cryst.spindle_vec), local_beam.polarization_axis, db_cryst.h_range, + db_cryst.k_range, db_cryst.l_range, db_cryst.h_max, db_cryst.h_min, + db_cryst.k_max, db_cryst.k_min, db_cryst.l_max, db_cryst.l_min, + db_cryst.dmin, db_cryst.fudge, /*db_flags.complex_miller,*/ db_flags.verbose, + db_flags.only_save_omega_kahn, db_flags.isotropic_ncells, /*db_flags.compute_curvatures,*/ + m_Fhkl, m_Fhkl2, /*m_refine_flag,*/ + m_fdet_vectors, m_sdet_vectors, m_odet_vectors, + m_pix0_vectors, db_flags.nopolar, db_flags.point_pixel, local_beam.fluence, + db_cryst.r_e_sqr, db_cryst.spot_scale, Npanels, aniso_eta, db_flags.no_Nabc_scale, + m_fpfdp, m_fpfdp_derivs, m_atom_data, num_atoms, m_nominal_hkl, + use_nominal_hkl, to_mat3(db_cryst.anisoU), to_mat3(db_cryst.anisoG), to_mat3(db_cryst.rotate_principal_axes), + /*db_flags.use_diffuse,*/ m_d_diffuse_gamma_images, m_d_diffuse_sigma_images, + db_flags.gamma_miller_units, /*db_flags.wavelength_img,*/ + db_cryst.laue_group_num, db_cryst.stencil_size, /*db_flags.Fhkl_gradient_mode,*/ + /*db_flags.Fhkl_errors_mode,*/ /*db_flags.using_trusted_mask,*/ /*db_beam.Fhkl_channels.empty(),*/ + /*db_flags.Fhkl_have_scale_factors,*/ db_cryst.Num_ASU, + m_data_residual, m_data_variance, + m_data_freq, m_data_trusted, + m_FhklLinear_ASUid, + m_Fhkl_channels, + m_Fhkl_scale, m_Fhkl_scale_deriv); + } else { + kokkos_sum_over_steps( + Npix_to_model, m_panels_fasts_slows, m_floatimage, m_wavelenimage, m_d_Umat_images, + m_d2_Umat_images, m_d_Bmat_images, m_d2_Bmat_images, m_d_Ncells_images, m_d2_Ncells_images, + m_d_fcell_images, m_d2_fcell_images, m_d_eta_images, m_d2_eta_images, m_d_lambda_images, + m_d2_lambda_images, m_d_panel_rot_images, m_d2_panel_rot_images, m_d_panel_orig_images, + m_d2_panel_orig_images, m_d_fp_fdp_images, m_manager_dI, m_manager_dI2, db_steps.Nsteps, + db_flags.printout_fpixel, db_flags.printout_spixel, db_flags.printout, db_cryst.default_F, + local_det.oversample, db_flags.oversample_omega, local_det.subpixel_size, local_det.pixel_size, + local_det.detector_thickstep, local_det.detector_thick, m_close_distances, + local_det.detector_attnlen, local_det.detector_thicksteps, local_beam.number_of_sources, + db_cryst.phisteps, db_cryst.UMATS.size(), db_flags.use_lambda_coefficients, + local_beam.lambda0, local_beam.lambda1, to_mat3(db_cryst.eig_U), to_mat3(db_cryst.eig_O), + to_mat3(db_cryst.eig_B), to_mat3(db_cryst.RXYZ), m_dF_vecs, m_dS_vecs, m_UMATS_RXYZ, m_UMATS_RXYZ_prime, + m_UMATS_RXYZ_dbl_prime, m_RotMats, m_dRotMats, m_d2RotMats, m_UMATS, m_dB_Mats, m_dB2_Mats, + m_AMATS, m_source_X, m_source_Y, m_source_Z, m_source_lambda, m_source_I, + local_beam.kahn_factor, db_cryst.Na, db_cryst.Nb, db_cryst.Nc, db_cryst.Nd, + db_cryst.Ne, db_cryst.Nf, db_cryst.phi0, db_cryst.phistep, + to_vec3(db_cryst.spindle_vec), local_beam.polarization_axis, db_cryst.h_range, + db_cryst.k_range, db_cryst.l_range, db_cryst.h_max, db_cryst.h_min, + db_cryst.k_max, db_cryst.k_min, db_cryst.l_max, db_cryst.l_min, + db_cryst.dmin, db_cryst.fudge, db_flags.complex_miller, db_flags.verbose, + db_flags.only_save_omega_kahn, db_flags.isotropic_ncells, db_flags.compute_curvatures, + m_Fhkl, m_Fhkl2, m_refine_flag, + m_fdet_vectors, m_sdet_vectors, m_odet_vectors, + m_pix0_vectors, db_flags.nopolar, db_flags.point_pixel, local_beam.fluence, + db_cryst.r_e_sqr, db_cryst.spot_scale, Npanels, aniso_eta, db_flags.no_Nabc_scale, + m_fpfdp, m_fpfdp_derivs, m_atom_data, num_atoms, m_nominal_hkl, + use_nominal_hkl, to_mat3(db_cryst.anisoU), to_mat3(db_cryst.anisoG), to_mat3(db_cryst.rotate_principal_axes), + db_flags.use_diffuse, m_d_diffuse_gamma_images, m_d_diffuse_sigma_images, + db_flags.gamma_miller_units, db_flags.wavelength_img, + db_cryst.laue_group_num, db_cryst.stencil_size, db_flags.Fhkl_gradient_mode, + db_flags.Fhkl_errors_mode, db_flags.using_trusted_mask, db_beam.Fhkl_channels.empty(), + db_flags.Fhkl_have_scale_factors, db_cryst.Num_ASU, + m_data_residual, m_data_variance, + m_data_freq, m_data_trusted, + m_FhklLinear_ASUid, + m_Fhkl_channels, + m_Fhkl_scale, m_Fhkl_scale_deriv + ); + } ::Kokkos::fence("after kernel call"); if (db_flags.verbose > 1) printf("KERNEL_COMPLETE gpu_sum_over_steps\n"); - gettimeofday(&t2, 0); - time = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; - if (TIMERS.recording) - TIMERS.cuda_kernel += time; - if (db_flags.verbose > 1) - printf("TIME SPENT(KERNEL): %3.10f ms \n", time); + easy_time(TIMERS.cuda_kernel, t1, TIMERS.recording); gettimeofday(&t1, 0); // COPY BACK FROM DEVICE @@ -482,12 +592,10 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( } if (db_flags.Fhkl_gradient_mode){ if (db_flags.Fhkl_errors_mode){ - for (int i=0; i < d_image.Fhkl_hessian.size(); i++) - d_image.Fhkl_hessian[i]= m_Fhkl_scale_deriv(i); + kokkostbx::transfer_kokkos2vector(d_image.Fhkl_hessian, m_Fhkl_scale_deriv); } else{ - for (int i=0; i < d_image.Fhkl_scale_deriv.size(); i++) - d_image.Fhkl_scale_deriv[i]= m_Fhkl_scale_deriv(i); + kokkostbx::transfer_kokkos2vector(d_image.Fhkl_scale_deriv, m_Fhkl_scale_deriv); } } if (std::count(db_flags.refine_Umat.begin(), db_flags.refine_Umat.end(), true) > 0) { @@ -526,12 +634,7 @@ void diffBraggKOKKOS::diffBragg_sum_over_steps_kokkos( } Kokkos::Tools::popRegion(); - gettimeofday(&t2, 0); - time = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0; - if (TIMERS.recording) - TIMERS.cuda_copy_from_dev += time; - if (db_flags.verbose > 1) - printf("TIME SPENT COPYING BACK : %3.10f ms \n", time); + easy_time(TIMERS.cuda_copy_from_dev, t1, TIMERS.recording); ::Kokkos::fence("After copy to host"); Kokkos::Tools::popRegion(); diff --git a/simtbx/diffBragg/src/diffBraggKOKKOS.h b/simtbx/diffBragg/src/diffBraggKOKKOS.h index cc074b938a..3aa07ae27b 100644 --- a/simtbx/diffBragg/src/diffBraggKOKKOS.h +++ b/simtbx/diffBragg/src/diffBraggKOKKOS.h @@ -9,9 +9,11 @@ #include "kokkostbx/kokkos_utils.h" #include "simtbx/diffBragg/src/util.h" #include "simtbx/diffBragg/src/util_kokkos.h" +#include "simtbx/diffBragg/src/diffBragg_refine_flag.h" using vector_vec3_t = view_1d_t; using vector_mat3_t = view_1d_t; +using vector_manager_t = view_1d_t; #define INTEGER_VIEW(varname) vector_int_t varname = vector_int_t(#varname, 0) #define CUDAREAL_VIEW(varname) vector_cudareal_t varname = vector_cudareal_t(#varname, 0) @@ -101,6 +103,7 @@ class diffBraggKOKKOS { MATRIX3_VIEW(m_sausages_U); CUDAREAL_VIEW(m_sausages_scale); + uint32_t m_refine_flag = 0; vector_bool_t m_refine_Bmat = vector_bool_t("m_refine_Bmat", 6); vector_bool_t m_refine_Umat = vector_bool_t("m_refine_Umat", 3); vector_bool_t m_refine_Ncells = vector_bool_t("m_refine_Ncells", 3); @@ -108,6 +111,9 @@ class diffBraggKOKKOS { vector_bool_t m_refine_panel_rot = vector_bool_t("m_refine_panel_rot", 3); vector_bool_t m_refine_lambda = vector_bool_t("m_refine_lambda", 2); + vector_manager_t m_manager_dI = vector_manager_t("m_manager_dI", 0); + vector_manager_t m_manager_dI2 = vector_manager_t("m_manager_dI2", 0); + bool m_Fhkl_gradient_mode; bool m_using_trusted_mask; bool m_Fhkl_channels_empty; @@ -119,12 +125,13 @@ class diffBraggKOKKOS { INTEGER_VIEW(m_data_freq); // length is number of modeled pixels vector_bool_t m_data_trusted = vector_bool_t("m_data_trusted", 0); // length is number of modeled pixels INTEGER_VIEW(m_FhklLinear_ASUid); // length is number of ASU in FhklLinear - CUDAREAL_VIEW(m_Fhkl_channels); + INTEGER_VIEW(m_Fhkl_channels); // Fhkl_scale is dynamically copied each iteration // Fhkl_scale_deriv is set to 0 each iteration CUDAREAL_VIEW(m_Fhkl_scale); // length is (number of ASUin FhklLinear) *times* (number of Fhkl channels) CUDAREAL_VIEW(m_Fhkl_scale_deriv); // length is (number of ASUin FhklLinear) *times* (number of Fhkl channels) + public: void diffBragg_sum_over_steps_kokkos( int Npix_to_model, diff --git a/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp b/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp index decf9f9145..22dd70275a 100644 --- a/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp +++ b/simtbx/diffBragg/src/diffBragg_cpu_kernel.cpp @@ -311,6 +311,10 @@ void diffBragg_sum_over_steps( anisoG_local = db_cryst.anisoG; anisoU_local = db_cryst.anisoU; + if (laue_group_num < 1 || laue_group_num >14 ){ + throw std::string("Laue group number not in range 1-14"); + } + num_laue_mats = gen_laue_mats(laue_group_num, laue_mats, db_cryst.rotate_principal_axes); for (int i_gam=0; i_gam<3; i_gam++){ dG_dgam[i_gam] << 0,0,0,0,0,0,0,0,0; @@ -913,6 +917,7 @@ void diffBragg_sum_over_steps( int nom_l=l0; //int f_cell_idx = 1; if (use_nominal_hkl){ + nom_h = db_cryst.nominal_hkl[i_pix*3]; nom_k = db_cryst.nominal_hkl[i_pix*3+1]; nom_l = db_cryst.nominal_hkl[i_pix*3+2]; diff --git a/simtbx/diffBragg/src/diffBragg_ext.cpp b/simtbx/diffBragg/src/diffBragg_ext.cpp index 7d2ba180f1..997a6bea6d 100644 --- a/simtbx/diffBragg/src/diffBragg_ext.cpp +++ b/simtbx/diffBragg/src/diffBragg_ext.cpp @@ -23,6 +23,43 @@ namespace boost_python { namespace { return indices; } + static void set_beams(simtbx::nanoBragg::diffBragg& diffBragg, scitbx::af::versa > const& value) { + if(diffBragg.verbose>3) printf(" about to initialize sources\n"); + diffBragg.pythony_beams = value; + if(diffBragg.verbose>3) printf(" done\n"); + diffBragg.db_cu_flags.update_sources=true; + /* re-initialize source table from pythony array */ + diffBragg.init_sources(); + } + // TODO: point to the get_beams defined in simtbx/nanoBragg/nanoBragg_ext.cpp if possible.. + ///* table of sources, as dxtbx "beam"s */ + static scitbx::af::versa > get_beams(simtbx::nanoBragg::diffBragg& diffBragg) { + int i; + /* allocate new flex array */ +// scitbx::af::versa > diffBragg_pythony_beams; + diffBragg.pythony_beams = scitbx::af::versa >(); + /* make sure it is big enough to hold all sources */ + diffBragg.pythony_beams.resize(diffBragg.sources); + + /* polarization normal seems to be B vector */ + scitbx::vec3 Evector = scitbx::vec3(diffBragg.polar_vector[1],diffBragg.polar_vector[2],diffBragg.polar_vector[3]); + scitbx::vec3 Pvector = scitbx::vec3(diffBragg.beam_vector[1],diffBragg.beam_vector[2],diffBragg.beam_vector[3]); + scitbx::vec3 Bvector = Pvector.cross(Evector).normalize(); + + /* copy internal storage into the flex array */ + for(i=0;i(diffBragg.source_X[i],diffBragg.source_Y[i],diffBragg.source_Z[i])); + diffBragg.pythony_beams[i].set_wavelength(diffBragg.source_lambda[i]*1e10); + diffBragg.pythony_beams[i].set_flux(diffBragg.source_I[i]); + // how is this a fraction when it can be negative? (Kahn et al. 1982) + diffBragg.pythony_beams[i].set_polarization_fraction(diffBragg.polarization); + diffBragg.pythony_beams[i].set_polarization_normal(Bvector); + } + /* pass this back to python */ + return diffBragg.pythony_beams; + } + + void set_dspace_bins(simtbx::nanoBragg::diffBragg& diffBragg, boost::python::list bins){ diffBragg.db_cryst.dspace_bins.clear(); for (int i=0; i< boost::python::len(bins); i++ ){ @@ -155,7 +192,7 @@ namespace boost_python { namespace { values = boost::python::make_tuple(0,0); return values; } - //TODO override the set_sources function (or xray_beams) property in nanoBragg in order + //TODO override the set_sources function (or xray_beams) property in nanoBragg // to set the fpfdp accordingly (if Fhkl2 is set) static void set_atom_data(simtbx::nanoBragg::diffBragg & diffBragg, boost::python::tuple const& atom_XYZBO){ @@ -418,6 +455,7 @@ namespace boost_python { namespace { static void set_Fhkl_tuple(simtbx::nanoBragg::diffBragg& diffBragg, boost::python::tuple const& value) { //TODO nanoBragg set as well ? + diffBragg.db_cu_flags.update_Fhkl=true; diffBragg.pythony_indices = extract(value[0]); diffBragg.pythony_amplitudes = extract >(value[1]); diffBragg.init_Fhkl(); @@ -432,7 +470,7 @@ namespace boost_python { namespace { diffBragg.linearize_Fhkl(true); } - static boost::python::tuple get_Fhkl_tuple(simtbx::nanoBragg::diffBragg diffBragg) { + static boost::python::tuple get_Fhkl_tuple(nanoBragg::diffBragg diffBragg) { int h,k,l; double temp; int hkls = diffBragg.h_range*diffBragg.k_range*diffBragg.l_range; @@ -471,9 +509,8 @@ namespace boost_python { namespace { } void initialize_kokkos(int dev){ - Kokkos::InitArguments kokkos_init; - kokkos_init.device_id = dev; - Kokkos::initialize(kokkos_init); + Kokkos::initialize(Kokkos::InitializationSettings() + .set_device_id(dev)); } #endif @@ -628,12 +665,14 @@ namespace boost_python { namespace { .def("show_heavy_atom_data", &simtbx::nanoBragg::diffBragg::show_heavy_atom_data) + .def("gpu_free",&simtbx::nanoBragg::diffBragg::gpu_free) + #ifdef DIFFBRAGG_HAVE_CUDA .def("gpu_free",&simtbx::nanoBragg::diffBragg::cuda_free) #endif #ifdef DIFFBRAGG_HAVE_KOKKOS - .def("kokkos_gpu_free",&simtbx::nanoBragg::diffBragg::kokkos_free) + .def("kokkos_free",&simtbx::nanoBragg::diffBragg::kokkos_free) #endif .def("set_mosaic_blocks_prime", @@ -789,9 +828,9 @@ namespace boost_python { namespace { "coefficients for source_lambda refinement: `lambda = coef0 + coef1*source` where `source` is the source index") // CUDA PROPERTIES - .add_property("use_cuda", - make_getter(&simtbx::nanoBragg::diffBragg::use_cuda,rbv()), - make_setter(&simtbx::nanoBragg::diffBragg::use_cuda,dcp()), + .add_property("use_gpu", + make_getter(&simtbx::nanoBragg::diffBragg::use_gpu,rbv()), + make_setter(&simtbx::nanoBragg::diffBragg::use_gpu,dcp()), "use GPU acceleration") .add_property("record_time", @@ -886,6 +925,15 @@ namespace boost_python { namespace { &simtbx::nanoBragg::diffBragg::set_Friedel_mate_inds, "Two arguments; each lists of the same length, pointing to the positive and negative mates in a Friedel pair, respectively") + .def("get_mosaic_blocks_prime", + &simtbx::nanoBragg::diffBragg::get_mosaic_blocks_prime, + "return the deriv of the matrices U that define the mosaic block distribution w.r.t eta") + + .add_property("xray_beams", + make_function(&get_beams,rbv()), + make_function(&set_beams,dcp()), + "list of dxtbx::Beam objects corresponding to each zero-divergence and monochromatic x-ray point source in the numerical simulation ") + ; // end of diffBragg extention } // end of diffBragg_init_module diff --git a/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu b/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu index 9c382a5c60..6bf8b801e4 100644 --- a/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu +++ b/simtbx/diffBragg/src/diffBragg_gpu_kernel.cu @@ -465,7 +465,7 @@ void gpu_sum_over_steps( CUDAREAL _F_cell = s_default_F; CUDAREAL _F_cell2 = 0; - int i_hklasu=0; + int i_hklasu=0; if ( (_h0<=s_h_max) && (_h0>=s_h_min) && (_k0<=s_k_max) && (_k0>=s_k_min) && (_l0<=s_l_max) && (_l0>=s_l_min) ) { int Fhkl_linear_index = (_h0-s_h_min) * s_k_range * s_l_range + (_k0-s_k_min) * s_l_range + (_l0-s_l_min); diff --git a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp index 69750ad9be..5b1acf243f 100644 --- a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp +++ b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.cpp @@ -2,11 +2,6 @@ #include "diffBraggKOKKOS.h" #include -// using ::Kokkos::Experimental::exp; -// using ::Kokkos::Experimental::sin; -// using ::Kokkos::Experimental::cos; -// using ::Kokkos::Experimental::sqrt; - void kokkos_sum_over_steps( int Npix_to_model, vector_uint_t panels_fasts_slows, @@ -29,6 +24,8 @@ void kokkos_sum_over_steps( vector_cudareal_t d_panel_orig_images, vector_cudareal_t d2_panel_orig_images, vector_cudareal_t d_fp_fdp_images, + vector_manager_t manager_dI, + vector_manager_t manager_dI2, const int Nsteps, int printout_fpixel, int printout_spixel, @@ -99,15 +96,16 @@ void kokkos_sum_over_steps( bool compute_curvatures, const vector_cudareal_t FhklLinear, const vector_cudareal_t Fhkl2Linear, - vector_bool_t refine_Bmat, - vector_bool_t refine_Ncells, - bool refine_Ncells_def, - vector_bool_t refine_panel_origin, - vector_bool_t refine_panel_rot, - bool refine_fcell, - vector_bool_t refine_lambda, - bool refine_eta, - vector_bool_t refine_Umat, + const uint32_t refine_flag, + // vector_bool_t refine_Bmat, + // vector_bool_t refine_Ncells, + // bool refine_Ncells_def, + // vector_bool_t refine_panel_origin, + // vector_bool_t refine_panel_rot, + // bool refine_fcell, + // vector_bool_t refine_lambda, + // bool refine_eta, + // vector_bool_t refine_Umat, const vector_cudareal_t fdet_vectors, const vector_cudareal_t sdet_vectors, const vector_cudareal_t odet_vectors, @@ -124,7 +122,7 @@ void kokkos_sum_over_steps( const vector_cudareal_t fpfdp_derivs, const vector_cudareal_t atom_data, int num_atoms, - bool refine_fp_fdp, + // bool refine_fp_fdp, const vector_int_t nominal_hkl, bool use_nominal_hkl, KOKKOS_MAT3 anisoU, @@ -133,9 +131,9 @@ void kokkos_sum_over_steps( bool use_diffuse, vector_cudareal_t d_diffuse_gamma_images, vector_cudareal_t d_diffuse_sigma_images, - bool refine_diffuse, + // bool refine_diffuse, bool gamma_miller_units, - bool refine_Icell, + // bool refine_Icell, bool save_wavelenimage, int laue_group_num, int stencil_size, @@ -150,91 +148,10 @@ void kokkos_sum_over_steps( const vector_int_t data_freq, const vector_bool_t data_trusted, const vector_int_t FhklLinear_ASUid, - const vector_cudareal_t Fhkl_channels, + const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, vector_cudareal_t Fhkl_scale_deriv) { // BEGIN GPU kernel - // int tid = blockIdx.x * blockDim.x + threadIdx.x; - // int thread_stride = blockDim.x * gridDim.x; - // __shared__ bool s_Fhkl_channels_empty; - // __shared__ bool s_Fhkl_have_scale_factors; - // __shared__ bool s_Fhkl_gradient_mode; - // __shared__ bool s_Fhkl_errors_mode; - // __shared__ int s_Num_ASU; - // __shared__ bool s_refine_Icell; - // __shared__ bool s_use_diffuse; - // __shared__ bool s_use_nominal_hkl; - // __shared__ bool s_refine_fp_fdp; - // __shared__ bool s_complex_miller; - // __shared__ int s_num_atoms; - // __shared__ bool s_aniso_eta; - // __shared__ bool s_no_Nabc_scale; - // __shared__ bool s_compute_curvatures; - // __shared__ KOKKOS_MAT3 s_Ot; - // __shared__ bool s_refine_diffuse; - // __shared__ bool s_gamma_miller_units; - // __shared__ KOKKOS_MAT3 _NABC; - // __shared__ KOKKOS_MAT3 s_dN; - // __shared__ CUDAREAL C; - // __shared__ CUDAREAL two_C; - // __shared__ KOKKOS_MAT3 Bmat_realspace; - // __shared__ KOKKOS_MAT3 Amat_init; - //__shared__ CUDAREAL s_Na; - //__shared__ CUDAREAL s_Nb; - //__shared__ CUDAREAL s_Nc; - // __shared__ CUDAREAL s_NaNbNc_squared; // unused? - // __shared__ int s_h_max, s_k_max, s_l_max, s_h_min, s_k_min, s_l_min, s_k_range, s_l_range; - // __shared__ int s_sources, s_mosaic_domains; - // __shared__ CUDAREAL s_detector_attnlen, s_lambda0, s_lambda1; - // __shared__ bool s_printout; - // __shared__ KOKKOS_VEC3 s_polarization_axis; - - // __shared__ bool s_refine_Umat[3]; - // __shared__ bool s_refine_panel_origin[3]; - // __shared__ bool s_refine_panel_rot[3]; - // __shared__ bool s_refine_Ncells[3]; - // __shared__ bool s_refine_eta; - // __shared__ bool s_refine_Ncells_def; - // __shared__ bool s_refine_fcell; - // __shared__ bool s_refine_Bmat[6]; - // __shared__ bool s_refine_lambda[2]; - // __shared__ double s_NABC_det, s_NABC_det_sq; - // extern __shared__ CUDAREAL det_vecs[]; - //__shared__ int det_stride; - - // TODO can we get speed gains by dividing up the following definitions over more threads ? - // if (threadIdx.x == 0) { - // for (int i = 0; i < 3; i++) { - // s_refine_Ncells[i] = refine_Ncells[i]; - // s_refine_Umat[i] = refine_Umat[i]; - // s_refine_panel_origin[i] = refine_panel_origin[i]; - // s_refine_panel_rot[i] = refine_panel_rot[i]; - // } - // s_Fhkl_channels_empty = Fhkl_channels_empty; - // s_Fhkl_have_scale_factors = Fhkl_have_scale_factors; - // s_Fhkl_gradient_mode = Fhkl_gradient_mode; - // s_Fhkl_errors_mode = Fhkl_errors_mode; - // s_Num_ASU = Num_ASU; - // s_refine_Icell = refine_Icell; - // s_use_nominal_hkl = use_nominal_hkl; - // s_aniso_eta = aniso_eta; - // s_no_Nabc_scale = no_Nabc_scale; - // s_complex_miller = complex_miller; - // s_refine_lambda[0] = refine_lambda[0]; - // s_refine_lambda[1] = refine_lambda[1]; - // for (int i = 0; i < 6; i++) { - // s_refine_Bmat[i] = refine_Bmat[i]; - // } - // s_use_diffuse = use_diffuse; - // s_num_atoms = num_atoms; - // s_refine_fcell = refine_fcell; - // s_refine_eta = refine_eta; - // s_refine_Ncells_def = refine_Ncells_def; - // s_compute_curvatures = compute_curvatures; - // s_refine_fp_fdp = refine_fp_fdp; - // s_refine_diffuse = refine_diffuse; - // s_gamma_miller_units = gamma_miller_units; - const KOKKOS_MAT3 Bmat_realspace = eig_B * 1e10; const KOKKOS_MAT3 eig_Otranspose = eig_O.transpose(); const KOKKOS_MAT3 Amat_init = eig_U * Bmat_realspace * eig_Otranspose; @@ -245,1027 +162,2325 @@ void kokkos_sum_over_steps( const CUDAREAL C = 2 / 0.63 * fudge; const CUDAREAL two_C = 2 * C; KOKKOS_MAT3 anisoG_local; + CUDAREAL anisoG_determ = 0; KOKKOS_MAT3 anisoU_local; - KOKKOS_MAT3 laue_mats[24]; - KOKKOS_MAT3 dG_dgam[3]; + vector_mat3_t laue_mats = vector_mat3_t("laue_mats", 24); + vector_vec3_t dG_dgam = vector_vec3_t("dG_dgam", 3); + vector_cudareal_t dG_trace = vector_cudareal_t("dG_trace", 3); int num_laue_mats = 0; int dhh = 0, dkk = 0, dll = 0; - KOKKOS_VEC3 Hmin, Hmax, dHH, Hrange; - // s_Na = Na; - // s_Nb = Nb; - // s_Nc = Nc; - // s_NaNbNc_squared = (Na * Nb * Nc); - // s_NaNbNc_squared *= s_NaNbNc_squared; - // s_h_max = h_max; - // s_k_max = k_max; - // s_l_max = l_max; - // s_h_min = h_min; - // s_k_min = k_min; - // s_l_min = l_min; - // s_h_range = h_range; - // s_k_range = k_range; - // s_l_range = l_range; - - // s_oversample = oversample; - // s_detector_thicksteps = detector_thicksteps; - // s_sources = sources; - // s_mosaic_domains = mosaic_domains; - // s_detector_thickstep = detector_thickstep; - // s_detector_attnlen = detector_attnlen; - // s_subpixel_size = subpixel_size; - // s_pixel_size = pixel_size; - // s_detector_thick = _detector_thick; - // s_lambda0 = lambda0; - // s_lambda1 = lambda1; - // s_oversample_omega = _oversample_omega; - // s_printout = printout; - // s_printout_fpixel = printout_fpixel; - // s_printout_spixel = printout_spixel; - // s_default_F = default_F; - // s_verbose = verbose; - // s_polarization_axis = polarization_axis; - // s_kahn_factor = kahn_factor; - // s_nopolar = nopolar; - // sX0 = source_X[0]; - // sY0 = source_Y[0]; - // sZ0 = source_Z[0]; - // s_Nsteps = Nsteps; - - // } + + Kokkos::View UMATS_prime("UMATS_prime", mosaic_domains); + Kokkos::View UMATS_dbl_prime("UMATS_dbl_prime", mosaic_domains); + Kokkos::View BMATS_prime("BMATS_prime", mosaic_domains); + Kokkos::View BMATS_dbl_prime("BMATS_dbl_prime", mosaic_domains); + + Kokkos::parallel_for("prepare_UMATS", mosaic_domains, KOKKOS_LAMBDA(const int& _mos_tic) { + const KOKKOS_MAT3 UBOt = Amat_init; + UMATS_prime(_mos_tic, 0) = _NABC * (UMATS(_mos_tic) * dRotMats(0) * RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_prime(_mos_tic, 1) = _NABC * (UMATS(_mos_tic) * RotMats(0) * dRotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_prime(_mos_tic, 2) = _NABC * (UMATS(_mos_tic) * RotMats(0) * RotMats(1) * dRotMats(2) * UBOt).transpose(); + + UMATS_dbl_prime(_mos_tic, 0) = _NABC * (UMATS(_mos_tic) * d2RotMats(0) * RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_dbl_prime(_mos_tic, 1) = _NABC * (UMATS(_mos_tic) * RotMats(0) * d2RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_dbl_prime(_mos_tic, 2) = _NABC * (UMATS(_mos_tic) * RotMats(0) * RotMats(1) * d2RotMats(2) * UBOt).transpose(); + + for (int i_uc=0; i_uc<6; i_uc++) { + BMATS_prime(_mos_tic, i_uc) = _NABC * (UMATS_RXYZ(_mos_tic) * eig_U * dB_mats(i_uc) * eig_O.transpose()).transpose(); + BMATS_dbl_prime(_mos_tic, i_uc) = _NABC * (UMATS_RXYZ(_mos_tic) * eig_U * dB2_mats(i_uc) * eig_O.transpose()).transpose(); + } + }); if (use_diffuse){ anisoG_local = anisoG; anisoU_local = anisoU; - num_laue_mats = gen_laue_mats(laue_group_num, laue_mats, rotate_principal_axes); - - for (int i_gam=0; i_gam<3; i_gam++){ - dG_dgam[i_gam] << 0,0,0,0,0,0,0,0,0; - dG_dgam[i_gam](i_gam, i_gam) = 1; + if (laue_group_num < 1 || laue_group_num >14 ){ + throw std::string("Laue group number not in range 1-14"); } + if (gamma_miller_units){ anisoG_local = anisoG_local * Bmat_realspace; + } + Kokkos::parallel_reduce("prepare diffuse mats", 1, KOKKOS_LAMBDA (const int& i, int& num_laue_mats_temp){ + num_laue_mats_temp = gen_laue_mats(laue_group_num, laue_mats, rotate_principal_axes); + // KOKKOS_MAT3 rotate_principal_axes; + // rotate_principal_axes << 0.70710678, -0.70710678, 0., 0.70710678, 0.70710678, 0., 0., 0., 1.; + + for ( int iL = 0; iL < num_laue_mats_temp; iL++ ){ + laue_mats(iL) = Ainv * laue_mats(iL); + } + // printf("Bmat ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", Bmat_realspace[i]); + // } + // printf("\n"); + const KOKKOS_MAT3 Ginv = anisoG_local.inverse(); + // printf("Ginv ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", Ginv[i]); + // } + // printf("\n"); + const KOKKOS_MAT3 dG = Bmat_realspace * Ginv; + // printf("dG ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", dG[i]); + // } + // printf("\n"); for (int i_gam=0; i_gam<3; i_gam++){ - dG_dgam[i_gam] = dG_dgam[i_gam] * Bmat_realspace; + if (gamma_miller_units) { + dG_dgam(i_gam) = KOKKOS_VEC3(Bmat_realspace(i_gam, 0), Bmat_realspace(i_gam, 1), Bmat_realspace(i_gam, 2)); + } else { + dG_dgam(i_gam)[i_gam] = 1; + } + KOKKOS_MAT3 temp_dgam; + temp_dgam(i_gam, 0) = dG_dgam(i_gam)[0]; + temp_dgam(i_gam, 1) = dG_dgam(i_gam)[1]; + temp_dgam(i_gam, 2) = dG_dgam(i_gam)[2]; + dG_trace(i_gam) = (Ginv*temp_dgam).trace(); + // printf("TRACE %g\n", dG_trace(i_gam)); + // printf("dgam ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", temp_dgam[i]); + // } + // printf("\n"); + + // dG(i_gam, i_gam); } - } + }, num_laue_mats); + anisoG_determ = anisoG_local.determinant(); dhh = dkk = dll = stencil_size; // Limits of stencil for diffuse calc } - Hmin << h_min, k_min, l_min; - Hmax << h_max, k_max, l_max; - dHH << dhh, dkk, dll; - Hrange << h_range, k_range, l_range; + const KOKKOS_VEC3 dHH (dhh, dkk, dll); const CUDAREAL overall_scale = r_e_sqr * spot_scale * fluence / Nsteps; + const CUDAREAL detector_attnlen_r = (detector_attnlen>0) ? 1 / detector_attnlen : 0; + Kokkos::parallel_for( "sum_over_steps", Npix_to_model, KOKKOS_LAMBDA(const int& pixIdx) { - // __syncthreads(); - // for (int pixIdx=tid; pixIdx < Npix_to_model; pixIdx+= thread_stride){ - if (using_trusted_mask) { - if (!data_trusted(pixIdx)) - return; - } - int _pid = panels_fasts_slows(pixIdx * 3); - int _fpixel = panels_fasts_slows(pixIdx * 3 + 1); - int _spixel = panels_fasts_slows(pixIdx * 3 + 2); - - CUDAREAL Fhkl_deriv_coef=0; - CUDAREAL Fhkl_hessian_coef=0; - if (Fhkl_gradient_mode) { - CUDAREAL u = data_residual(pixIdx); - CUDAREAL one_by_v = 1/data_variance(pixIdx); - CUDAREAL Gterm = 1 - 2*u - u*u*one_by_v; - Fhkl_deriv_coef = 0.5 * Gterm*one_by_v / data_freq(pixIdx); - if (Fhkl_errors_mode) { - Fhkl_hessian_coef = -0.5*one_by_v*(one_by_v*Gterm - 2 - 2*u*one_by_v -u*u*one_by_v*one_by_v)/data_freq(pixIdx); - } + if (using_trusted_mask) { + if (!data_trusted(pixIdx)) + return; + } + const int _pid = panels_fasts_slows(pixIdx * 3); + const int _fpixel = panels_fasts_slows(pixIdx * 3 + 1); + const int _spixel = panels_fasts_slows(pixIdx * 3 + 2); + + CUDAREAL Fhkl_deriv_coef=0; + CUDAREAL Fhkl_hessian_coef=0; + if (Fhkl_gradient_mode) { + CUDAREAL u = data_residual(pixIdx); + CUDAREAL one_by_v = 1/data_variance(pixIdx); + CUDAREAL Gterm = 1 - 2*u - u*u*one_by_v; + Fhkl_deriv_coef = 0.5 * Gterm*one_by_v / data_freq(pixIdx); + if (Fhkl_errors_mode) { + Fhkl_hessian_coef = -0.5*one_by_v*(one_by_v*Gterm - 2 - 2*u*one_by_v -u*u*one_by_v*one_by_v)/data_freq(pixIdx); } + } - // int fcell_idx=1; - int nom_h = 0, nom_k = 0, nom_l = 0; - if (use_nominal_hkl) { - nom_h = nominal_hkl(pixIdx * 3); - nom_k = nominal_hkl(pixIdx * 3 + 1); - nom_l = nominal_hkl(pixIdx * 3 + 2); - } - CUDAREAL close_distance = close_distances(_pid); - - // reset photon count for this pixel - double _I = 0; - double Ilambda = 0; - - // reset derivative photon counts for the various parameters - double rot_manager_dI[3] = {0, 0, 0}; - double rot_manager_dI2[3] = {0, 0, 0}; - double ucell_manager_dI[6] = {0, 0, 0, 0, 0, 0}; - double ucell_manager_dI2[6] = {0, 0, 0, 0, 0, 0}; - double Ncells_manager_dI[6] = {0, 0, 0, 0, 0, 0}; - double Ncells_manager_dI2[6] = {0, 0, 0, 0, 0, 0}; - double pan_orig_manager_dI[3] = {0, 0, 0}; - double pan_orig_manager_dI2[3] = {0, 0, 0}; - double pan_rot_manager_dI[3] = {0, 0, 0}; - double pan_rot_manager_dI2[3] = {0, 0, 0}; - double fcell_manager_dI = 0; - double fcell_manager_dI2 = 0; - double eta_manager_dI[3] = {0, 0, 0}; - double eta_manager_dI2[3] = {0, 0, 0}; - double lambda_manager_dI[2] = {0, 0}; - double lambda_manager_dI2[2] = {0, 0}; - double fp_fdp_manager_dI[2] = {0, 0}; - double dI_diffuse[6] = {0, 0, 0, 0, 0, 0}; - - for (int _subS = 0; _subS < oversample; ++_subS) { - for (int _subF = 0; _subF < oversample; ++_subF) { - // absolute mm position on detector (relative to its origin) - CUDAREAL _Fdet = - subpixel_size * (_fpixel * oversample + _subF) + subpixel_size / 2.0; - CUDAREAL _Sdet = - subpixel_size * (_spixel * oversample + _subS) + subpixel_size / 2.0; - - // assume "distance" is to the front of the detector sensor layer - int pid_x = _pid * 3; - int pid_y = _pid * 3 + 1; - int pid_z = _pid * 3 + 2; - - CUDAREAL fx = fdet_vectors(pid_x); - CUDAREAL fy = fdet_vectors(pid_y); - CUDAREAL fz = fdet_vectors(pid_z); - CUDAREAL sx = sdet_vectors(pid_x); - CUDAREAL sy = sdet_vectors(pid_y); - CUDAREAL sz = sdet_vectors(pid_z); - CUDAREAL ox = odet_vectors(pid_x); - CUDAREAL oy = odet_vectors(pid_y); - CUDAREAL oz = odet_vectors(pid_z); - CUDAREAL px = pix0_vectors(pid_x); - CUDAREAL py = pix0_vectors(pid_y); - CUDAREAL pz = pix0_vectors(pid_z); - - KOKKOS_VEC3 _o_vec(ox, oy, oz); - - for (int _thick_tic = 0; _thick_tic < detector_thicksteps; ++_thick_tic) { - CUDAREAL _Odet = _thick_tic * detector_thickstep; - - CUDAREAL pixposX = _Fdet * fx + _Sdet * sx + _Odet * ox + px; - CUDAREAL pixposY = _Fdet * fy + _Sdet * sy + _Odet * oy + py; - CUDAREAL pixposZ = _Fdet * fz + _Sdet * sz + _Odet * oz + pz; - KOKKOS_VEC3 _pixel_pos(pixposX, pixposY, pixposZ); - - CUDAREAL _airpath = _pixel_pos.length(); - KOKKOS_VEC3 _diffracted = _pixel_pos.get_unit_vector(); - - // solid angle subtended by a pixel: (pix/airpath)^2*cos(2theta) - CUDAREAL _omega_pixel = pixel_size * pixel_size / _airpath / _airpath * - close_distance / _airpath; - - // option to turn off obliquity effect, inverse-square-law only - if (point_pixel) - _omega_pixel = 1.0 / _airpath / _airpath; - - // now calculate detector thickness effects - CUDAREAL _capture_fraction = 1; - - if (detector_thick > 0.0 && detector_attnlen > 0.0) { - // inverse of effective thickness increase - CUDAREAL _parallax = _diffracted.dot(_o_vec); - _capture_fraction = ::Kokkos::Experimental::exp( - -_thick_tic * detector_thickstep / - detector_attnlen / _parallax) - - ::Kokkos::Experimental::exp( - -(_thick_tic + 1) * detector_thickstep / - detector_attnlen / _parallax); + // int fcell_idx=1; + int nom_h = 0, nom_k = 0, nom_l = 0; + if (use_nominal_hkl) { + nom_h = nominal_hkl(pixIdx * 3); + nom_k = nominal_hkl(pixIdx * 3 + 1); + nom_l = nominal_hkl(pixIdx * 3 + 2); + } + CUDAREAL close_distance = close_distances(_pid); + + // reset photon count for this pixel + double _I = 0; + double Ilambda = 0; + + kokkos_manager dI, dI2; + dI.reset(); + dI2.reset(); + + for (int _subS = 0; _subS < oversample; ++_subS) { + for (int _subF = 0; _subF < oversample; ++_subF) { + // absolute mm position on detector (relative to its origin) + CUDAREAL _Fdet = + subpixel_size * (_fpixel * oversample + _subF) + subpixel_size / 2.0; + CUDAREAL _Sdet = + subpixel_size * (_spixel * oversample + _subS) + subpixel_size / 2.0; + + // assume "distance" is to the front of the detector sensor layer + int pid_x = _pid * 3; + int pid_y = _pid * 3 + 1; + int pid_z = _pid * 3 + 2; + + + CUDAREAL fx = fdet_vectors(pid_x); + CUDAREAL fy = fdet_vectors(pid_y); + CUDAREAL fz = fdet_vectors(pid_z); + CUDAREAL sx = sdet_vectors(pid_x); + CUDAREAL sy = sdet_vectors(pid_y); + CUDAREAL sz = sdet_vectors(pid_z); + CUDAREAL ox = odet_vectors(pid_x); + CUDAREAL oy = odet_vectors(pid_y); + CUDAREAL oz = odet_vectors(pid_z); + CUDAREAL px = pix0_vectors(pid_x); + CUDAREAL py = pix0_vectors(pid_y); + CUDAREAL pz = pix0_vectors(pid_z); + KOKKOS_VEC3 _o_vec(ox, oy, oz); + + for (int _thick_tic = 0; _thick_tic < detector_thicksteps; ++_thick_tic) { + + CUDAREAL _Odet = _thick_tic * detector_thickstep; + + CUDAREAL pixposX = _Fdet * fx + _Sdet * sx + _Odet * ox + px; + CUDAREAL pixposY = _Fdet * fy + _Sdet * sy + _Odet * oy + py; + CUDAREAL pixposZ = _Fdet * fz + _Sdet * sz + _Odet * oz + pz; + KOKKOS_VEC3 _pixel_pos(pixposX, pixposY, pixposZ); + + CUDAREAL _airpath_r = 1 / _pixel_pos.length(); + KOKKOS_VEC3 _diffracted = _pixel_pos.get_unit_vector(); + + const CUDAREAL close_distance = close_distances(_pid); + + // solid angle subtended by a pixel: (pix/airpath)^2*cos(2theta) + CUDAREAL _omega_pixel = pixel_size * pixel_size * _airpath_r * _airpath_r * + close_distance * _airpath_r; + + // option to turn off obliquity effect, inverse-square-law only + if (point_pixel) + _omega_pixel = _airpath_r * _airpath_r; + + // now calculate detector thickness effects + CUDAREAL _capture_fraction = 1; + + CUDAREAL previous_layer = 1.0; + if (detector_thick > 0.0 && detector_attnlen_r > 0.0) { + // inverse of effective thickness increase + KOKKOS_VEC3 _o_vec(ox, oy, oz); + CUDAREAL _parallax = _diffracted.dot(_o_vec); + CUDAREAL current_layer = ::Kokkos::exp( + -(_thick_tic + 1) * detector_thickstep * + detector_attnlen_r / _parallax); + _capture_fraction = previous_layer - current_layer; + previous_layer = current_layer; + } + + for (int _source = 0; _source < sources; ++_source) { + + KOKKOS_VEC3 _incident( + -source_X(_source), -source_Y(_source), -source_Z(_source)); + CUDAREAL _lambda = source_lambda(_source); + CUDAREAL sI = source_I(_source); + CUDAREAL lambda_ang = _lambda * 1e10; + if (use_lambda_coefficients) { + lambda_ang = lambda0 + lambda1 * lambda_ang; + _lambda = lambda_ang * 1e-10; } - CUDAREAL cap_frac_times_omega = _capture_fraction * _omega_pixel; - - for (int _source = 0; _source < sources; ++_source) { - KOKKOS_VEC3 _incident( - -source_X(_source), -source_Y(_source), -source_Z(_source)); - CUDAREAL _lambda = source_lambda(_source); - CUDAREAL sI = source_I(_source); - CUDAREAL lambda_ang = _lambda * 1e10; - if (use_lambda_coefficients) { - lambda_ang = lambda0 + lambda1 * lambda_ang; - _lambda = lambda_ang * 1e-10; - } - // polarization - CUDAREAL polar_for_Fhkl_grad=1; - if (!nopolar && Fhkl_gradient_mode){ - //polar_for_Fhkl_grad = diffBragg_gpu_kernel_polarization(_incident, _diffracted, - // s_polarization_axis, s_kahn_factor); - // component of diffracted unit vector along incident beam unit vector - CUDAREAL cos2theta = _incident.dot(_diffracted); - CUDAREAL cos2theta_sqr = cos2theta*cos2theta; - CUDAREAL sin2theta_sqr = 1-cos2theta_sqr; - - CUDAREAL _psi=0; - if(kahn_factor != 0.0){ - // cross product to get "vertical" axis that is orthogonal to the cannonical "polarization" - KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); - // cross product with incident beam to get E-vector direction - KOKKOS_VEC3 E_in = _incident.cross(B_in); - // get components of diffracted ray projected onto the E-B plane - CUDAREAL _kEi = _diffracted.dot(E_in); - CUDAREAL _kBi = _diffracted.dot(B_in); - // compute the angle of the diffracted ray projected onto the incident E-B plane - _psi = -atan2(_kBi,_kEi); + // polarization + CUDAREAL polar_for_Fhkl_grad=1; + if (!nopolar && Fhkl_gradient_mode){ + + // component of diffracted unit vector along incident beam unit vector + CUDAREAL cos2theta = _incident.dot(_diffracted); + CUDAREAL cos2theta_sqr = cos2theta*cos2theta; + CUDAREAL sin2theta_sqr = 1-cos2theta_sqr; + + CUDAREAL cos2psi=0; + if(kahn_factor != 0.0){ + // cross product to get "vertical" axis that is orthogonal to the cannonical "polarization" + KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); + // cross product with incident beam to get E-vector direction + KOKKOS_VEC3 E_in = _incident.cross(B_in); + // get components of diffracted ray projected onto the E-B plane + CUDAREAL _kEi = _diffracted.dot(E_in); + CUDAREAL _kBi = _diffracted.dot(B_in); + // compute the angle of the diffracted ray projected onto the incident E-B plane + // calculate cos(2 * atan2(_kBi, _kEi)) + if (_kEi!=0) { + CUDAREAL ratio = _kBi / _kEi; + cos2psi = (1 - ratio*ratio) / (1 + ratio*ratio); + } else { + cos2psi = -1; } - // correction for polarized incident beam - polar_for_Fhkl_grad = 0.5*(1.0 + cos2theta_sqr - kahn_factor*cos(2*_psi)*sin2theta_sqr); + } + // correction for polarized incident beam + polar_for_Fhkl_grad = 0.5*(1.0 + cos2theta_sqr - kahn_factor*cos2psi*sin2theta_sqr); + } + KOKKOS_VEC3 _scattering = (_diffracted - _incident) / _lambda; + + KOKKOS_VEC3 q_vec = _scattering * 1e-10; + + // TODO rename + CUDAREAL texture_scale = _capture_fraction * _omega_pixel * sI; + + for (int _mos_tic = 0; _mos_tic < mosaic_domains; ++_mos_tic) { + const KOKKOS_MAT3 UBO = Amatrices(_mos_tic); + + KOKKOS_VEC3 H_vec = UBO * q_vec; + CUDAREAL _h = H_vec[0]; + CUDAREAL _k = H_vec[1]; + CUDAREAL _l = H_vec[2]; + + int _h0 = ceil(_h - 0.5); + int _k0 = ceil(_k - 0.5); + int _l0 = ceil(_l - 0.5); + + KOKKOS_VEC3 H0(_h0, _k0, _l0); + + KOKKOS_VEC3 delta_H = H_vec - H0; + KOKKOS_VEC3 V = _NABC * delta_H; + CUDAREAL _hrad_sqr = V.length_sqr(); + CUDAREAL exparg = _hrad_sqr * C / 2; + CUDAREAL I0 = 0; + + if (exparg < 35) + if (no_Nabc_scale) + I0 = ::Kokkos::exp(-2 * exparg); + else + I0 = (NABC_det_sq) * + ::Kokkos::exp(-2 * exparg); + + // are we doing diffuse scattering + CUDAREAL step_diffuse_param[6] = {0, 0, 0, 0, 0, 0}; + if (use_diffuse) { + calc_diffuse_at_hkl(H_vec,H0,dHH,h_min,k_min,l_min,h_max,k_max,l_max,h_range,k_range,l_range,Ainv,FhklLinear,num_laue_mats,laue_mats,anisoG_local,dG_trace,anisoG_determ,anisoU_local,dG_dgam,(refine_flag & REFINE_DIFFUSE)>0,&I0,step_diffuse_param); + } // end s_use_diffuse outer + + CUDAREAL _F_cell = default_F; + CUDAREAL _F_cell2 = 0; + int i_hklasu=0; + + if ((_h0 <= h_max) && (_h0 >= h_min) && + (_k0 <= k_max) && (_k0 >= k_min) && + (_l0 <= l_max) && (_l0 >= l_min)) { + int Fhkl_linear_index = (_h0 - h_min) * k_range * l_range + + (_k0 - k_min) * l_range + (_l0 - l_min); + //_F_cell = __ldg(&FhklLinear[Fhkl_linear_index]); + _F_cell = FhklLinear(Fhkl_linear_index); + // if (complex_miller) _F_cell2 = + // __ldg(&Fhkl2Linear[Fhkl_linear_index]); + if (complex_miller) + _F_cell2 = Fhkl2Linear(Fhkl_linear_index); + if (Fhkl_have_scale_factors) + i_hklasu = FhklLinear_ASUid(Fhkl_linear_index); } - KOKKOS_VEC3 _scattering = (_diffracted - _incident) / _lambda; - - KOKKOS_VEC3 q_vec(_scattering[0], _scattering[1], _scattering[2]); - q_vec *= 1e-10; - - // TODO rename - CUDAREAL texture_scale = 1; - texture_scale *= cap_frac_times_omega; - texture_scale *= sI; + CUDAREAL c_deriv_Fcell = 0; + CUDAREAL d_deriv_Fcell = 0; + if (complex_miller) { + CUDAREAL c_deriv_Fcell_real = 0; + CUDAREAL c_deriv_Fcell_imag = 0; + CUDAREAL d_deriv_Fcell_real = 0; + CUDAREAL d_deriv_Fcell_imag = 0; + if (num_atoms > 0) { + CUDAREAL S_2 = (q_vec[0] * q_vec[0] + + q_vec[1] * q_vec[1] + + q_vec[2] * q_vec[2]); + + // fp is always followed by the fdp value + CUDAREAL val_fp = fpfdp(2 * _source); + CUDAREAL val_fdp = fpfdp(2 * _source + 1); + + CUDAREAL c_deriv_prime = 0; + CUDAREAL c_deriv_dblprime = 0; + CUDAREAL d_deriv_prime = 0; + CUDAREAL d_deriv_dblprime = 0; + if (refine_flag & REFINE_FP_FDP) { + // currently only supports two parameter model + int d_idx = 2 * _source; + c_deriv_prime = fpfdp_derivs(d_idx); + c_deriv_dblprime = fpfdp_derivs(d_idx + 1); + d_deriv_prime = fpfdp_derivs(d_idx + 2 * sources); + d_deriv_dblprime = + fpfdp_derivs(d_idx + 1 + 2 * sources); + } - for (int _mos_tic = 0; _mos_tic < mosaic_domains; ++_mos_tic) { - int amat_idx = _mos_tic; - KOKKOS_MAT3 UBO = Amatrices(amat_idx); + for (int i_atom = 0; i_atom < num_atoms; i_atom++) { + // fractional atomic coordinates + CUDAREAL atom_x = atom_data(i_atom * 5); + CUDAREAL atom_y = atom_data(i_atom * 5 + 1); + CUDAREAL atom_z = atom_data(i_atom * 5 + 2); + CUDAREAL B = atom_data(i_atom * 5 + 3); // B factor + B = ::Kokkos::exp( + -B * S_2 / 4.0); // TODO: speed me up? + CUDAREAL occ = atom_data(i_atom * 5 + 4); // occupancy + CUDAREAL r_dot_h = + _h0 * atom_x + _k0 * atom_y + _l0 * atom_z; + CUDAREAL phase = 2 * M_PI * r_dot_h; + CUDAREAL s_rdoth = ::Kokkos::sin(phase); + CUDAREAL c_rdoth = ::Kokkos::cos(phase); + CUDAREAL Bocc = B * occ; + CUDAREAL BC = B * c_rdoth; + CUDAREAL BS = B * s_rdoth; + CUDAREAL real_part = BC * val_fp - BS * val_fdp; + CUDAREAL imag_part = BS * val_fp + BC * val_fdp; + _F_cell += real_part; + _F_cell2 += imag_part; + if (refine_flag & REFINE_FP_FDP) { + c_deriv_Fcell_real += + BC * c_deriv_prime - BS * c_deriv_dblprime; + c_deriv_Fcell_imag += + BS * c_deriv_prime + BC * c_deriv_dblprime; + + d_deriv_Fcell_real += + BC * d_deriv_prime - BS * d_deriv_dblprime; + d_deriv_Fcell_imag += + BS * d_deriv_prime + BC * d_deriv_dblprime; + } + } + } + CUDAREAL Freal = _F_cell; + CUDAREAL Fimag = _F_cell2; + _F_cell = + ::Kokkos::sqrt(Freal * Freal + Fimag * Fimag); + if (refine_flag & REFINE_FP_FDP) { + c_deriv_Fcell = + Freal * c_deriv_Fcell_real + Fimag * c_deriv_Fcell_imag; + d_deriv_Fcell = + Freal * d_deriv_Fcell_real + Fimag * d_deriv_Fcell_imag; + } + } + if (!oversample_omega && ! Fhkl_gradient_mode) + _omega_pixel = 1; + + CUDAREAL _I_cell = _F_cell; + if (!(refine_flag & REFINE_ICELL)) + _I_cell *= _F_cell; + CUDAREAL hkl=1; + int Fhkl_channel=0; + if (! Fhkl_channels_empty) + Fhkl_channel = Fhkl_channels(_source); + if (Fhkl_have_scale_factors) + hkl = Fhkl_scale(i_hklasu + Fhkl_channel*Num_ASU); + if (Fhkl_gradient_mode){ + CUDAREAL Fhkl_deriv_scale = overall_scale*polar_for_Fhkl_grad; + CUDAREAL I_noFcell=texture_scale*I0; + CUDAREAL dfhkl = I_noFcell*_I_cell * Fhkl_deriv_scale; + CUDAREAL grad_incr = dfhkl*Fhkl_deriv_coef; + int fhkl_grad_idx=i_hklasu + Fhkl_channel*Num_ASU; + + if (Fhkl_errors_mode){ + // here we hi-kack the Fhkl_scale_deriv array, if computing errors, in order to store the hessian terms + // if we are getting the hessian terms, we no longer need the gradients (e.g. by this point we are done refininig) + CUDAREAL hessian_incr = Fhkl_hessian_coef*dfhkl*dfhkl; + ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), hessian_incr); + } + else{ + ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), grad_incr); + } + continue; + } - KOKKOS_VEC3 H_vec = UBO * q_vec; - CUDAREAL _h = H_vec[0]; - CUDAREAL _k = H_vec[1]; - CUDAREAL _l = H_vec[2]; + CUDAREAL _I_total = hkl*_I_cell *I0; + CUDAREAL Iincrement = _I_total * texture_scale; + _I += Iincrement; + if (save_wavelenimage) + Ilambda += Iincrement * lambda_ang; + + if (refine_flag & REFINE_DIFFUSE) { + CUDAREAL step_scale = texture_scale * _F_cell * _F_cell; + for (int i_diff = 0; i_diff < 6; i_diff++) { + dI.diffuse[i_diff] += + step_scale * step_diffuse_param[i_diff]; + } + } - int _h0 = ceil(_h - 0.5); - int _k0 = ceil(_k - 0.5); - int _l0 = ceil(_l - 0.5); + //************************************************* + // START REFINEMENT - KOKKOS_VEC3 H0(_h0, _k0, _l0); + if (refine_flag & REFINE_FP_FDP) { + CUDAREAL I_noFcell = texture_scale * I0; + dI.fp_fdp[0] += 2 * I_noFcell * (c_deriv_Fcell); + dI.fp_fdp[1] += 2 * I_noFcell * (d_deriv_Fcell); + } - KOKKOS_VEC3 delta_H = H_vec - H0; - KOKKOS_VEC3 V = _NABC * delta_H; - CUDAREAL _hrad_sqr = V.dot(V); - CUDAREAL exparg = _hrad_sqr * C / 2; - CUDAREAL I0 = 0; + if (verbose > 3) + printf( + "hkl= %f %f %f hkl1= %d %d %d Fcell=%f\n", _h, _k, _l, + _h0, _k0, _l0, _F_cell); - if (exparg < 35) - if (no_Nabc_scale) - I0 = ::Kokkos::Experimental::exp(-2 * exparg); - else - I0 = (NABC_det_sq) * - ::Kokkos::Experimental::exp(-2 * exparg); - - // are we doing diffuse scattering - CUDAREAL step_diffuse_param[6] = {0, 0, 0, 0, 0, 0}; - if (use_diffuse) { - calc_diffuse_at_hkl(H_vec,H0,dHH,Hmin,Hmax,Hrange,Ainv,FhklLinear,num_laue_mats,laue_mats,anisoG_local,anisoU_local,dG_dgam,refine_diffuse,&I0,step_diffuse_param); - } // end s_use_diffuse outer - - CUDAREAL _F_cell = default_F; - CUDAREAL _F_cell2 = 0; - int i_hklasu=0; - - if ((_h0 <= h_max) && (_h0 >= h_min) && (_k0 <= k_max) && - (_k0 >= k_min) && (_l0 <= l_max) && (_l0 >= l_min)) { - int Fhkl_linear_index = (_h0 - h_min) * k_range * l_range + - (_k0 - k_min) * l_range + (_l0 - l_min); - //_F_cell = __ldg(&FhklLinear[Fhkl_linear_index]); - _F_cell = FhklLinear(Fhkl_linear_index); - // if (complex_miller) _F_cell2 = - // __ldg(&Fhkl2Linear[Fhkl_linear_index]); - if (complex_miller) - _F_cell2 = Fhkl2Linear(Fhkl_linear_index); - if (Fhkl_have_scale_factors) - i_hklasu = FhklLinear_ASUid(Fhkl_linear_index); + KOKKOS_MAT3 UBOt; + if (refine_flag & (REFINE_UMAT | REFINE_ETA)) { + UBOt = Amat_init; + } + if (refine_flag & REFINE_UMAT1) { + const KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 0) * q_vec; + const CUDAREAL V_dot_dV = V.dot(dV); + const CUDAREAL value = -two_C * V_dot_dV * Iincrement; + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + const CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 0)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; } - - CUDAREAL c_deriv_Fcell = 0; - CUDAREAL d_deriv_Fcell = 0; - if (complex_miller) { - CUDAREAL c_deriv_Fcell_real = 0; - CUDAREAL c_deriv_Fcell_imag = 0; - CUDAREAL d_deriv_Fcell_real = 0; - CUDAREAL d_deriv_Fcell_imag = 0; - if (num_atoms > 0) { - CUDAREAL S_2 = 1.e-20 * (_scattering[0] * _scattering[0] + - _scattering[1] * _scattering[1] + - _scattering[2] * _scattering[2]); - - // fp is always followed by the fdp value - CUDAREAL val_fp = fpfdp(2 * _source); - CUDAREAL val_fdp = fpfdp(2 * _source + 1); - - CUDAREAL c_deriv_prime = 0; - CUDAREAL c_deriv_dblprime = 0; - CUDAREAL d_deriv_prime = 0; - CUDAREAL d_deriv_dblprime = 0; - if (refine_fp_fdp) { - // currently only supports two parameter model - int d_idx = 2 * _source; - c_deriv_prime = fpfdp_derivs(d_idx); - c_deriv_dblprime = fpfdp_derivs(d_idx + 1); - d_deriv_prime = fpfdp_derivs(d_idx + 2 * sources); - d_deriv_dblprime = - fpfdp_derivs(d_idx + 1 + 2 * sources); - } - - for (int i_atom = 0; i_atom < num_atoms; i_atom++) { - // fractional atomic coordinates - CUDAREAL atom_x = atom_data(i_atom * 5); - CUDAREAL atom_y = atom_data(i_atom * 5 + 1); - CUDAREAL atom_z = atom_data(i_atom * 5 + 2); - CUDAREAL B = atom_data(i_atom * 5 + 3); // B factor - B = ::Kokkos::Experimental::exp( - -B * S_2 / 4.0); // TODO: speed me up? - CUDAREAL occ = atom_data(i_atom * 5 + 4); // occupancy - CUDAREAL r_dot_h = - _h0 * atom_x + _k0 * atom_y + _l0 * atom_z; - CUDAREAL phase = 2 * M_PI * r_dot_h; - CUDAREAL s_rdoth = ::Kokkos::Experimental::sin(phase); - CUDAREAL c_rdoth = ::Kokkos::Experimental::cos(phase); - CUDAREAL Bocc = B * occ; - CUDAREAL BC = B * c_rdoth; - CUDAREAL BS = B * s_rdoth; - CUDAREAL real_part = BC * val_fp - BS * val_fdp; - CUDAREAL imag_part = BS * val_fp + BC * val_fdp; - _F_cell += real_part; - _F_cell2 += imag_part; - if (refine_fp_fdp) { - c_deriv_Fcell_real += - BC * c_deriv_prime - BS * c_deriv_dblprime; - c_deriv_Fcell_imag += - BS * c_deriv_prime + BC * c_deriv_dblprime; - - d_deriv_Fcell_real += - BC * d_deriv_prime - BS * d_deriv_dblprime; - d_deriv_Fcell_imag += - BS * d_deriv_prime + BC * d_deriv_dblprime; - } - } - } - CUDAREAL Freal = _F_cell; - CUDAREAL Fimag = _F_cell2; - _F_cell = - ::Kokkos::Experimental::sqrt(Freal * Freal + Fimag * Fimag); - if (refine_fp_fdp) { - c_deriv_Fcell = - Freal * c_deriv_Fcell_real + Fimag * c_deriv_Fcell_imag; - d_deriv_Fcell = - Freal * d_deriv_Fcell_real + Fimag * d_deriv_Fcell_imag; + dI.rot[0] += value; + dI2.rot[0] += value2; + } + if (refine_flag & REFINE_UMAT2) { + KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 1) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 1)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; + } + dI.rot[1] += value; + dI2.rot[1] += value2; + } + if (refine_flag & REFINE_UMAT3) { + KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 2) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 2)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; + } + dI.rot[2] += value; + dI2.rot[2] += value2; + } + // Checkpoint for unit cell derivatives + for (int i_uc = 0; i_uc < 6; i_uc++) { + if (refine_flag & (REFINE_BMAT1 << i_uc)) { + KOKKOS_VEC3 dV = BMATS_prime(_mos_tic, i_uc) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(BMATS_dbl_prime(_mos_tic, i_uc)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; } + dI.ucell[i_uc] += value; + dI2.ucell[i_uc] += value2; } - if (!oversample_omega && ! Fhkl_gradient_mode) - _omega_pixel = 1; - - CUDAREAL _I_cell = _F_cell; - if (!refine_Icell) - _I_cell *= _F_cell; - CUDAREAL hkl=1; - int Fhkl_channel=0; - if (! Fhkl_channels_empty) - Fhkl_channel = Fhkl_channels(_source); - if (Fhkl_have_scale_factors) - hkl = Fhkl_scale(i_hklasu + Fhkl_channel*Num_ASU); - if (Fhkl_gradient_mode){ - CUDAREAL Fhkl_deriv_scale = overall_scale*polar_for_Fhkl_grad; - CUDAREAL I_noFcell=texture_scale*I0; - CUDAREAL dfhkl = I_noFcell*_I_cell * Fhkl_deriv_scale; - CUDAREAL grad_incr = dfhkl*Fhkl_deriv_coef; - int fhkl_grad_idx=i_hklasu + Fhkl_channel*Num_ASU; - - if (Fhkl_errors_mode){ - // here we hi-kack the Fhkl_scale_deriv array, if computing errors, in order to store the hessian terms - // if we are getting the hessian terms, we no longer need the gradients (e.g. by this point we are done refininig) - CUDAREAL hessian_incr = Fhkl_hessian_coef*dfhkl*dfhkl; - ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), hessian_incr); + } // end ucell deriv + + // Checkpoint for Ncells manager + if (refine_flag & REFINE_NCELLS1) { + int num_ncell_deriv = 1; + if (!isotropic_ncells) + num_ncell_deriv = 3; + for (int i_nc = 0; i_nc < num_ncell_deriv; i_nc++) { + KOKKOS_MAT3 dN; + dN(i_nc, i_nc) = 1; + if (num_ncell_deriv == 1) { + dN(0, 0) = 1; + dN(1, 1) = 1; + dN(2, 2) = 1; } - else{ - ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), grad_incr); + CUDAREAL N_i = _NABC(i_nc, i_nc); + KOKKOS_VEC3 dV_dN = dN.dot(delta_H); + // TODO speedops: precompute these, store shared var + // _NABC.inverse + CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); + CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); + CUDAREAL value = 2 * Iincrement * deriv_coef; + CUDAREAL value2 = 0; + if (compute_curvatures) { + value2 = (-1 / N_i / N_i - C * (dV_dN.dot(dV_dN))) * 2 * + Iincrement; + value2 += deriv_coef * 2 * value; } - continue; + dI.Ncells[i_nc] += value; + dI2.Ncells[i_nc] += value2; } - - CUDAREAL _I_total = hkl*_I_cell *I0; - CUDAREAL Iincrement = _I_total * texture_scale; - _I += Iincrement; - if (save_wavelenimage) - Ilambda += Iincrement * lambda_ang; - - if (refine_diffuse) { - CUDAREAL step_scale = texture_scale * _F_cell * _F_cell; - for (int i_diff = 0; i_diff < 6; i_diff++) { - dI_diffuse[i_diff] += - step_scale * step_diffuse_param[i_diff]; + } // end Ncells manager deriv + + if (refine_flag & REFINE_NCELLS_DEF) { + for (int i_nc = 3; i_nc < 6; i_nc++) { + KOKKOS_MAT3 dN; + if (i_nc == 3) + dN = KOKKOS_MAT3{0, 1, 0, 1, 0, 0, 0, 0, 0}; + else if (i_nc == 4) + dN = KOKKOS_MAT3{0, 0, 0, 0, 0, 1, 0, 1, 0}; + else + dN = KOKKOS_MAT3{0, 0, 1, 0, 0, 0, 1, 0, 0}; + KOKKOS_VEC3 dV_dN = dN.dot(delta_H); + // TODO speedops: precompute these + CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); + CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); + CUDAREAL value = 2 * Iincrement * deriv_coef; + dI.Ncells[i_nc] += value; + CUDAREAL value2 = 0; + if (compute_curvatures) { + value2 = deriv_coef * value; + value2 += -2 * C * Iincrement * (dV_dN.dot(dV_dN)); + dI2.Ncells[i_nc] += value2; } } + } - if (refine_fp_fdp) { - CUDAREAL I_noFcell = texture_scale * I0; - fp_fdp_manager_dI[0] += 2 * I_noFcell * (c_deriv_Fcell); - fp_fdp_manager_dI[1] += 2 * I_noFcell * (d_deriv_Fcell); - } + // Checkpoint for Origin manager + for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { + if (refine_flag & (REFINE_PANEL_ORIGIN1 << i_pan_orig)) { + CUDAREAL per_k = _airpath_r; + CUDAREAL per_k3 = pow(per_k, 3.); + CUDAREAL per_k5 = pow(per_k, 5.); + + KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; + KOKKOS_VEC3 dk; + if (i_pan_orig == 0) + dk = KOKKOS_VEC3{0, 0, 1}; + else if (i_pan_orig == 1) + dk = KOKKOS_VEC3{1, 0, 0}; + else + dk = KOKKOS_VEC3{0, 1, 0}; + + CUDAREAL G = dk.dot(_pixel_pos); + CUDAREAL pix2 = subpixel_size * subpixel_size; + KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; + CUDAREAL coef = (M.dot(dk_hat)).dot(V); + CUDAREAL coef2 = + -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); + coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); + CUDAREAL value = + coef * Iincrement + coef2 * Iincrement / _omega_pixel; + + dI.pan_orig[i_pan_orig] += value; + dI2.pan_orig[i_pan_orig] += 0; + + } // end origin manager deriv + } - if (verbose > 3) - printf( - "hkl= %f %f %f hkl1= %d %d %d Fcell=%f\n", _h, _k, _l, - _h0, _k0, _l0, _F_cell); + for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { + if (refine_flag & (REFINE_PANEL_ROT1 << i_pan_rot)) { + CUDAREAL per_k = _airpath_r; + CUDAREAL per_k3 = pow(per_k, 3.); + CUDAREAL per_k5 = pow(per_k, 5.); + KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; + KOKKOS_VEC3 dk = _Fdet * (dF_vecs(_pid * 3 + i_pan_rot)) + + _Sdet * (dS_vecs(_pid * 3 + i_pan_rot)); + CUDAREAL G = dk.dot(_pixel_pos); + CUDAREAL pix2 = subpixel_size * subpixel_size; + KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; + CUDAREAL coef = (M.dot(dk_hat)).dot(V); + CUDAREAL coef2 = + -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); + coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); + CUDAREAL value = + coef * Iincrement + coef2 * Iincrement / _omega_pixel; + + dI.pan_rot[i_pan_rot] += value; + dI2.pan_rot[i_pan_rot] += 0; + } + } - KOKKOS_MAT3 UBOt; - if (refine_Umat(0) || refine_Umat(1) || refine_Umat(2) || - refine_eta) { - UBOt = Amat_init; + // checkpoint for Fcell manager + if (refine_flag & REFINE_FCELL) { + CUDAREAL value; + if (refine_flag & REFINE_ICELL) + value = I0 * texture_scale; + else + value = 2 * I0 * _F_cell * + texture_scale; // Iincrement/_F_cell ; + CUDAREAL value2 = 0; + if (compute_curvatures) { + // NOTE if _Fcell >0 + value2 = 2 * I0 * texture_scale; } - if (refine_Umat(0)) { - KOKKOS_MAT3 RyRzUBOt = RotMats(1) * RotMats(2) * UBOt; - KOKKOS_VEC3 delta_H_prime = - (UMATS(_mos_tic) * dRotMats(0) * RyRzUBOt) - .transpose() - .dot(q_vec); - CUDAREAL V_dot_dV = V.dot(_NABC.dot(delta_H_prime)); - CUDAREAL value = -two_C * V_dot_dV * Iincrement; - CUDAREAL value2 = 0; - if (compute_curvatures) { - KOKKOS_VEC3 delta_H_dbl_prime = - (UMATS(_mos_tic).dot(d2RotMats(0).dot(RyRzUBOt))) - .transpose() - .dot(q_vec); - CUDAREAL dV_dot_dV = (_NABC.dot(delta_H_prime)) - .dot(_NABC.dot(delta_H_prime)); - CUDAREAL dV2_dot_V = - (_NABC.dot(delta_H)).dot(_NABC.dot(delta_H_dbl_prime)); - value2 = - two_C * - (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * - Iincrement; + // if (fcell_idx >=0 && fcell_idx <=2){ + if (use_nominal_hkl) { + if (_h0 == nom_h && _k0 == nom_k && _l0 == nom_l) { + dI.fcell += value; + dI2.fcell += value2; } - rot_manager_dI[0] += value; - rot_manager_dI2[0] += value2; + } else { + dI.fcell += value; + dI2.fcell += value2; } - if (refine_Umat(1)) { - KOKKOS_MAT3 UmosRx = UMATS(_mos_tic).dot(RotMats(0)); - KOKKOS_MAT3 RzUBOt = RotMats(2).dot(UBOt); - KOKKOS_VEC3 delta_H_prime = (UmosRx.dot(dRotMats(1).dot(RzUBOt))) - .transpose() - .dot(q_vec); - CUDAREAL V_dot_dV = V.dot(_NABC.dot(delta_H_prime)); - CUDAREAL value = -two_C * V_dot_dV * Iincrement; - - CUDAREAL value2 = 0; + } // end of fcell man deriv + + // checkpoint for eta manager + if (refine_flag & REFINE_ETA) { + for (int i_eta = 0; i_eta < 3; i_eta++) { + if (i_eta > 0 && !aniso_eta) + continue; + int mtic2 = _mos_tic + i_eta * mosaic_domains; + KOKKOS_VEC3 DeltaH_deriv = (UMATS_RXYZ_prime(mtic2).dot(UBOt)) + .transpose() + .dot(q_vec); + // vector V is _Nabc*Delta_H + KOKKOS_VEC3 dV = _NABC.dot(DeltaH_deriv); + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL Iprime = -two_C * (V_dot_dV)*Iincrement; + dI.eta[i_eta] += Iprime; + CUDAREAL Idbl_prime = 0; if (compute_curvatures) { - KOKKOS_VEC3 delta_H_dbl_prime = - (UmosRx.dot(d2RotMats(1).dot(RzUBOt))) + KOKKOS_VEC3 DeltaH_second_deriv = + (UMATS_RXYZ_dbl_prime(mtic2).dot(UBOt)) .transpose() .dot(q_vec); - CUDAREAL dV_dot_dV = (_NABC.dot(delta_H_prime)) - .dot(_NABC.dot(delta_H_prime)); - CUDAREAL dV2_dot_V = - (_NABC.dot(delta_H)).dot(_NABC.dot(delta_H_dbl_prime)); - value2 = - two_C * - (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * - Iincrement; + KOKKOS_VEC3 dV2 = _NABC.dot(DeltaH_second_deriv); + Idbl_prime = + -two_C * (dV.dot(dV) + V.dot(dV2)) * Iincrement; + Idbl_prime += -two_C * (V_dot_dV)*Iprime; } - rot_manager_dI[1] += value; - rot_manager_dI2[1] += value2; + dI2.eta[i_eta] += Idbl_prime; } - if (refine_Umat(2)) { - KOKKOS_MAT3 UmosRxRy = UMATS(_mos_tic).dot(RotMats(0).dot(RotMats(1))); - KOKKOS_VEC3 delta_H_prime = (UmosRxRy.dot(dRotMats(2).dot(UBOt))) - .transpose() - .dot(q_vec); - CUDAREAL V_dot_dV = V.dot(_NABC.dot(delta_H_prime)); - CUDAREAL value = -two_C * V_dot_dV * Iincrement; - + } // end of eta man deriv + + // checkpoint for lambda manager + for (int i_lam = 0; i_lam < 2; i_lam++) { + if (refine_flag & (REFINE_LAMBDA << i_lam)) { + CUDAREAL NH_dot_V = (_NABC.dot(H_vec)).dot(V); + CUDAREAL dg_dlambda; + if (i_lam == 0) + dg_dlambda = 1; + else // i_lam==1 + dg_dlambda = lambda_ang; + CUDAREAL coef = + NH_dot_V * two_C * (dg_dlambda) / lambda_ang; + CUDAREAL value = coef * Iincrement; CUDAREAL value2 = 0; - if (compute_curvatures) { - KOKKOS_VEC3 delta_H_dbl_prime = - (UmosRxRy.dot(d2RotMats(2).dot(UBOt))) - .transpose() - .dot(q_vec); - CUDAREAL dV_dot_dV = (_NABC.dot(delta_H_prime)) - .dot(_NABC.dot(delta_H_prime)); - CUDAREAL dV2_dot_V = - (_NABC.dot(delta_H)).dot(_NABC.dot(delta_H_dbl_prime)); - value2 = - two_C * - (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * - Iincrement; - } - rot_manager_dI[2] += value; - rot_manager_dI2[2] += value2; + dI.lambda[i_lam] += value; + dI2.lambda[i_lam] += value2; } - // Checkpoint for unit cell derivatives - // KOKKOS_MAT3 Ot = eig_O.transpose(); - KOKKOS_MAT3 UmosRxRyRzU; - KOKKOS_VEC3 delta_H_prime; - for (int i_uc = 0; i_uc < 6; i_uc++) { - if (refine_Bmat(i_uc)) { - UmosRxRyRzU = UMATS_RXYZ(_mos_tic).dot(eig_U); - delta_H_prime = - (UmosRxRyRzU.dot(dB_mats(i_uc).dot(eig_Otranspose))) - .transpose() - .dot(q_vec); - CUDAREAL V_dot_dV = V.dot(_NABC.dot(delta_H_prime)); - CUDAREAL value = -two_C * V_dot_dV * Iincrement; - CUDAREAL value2 = 0; - if (compute_curvatures) { - KOKKOS_VEC3 delta_H_dbl_prime = - (UmosRxRyRzU.dot( - dB2_mats(i_uc).dot(eig_Otranspose))) - .transpose() - .dot(q_vec); - CUDAREAL dV_dot_dV = (_NABC.dot(delta_H_prime)) - .dot(_NABC.dot(delta_H_prime)); - CUDAREAL dV2_dot_V = - (_NABC.dot(delta_H)) - .dot(_NABC.dot(delta_H_dbl_prime)); - value2 = two_C * - (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - - dV_dot_dV) * - Iincrement; - } - ucell_manager_dI[i_uc] += value; - ucell_manager_dI2[i_uc] += value2; + } + // end of lambda deriv + if (printout) { + if (_subS == 0 && _subF == 0 && _thick_tic == 0 && + _source == 0 && _mos_tic == 0) { + if ((_fpixel == printout_fpixel && + _spixel == printout_spixel) || + printout_fpixel < 0) { + printf("%4d %4d : lambda = %g\n", _fpixel, _spixel, _lambda); + printf( + "at %g %g %g\n", _pixel_pos[0], _pixel_pos[1], + _pixel_pos[2]); + printf("Fdet= %g; Sdet= %g ; Odet= %g\n", _Fdet, _Sdet, _Odet); + printf( + "PIX0: %f %f %f\n", pix0_vectors(pid_x), + pix0_vectors(pid_y), pix0_vectors(pid_z)); + printf( + "F: %f %f %f\n", fdet_vectors(pid_x), + fdet_vectors(pid_y), fdet_vectors(pid_z)); + printf( + "S: %f %f %f\n", sdet_vectors(pid_x), + sdet_vectors(pid_y), sdet_vectors(pid_z)); + printf( + "O: %f %f %f\n", odet_vectors(pid_x), + odet_vectors(pid_y), odet_vectors(pid_z)); + printf("pid_x=%d, pid_y=%d; pid_z=%d\n", pid_x, pid_y, pid_z); + printf( + "QVECTOR: %f %f %f\n", q_vec[0], q_vec[1], q_vec[2]); + printf("omega %15.10g\n", _omega_pixel); + printf( + "Incident: %g %g %g\n", + _incident[0], _incident[1], _incident[2]); + + KOKKOS_MAT3 UU = UMATS_RXYZ(_mos_tic); + printf( + "UMAT_RXYZ :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + UU = Bmat_realspace; + printf( + "Bmat_realspace :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + UU = UBO; + printf( + "UBO :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + + UU = UBOt; + printf( + "UBOt :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + + // UU = UmosRxRyRzU; + // printf( + // "UmosRxRyRzU :\n%f %f %f\n%f %f %f\n%f %f %f\n", + // UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + // UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + // KOKKOS_VEC3 AA = delta_H_prime; + // printf( + // "delta_H_prime :\n%f %f %f\n", AA[0], AA[1], + // AA[2]); + printf("Iincrement: %f\n", Iincrement); + printf( + "hkl= %f %f %f hkl0= %d %d %d\n", _h, _k, _l, _h0, + _k0, _l0); + printf( + " F_cell=%g F_cell2=%g I_latt=%g I = %g\n", + _F_cell, _F_cell2, I0, _I); + printf("I/steps %15.10g\n", _I / Nsteps); + // printf("Ilatt diffuse %15.10g\n", I_latt_diffuse); + printf("default_F= %f\n", default_F); + if (complex_miller) + printf("COMPLEX MILLER!\n"); + if (no_Nabc_scale) + printf("No Nabc scale!\n"); } - } // end ucell deriv - - // Checkpoint for Ncells manager - if (refine_Ncells(0)) { - int num_ncell_deriv = 1; - if (!isotropic_ncells) - num_ncell_deriv = 3; - for (int i_nc = 0; i_nc < num_ncell_deriv; i_nc++) { - KOKKOS_MAT3 dN; - dN(i_nc, i_nc) = 1; - if (num_ncell_deriv == 1) { - dN(0, 0) = 1; - dN(1, 1) = 1; - dN(2, 2) = 1; - } - CUDAREAL N_i = _NABC(i_nc, i_nc); - KOKKOS_VEC3 dV_dN = dN.dot(delta_H); - // TODO speedops: precompute these, store shared var - // _NABC.inverse - CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); - CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); - CUDAREAL value = 2 * Iincrement * deriv_coef; - CUDAREAL value2 = 0; - if (compute_curvatures) { - dN(i_nc, i_nc) = 0; // TODO check maths - value2 = (-1 / N_i / N_i - C * (dV_dN.dot(dV_dN))) * 2 * - Iincrement; - value2 += deriv_coef * 2 * value; - } - Ncells_manager_dI[i_nc] += value; - Ncells_manager_dI2[i_nc] += value2; + } + } // end of printout if + + } // end of mos_tic loop + } // end of source loop + } // end of thick step loop + } // end of fpos loop + } // end of spos loop + floatimage(pixIdx) = _I; + if (save_wavelenimage) + wavelenimage(pixIdx) = Ilambda / _I; + + if (refine_flag) { + manager_dI(pixIdx) = dI; + manager_dI2(pixIdx) = dI2; + } + }); // end pixIdx loop + + if (Fhkl_gradient_mode) + return; + + Kokkos::parallel_for( + "deriv_image_increment", Npix_to_model, KOKKOS_LAMBDA(const int& pixIdx) { + + int _pid = panels_fasts_slows(pixIdx * 3); + int _fpixel = panels_fasts_slows(pixIdx * 3 + 1); + int _spixel = panels_fasts_slows(pixIdx * 3 + 2); + + CUDAREAL _Fdet_ave = pixel_size * _fpixel + pixel_size / 2.0; + CUDAREAL _Sdet_ave = pixel_size * _spixel + pixel_size / 2.0; + CUDAREAL _Odet_ave = 0; // Odet; + // TODO maybe make this more general for thick detectors? + + KOKKOS_VEC3 _pixel_pos_ave(0, 0, 0); + int pid_x = _pid * 3; + int pid_y = _pid * 3 + 1; + int pid_z = _pid * 3 + 2; + + CUDAREAL fx = fdet_vectors(pid_x); + CUDAREAL fy = fdet_vectors(pid_y); + CUDAREAL fz = fdet_vectors(pid_z); + + CUDAREAL sx = sdet_vectors(pid_x); + CUDAREAL sy = sdet_vectors(pid_y); + CUDAREAL sz = sdet_vectors(pid_z); + + CUDAREAL ox = odet_vectors(pid_x); + CUDAREAL oy = odet_vectors(pid_y); + CUDAREAL oz = odet_vectors(pid_z); + + CUDAREAL px = pix0_vectors(pid_x); + CUDAREAL py = pix0_vectors(pid_y); + CUDAREAL pz = pix0_vectors(pid_z); + + _pixel_pos_ave[0] = _Fdet_ave * fx + _Sdet_ave * sx + _Odet_ave * ox + px; + _pixel_pos_ave[1] = _Fdet_ave * fy + _Sdet_ave * sy + _Odet_ave * oy + py; + _pixel_pos_ave[2] = _Fdet_ave * fz + _Sdet_ave * sz + _Odet_ave * oz + pz; + + CUDAREAL close_distance = close_distances(_pid); + + CUDAREAL _airpath_ave_r = 1 / _pixel_pos_ave.length(); + KOKKOS_VEC3 _diffracted_ave = _pixel_pos_ave.get_unit_vector(); + CUDAREAL _omega_pixel_ave = pixel_size * pixel_size * _airpath_ave_r * _airpath_ave_r * + close_distance * _airpath_ave_r; + + CUDAREAL _polar = 1; + if (!nopolar) { + KOKKOS_VEC3 _incident(-source_X(0), -source_Y(0), -source_Z(0)); + _incident.normalize(); + // component of diffracted unit vector along _incident beam unit vector + CUDAREAL cos2theta = _incident.dot(_diffracted_ave); + CUDAREAL cos2theta_sqr = cos2theta * cos2theta; + CUDAREAL sin2theta_sqr = 1 - cos2theta_sqr; + + CUDAREAL cos2psi = 0; + if (kahn_factor != 0.0) { + // cross product to get "vertical" axis that is orthogonal to the cannonical + // "polarization" + KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); + // cross product with _incident beam to get E-vector direction + KOKKOS_VEC3 E_in = _incident.cross(B_in); + // get components of diffracted ray projected onto the E-B plane + CUDAREAL _kEi = _diffracted_ave.dot(E_in); + CUDAREAL _kBi = _diffracted_ave.dot(B_in); + // compute the angle of the diffracted ray projected onto the incident E-B plane + // calculate cos(2 * atan2(_kBi, _kEi)) + if (_kEi!=0) { + CUDAREAL ratio = _kBi / _kEi; + cos2psi = (1 - ratio*ratio) / (1 + ratio*ratio); + } else { + cos2psi = -1; + } + } + // correction for polarized _incident beam + _polar = 0.5 * (1.0 + cos2theta_sqr - kahn_factor * cos2psi * sin2theta_sqr); + } + + CUDAREAL _om = 1; + if (!oversample_omega) + _om = _omega_pixel_ave; + // final scale term to being everything to photon number units + CUDAREAL _scale_term = _polar * _om * overall_scale; + floatimage(pixIdx) *= _scale_term; + + auto& dI = manager_dI(pixIdx); + auto& dI2 = manager_dI2(pixIdx); + + // udpate the rotation derivative images* + for (int i_rot = 0; i_rot < 3; i_rot++) { + if (refine_flag & (REFINE_UMAT1 << i_rot)) { + CUDAREAL value = _scale_term * dI.rot[i_rot]; + CUDAREAL value2 = _scale_term * dI2.rot[i_rot]; + int idx = i_rot * Npix_to_model + pixIdx; + d_Umat_images(idx) = value; + d2_Umat_images(idx) = value2; + } + } // end rot deriv image increment + + // update the ucell derivative images + for (int i_uc = 0; i_uc < 6; i_uc++) { + if (refine_flag & (REFINE_BMAT1 << i_uc)) { + CUDAREAL value = _scale_term * dI.ucell[i_uc]; + CUDAREAL value2 = _scale_term * dI2.ucell[i_uc]; + int idx = i_uc * Npix_to_model + pixIdx; + d_Bmat_images(idx) = value; + d2_Bmat_images(idx) = value2; + } + } // end ucell deriv image increment + + // update the Ncells derivative image + if (refine_flag & REFINE_NCELLS1) { + CUDAREAL value = _scale_term * dI.Ncells[0]; + CUDAREAL value2 = _scale_term * dI2.Ncells[0]; + int idx = pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + + if (!isotropic_ncells) { + value = _scale_term * dI.Ncells[1]; + value2 = _scale_term * dI2.Ncells[1]; + idx = Npix_to_model + pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + + value = _scale_term * dI.Ncells[2]; + value2 = _scale_term * dI2.Ncells[2]; + idx = Npix_to_model * 2 + pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + } + } // end Ncells deriv image increment + if (refine_flag & REFINE_NCELLS_DEF) { + for (int i_nc = 3; i_nc < 6; i_nc++) { + CUDAREAL value = _scale_term * dI.Ncells[i_nc]; + CUDAREAL value2 = _scale_term * dI2.Ncells[i_nc]; + int idx = i_nc * Npix_to_model + pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + } + } + + // update Fcell derivative image + if (refine_flag & REFINE_FCELL) { + CUDAREAL value = _scale_term * dI.fcell; + CUDAREAL value2 = _scale_term * dI2.fcell; + d_fcell_images(pixIdx) = value; + d2_fcell_images(pixIdx) = value2; + } // end Fcell deriv image increment + + if (refine_flag & REFINE_FP_FDP) { + // c derivative + CUDAREAL value = _scale_term * dI.fp_fdp[0]; + d_fp_fdp_images(pixIdx) = value; + // d derivative + value = _scale_term * dI.fp_fdp[1]; + d_fp_fdp_images(Npix_to_model + pixIdx) = value; + } + if (refine_flag & REFINE_DIFFUSE) { + for (int i_gam = 0; i_gam < 3; i_gam++) { + CUDAREAL val = dI.diffuse[i_gam] * _scale_term; + int img_idx = Npix_to_model * i_gam + pixIdx; + d_diffuse_gamma_images(img_idx) = val; + } + for (int i_sig = 0; i_sig < 3; i_sig++) { + CUDAREAL val = dI.diffuse[i_sig + 3] * _scale_term; + int img_idx = Npix_to_model * i_sig + pixIdx; + d_diffuse_sigma_images(img_idx) = val; + } + } + + // update eta derivative image + if (refine_flag & REFINE_ETA) { + for (int i_eta = 0; i_eta < 3; i_eta++) { + if (i_eta > 0 && !aniso_eta) + continue; + int idx = pixIdx + Npix_to_model * i_eta; + CUDAREAL value = _scale_term * dI.eta[i_eta]; + CUDAREAL value2 = _scale_term * dI2.eta[i_eta]; + d_eta_images(idx) = value; + d2_eta_images(idx) = value2; + } + } // end eta deriv image increment + + // update the lambda derivative images + for (int i_lam = 0; i_lam < 2; i_lam++) { + if (refine_flag & (REFINE_LAMBDA1 << i_lam)) { + CUDAREAL value = _scale_term * dI.lambda[i_lam]; + CUDAREAL value2 = _scale_term * dI2.lambda[i_lam]; + int idx = i_lam * Npix_to_model + pixIdx; + d_lambda_images(idx) = value; + // d2_lambda_images(idx) = value2; + } + } // end lambda deriv image increment + + for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { + if (refine_flag & (REFINE_PANEL_ROT1 << i_pan_rot)) { + CUDAREAL value = _scale_term * dI.pan_rot[i_pan_rot]; + CUDAREAL value2 = _scale_term * dI2.pan_rot[i_pan_rot]; + int idx = i_pan_rot * Npix_to_model + pixIdx; + d_panel_rot_images(idx) = value; + // d2_panel_rot_images(idx) = value2; + } + } // end panel rot deriv image increment + + for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { + if (refine_flag & (REFINE_PANEL_ORIGIN1 << i_pan_orig)) { + CUDAREAL value = _scale_term * dI.pan_orig[i_pan_orig]; + CUDAREAL value2 = _scale_term * dI2.pan_orig[i_pan_orig]; + int idx = i_pan_orig * Npix_to_model + pixIdx; + d_panel_orig_images(idx) = value; + // d2_panel_orig_images(idx) = value2; + } + } // end panel orig deriv image increment + }); // end pixIdx loop + +} // END of GPU kernel + +////////////////////////////////////////////////////////////////////////////////// + +template < + bool printout, + bool complex_miller, + bool compute_curvatures, + uint32_t refine_flag, + bool use_diffuse, + bool save_wavelenimage, + bool Fhkl_gradient_mode, + bool Fhkl_errors_mode, + bool using_trusted_mask, + bool Fhkl_channels_empty, + bool Fhkl_have_scale_factors> +void kokkos_sum_over_steps( + int Npix_to_model, + vector_uint_t panels_fasts_slows, + vector_cudareal_t floatimage, + vector_cudareal_t wavelenimage, + vector_cudareal_t d_Umat_images, + vector_cudareal_t d2_Umat_images, + vector_cudareal_t d_Bmat_images, + vector_cudareal_t d2_Bmat_images, + vector_cudareal_t d_Ncells_images, + vector_cudareal_t d2_Ncells_images, + vector_cudareal_t d_fcell_images, + vector_cudareal_t d2_fcell_images, + vector_cudareal_t d_eta_images, + vector_cudareal_t d2_eta_images, + vector_cudareal_t d_lambda_images, + vector_cudareal_t d2_lambda_images, + vector_cudareal_t d_panel_rot_images, + vector_cudareal_t d2_panel_rot_images, + vector_cudareal_t d_panel_orig_images, + vector_cudareal_t d2_panel_orig_images, + vector_cudareal_t d_fp_fdp_images, + vector_manager_t manager_dI, + vector_manager_t manager_dI2, + const int Nsteps, + int printout_fpixel, + int printout_spixel, + /*bool printout,*/ + CUDAREAL default_F, + int oversample, + bool oversample_omega, + CUDAREAL subpixel_size, + CUDAREAL pixel_size, + CUDAREAL detector_thickstep, + CUDAREAL detector_thick, + const vector_cudareal_t close_distances, + CUDAREAL detector_attnlen, + int detector_thicksteps, + int sources, + int phisteps, + int mosaic_domains, + bool use_lambda_coefficients, + CUDAREAL lambda0, + CUDAREAL lambda1, + KOKKOS_MAT3 eig_U, + KOKKOS_MAT3 eig_O, + KOKKOS_MAT3 eig_B, + KOKKOS_MAT3 RXYZ, + vector_vec3_t dF_vecs, + vector_vec3_t dS_vecs, + const vector_mat3_t UMATS_RXYZ, + vector_mat3_t UMATS_RXYZ_prime, + vector_mat3_t UMATS_RXYZ_dbl_prime, + vector_mat3_t RotMats, + vector_mat3_t dRotMats, + vector_mat3_t d2RotMats, + vector_mat3_t UMATS, + vector_mat3_t dB_mats, + vector_mat3_t dB2_mats, + vector_mat3_t Amatrices, + const vector_cudareal_t source_X, + const vector_cudareal_t source_Y, + const vector_cudareal_t source_Z, + const vector_cudareal_t source_lambda, + const vector_cudareal_t source_I, + CUDAREAL kahn_factor, + CUDAREAL Na, + CUDAREAL Nb, + CUDAREAL Nc, + CUDAREAL Nd, + CUDAREAL Ne, + CUDAREAL Nf, + CUDAREAL phi0, + CUDAREAL phistep, + KOKKOS_VEC3 spindle_vec, + KOKKOS_VEC3 polarization_axis, + int h_range, + int k_range, + int l_range, + int h_max, + int h_min, + int k_max, + int k_min, + int l_max, + int l_min, + CUDAREAL dmin, + CUDAREAL fudge, + /*bool complex_miller,*/ + int verbose, + bool only_save_omega_kahn, + bool isotropic_ncells, + /*bool compute_curvatures,*/ + const vector_cudareal_t FhklLinear, + const vector_cudareal_t Fhkl2Linear, + /*const uint32_t refine_flag,*/ + // vector_bool_t refine_Bmat, + // vector_bool_t refine_Ncells, + // bool refine_Ncells_def, + // vector_bool_t refine_panel_origin, + // vector_bool_t refine_panel_rot, + // bool refine_fcell, + // vector_bool_t refine_lambda, + // bool refine_eta, + // vector_bool_t refine_Umat, + const vector_cudareal_t fdet_vectors, + const vector_cudareal_t sdet_vectors, + const vector_cudareal_t odet_vectors, + const vector_cudareal_t pix0_vectors, + bool nopolar, + bool point_pixel, + CUDAREAL fluence, + CUDAREAL r_e_sqr, + CUDAREAL spot_scale, + int Npanels, + bool aniso_eta, + bool no_Nabc_scale, + const vector_cudareal_t fpfdp, + const vector_cudareal_t fpfdp_derivs, + const vector_cudareal_t atom_data, + int num_atoms, + // bool refine_fp_fdp, + const vector_int_t nominal_hkl, + bool use_nominal_hkl, + KOKKOS_MAT3 anisoU, + KOKKOS_MAT3 anisoG, + KOKKOS_MAT3 rotate_principal_axes, + /*bool use_diffuse,*/ + vector_cudareal_t d_diffuse_gamma_images, + vector_cudareal_t d_diffuse_sigma_images, + // bool refine_diffuse, + bool gamma_miller_units, + // bool refine_Icell, + /*bool save_wavelenimage,*/ + int laue_group_num, + int stencil_size, + /*bool Fhkl_gradient_mode,*/ + /*bool Fhkl_errors_mode,*/ + /*bool using_trusted_mask,*/ + /*bool Fhkl_channels_empty,*/ + /*bool Fhkl_have_scale_factors,*/ + int Num_ASU, + const vector_cudareal_t data_residual, + const vector_cudareal_t data_variance, + const vector_int_t data_freq, + const vector_bool_t data_trusted, + const vector_int_t FhklLinear_ASUid, + const vector_int_t Fhkl_channels, + const vector_cudareal_t Fhkl_scale, + vector_cudareal_t Fhkl_scale_deriv) { // BEGIN GPU kernel + + const KOKKOS_MAT3 Bmat_realspace = eig_B * 1e10; + const KOKKOS_MAT3 eig_Otranspose = eig_O.transpose(); + const KOKKOS_MAT3 Amat_init = eig_U * Bmat_realspace * eig_Otranspose; + const KOKKOS_MAT3 Ainv = eig_U*(Bmat_realspace.transpose().inverse())* (eig_O.inverse()); + const KOKKOS_MAT3 _NABC {Na, Nd, Nf, Nd, Nb, Ne, Nf, Ne, Nc}; + const double NABC_det = _NABC.determinant(); // TODO is this slow ? + const double NABC_det_sq = NABC_det * NABC_det; + const CUDAREAL C = 2 / 0.63 * fudge; + const CUDAREAL two_C = 2 * C; + KOKKOS_MAT3 anisoG_local; + CUDAREAL anisoG_determ = 0; + KOKKOS_MAT3 anisoU_local; + vector_mat3_t laue_mats = vector_mat3_t("laue_mats", 24); + vector_vec3_t dG_dgam = vector_vec3_t("dG_dgam", 3); + vector_cudareal_t dG_trace = vector_cudareal_t("dG_trace", 3); + int num_laue_mats = 0; + int dhh = 0, dkk = 0, dll = 0; + + Kokkos::View UMATS_prime("UMATS_prime", mosaic_domains); + Kokkos::View UMATS_dbl_prime("UMATS_dbl_prime", mosaic_domains); + Kokkos::View BMATS_prime("BMATS_prime", mosaic_domains); + Kokkos::View BMATS_dbl_prime("BMATS_dbl_prime", mosaic_domains); + + Kokkos::parallel_for("prepare_UMATS", mosaic_domains, KOKKOS_LAMBDA(const int& _mos_tic) { + const KOKKOS_MAT3 UBOt = Amat_init; + UMATS_prime(_mos_tic, 0) = _NABC * (UMATS(_mos_tic) * dRotMats(0) * RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_prime(_mos_tic, 1) = _NABC * (UMATS(_mos_tic) * RotMats(0) * dRotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_prime(_mos_tic, 2) = _NABC * (UMATS(_mos_tic) * RotMats(0) * RotMats(1) * dRotMats(2) * UBOt).transpose(); + + UMATS_dbl_prime(_mos_tic, 0) = _NABC * (UMATS(_mos_tic) * d2RotMats(0) * RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_dbl_prime(_mos_tic, 1) = _NABC * (UMATS(_mos_tic) * RotMats(0) * d2RotMats(1) * RotMats(2) * UBOt).transpose(); + UMATS_dbl_prime(_mos_tic, 2) = _NABC * (UMATS(_mos_tic) * RotMats(0) * RotMats(1) * d2RotMats(2) * UBOt).transpose(); + + for (int i_uc=0; i_uc<6; i_uc++) { + BMATS_prime(_mos_tic, i_uc) = _NABC * (UMATS_RXYZ(_mos_tic) * eig_U * dB_mats(i_uc) * eig_O.transpose()).transpose(); + BMATS_dbl_prime(_mos_tic, i_uc) = _NABC * (UMATS_RXYZ(_mos_tic) * eig_U * dB2_mats(i_uc) * eig_O.transpose()).transpose(); + } + }); + + if (use_diffuse){ + anisoG_local = anisoG; + anisoU_local = anisoU; + + if (laue_group_num < 1 || laue_group_num >14 ){ + throw std::string("Laue group number not in range 1-14"); + } + + if (gamma_miller_units){ + anisoG_local = anisoG_local * Bmat_realspace; + } + Kokkos::parallel_reduce("prepare diffuse mats", 1, KOKKOS_LAMBDA (const int& i, int& num_laue_mats_temp){ + num_laue_mats_temp = gen_laue_mats(laue_group_num, laue_mats, rotate_principal_axes); + // KOKKOS_MAT3 rotate_principal_axes; + // rotate_principal_axes << 0.70710678, -0.70710678, 0., 0.70710678, 0.70710678, 0., 0., 0., 1.; + + for ( int iL = 0; iL < num_laue_mats_temp; iL++ ){ + laue_mats(iL) = Ainv * laue_mats(iL) * rotate_principal_axes; + } + // printf("Bmat ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", Bmat_realspace[i]); + // } + // printf("\n"); + const KOKKOS_MAT3 Ginv = anisoG_local.inverse(); + // printf("Ginv ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", Ginv[i]); + // } + // printf("\n"); + const KOKKOS_MAT3 dG = Bmat_realspace * Ginv; + // printf("dG ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", dG[i]); + // } + // printf("\n"); + for (int i_gam=0; i_gam<3; i_gam++){ + if (gamma_miller_units) { + dG_dgam(i_gam) = KOKKOS_VEC3(Bmat_realspace(i_gam, 0), Bmat_realspace(i_gam, 1), Bmat_realspace(i_gam, 2)); + } else { + dG_dgam(i_gam)[i_gam] = 1; + } + KOKKOS_MAT3 temp_dgam; + temp_dgam(i_gam, 0) = dG_dgam(i_gam)[0]; + temp_dgam(i_gam, 1) = dG_dgam(i_gam)[1]; + temp_dgam(i_gam, 2) = dG_dgam(i_gam)[2]; + dG_trace(i_gam) = (Ginv*temp_dgam).trace(); + // printf("TRACE %g\n", dG_trace(i_gam)); + // printf("dgam ="); + // for (int i=0; i<9; ++i) { + // printf(" %g", temp_dgam[i]); + // } + // printf("\n"); + + // dG(i_gam, i_gam); + } + }, num_laue_mats); + anisoG_determ = anisoG_local.determinant(); + dhh = dkk = dll = stencil_size; // Limits of stencil for diffuse calc + } + const KOKKOS_VEC3 dHH (dhh, dkk, dll); + + const CUDAREAL overall_scale = r_e_sqr * spot_scale * fluence / Nsteps; + + const CUDAREAL detector_attnlen_r = (detector_attnlen>0) ? 1 / detector_attnlen : 0; + + Kokkos::parallel_for( + "sum_over_steps", Npix_to_model, KOKKOS_LAMBDA(const int& pixIdx) { + + if (using_trusted_mask) { + if (!data_trusted(pixIdx)) + return; + } + const int _pid = panels_fasts_slows(pixIdx * 3); + const int _fpixel = panels_fasts_slows(pixIdx * 3 + 1); + const int _spixel = panels_fasts_slows(pixIdx * 3 + 2); + + CUDAREAL Fhkl_deriv_coef=0; + CUDAREAL Fhkl_hessian_coef=0; + if (Fhkl_gradient_mode) { + CUDAREAL u = data_residual(pixIdx); + CUDAREAL one_by_v = 1/data_variance(pixIdx); + CUDAREAL Gterm = 1 - 2*u - u*u*one_by_v; + Fhkl_deriv_coef = 0.5 * Gterm*one_by_v / data_freq(pixIdx); + if (Fhkl_errors_mode) { + Fhkl_hessian_coef = -0.5*one_by_v*(one_by_v*Gterm - 2 - 2*u*one_by_v -u*u*one_by_v*one_by_v)/data_freq(pixIdx); + } + } + + // int fcell_idx=1; + int nom_h = 0, nom_k = 0, nom_l = 0; + if (use_nominal_hkl) { + nom_h = nominal_hkl(pixIdx * 3); + nom_k = nominal_hkl(pixIdx * 3 + 1); + nom_l = nominal_hkl(pixIdx * 3 + 2); + } + CUDAREAL close_distance = close_distances(_pid); + + // reset photon count for this pixel + double _I = 0; + double Ilambda = 0; + + kokkos_manager dI, dI2; + dI.reset(); + dI2.reset(); + + for (int _subS = 0; _subS < oversample; ++_subS) { + for (int _subF = 0; _subF < oversample; ++_subF) { + // absolute mm position on detector (relative to its origin) + CUDAREAL _Fdet = + subpixel_size * (_fpixel * oversample + _subF) + subpixel_size / 2.0; + CUDAREAL _Sdet = + subpixel_size * (_spixel * oversample + _subS) + subpixel_size / 2.0; + + // assume "distance" is to the front of the detector sensor layer + int pid_x = _pid * 3; + int pid_y = _pid * 3 + 1; + int pid_z = _pid * 3 + 2; + + + CUDAREAL fx = fdet_vectors(pid_x); + CUDAREAL fy = fdet_vectors(pid_y); + CUDAREAL fz = fdet_vectors(pid_z); + CUDAREAL sx = sdet_vectors(pid_x); + CUDAREAL sy = sdet_vectors(pid_y); + CUDAREAL sz = sdet_vectors(pid_z); + CUDAREAL ox = odet_vectors(pid_x); + CUDAREAL oy = odet_vectors(pid_y); + CUDAREAL oz = odet_vectors(pid_z); + CUDAREAL px = pix0_vectors(pid_x); + CUDAREAL py = pix0_vectors(pid_y); + CUDAREAL pz = pix0_vectors(pid_z); + KOKKOS_VEC3 _o_vec(ox, oy, oz); + + for (int _thick_tic = 0; _thick_tic < detector_thicksteps; ++_thick_tic) { + + CUDAREAL _Odet = _thick_tic * detector_thickstep; + + CUDAREAL pixposX = _Fdet * fx + _Sdet * sx + _Odet * ox + px; + CUDAREAL pixposY = _Fdet * fy + _Sdet * sy + _Odet * oy + py; + CUDAREAL pixposZ = _Fdet * fz + _Sdet * sz + _Odet * oz + pz; + KOKKOS_VEC3 _pixel_pos(pixposX, pixposY, pixposZ); + + CUDAREAL _airpath_r = 1 / _pixel_pos.length(); + KOKKOS_VEC3 _diffracted = _pixel_pos.get_unit_vector(); + + const CUDAREAL close_distance = close_distances(_pid); + + // solid angle subtended by a pixel: (pix/airpath)^2*cos(2theta) + CUDAREAL _omega_pixel = pixel_size * pixel_size * _airpath_r * _airpath_r * + close_distance * _airpath_r; + + // option to turn off obliquity effect, inverse-square-law only + if (point_pixel) + _omega_pixel = _airpath_r * _airpath_r; + + // now calculate detector thickness effects + CUDAREAL _capture_fraction = 1; + + CUDAREAL previous_layer = 1.0; + if (detector_thick > 0.0 && detector_attnlen_r > 0.0) { + // inverse of effective thickness increase + KOKKOS_VEC3 _o_vec(ox, oy, oz); + CUDAREAL _parallax = _diffracted.dot(_o_vec); + CUDAREAL current_layer = ::Kokkos::exp( + -(_thick_tic + 1) * detector_thickstep * + detector_attnlen_r / _parallax); + _capture_fraction = previous_layer - current_layer; + previous_layer = current_layer; + } + + for (int _source = 0; _source < sources; ++_source) { + + KOKKOS_VEC3 _incident( + -source_X(_source), -source_Y(_source), -source_Z(_source)); + CUDAREAL _lambda = source_lambda(_source); + CUDAREAL sI = source_I(_source); + CUDAREAL lambda_ang = _lambda * 1e10; + if (use_lambda_coefficients) { + lambda_ang = lambda0 + lambda1 * lambda_ang; + _lambda = lambda_ang * 1e-10; + } + + // polarization + CUDAREAL polar_for_Fhkl_grad=1; + if (!nopolar && Fhkl_gradient_mode){ + + // component of diffracted unit vector along incident beam unit vector + CUDAREAL cos2theta = _incident.dot(_diffracted); + CUDAREAL cos2theta_sqr = cos2theta*cos2theta; + CUDAREAL sin2theta_sqr = 1-cos2theta_sqr; + + CUDAREAL cos2psi=1; + if(kahn_factor != 0.0){ + // cross product to get "vertical" axis that is orthogonal to the cannonical "polarization" + KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); + // cross product with incident beam to get E-vector direction + KOKKOS_VEC3 E_in = _incident.cross(B_in); + // get components of diffracted ray projected onto the E-B plane + CUDAREAL _kEi = _diffracted.dot(E_in); + CUDAREAL _kBi = _diffracted.dot(B_in); + // compute the angle of the diffracted ray projected onto the incident E-B plane + // calculate cos(2 * atan2(_kBi, _kEi)) + if (_kEi!=0) { + CUDAREAL ratio = _kBi / _kEi; + cos2psi = (1 - ratio*ratio) / (1 + ratio*ratio); + } else { + cos2psi = -1; + } + } + // correction for polarized incident beam + polar_for_Fhkl_grad = 0.5*(1.0 + cos2theta_sqr - kahn_factor*cos2psi*sin2theta_sqr); + } + KOKKOS_VEC3 _scattering = (_diffracted - _incident) / _lambda; + + KOKKOS_VEC3 q_vec = _scattering * 1e-10; + + // TODO rename + CUDAREAL texture_scale = _capture_fraction * _omega_pixel * sI; + + for (int _mos_tic = 0; _mos_tic < mosaic_domains; ++_mos_tic) { + const KOKKOS_MAT3 UBO = Amatrices(_mos_tic); + + KOKKOS_VEC3 H_vec = UBO * q_vec; + CUDAREAL _h = H_vec[0]; + CUDAREAL _k = H_vec[1]; + CUDAREAL _l = H_vec[2]; + + int _h0 = ceil(_h - 0.5); + int _k0 = ceil(_k - 0.5); + int _l0 = ceil(_l - 0.5); + + KOKKOS_VEC3 H0(_h0, _k0, _l0); + + KOKKOS_VEC3 delta_H = H_vec - H0; + KOKKOS_VEC3 V = _NABC * delta_H; + CUDAREAL _hrad_sqr = V.length_sqr(); + CUDAREAL exparg = _hrad_sqr * C / 2; + CUDAREAL I0 = 0; + + if (exparg < 35) + if (no_Nabc_scale) + I0 = ::Kokkos::exp(-2 * exparg); + else + I0 = (NABC_det_sq) * + ::Kokkos::exp(-2 * exparg); + + // are we doing diffuse scattering + CUDAREAL step_diffuse_param[6] = {0, 0, 0, 0, 0, 0}; + if (use_diffuse) { + calc_diffuse_at_hkl(H_vec,H0,dHH,h_min,k_min,l_min,h_max,k_max,l_max,h_range,k_range,l_range,Ainv,FhklLinear,num_laue_mats,laue_mats,anisoG_local,dG_trace,anisoG_determ,anisoU_local,dG_dgam,(refine_flag & REFINE_DIFFUSE)>0,&I0,step_diffuse_param); + } // end s_use_diffuse outer + + CUDAREAL _F_cell = default_F; + CUDAREAL _F_cell2 = 0; + int i_hklasu=0; + + if ((_h0 <= h_max) && (_h0 >= h_min) && + (_k0 <= k_max) && (_k0 >= k_min) && + (_l0 <= l_max) && (_l0 >= l_min)) { + int Fhkl_linear_index = (_h0 - h_min) * k_range * l_range + + (_k0 - k_min) * l_range + (_l0 - l_min); + //_F_cell = __ldg(&FhklLinear[Fhkl_linear_index]); + _F_cell = FhklLinear(Fhkl_linear_index); + // if (complex_miller) _F_cell2 = + // __ldg(&Fhkl2Linear[Fhkl_linear_index]); + if (complex_miller) + _F_cell2 = Fhkl2Linear(Fhkl_linear_index); + if (Fhkl_have_scale_factors) + i_hklasu = FhklLinear_ASUid(Fhkl_linear_index); + } + + CUDAREAL c_deriv_Fcell = 0; + CUDAREAL d_deriv_Fcell = 0; + if (complex_miller) { + CUDAREAL c_deriv_Fcell_real = 0; + CUDAREAL c_deriv_Fcell_imag = 0; + CUDAREAL d_deriv_Fcell_real = 0; + CUDAREAL d_deriv_Fcell_imag = 0; + if (num_atoms > 0) { + CUDAREAL S_2 = (q_vec[0] * q_vec[0] + + q_vec[1] * q_vec[1] + + q_vec[2] * q_vec[2]); + + // fp is always followed by the fdp value + CUDAREAL val_fp = fpfdp(2 * _source); + CUDAREAL val_fdp = fpfdp(2 * _source + 1); + + CUDAREAL c_deriv_prime = 0; + CUDAREAL c_deriv_dblprime = 0; + CUDAREAL d_deriv_prime = 0; + CUDAREAL d_deriv_dblprime = 0; + if (refine_flag & REFINE_FP_FDP) { + // currently only supports two parameter model + int d_idx = 2 * _source; + c_deriv_prime = fpfdp_derivs(d_idx); + c_deriv_dblprime = fpfdp_derivs(d_idx + 1); + d_deriv_prime = fpfdp_derivs(d_idx + 2 * sources); + d_deriv_dblprime = + fpfdp_derivs(d_idx + 1 + 2 * sources); } - } // end Ncells manager deriv - - if (refine_Ncells_def) { - for (int i_nc = 3; i_nc < 6; i_nc++) { - KOKKOS_MAT3 dN; - if (i_nc == 3) - dN = KOKKOS_MAT3{0, 1, 0, 1, 0, 0, 0, 0, 0}; - else if (i_nc == 4) - dN = KOKKOS_MAT3{0, 0, 0, 0, 0, 1, 0, 1, 0}; - else - dN = KOKKOS_MAT3{0, 0, 1, 0, 0, 0, 1, 0, 0}; - KOKKOS_VEC3 dV_dN = dN.dot(delta_H); - // TODO speedops: precompute these - CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); - CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); - CUDAREAL value = 2 * Iincrement * deriv_coef; - Ncells_manager_dI[i_nc] += value; - CUDAREAL value2 = 0; - if (compute_curvatures) { - value2 = deriv_coef * value; - value2 += -2 * C * Iincrement * (dV_dN.dot(dV_dN)); - Ncells_manager_dI2[i_nc] += value2; + + for (int i_atom = 0; i_atom < num_atoms; i_atom++) { + // fractional atomic coordinates + CUDAREAL atom_x = atom_data(i_atom * 5); + CUDAREAL atom_y = atom_data(i_atom * 5 + 1); + CUDAREAL atom_z = atom_data(i_atom * 5 + 2); + CUDAREAL B = atom_data(i_atom * 5 + 3); // B factor + B = ::Kokkos::exp( + -B * S_2 / 4.0); // TODO: speed me up? + CUDAREAL occ = atom_data(i_atom * 5 + 4); // occupancy + CUDAREAL r_dot_h = + _h0 * atom_x + _k0 * atom_y + _l0 * atom_z; + CUDAREAL phase = 2 * M_PI * r_dot_h; + CUDAREAL s_rdoth = ::Kokkos::sin(phase); + CUDAREAL c_rdoth = ::Kokkos::cos(phase); + CUDAREAL Bocc = B * occ; + CUDAREAL BC = B * c_rdoth; + CUDAREAL BS = B * s_rdoth; + CUDAREAL real_part = BC * val_fp - BS * val_fdp; + CUDAREAL imag_part = BS * val_fp + BC * val_fdp; + _F_cell += real_part; + _F_cell2 += imag_part; + if (refine_flag & REFINE_FP_FDP) { + c_deriv_Fcell_real += + BC * c_deriv_prime - BS * c_deriv_dblprime; + c_deriv_Fcell_imag += + BS * c_deriv_prime + BC * c_deriv_dblprime; + + d_deriv_Fcell_real += + BC * d_deriv_prime - BS * d_deriv_dblprime; + d_deriv_Fcell_imag += + BS * d_deriv_prime + BC * d_deriv_dblprime; } } } + CUDAREAL Freal = _F_cell; + CUDAREAL Fimag = _F_cell2; + _F_cell = + ::Kokkos::sqrt(Freal * Freal + Fimag * Fimag); + if (refine_flag & REFINE_FP_FDP) { + c_deriv_Fcell = + Freal * c_deriv_Fcell_real + Fimag * c_deriv_Fcell_imag; + d_deriv_Fcell = + Freal * d_deriv_Fcell_real + Fimag * d_deriv_Fcell_imag; + } + } + if (!oversample_omega && ! Fhkl_gradient_mode) + _omega_pixel = 1; + + CUDAREAL _I_cell = _F_cell; + if (!(refine_flag & REFINE_ICELL)) + _I_cell *= _F_cell; + CUDAREAL hkl=1; + int Fhkl_channel=0; + if (! Fhkl_channels_empty) + Fhkl_channel = Fhkl_channels(_source); + if (Fhkl_have_scale_factors) + hkl = Fhkl_scale(i_hklasu + Fhkl_channel*Num_ASU); + if (Fhkl_gradient_mode){ + CUDAREAL Fhkl_deriv_scale = overall_scale*polar_for_Fhkl_grad; + CUDAREAL I_noFcell=texture_scale*I0; + CUDAREAL dfhkl = I_noFcell*_I_cell * Fhkl_deriv_scale; + CUDAREAL grad_incr = dfhkl*Fhkl_deriv_coef; + int fhkl_grad_idx=i_hklasu + Fhkl_channel*Num_ASU; + + if (Fhkl_errors_mode){ + // here we hi-kack the Fhkl_scale_deriv array, if computing errors, in order to store the hessian terms + // if we are getting the hessian terms, we no longer need the gradients (e.g. by this point we are done refininig) + CUDAREAL hessian_incr = Fhkl_hessian_coef*dfhkl*dfhkl; + ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), hessian_incr); + } + else{ + ::Kokkos::atomic_add(&Fhkl_scale_deriv(fhkl_grad_idx), grad_incr); + } + continue; + } - // Checkpoint for Origin manager - for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { - if (refine_panel_origin(i_pan_orig)) { - CUDAREAL per_k = 1 / _airpath; - CUDAREAL per_k3 = pow(per_k, 3.); - CUDAREAL per_k5 = pow(per_k, 5.); - CUDAREAL lambda_ang = _lambda * 1e10; - - KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; - KOKKOS_VEC3 dk; - if (i_pan_orig == 0) - dk = KOKKOS_VEC3{0, 0, 1}; - else if (i_pan_orig == 1) - dk = KOKKOS_VEC3{1, 0, 0}; - else - dk = KOKKOS_VEC3{0, 1, 0}; - - CUDAREAL G = dk.dot(_pixel_pos); - CUDAREAL pix2 = subpixel_size * subpixel_size; - KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; - CUDAREAL coef = (M.dot(dk_hat)).dot(V); - CUDAREAL coef2 = - -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); - coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); - CUDAREAL value = - coef * Iincrement + coef2 * Iincrement / _omega_pixel; - - pan_orig_manager_dI[i_pan_orig] += value; - pan_orig_manager_dI2[i_pan_orig] += 0; - - } // end origin manager deriv + CUDAREAL _I_total = hkl*_I_cell *I0; + CUDAREAL Iincrement = _I_total * texture_scale; + _I += Iincrement; + if (save_wavelenimage) + Ilambda += Iincrement * lambda_ang; + + if (refine_flag & REFINE_DIFFUSE) { + CUDAREAL step_scale = texture_scale * _F_cell * _F_cell; + for (int i_diff = 0; i_diff < 6; i_diff++) { + dI.diffuse[i_diff] += + step_scale * step_diffuse_param[i_diff]; } + } + + //************************************************* + // START REFINEMENT - for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { - if (refine_panel_rot(i_pan_rot)) { - CUDAREAL per_k = 1 / _airpath; - CUDAREAL per_k3 = pow(per_k, 3.); - CUDAREAL per_k5 = pow(per_k, 5.); - CUDAREAL lambda_ang = _lambda * 1e10; - KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; - KOKKOS_VEC3 dk = _Fdet * (dF_vecs(_pid * 3 + i_pan_rot)) + - _Sdet * (dS_vecs(_pid * 3 + i_pan_rot)); - CUDAREAL G = dk.dot(_pixel_pos); - CUDAREAL pix2 = subpixel_size * subpixel_size; - KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; - CUDAREAL coef = (M.dot(dk_hat)).dot(V); - CUDAREAL coef2 = - -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); - coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); - CUDAREAL value = - coef * Iincrement + coef2 * Iincrement / _omega_pixel; - - pan_rot_manager_dI[i_pan_rot] += value; - pan_rot_manager_dI2[i_pan_rot] += 0; + if (refine_flag & REFINE_FP_FDP) { + CUDAREAL I_noFcell = texture_scale * I0; + dI.fp_fdp[0] += 2 * I_noFcell * (c_deriv_Fcell); + dI.fp_fdp[1] += 2 * I_noFcell * (d_deriv_Fcell); + } + + if (verbose > 3) + printf( + "hkl= %f %f %f hkl1= %d %d %d Fcell=%f\n", _h, _k, _l, + _h0, _k0, _l0, _F_cell); + + KOKKOS_MAT3 UBOt; + if (refine_flag & (REFINE_UMAT | REFINE_ETA)) { + UBOt = Amat_init; + } + if (refine_flag & REFINE_UMAT1) { + const KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 0) * q_vec; + const CUDAREAL V_dot_dV = V.dot(dV); + const CUDAREAL value = -two_C * V_dot_dV * Iincrement; + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + const CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 0)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; + } + dI.rot[0] += value; + dI2.rot[0] += value2; + } + if (refine_flag & REFINE_UMAT2) { + KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 1) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 1)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; + } + dI.rot[1] += value; + dI2.rot[1] += value2; + } + if (refine_flag & REFINE_UMAT3) { + KOKKOS_VEC3 dV = UMATS_prime(_mos_tic, 2) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(UMATS_dbl_prime(_mos_tic, 2)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; + } + dI.rot[2] += value; + dI2.rot[2] += value2; + } + // Checkpoint for unit cell derivatives + for (int i_uc = 0; i_uc < 6; i_uc++) { + if (refine_flag & (REFINE_BMAT1 << i_uc)) { + KOKKOS_VEC3 dV = BMATS_prime(_mos_tic, i_uc) * q_vec; + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL value = -two_C * V_dot_dV * Iincrement; + CUDAREAL value2 = 0; + if (compute_curvatures) { + const CUDAREAL dV_dot_dV = dV.length_sqr(); + CUDAREAL dV2_dot_V = V.dot(BMATS_dbl_prime(_mos_tic, i_uc)*q_vec); + value2 = two_C * (two_C * V_dot_dV * V_dot_dV - dV2_dot_V - dV_dot_dV) * Iincrement; } + dI.ucell[i_uc] += value; + dI2.ucell[i_uc] += value2; } - - // checkpoint for Fcell manager - if (refine_fcell) { - CUDAREAL value; - if (refine_Icell) - value = I0 * texture_scale; - else - value = 2 * I0 * _F_cell * - texture_scale; // Iincrement/_F_cell ; + } // end ucell deriv + + // Checkpoint for Ncells manager + if (refine_flag & REFINE_NCELLS1) { + int num_ncell_deriv = 1; + if (!isotropic_ncells) + num_ncell_deriv = 3; + for (int i_nc = 0; i_nc < num_ncell_deriv; i_nc++) { + KOKKOS_MAT3 dN; + dN(i_nc, i_nc) = 1; + if (num_ncell_deriv == 1) { + dN(0, 0) = 1; + dN(1, 1) = 1; + dN(2, 2) = 1; + } + CUDAREAL N_i = _NABC(i_nc, i_nc); + KOKKOS_VEC3 dV_dN = dN.dot(delta_H); + // TODO speedops: precompute these, store shared var + // _NABC.inverse + CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); + CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); + CUDAREAL value = 2 * Iincrement * deriv_coef; CUDAREAL value2 = 0; if (compute_curvatures) { - // NOTE if _Fcell >0 - value2 = 2 * I0 * texture_scale; + value2 = (-1 / N_i / N_i - C * (dV_dN.dot(dV_dN))) * 2 * + Iincrement; + value2 += deriv_coef * 2 * value; } - // if (fcell_idx >=0 && fcell_idx <=2){ - if (use_nominal_hkl) { - if (_h0 == nom_h && _k0 == nom_k && _l0 == nom_l) { - fcell_manager_dI += value; - fcell_manager_dI2 += value2; - } - } else { - fcell_manager_dI += value; - fcell_manager_dI2 += value2; + dI.Ncells[i_nc] += value; + dI2.Ncells[i_nc] += value2; + } + } // end Ncells manager deriv + + if (refine_flag & REFINE_NCELLS_DEF) { + for (int i_nc = 3; i_nc < 6; i_nc++) { + KOKKOS_MAT3 dN; + if (i_nc == 3) + dN = KOKKOS_MAT3{0, 1, 0, 1, 0, 0, 0, 0, 0}; + else if (i_nc == 4) + dN = KOKKOS_MAT3{0, 0, 0, 0, 0, 1, 0, 1, 0}; + else + dN = KOKKOS_MAT3{0, 0, 1, 0, 0, 0, 1, 0, 0}; + KOKKOS_VEC3 dV_dN = dN.dot(delta_H); + // TODO speedops: precompute these + CUDAREAL determ_deriv = (_NABC.inverse().dot(dN)).trace(); + CUDAREAL deriv_coef = determ_deriv - C * (dV_dN.dot(V)); + CUDAREAL value = 2 * Iincrement * deriv_coef; + dI.Ncells[i_nc] += value; + CUDAREAL value2 = 0; + if (compute_curvatures) { + value2 = deriv_coef * value; + value2 += -2 * C * Iincrement * (dV_dN.dot(dV_dN)); + dI2.Ncells[i_nc] += value2; } - } // end of fcell man deriv - - // checkpoint for eta manager - if (refine_eta) { - for (int i_eta = 0; i_eta < 3; i_eta++) { - if (i_eta > 0 && !aniso_eta) - continue; - int mtic2 = _mos_tic + i_eta * mosaic_domains; - KOKKOS_VEC3 DeltaH_deriv = (UMATS_RXYZ_prime(mtic2).dot(UBOt)) - .transpose() - .dot(q_vec); - // vector V is _Nabc*Delta_H - KOKKOS_VEC3 dV = _NABC.dot(DeltaH_deriv); - CUDAREAL V_dot_dV = V.dot(dV); - CUDAREAL Iprime = -two_C * (V_dot_dV)*Iincrement; - eta_manager_dI[i_eta] += Iprime; - CUDAREAL Idbl_prime = 0; - if (compute_curvatures) { - KOKKOS_VEC3 DeltaH_second_deriv = - (UMATS_RXYZ_dbl_prime(mtic2).dot(UBOt)) - .transpose() - .dot(q_vec); - KOKKOS_VEC3 dV2 = _NABC.dot(DeltaH_second_deriv); - Idbl_prime = - -two_C * (dV.dot(dV) + V.dot(dV2)) * Iincrement; - Idbl_prime += -two_C * (V_dot_dV)*Iprime; - } - eta_manager_dI2[i_eta] += Idbl_prime; + } + } + + // Checkpoint for Origin manager + for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { + if (refine_flag & (REFINE_PANEL_ORIGIN1 << i_pan_orig)) { + CUDAREAL per_k = _airpath_r; + CUDAREAL per_k3 = pow(per_k, 3.); + CUDAREAL per_k5 = pow(per_k, 5.); + + KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; + KOKKOS_VEC3 dk; + if (i_pan_orig == 0) + dk = KOKKOS_VEC3{0, 0, 1}; + else if (i_pan_orig == 1) + dk = KOKKOS_VEC3{1, 0, 0}; + else + dk = KOKKOS_VEC3{0, 1, 0}; + + CUDAREAL G = dk.dot(_pixel_pos); + CUDAREAL pix2 = subpixel_size * subpixel_size; + KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; + CUDAREAL coef = (M.dot(dk_hat)).dot(V); + CUDAREAL coef2 = + -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); + coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); + CUDAREAL value = + coef * Iincrement + coef2 * Iincrement / _omega_pixel; + + dI.pan_orig[i_pan_orig] += value; + dI2.pan_orig[i_pan_orig] += 0; + + } // end origin manager deriv + } + + for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { + if (refine_flag & (REFINE_PANEL_ROT1 << i_pan_rot)) { + CUDAREAL per_k = _airpath_r; + CUDAREAL per_k3 = pow(per_k, 3.); + CUDAREAL per_k5 = pow(per_k, 5.); + KOKKOS_MAT3 M = -two_C * (_NABC.dot(UBO)) / lambda_ang; + KOKKOS_VEC3 dk = _Fdet * (dF_vecs(_pid * 3 + i_pan_rot)) + + _Sdet * (dS_vecs(_pid * 3 + i_pan_rot)); + CUDAREAL G = dk.dot(_pixel_pos); + CUDAREAL pix2 = subpixel_size * subpixel_size; + KOKKOS_VEC3 dk_hat = -per_k3 * G * _pixel_pos + per_k * dk; + CUDAREAL coef = (M.dot(dk_hat)).dot(V); + CUDAREAL coef2 = + -3 * pix2 * per_k5 * G * (_o_vec.dot(_pixel_pos)); + coef2 += pix2 * per_k3 * (_o_vec.dot(dk)); + CUDAREAL value = + coef * Iincrement + coef2 * Iincrement / _omega_pixel; + + dI.pan_rot[i_pan_rot] += value; + dI2.pan_rot[i_pan_rot] += 0; + } + } + + // checkpoint for Fcell manager + if (refine_flag & REFINE_FCELL) { + CUDAREAL value; + if (refine_flag & REFINE_ICELL) + value = I0 * texture_scale; + else + value = 2 * I0 * _F_cell * + texture_scale; // Iincrement/_F_cell ; + CUDAREAL value2 = 0; + if (compute_curvatures) { + // NOTE if _Fcell >0 + value2 = 2 * I0 * texture_scale; + } + // if (fcell_idx >=0 && fcell_idx <=2){ + if (use_nominal_hkl) { + if (_h0 == nom_h && _k0 == nom_k && _l0 == nom_l) { + dI.fcell += value; + dI2.fcell += value2; } - } // end of eta man deriv - - // checkpoint for lambda manager - for (int i_lam = 0; i_lam < 2; i_lam++) { - if (refine_lambda(i_lam)) { - CUDAREAL lambda_ang = _lambda * 1e10; - CUDAREAL NH_dot_V = (_NABC.dot(H_vec)).dot(V); - CUDAREAL dg_dlambda; - if (i_lam == 0) - dg_dlambda = 1; - else // i_lam==1 - dg_dlambda = lambda_ang; - CUDAREAL coef = - NH_dot_V * two_C * (dg_dlambda) / lambda_ang; - CUDAREAL value = coef * Iincrement; - CUDAREAL value2 = 0; - lambda_manager_dI[i_lam] += value; - lambda_manager_dI2[i_lam] += value2; + } else { + dI.fcell += value; + dI2.fcell += value2; + } + } // end of fcell man deriv + + // checkpoint for eta manager + if (refine_flag & REFINE_ETA) { + for (int i_eta = 0; i_eta < 3; i_eta++) { + if (i_eta > 0 && !aniso_eta) + continue; + int mtic2 = _mos_tic + i_eta * mosaic_domains; + KOKKOS_VEC3 DeltaH_deriv = (UMATS_RXYZ_prime(mtic2).dot(UBOt)) + .transpose() + .dot(q_vec); + // vector V is _Nabc*Delta_H + KOKKOS_VEC3 dV = _NABC.dot(DeltaH_deriv); + CUDAREAL V_dot_dV = V.dot(dV); + CUDAREAL Iprime = -two_C * (V_dot_dV)*Iincrement; + dI.eta[i_eta] += Iprime; + CUDAREAL Idbl_prime = 0; + if (compute_curvatures) { + KOKKOS_VEC3 DeltaH_second_deriv = + (UMATS_RXYZ_dbl_prime(mtic2).dot(UBOt)) + .transpose() + .dot(q_vec); + KOKKOS_VEC3 dV2 = _NABC.dot(DeltaH_second_deriv); + Idbl_prime = + -two_C * (dV.dot(dV) + V.dot(dV2)) * Iincrement; + Idbl_prime += -two_C * (V_dot_dV)*Iprime; } + dI2.eta[i_eta] += Idbl_prime; } - // end of lambda deriv - if (printout) { - if (_subS == 0 && _subF == 0 && _thick_tic == 0 && - _source == 0 && _mos_tic == 0) { - if ((_fpixel == printout_fpixel && - _spixel == printout_spixel) || - printout_fpixel < 0) { - printf( - "%4d %4d : lambda = %g\n", _fpixel, _spixel, - _lambda); - printf( - "at %g %g %g\n", _pixel_pos[0], _pixel_pos[1], - _pixel_pos[2]); - printf( - "Fdet= %g; Sdet= %g ; Odet= %g\n", _Fdet, _Sdet, - _Odet); - printf( - "PIX0: %f %f %f\n", pix0_vectors(pid_x), - pix0_vectors(pid_y), pix0_vectors(pid_z)); - printf( - "F: %f %f %f\n", fdet_vectors(pid_x), - fdet_vectors(pid_y), fdet_vectors(pid_z)); - printf( - "S: %f %f %f\n", sdet_vectors(pid_x), - sdet_vectors(pid_y), sdet_vectors(pid_z)); - printf( - "O: %f %f %f\n", odet_vectors(pid_x), - odet_vectors(pid_y), odet_vectors(pid_z)); - printf( - "pid_x=%d, pid_y=%d; pid_z=%d\n", pid_x, pid_y, - pid_z); - - printf( - "QVECTOR: %f %f %f\n", q_vec[0], q_vec[1], - q_vec[2]); - KOKKOS_MAT3 UU = UMATS_RXYZ(_mos_tic); - printf( - "UMAT_RXYZ :\n%f %f %f\n%f %f %f\n%f %f %f\n", - UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), - UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); - UU = Bmat_realspace; - printf( - "Bmat_realspace :\n%f %f %f\n%f %f %f\n%f %f %f\n", - UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), - UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); - UU = UBO; - printf( - "UBO :\n%f %f %f\n%f %f %f\n%f %f %f\n", - UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), - UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); - - UU = UBOt; - printf( - "UBOt :\n%f %f %f\n%f %f %f\n%f %f %f\n", - UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), - UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); - - UU = UmosRxRyRzU; - printf( - "UmosRxRyRzU :\n%f %f %f\n%f %f %f\n%f %f %f\n", - UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), - UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); - KOKKOS_VEC3 AA = delta_H_prime; - printf( - "delta_H_prime :\n%f %f %f\n", AA[0], AA[1], - AA[2]); - printf("Iincrement: %f\n", Iincrement); - printf( - "hkl= %f %f %f hkl0= %d %d %d\n", _h, _k, _l, _h0, - _k0, _l0); - printf( - " F_cell=%g F_cell2=%g I_latt=%g I = %g\n", - _F_cell, _F_cell2, I0, _I); - printf("I/steps %15.10g\n", _I / Nsteps); - // printf("Ilatt diffuse %15.10g\n", I_latt_diffuse); - printf("omega %15.10g\n", _omega_pixel); - printf("default_F= %f\n", default_F); - printf( - "Incident[0]=%g, Incident[1]=%g, Incident[2]=%g\n", - _incident[0], _incident[1], _incident[2]); - if (complex_miller) - printf("COMPLEX MILLER!\n"); - if (no_Nabc_scale) - printf("No Nabc scale!\n"); - } + } // end of eta man deriv + + // checkpoint for lambda manager + for (int i_lam = 0; i_lam < 2; i_lam++) { + if (refine_flag & (REFINE_LAMBDA << i_lam)) { + CUDAREAL NH_dot_V = (_NABC.dot(H_vec)).dot(V); + CUDAREAL dg_dlambda; + if (i_lam == 0) + dg_dlambda = 1; + else // i_lam==1 + dg_dlambda = lambda_ang; + CUDAREAL coef = + NH_dot_V * two_C * (dg_dlambda) / lambda_ang; + CUDAREAL value = coef * Iincrement; + CUDAREAL value2 = 0; + dI.lambda[i_lam] += value; + dI2.lambda[i_lam] += value2; + } + } + // end of lambda deriv + if (printout) { + if (_subS == 0 && _subF == 0 && _thick_tic == 0 && + _source == 0 && _mos_tic == 0) { + if ((_fpixel == printout_fpixel && + _spixel == printout_spixel) || + printout_fpixel < 0) { + printf("%4d %4d : lambda = %g\n", _fpixel, _spixel, _lambda); + printf( + "at %g %g %g\n", _pixel_pos[0], _pixel_pos[1], + _pixel_pos[2]); + printf("Fdet= %g; Sdet= %g ; Odet= %g\n", _Fdet, _Sdet, _Odet); + printf( + "PIX0: %f %f %f\n", pix0_vectors(pid_x), + pix0_vectors(pid_y), pix0_vectors(pid_z)); + printf( + "F: %f %f %f\n", fdet_vectors(pid_x), + fdet_vectors(pid_y), fdet_vectors(pid_z)); + printf( + "S: %f %f %f\n", sdet_vectors(pid_x), + sdet_vectors(pid_y), sdet_vectors(pid_z)); + printf( + "O: %f %f %f\n", odet_vectors(pid_x), + odet_vectors(pid_y), odet_vectors(pid_z)); + printf("pid_x=%d, pid_y=%d; pid_z=%d\n", pid_x, pid_y, pid_z); + printf( + "QVECTOR: %f %f %f\n", q_vec[0], q_vec[1], q_vec[2]); + printf("omega %15.10g\n", _omega_pixel); + printf( + "Incident: %g %g %g\n", + _incident[0], _incident[1], _incident[2]); + + KOKKOS_MAT3 UU = UMATS_RXYZ(_mos_tic); + printf( + "UMAT_RXYZ :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + UU = Bmat_realspace; + printf( + "Bmat_realspace :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + UU = UBO; + printf( + "UBO :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + + UU = UBOt; + printf( + "UBOt :\n%f %f %f\n%f %f %f\n%f %f %f\n", + UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + + // UU = UmosRxRyRzU; + // printf( + // "UmosRxRyRzU :\n%f %f %f\n%f %f %f\n%f %f %f\n", + // UU(0, 0), UU(0, 1), UU(0, 2), UU(1, 0), UU(1, 1), + // UU(1, 2), UU(2, 0), UU(2, 1), UU(2, 2)); + // KOKKOS_VEC3 AA = delta_H_prime; + // printf( + // "delta_H_prime :\n%f %f %f\n", AA[0], AA[1], + // AA[2]); + printf("Iincrement: %f\n", Iincrement); + printf( + "hkl= %f %f %f hkl0= %d %d %d\n", _h, _k, _l, _h0, + _k0, _l0); + printf( + " F_cell=%g F_cell2=%g I_latt=%g I = %g\n", + _F_cell, _F_cell2, I0, _I); + printf("I/steps %15.10g\n", _I / Nsteps); + // printf("Ilatt diffuse %15.10g\n", I_latt_diffuse); + printf("default_F= %f\n", default_F); + if (complex_miller) + printf("COMPLEX MILLER!\n"); + if (no_Nabc_scale) + printf("No Nabc scale!\n"); } } + } // end of printout if + + } // end of mos_tic loop + } // end of source loop + } // end of thick step loop + } // end of fpos loop + } // end of spos loop + floatimage(pixIdx) = _I; + if (save_wavelenimage) + wavelenimage(pixIdx) = Ilambda / _I; + + if (refine_flag) { + manager_dI(pixIdx) = dI; + manager_dI2(pixIdx) = dI2; + } + }); // end pixIdx loop - } // end of mos_tic loop - } // end of source loop - } // end of thick step loop - } // end of fpos loop - } // end of spos loop - if (Fhkl_gradient_mode) - return; + if (Fhkl_gradient_mode) + return; - CUDAREAL _Fdet_ave = pixel_size * _fpixel + pixel_size / 2.0; - CUDAREAL _Sdet_ave = pixel_size * _spixel + pixel_size / 2.0; - CUDAREAL _Odet_ave = 0; // Odet; - // TODO maybe make this more general for thick detectors? - - KOKKOS_VEC3 _pixel_pos_ave(0, 0, 0); - int pid_x = _pid * 3; - int pid_y = _pid * 3 + 1; - int pid_z = _pid * 3 + 2; - - CUDAREAL fx = fdet_vectors(pid_x); - CUDAREAL fy = fdet_vectors(pid_y); - CUDAREAL fz = fdet_vectors(pid_z); - - CUDAREAL sx = sdet_vectors(pid_x); - CUDAREAL sy = sdet_vectors(pid_y); - CUDAREAL sz = sdet_vectors(pid_z); - - CUDAREAL ox = odet_vectors(pid_x); - CUDAREAL oy = odet_vectors(pid_y); - CUDAREAL oz = odet_vectors(pid_z); - - CUDAREAL px = pix0_vectors(pid_x); - CUDAREAL py = pix0_vectors(pid_y); - CUDAREAL pz = pix0_vectors(pid_z); - - _pixel_pos_ave[0] = _Fdet_ave * fx + _Sdet_ave * sx + _Odet_ave * ox + px; - _pixel_pos_ave[1] = _Fdet_ave * fy + _Sdet_ave * sy + _Odet_ave * oy + py; - _pixel_pos_ave[2] = _Fdet_ave * fz + _Sdet_ave * sz + _Odet_ave * oz + pz; - - CUDAREAL _airpath_ave = _pixel_pos_ave.length(); - KOKKOS_VEC3 _diffracted_ave = _pixel_pos_ave.get_unit_vector(); - CUDAREAL _omega_pixel_ave = pixel_size * pixel_size / _airpath_ave / _airpath_ave * - close_distance / _airpath_ave; - - CUDAREAL _polar = 1; - if (!nopolar) { - KOKKOS_VEC3 _incident(-source_X(0), -source_Y(0), -source_Z(0)); - _incident.normalize(); - // component of diffracted unit vector along _incident beam unit vector - CUDAREAL cos2theta = _incident.dot(_diffracted_ave); - CUDAREAL cos2theta_sqr = cos2theta * cos2theta; - CUDAREAL sin2theta_sqr = 1 - cos2theta_sqr; - - CUDAREAL _psi = 0; - if (kahn_factor != 0.0) { - // cross product to get "vertical" axis that is orthogonal to the cannonical - // "polarization" - KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); - // cross product with _incident beam to get E-vector direction - KOKKOS_VEC3 E_in = _incident.cross(B_in); - // get components of diffracted ray projected onto the E-B plane - CUDAREAL _kEi = _diffracted_ave.dot(E_in); - CUDAREAL _kBi = _diffracted_ave.dot(B_in); - // compute the angle of the diffracted ray projected onto the incident E-B plane - _psi = -atan2(_kBi, _kEi); + Kokkos::parallel_for( + "deriv_image_increment", Npix_to_model, KOKKOS_LAMBDA(const int& pixIdx) { + + int _pid = panels_fasts_slows(pixIdx * 3); + int _fpixel = panels_fasts_slows(pixIdx * 3 + 1); + int _spixel = panels_fasts_slows(pixIdx * 3 + 2); + + CUDAREAL _Fdet_ave = pixel_size * _fpixel + pixel_size / 2.0; + CUDAREAL _Sdet_ave = pixel_size * _spixel + pixel_size / 2.0; + CUDAREAL _Odet_ave = 0; // Odet; + // TODO maybe make this more general for thick detectors? + + KOKKOS_VEC3 _pixel_pos_ave(0, 0, 0); + int pid_x = _pid * 3; + int pid_y = _pid * 3 + 1; + int pid_z = _pid * 3 + 2; + + CUDAREAL fx = fdet_vectors(pid_x); + CUDAREAL fy = fdet_vectors(pid_y); + CUDAREAL fz = fdet_vectors(pid_z); + + CUDAREAL sx = sdet_vectors(pid_x); + CUDAREAL sy = sdet_vectors(pid_y); + CUDAREAL sz = sdet_vectors(pid_z); + + CUDAREAL ox = odet_vectors(pid_x); + CUDAREAL oy = odet_vectors(pid_y); + CUDAREAL oz = odet_vectors(pid_z); + + CUDAREAL px = pix0_vectors(pid_x); + CUDAREAL py = pix0_vectors(pid_y); + CUDAREAL pz = pix0_vectors(pid_z); + + _pixel_pos_ave[0] = _Fdet_ave * fx + _Sdet_ave * sx + _Odet_ave * ox + px; + _pixel_pos_ave[1] = _Fdet_ave * fy + _Sdet_ave * sy + _Odet_ave * oy + py; + _pixel_pos_ave[2] = _Fdet_ave * fz + _Sdet_ave * sz + _Odet_ave * oz + pz; + + CUDAREAL close_distance = close_distances(_pid); + + CUDAREAL _airpath_ave_r = 1 / _pixel_pos_ave.length(); + KOKKOS_VEC3 _diffracted_ave = _pixel_pos_ave.get_unit_vector(); + CUDAREAL _omega_pixel_ave = pixel_size * pixel_size * _airpath_ave_r * _airpath_ave_r * + close_distance * _airpath_ave_r; + + CUDAREAL _polar = 1; + if (!nopolar) { + KOKKOS_VEC3 _incident(-source_X(0), -source_Y(0), -source_Z(0)); + _incident.normalize(); + // component of diffracted unit vector along _incident beam unit vector + CUDAREAL cos2theta = _incident.dot(_diffracted_ave); + CUDAREAL cos2theta_sqr = cos2theta * cos2theta; + CUDAREAL sin2theta_sqr = 1 - cos2theta_sqr; + + CUDAREAL cos2psi = 0; + if (kahn_factor != 0.0) { + // cross product to get "vertical" axis that is orthogonal to the cannonical + // "polarization" + KOKKOS_VEC3 B_in = polarization_axis.cross(_incident); + // cross product with _incident beam to get E-vector direction + KOKKOS_VEC3 E_in = _incident.cross(B_in); + // get components of diffracted ray projected onto the E-B plane + CUDAREAL _kEi = _diffracted_ave.dot(E_in); + CUDAREAL _kBi = _diffracted_ave.dot(B_in); + // compute the angle of the diffracted ray projected onto the incident E-B plane + // calculate cos(2 * atan2(_kBi, _kEi)) + if (_kEi!=0) { + CUDAREAL ratio = _kBi / _kEi; + cos2psi = (1 - ratio*ratio) / (1 + ratio*ratio); + } else { + cos2psi = -1; } - // correction for polarized _incident beam - _polar = - 0.5 * (1.0 + cos2theta_sqr - - kahn_factor * ::Kokkos::Experimental::cos(2 * _psi) * sin2theta_sqr); } + // correction for polarized _incident beam + _polar = 0.5 * (1.0 + cos2theta_sqr - kahn_factor * cos2psi * sin2theta_sqr); + } - CUDAREAL _om = 1; - if (!oversample_omega) - _om = _omega_pixel_ave; - // final scale term to being everything to photon number units - CUDAREAL _scale_term = _polar * _om * overall_scale; - floatimage(pixIdx) = _scale_term * _I; - if (save_wavelenimage) - wavelenimage(pixIdx) = Ilambda / _I; - - // udpate the rotation derivative images* - for (int i_rot = 0; i_rot < 3; i_rot++) { - if (refine_Umat(i_rot)) { - CUDAREAL value = _scale_term * rot_manager_dI[i_rot]; - CUDAREAL value2 = _scale_term * rot_manager_dI2[i_rot]; - int idx = i_rot * Npix_to_model + pixIdx; - d_Umat_images(idx) = value; - d2_Umat_images(idx) = value2; - } - } // end rot deriv image increment - - // update the ucell derivative images - for (int i_uc = 0; i_uc < 6; i_uc++) { - if (refine_Bmat(i_uc)) { - CUDAREAL value = _scale_term * ucell_manager_dI[i_uc]; - CUDAREAL value2 = _scale_term * ucell_manager_dI2[i_uc]; - int idx = i_uc * Npix_to_model + pixIdx; - d_Bmat_images(idx) = value; - d2_Bmat_images(idx) = value2; - } - } // end ucell deriv image increment - - // update the Ncells derivative image - if (refine_Ncells(0)) { - CUDAREAL value = _scale_term * Ncells_manager_dI[0]; - CUDAREAL value2 = _scale_term * Ncells_manager_dI2[0]; - int idx = pixIdx; + CUDAREAL _om = 1; + if (!oversample_omega) + _om = _omega_pixel_ave; + // final scale term to being everything to photon number units + CUDAREAL _scale_term = _polar * _om * overall_scale; + floatimage(pixIdx) *= _scale_term; + + auto& dI = manager_dI(pixIdx); + auto& dI2 = manager_dI2(pixIdx); + + // udpate the rotation derivative images* + for (int i_rot = 0; i_rot < 3; i_rot++) { + if (refine_flag & (REFINE_UMAT1 << i_rot)) { + CUDAREAL value = _scale_term * dI.rot[i_rot]; + CUDAREAL value2 = _scale_term * dI2.rot[i_rot]; + int idx = i_rot * Npix_to_model + pixIdx; + d_Umat_images(idx) = value; + d2_Umat_images(idx) = value2; + } + } // end rot deriv image increment + + // update the ucell derivative images + for (int i_uc = 0; i_uc < 6; i_uc++) { + if (refine_flag & (REFINE_BMAT1 << i_uc)) { + CUDAREAL value = _scale_term * dI.ucell[i_uc]; + CUDAREAL value2 = _scale_term * dI2.ucell[i_uc]; + int idx = i_uc * Npix_to_model + pixIdx; + d_Bmat_images(idx) = value; + d2_Bmat_images(idx) = value2; + } + } // end ucell deriv image increment + + // update the Ncells derivative image + if (refine_flag & REFINE_NCELLS1) { + CUDAREAL value = _scale_term * dI.Ncells[0]; + CUDAREAL value2 = _scale_term * dI2.Ncells[0]; + int idx = pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + + if (!isotropic_ncells) { + value = _scale_term * dI.Ncells[1]; + value2 = _scale_term * dI2.Ncells[1]; + idx = Npix_to_model + pixIdx; d_Ncells_images(idx) = value; d2_Ncells_images(idx) = value2; - if (!isotropic_ncells) { - value = _scale_term * Ncells_manager_dI[1]; - value2 = _scale_term * Ncells_manager_dI2[1]; - idx = Npix_to_model + pixIdx; - d_Ncells_images(idx) = value; - d2_Ncells_images(idx) = value2; - - value = _scale_term * Ncells_manager_dI[2]; - value2 = _scale_term * Ncells_manager_dI2[2]; - idx = Npix_to_model * 2 + pixIdx; - d_Ncells_images(idx) = value; - d2_Ncells_images(idx) = value2; - } - } // end Ncells deriv image increment - if (refine_Ncells_def) { - for (int i_nc = 3; i_nc < 6; i_nc++) { - CUDAREAL value = _scale_term * Ncells_manager_dI[i_nc]; - CUDAREAL value2 = _scale_term * Ncells_manager_dI2[i_nc]; - int idx = i_nc * Npix_to_model + pixIdx; - d_Ncells_images(idx) = value; - d2_Ncells_images(idx) = value2; - } + value = _scale_term * dI.Ncells[2]; + value2 = _scale_term * dI2.Ncells[2]; + idx = Npix_to_model * 2 + pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; + } + } // end Ncells deriv image increment + if (refine_flag & REFINE_NCELLS_DEF) { + for (int i_nc = 3; i_nc < 6; i_nc++) { + CUDAREAL value = _scale_term * dI.Ncells[i_nc]; + CUDAREAL value2 = _scale_term * dI2.Ncells[i_nc]; + int idx = i_nc * Npix_to_model + pixIdx; + d_Ncells_images(idx) = value; + d2_Ncells_images(idx) = value2; } + } - // update Fcell derivative image - if (refine_fcell) { - CUDAREAL value = _scale_term * fcell_manager_dI; - CUDAREAL value2 = _scale_term * fcell_manager_dI2; - d_fcell_images(pixIdx) = value; - d2_fcell_images(pixIdx) = value2; - } // end Fcell deriv image increment - - if (refine_fp_fdp) { - // c derivative - CUDAREAL value = _scale_term * fp_fdp_manager_dI[0]; - d_fp_fdp_images(pixIdx) = value; - // d derivative - value = _scale_term * fp_fdp_manager_dI[1]; - d_fp_fdp_images(Npix_to_model + pixIdx) = value; + // update Fcell derivative image + if (refine_flag & REFINE_FCELL) { + CUDAREAL value = _scale_term * dI.fcell; + CUDAREAL value2 = _scale_term * dI2.fcell; + d_fcell_images(pixIdx) = value; + d2_fcell_images(pixIdx) = value2; + } // end Fcell deriv image increment + + if (refine_flag & REFINE_FP_FDP) { + // c derivative + CUDAREAL value = _scale_term * dI.fp_fdp[0]; + d_fp_fdp_images(pixIdx) = value; + // d derivative + value = _scale_term * dI.fp_fdp[1]; + d_fp_fdp_images(Npix_to_model + pixIdx) = value; + } + if (refine_flag & REFINE_DIFFUSE) { + for (int i_gam = 0; i_gam < 3; i_gam++) { + CUDAREAL val = dI.diffuse[i_gam] * _scale_term; + int img_idx = Npix_to_model * i_gam + pixIdx; + d_diffuse_gamma_images(img_idx) = val; } - if (refine_diffuse) { - for (int i_gam = 0; i_gam < 3; i_gam++) { - CUDAREAL val = dI_diffuse[i_gam] * _scale_term; - int img_idx = Npix_to_model * i_gam + pixIdx; - d_diffuse_gamma_images(img_idx) = val; - } - for (int i_sig = 0; i_sig < 3; i_sig++) { - CUDAREAL val = dI_diffuse[i_sig + 3] * _scale_term; - int img_idx = Npix_to_model * i_sig + pixIdx; - d_diffuse_sigma_images(img_idx) = val; - } + for (int i_sig = 0; i_sig < 3; i_sig++) { + CUDAREAL val = dI.diffuse[i_sig + 3] * _scale_term; + int img_idx = Npix_to_model * i_sig + pixIdx; + d_diffuse_sigma_images(img_idx) = val; } + } - // update eta derivative image - if (refine_eta) { - for (int i_eta = 0; i_eta < 3; i_eta++) { - if (i_eta > 0 && !aniso_eta) - continue; - int idx = pixIdx + Npix_to_model * i_eta; - CUDAREAL value = _scale_term * eta_manager_dI[i_eta]; - CUDAREAL value2 = _scale_term * eta_manager_dI2[i_eta]; - d_eta_images(idx) = value; - d2_eta_images(idx) = value2; - } - } // end eta deriv image increment - - // update the lambda derivative images - for (int i_lam = 0; i_lam < 2; i_lam++) { - if (refine_lambda(i_lam)) { - CUDAREAL value = _scale_term * lambda_manager_dI[i_lam]; - CUDAREAL value2 = _scale_term * lambda_manager_dI2[i_lam]; - int idx = i_lam * Npix_to_model + pixIdx; - d_lambda_images(idx) = value; - // d2_lambda_images(idx) = value2; - } - } // end lambda deriv image increment - - for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { - if (refine_panel_rot(i_pan_rot)) { - CUDAREAL value = _scale_term * pan_rot_manager_dI[i_pan_rot]; - CUDAREAL value2 = _scale_term * pan_rot_manager_dI2[i_pan_rot]; - int idx = i_pan_rot * Npix_to_model + pixIdx; - d_panel_rot_images(idx) = value; - // d2_panel_rot_images(idx) = value2; - } - } // end panel rot deriv image increment - - for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { - if (refine_panel_origin(i_pan_orig)) { - CUDAREAL value = _scale_term * pan_orig_manager_dI[i_pan_orig]; - CUDAREAL value2 = _scale_term * pan_orig_manager_dI2[i_pan_orig]; - int idx = i_pan_orig * Npix_to_model + pixIdx; - d_panel_orig_images(idx) = value; - // d2_panel_orig_images(idx) = value2; - } - } // end panel orig deriv image increment - }); // end pixIdx loop + // update eta derivative image + if (refine_flag & REFINE_ETA) { + for (int i_eta = 0; i_eta < 3; i_eta++) { + if (i_eta > 0 && !aniso_eta) + continue; + int idx = pixIdx + Npix_to_model * i_eta; + CUDAREAL value = _scale_term * dI.eta[i_eta]; + CUDAREAL value2 = _scale_term * dI2.eta[i_eta]; + d_eta_images(idx) = value; + d2_eta_images(idx) = value2; + } + } // end eta deriv image increment + + // update the lambda derivative images + for (int i_lam = 0; i_lam < 2; i_lam++) { + if (refine_flag & (REFINE_LAMBDA1 << i_lam)) { + CUDAREAL value = _scale_term * dI.lambda[i_lam]; + CUDAREAL value2 = _scale_term * dI2.lambda[i_lam]; + int idx = i_lam * Npix_to_model + pixIdx; + d_lambda_images(idx) = value; + // d2_lambda_images(idx) = value2; + } + } // end lambda deriv image increment + + for (int i_pan_rot = 0; i_pan_rot < 3; i_pan_rot++) { + if (refine_flag & (REFINE_PANEL_ROT1 << i_pan_rot)) { + CUDAREAL value = _scale_term * dI.pan_rot[i_pan_rot]; + CUDAREAL value2 = _scale_term * dI2.pan_rot[i_pan_rot]; + int idx = i_pan_rot * Npix_to_model + pixIdx; + d_panel_rot_images(idx) = value; + // d2_panel_rot_images(idx) = value2; + } + } // end panel rot deriv image increment + + for (int i_pan_orig = 0; i_pan_orig < 3; i_pan_orig++) { + if (refine_flag & (REFINE_PANEL_ORIGIN1 << i_pan_orig)) { + CUDAREAL value = _scale_term * dI.pan_orig[i_pan_orig]; + CUDAREAL value2 = _scale_term * dI2.pan_orig[i_pan_orig]; + int idx = i_pan_orig * Npix_to_model + pixIdx; + d_panel_orig_images(idx) = value; + // d2_panel_orig_images(idx) = value2; + } + } // end panel orig deriv image increment + }); // end pixIdx loop } // END of GPU kernel + +template void +kokkos_sum_over_steps< + false, // printout, + false, // complex_miller, + false, // compute_curvatures, + REFINE_FCELL, // refine_flag, + false, // use_diffuse, + false, // save_wavelenimage + false, // Fhkl_gradient_mode, + false, // Fhkl_errors_mode, + false, // using_trusted_mask, + true, // Fhkl_channels_empty, + false> // Fhkl_have_scale_factors + ( + int Npix_to_model, + vector_uint_t panels_fasts_slows, + vector_cudareal_t floatimage, + vector_cudareal_t wavelenimage, + vector_cudareal_t d_Umat_images, + vector_cudareal_t d2_Umat_images, + vector_cudareal_t d_Bmat_images, + vector_cudareal_t d2_Bmat_images, + vector_cudareal_t d_Ncells_images, + vector_cudareal_t d2_Ncells_images, + vector_cudareal_t d_fcell_images, + vector_cudareal_t d2_fcell_images, + vector_cudareal_t d_eta_images, + vector_cudareal_t d2_eta_images, + vector_cudareal_t d_lambda_images, + vector_cudareal_t d2_lambda_images, + vector_cudareal_t d_panel_rot_images, + vector_cudareal_t d2_panel_rot_images, + vector_cudareal_t d_panel_orig_images, + vector_cudareal_t d2_panel_orig_images, + vector_cudareal_t d_fp_fdp_images, + vector_manager_t manager_dI, + vector_manager_t manager_dI2, + const int Nsteps, + int printout_fpixel, + int printout_spixel, + /*bool printout,*/ + CUDAREAL default_F, + int oversample, + bool oversample_omega, + CUDAREAL subpixel_size, + CUDAREAL pixel_size, + CUDAREAL detector_thickstep, + CUDAREAL detector_thick, + const vector_cudareal_t close_distances, + CUDAREAL detector_attnlen, + int detector_thicksteps, + int sources, + int phisteps, + int mosaic_domains, + bool use_lambda_coefficients, + CUDAREAL lambda0, + CUDAREAL lambda1, + KOKKOS_MAT3 eig_U, + KOKKOS_MAT3 eig_O, + KOKKOS_MAT3 eig_B, + KOKKOS_MAT3 RXYZ, + vector_vec3_t dF_vecs, + vector_vec3_t dS_vecs, + const vector_mat3_t UMATS_RXYZ, + vector_mat3_t UMATS_RXYZ_prime, + vector_mat3_t UMATS_RXYZ_dbl_prime, + vector_mat3_t RotMats, + vector_mat3_t dRotMats, + vector_mat3_t d2RotMats, + vector_mat3_t UMATS, + vector_mat3_t dB_mats, + vector_mat3_t dB2_mats, + vector_mat3_t Amatrices, + const vector_cudareal_t source_X, + const vector_cudareal_t source_Y, + const vector_cudareal_t source_Z, + const vector_cudareal_t source_lambda, + const vector_cudareal_t source_I, + CUDAREAL kahn_factor, + CUDAREAL Na, + CUDAREAL Nb, + CUDAREAL Nc, + CUDAREAL Nd, + CUDAREAL Ne, + CUDAREAL Nf, + CUDAREAL phi0, + CUDAREAL phistep, + KOKKOS_VEC3 spindle_vec, + KOKKOS_VEC3 polarization_axis, + int h_range, + int k_range, + int l_range, + int h_max, + int h_min, + int k_max, + int k_min, + int l_max, + int l_min, + CUDAREAL dmin, + CUDAREAL fudge, + /*bool complex_miller,*/ + int verbose, + bool only_save_omega_kahn, + bool isotropic_ncells, + /*bool compute_curvatures,*/ + const vector_cudareal_t FhklLinear, + const vector_cudareal_t Fhkl2Linear, + /*const uint32_t refine_flag,*/ + // vector_bool_t refine_Bmat, + // vector_bool_t refine_Ncells, + // bool refine_Ncells_def, + // vector_bool_t refine_panel_origin, + // vector_bool_t refine_panel_rot, + // bool refine_fcell, + // vector_bool_t refine_lambda, + // bool refine_eta, + // vector_bool_t refine_Umat, + const vector_cudareal_t fdet_vectors, + const vector_cudareal_t sdet_vectors, + const vector_cudareal_t odet_vectors, + const vector_cudareal_t pix0_vectors, + bool nopolar, + bool point_pixel, + CUDAREAL fluence, + CUDAREAL r_e_sqr, + CUDAREAL spot_scale, + int Npanels, + bool aniso_eta, + bool no_Nabc_scale, + const vector_cudareal_t fpfdp, + const vector_cudareal_t fpfdp_derivs, + const vector_cudareal_t atom_data, + int num_atoms, + // bool refine_fp_fdp, + const vector_int_t nominal_hkl, + bool use_nominal_hkl, + KOKKOS_MAT3 anisoU, + KOKKOS_MAT3 anisoG, + KOKKOS_MAT3 rotate_principal_axes, + /*bool use_diffuse,*/ + vector_cudareal_t d_diffuse_gamma_images, + vector_cudareal_t d_diffuse_sigma_images, + // bool refine_diffuse, + bool gamma_miller_units, + // bool refine_Icell, + /*bool save_wavelenimage,*/ + int laue_group_num, + int stencil_size, + /*bool Fhkl_gradient_mode,*/ + /*bool Fhkl_errors_mode,*/ + /*bool using_trusted_mask,*/ + /*bool Fhkl_channels_empty,*/ + /*bool Fhkl_have_scale_factors,*/ + int Num_ASU, + const vector_cudareal_t data_residual, + const vector_cudareal_t data_variance, + const vector_int_t data_freq, + const vector_bool_t data_trusted, + const vector_int_t FhklLinear_ASUid, + const vector_int_t Fhkl_channels, + const vector_cudareal_t Fhkl_scale, + vector_cudareal_t Fhkl_scale_deriv); diff --git a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h index 3c4a117693..3586183087 100644 --- a/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h +++ b/simtbx/diffBragg/src/diffBragg_kokkos_kernel.h @@ -26,6 +26,8 @@ void kokkos_sum_over_steps( vector_cudareal_t d_panel_orig_images, vector_cudareal_t d2_panel_orig_images, vector_cudareal_t d_fp_fdp_images, + vector_manager_t manager_dI, + vector_manager_t manager_dI2, const int Nsteps, int printout_fpixel, int printout_spixel, @@ -96,15 +98,16 @@ void kokkos_sum_over_steps( bool compute_curvatures, const vector_cudareal_t FhklLinear, const vector_cudareal_t Fhkl2Linear, - vector_bool_t refine_Bmat, - vector_bool_t refine_Ncells, - bool refine_Ncells_def, - vector_bool_t refine_panel_origin, - vector_bool_t refine_panel_rot, - bool refine_fcell, - vector_bool_t refine_lambda, - bool refine_eta, - vector_bool_t refine_Umat, + const uint32_t refine_flag, + // vector_bool_t refine_Bmat, + // vector_bool_t refine_Ncells, + // bool refine_Ncells_def, + // vector_bool_t refine_panel_origin, + // vector_bool_t refine_panel_rot, + // bool refine_fcell, + // vector_bool_t refine_lambda, + // bool refine_eta, + // vector_bool_t refine_Umat, const vector_cudareal_t fdet_vectors, const vector_cudareal_t sdet_vectors, const vector_cudareal_t odet_vectors, @@ -121,7 +124,7 @@ void kokkos_sum_over_steps( const vector_cudareal_t fpfdp_derivs, const vector_cudareal_t atom_data, int num_atoms, - bool refine_fp_fdp, + // bool refine_fp_fdp, const vector_int_t nominal_hkl, bool use_nominal_hkl, KOKKOS_MAT3 anisoU, @@ -130,9 +133,9 @@ void kokkos_sum_over_steps( bool use_diffuse, vector_cudareal_t d_diffuse_gamma_images, vector_cudareal_t d_diffuse_sigma_images, - bool refine_diffuse, + // bool refine_diffuse, bool gamma_miller_units, - bool refine_Icell, + // bool refine_Icell, bool save_wavelenimage, int laue_group_num, int stencil_size, bool Fhkl_gradient_mode, @@ -146,10 +149,170 @@ void kokkos_sum_over_steps( const vector_int_t data_freq, const vector_bool_t data_trusted, const vector_int_t FhklLinear_ASUid, - const vector_cudareal_t Fhkl_channels, + const vector_int_t Fhkl_channels, const vector_cudareal_t Fhkl_scale, vector_cudareal_t Fhkl_scale_deriv - ); - +); +template < + bool printout, + bool complex_miller, + bool compute_curvatures, + uint32_t refine_fcell, + bool use_diffuse, + bool save_wavelenimage, + bool Fhkl_gradient_mode, + bool Fhkl_errors_mode, + bool using_trusted_mask, + bool Fhkl_channels_empty, + bool Fhkl_have_scale_factors> +void kokkos_sum_over_steps( + int Npix_to_model, + vector_uint_t panels_fasts_slows, + vector_cudareal_t floatimage, + vector_cudareal_t wavelenimage, + vector_cudareal_t d_Umat_images, + vector_cudareal_t d2_Umat_images, + vector_cudareal_t d_Bmat_images, + vector_cudareal_t d2_Bmat_images, + vector_cudareal_t d_Ncells_images, + vector_cudareal_t d2_Ncells_images, + vector_cudareal_t d_fcell_images, + vector_cudareal_t d2_fcell_images, + vector_cudareal_t d_eta_images, + vector_cudareal_t d2_eta_images, + vector_cudareal_t d_lambda_images, + vector_cudareal_t d2_lambda_images, + vector_cudareal_t d_panel_rot_images, + vector_cudareal_t d2_panel_rot_images, + vector_cudareal_t d_panel_orig_images, + vector_cudareal_t d2_panel_orig_images, + vector_cudareal_t d_fp_fdp_images, + vector_manager_t manager_dI, + vector_manager_t manager_dI2, + const int Nsteps, + int printout_fpixel, + int printout_spixel, + /*bool printout,*/ + CUDAREAL default_F, + int oversample, + bool oversample_omega, + CUDAREAL subpixel_size, + CUDAREAL pixel_size, + CUDAREAL detector_thickstep, + CUDAREAL detector_thick, + const vector_cudareal_t close_distances, + CUDAREAL detector_attnlen, + int detector_thicksteps, + int sources, + int phisteps, + int mosaic_domains, + bool use_lambda_coefficients, + CUDAREAL lambda0, + CUDAREAL lambda1, + KOKKOS_MAT3 eig_U, + KOKKOS_MAT3 eig_O, + KOKKOS_MAT3 eig_B, + KOKKOS_MAT3 RXYZ, + vector_vec3_t dF_vecs, + vector_vec3_t dS_vecs, + const vector_mat3_t UMATS_RXYZ, + vector_mat3_t UMATS_RXYZ_prime, + vector_mat3_t UMATS_RXYZ_dbl_prime, + vector_mat3_t RotMats, + vector_mat3_t dRotMats, + vector_mat3_t d2RotMats, + vector_mat3_t UMATS, + vector_mat3_t dB_mats, + vector_mat3_t dB2_mats, + vector_mat3_t Amatrices, + const vector_cudareal_t source_X, + const vector_cudareal_t source_Y, + const vector_cudareal_t source_Z, + const vector_cudareal_t source_lambda, + const vector_cudareal_t source_I, + CUDAREAL kahn_factor, + CUDAREAL Na, + CUDAREAL Nb, + CUDAREAL Nc, + CUDAREAL Nd, + CUDAREAL Ne, + CUDAREAL Nf, + CUDAREAL phi0, + CUDAREAL phistep, + KOKKOS_VEC3 spindle_vec, + KOKKOS_VEC3 polarization_axis, + int h_range, + int k_range, + int l_range, + int h_max, + int h_min, + int k_max, + int k_min, + int l_max, + int l_min, + CUDAREAL dmin, + CUDAREAL fudge, + /*bool complex_miller,*/ + int verbose, + bool only_save_omega_kahn, + bool isotropic_ncells, + /*bool compute_curvatures,*/ + const vector_cudareal_t FhklLinear, + const vector_cudareal_t Fhkl2Linear, + /*const uint32_t refine_flag,*/ + // vector_bool_t refine_Bmat, + // vector_bool_t refine_Ncells, + // bool refine_Ncells_def, + // vector_bool_t refine_panel_origin, + // vector_bool_t refine_panel_rot, + // bool refine_fcell, + // vector_bool_t refine_lambda, + // bool refine_eta, + // vector_bool_t refine_Umat, + const vector_cudareal_t fdet_vectors, + const vector_cudareal_t sdet_vectors, + const vector_cudareal_t odet_vectors, + const vector_cudareal_t pix0_vectors, + bool nopolar, + bool point_pixel, + CUDAREAL fluence, + CUDAREAL r_e_sqr, + CUDAREAL spot_scale, + int Npanels, + bool aniso_eta, + bool no_Nabc_scale, + const vector_cudareal_t fpfdp, + const vector_cudareal_t fpfdp_derivs, + const vector_cudareal_t atom_data, + int num_atoms, + // bool refine_fp_fdp, + const vector_int_t nominal_hkl, + bool use_nominal_hkl, + KOKKOS_MAT3 anisoU, + KOKKOS_MAT3 anisoG, + KOKKOS_MAT3 rotate_principal_axes, + /*bool use_diffuse,*/ + vector_cudareal_t d_diffuse_gamma_images, + vector_cudareal_t d_diffuse_sigma_images, + // bool refine_diffuse, + bool gamma_miller_units, + // bool refine_Icell, + /*bool save_wavelenimage,*/ + int laue_group_num, int stencil_size, + /*bool Fhkl_gradient_mode,*/ + /*bool Fhkl_errors_mode,*/ + /*bool using_trusted_mask,*/ + /*bool Fhkl_channels_empty,*/ + /*bool Fhkl_have_scale_factors,*/ + int Num_ASU, + const vector_cudareal_t data_residual, + const vector_cudareal_t data_variance, + const vector_int_t data_freq, + const vector_bool_t data_trusted, + const vector_int_t FhklLinear_ASUid, + const vector_int_t Fhkl_channels, + const vector_cudareal_t Fhkl_scale, + vector_cudareal_t Fhkl_scale_deriv +); #endif diff --git a/simtbx/diffBragg/src/diffBragg_refine_flag.h b/simtbx/diffBragg/src/diffBragg_refine_flag.h new file mode 100644 index 0000000000..64b250ebf5 --- /dev/null +++ b/simtbx/diffBragg/src/diffBragg_refine_flag.h @@ -0,0 +1,67 @@ +#ifndef SIMTBX_DIFFBRAGG_REFINE_FLAG +#define SIMTBX_DIFFBRAGG_REFINE_FLAG +/* +enum refine_id { + ROTX_ID = (1u << 0), + ROTY_ID = (1u << 1), + ROTZ_ID = (1u << 2), + UCELL_A_ID = (1u << 3), + UCELL_B_ID = (1u << 4), + UCELL_C_ID = (1u << 5), + UCELL_ALPHA_ID = (1u << 6), + UCELL_BETA_ID = (1u << 7), + UCELL_GAMMA_ID = (1u << 8), + NCELLS_ID = (1u << 9), + PANELZ_ID = (1u << 10), + FCELL_ID = (1u << 11), + LAMBDA_OFFSET_ID = (1u << 12), + LAMBDA_SCALE_ID = (1u << 13), + PANEL_ROTO_ID = (1u << 14), + PANELX_ID = (1u << 15), + PANELY_ID = (1u << 16), + PANEL_ROTF_ID = (1u << 17), + PANEL_ROTS_ID = (1u << 18), + ETA_ID = (1u << 19), + NCELLS_OFFDIAG_ID = (1u << 21), + F_PRIME_F_DPRIME_ID = (1u << 22), + DIFFUSE_ID = (1u << 23), +}; +*/ + +enum refine_flag : uint32_t { + REFINE_DIFFUSE = (1u << 0), + REFINE_FP_FDP = (1u << 1), + REFINE_UMAT1 = (1u << 2), + REFINE_UMAT2 = (1u << 3), + REFINE_UMAT3 = (1u << 4), + REFINE_BMAT1 = (1u << 5), + REFINE_BMAT2 = (1u << 6), + REFINE_BMAT3 = (1u << 7), + REFINE_BMAT4 = (1u << 8), + REFINE_BMAT5 = (1u << 9), + REFINE_BMAT6 = (1u << 10), + REFINE_NCELLS1 = (1u << 11), + REFINE_NCELLS2 = (1u << 12), + REFINE_NCELLS3 = (1u << 13), + REFINE_NCELLS_DEF = (1u << 14), + REFINE_PANEL_ORIGIN1 = (1u << 15), + REFINE_PANEL_ORIGIN2 = (1u << 16), + REFINE_PANEL_ORIGIN3 = (1u << 17), + REFINE_PANEL_ROT1 = (1u << 18), + REFINE_PANEL_ROT2 = (1u << 19), + REFINE_PANEL_ROT3 = (1u << 20), + REFINE_FCELL = (1u << 21), + REFINE_ETA = (1u << 22), + REFINE_LAMBDA1 = (1u << 23), + REFINE_LAMBDA2 = (1u << 24), + REFINE_ICELL = (1u << 25) +}; + +constexpr unsigned int REFINE_BMAT = REFINE_BMAT1 | REFINE_BMAT2 | REFINE_BMAT3 | REFINE_BMAT4 | REFINE_BMAT5 | REFINE_BMAT6; +constexpr unsigned int REFINE_UMAT = REFINE_UMAT1 | REFINE_UMAT2 | REFINE_UMAT3; +constexpr unsigned int REFINE_NCELLS = REFINE_NCELLS1 | REFINE_NCELLS2 | REFINE_NCELLS3; +constexpr unsigned int REFINE_PANEL_ORIGIN = REFINE_PANEL_ORIGIN1 | REFINE_PANEL_ORIGIN2 | REFINE_PANEL_ORIGIN3; +constexpr unsigned int REFINE_PANEL_ROT = REFINE_PANEL_ROT1 | REFINE_PANEL_ROT2 | REFINE_PANEL_ROT3; +constexpr unsigned int REFINE_LAMBDA = REFINE_LAMBDA1 | REFINE_LAMBDA2; + +#endif diff --git a/simtbx/diffBragg/src/diffuse_util.h b/simtbx/diffBragg/src/diffuse_util.h index 881a4902a9..d556f885d9 100644 --- a/simtbx/diffBragg/src/diffuse_util.h +++ b/simtbx/diffBragg/src/diffuse_util.h @@ -3,21 +3,22 @@ #include -#define CUDA_COMPILE (defined(DIFFBRAGG_HAVE_CUDA) && defined(__CUDACC__)) +#if defined(DIFFBRAGG_HAVE_CUDA) && defined(__CUDACC__) +#define CUDA_COMPILE +#endif #define REAL double -#if CUDA_COMPILE +#ifdef CUDA_COMPILE __device__ __host__ #endif -#if CUDA_COMPILE || not defined(DIFFBRAGG_HAVE_CUDA) + +#if defined(CUDA_COMPILE) || not defined(DIFFBRAGG_HAVE_CUDA) int gen_laue_mats(int laue_group_num, MAT3 *lmats, MAT3 rpa) { - if (laue_group_num < 1 || laue_group_num >14 ){ - printf("Laue group number not in range 1-14; exiting\n"); - exit(1); - } + assert(laue_group_num>0); + assert(laue_group_num<15); - int num_mats; + int num_mats = 0; const double one_over_root2 = 1./sqrt(2.); @@ -561,10 +562,10 @@ int gen_laue_mats(int laue_group_num, MAT3 *lmats, MAT3 rpa) { int gen_laue_mats(int laue_group_num, MAT3 *lmats, MAT3 rpa); #endif -#if CUDA_COMPILE +#ifdef CUDA_COMPILE __device__ __host__ #endif -#if CUDA_COMPILE || not defined(DIFFBRAGG_HAVE_CUDA) +#if defined(CUDA_COMPILE) || not defined(DIFFBRAGG_HAVE_CUDA) void calc_diffuse_at_hkl(VEC3 H_vec, VEC3 H0, VEC3 dHH, VEC3 Hmin, VEC3 Hmax, VEC3 Hrange, MAT3 Ainv, const REAL *FhklLinear, int num_laue_mats, MAT3 *laue_mats, MAT3 anisoG_local, MAT3 anisoU_local, MAT3 *dG_dgam, bool refine_diffuse, REAL *I0, REAL *step_diffuse_param){ REAL four_mpi_sq = 4.*M_PI*M_PI; // loop over laue matrices diff --git a/simtbx/diffBragg/src/diffuse_util_kokkos.h b/simtbx/diffBragg/src/diffuse_util_kokkos.h index 33edd23b94..397db20f73 100644 --- a/simtbx/diffBragg/src/diffuse_util_kokkos.h +++ b/simtbx/diffBragg/src/diffuse_util_kokkos.h @@ -3,14 +3,12 @@ #include -KOKKOS_FUNCTION -int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { +KOKKOS_INLINE_FUNCTION +int gen_laue_mats(int laue_group_num, vector_mat3_t lmats, KOKKOS_MAT3 rpa) { - if (laue_group_num < 1 || laue_group_num >14 ){ - printf("Laue group number not in range 1-14; exiting\n"); - exit(1); - } - int num_mats; + assert(laue_group_num>0); + assert(laue_group_num<15); + int num_mats = 0; const double one_over_root2 = 1./sqrt(2.); @@ -18,7 +16,7 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P -1 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; @@ -28,12 +26,12 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 1 1 2/m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -x,-y,z - lmats[1] << -1, 0, 0, + lmats(1) << -1, 0, 0, 0,-1, 0, 0, 0, 1; @@ -43,12 +41,12 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 1 2/m 1 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -x,y,-z - lmats[1] << -1, 0, 0, + lmats(1) << -1, 0, 0, 0, 1, 0, 0, 0,-1; @@ -58,12 +56,12 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 2/m 1 1 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // x,-y,-z - lmats[1] << 1, 0, 0, + lmats(1) << 1, 0, 0, 0,-1, 0, 0, 0,-1; @@ -73,22 +71,22 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P m m m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // x,-y,-z - lmats[1] << 1, 0, 0, + lmats(1) << 1, 0, 0, 0,-1, 0, 0, 0,-1; // -x,y,-z - lmats[2] << -1, 0, 0, + lmats(2) << -1, 0, 0, 0, 1, 0, 0, 0,-1; // -x,-y,z - lmats[3] << -1, 0, 0, + lmats(3) << -1, 0, 0, 0,-1, 0, 0, 0, 1; @@ -98,22 +96,22 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 4/m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -y,x,z - lmats[1] << 0,-1, 0, + lmats(1) << 0,-1, 0, 1, 0, 0, 0, 0, 1; // y,-x,z - lmats[2] << 0, 1, 0, + lmats(2) << 0, 1, 0, -1, 0, 0, 0, 0, 1; // -x,-y,z - lmats[3] << -1, 0, 0, + lmats(3) << -1, 0, 0, 0,-1, 0, 0, 0, 1; @@ -123,42 +121,42 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 4/m m m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -y,x,z - lmats[1] << 0,-1, 0, + lmats(1) << 0,-1, 0, 1, 0, 0, 0, 0, 1; // y,-x,z - lmats[2] << 0, 1, 0, + lmats(2) << 0, 1, 0, -1, 0, 0, 0, 0, 1; // x,-y,-z - lmats[3] << 1, 0, 0, + lmats(3) << 1, 0, 0, 0,-1, 0, 0, 0,-1; // -x,y,-z - lmats[4] << -1, 0, 0, + lmats(4) << -1, 0, 0, 0, 1, 0, 0, 0,-1; // -x,-y,z - lmats[5] << -1, 0, 0, + lmats(5) << -1, 0, 0, 0,-1, 0, 0, 0, 1; // y,x,-z - lmats[6] << 0, 1, 0, + lmats(6) << 0, 1, 0, 1, 0, 0, 0, 0,-1; // -y,-x,-z - lmats[7] << 0,-1, 0, + lmats(7) << 0,-1, 0, -1, 0, 0, 0, 0,-1; @@ -168,17 +166,17 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P -3 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -y,x-y,z - lmats[1] << 0,-1, 0, + lmats(1) << 0,-1, 0, one_over_root2,-one_over_root2, 0, 0, 0, 1; // -x+y,-x,z - lmats[2] << -one_over_root2, one_over_root2, 0, + lmats(2) << -one_over_root2, one_over_root2, 0, -1, 0, 0, 0, 0, 1; @@ -188,32 +186,32 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P -3 m 1 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -y,x-y,z - lmats[1] << 0,-1, 0, + lmats(1) << 0,-1, 0, one_over_root2,-one_over_root2, 0, 0, 0, 1; // -x+y,-x,z - lmats[2] << -one_over_root2, one_over_root2, 0, + lmats(2) << -one_over_root2, one_over_root2, 0, -1, 0, 0, 0, 0, 1; // x-y,-y,-z - lmats[3] << one_over_root2,-one_over_root2, 0, + lmats(3) << one_over_root2,-one_over_root2, 0, 0,-1, 0, 0, 0,-1; // -x,-x+y,-z - lmats[4] << -1, 0, 0, + lmats(4) << -1, 0, 0, -one_over_root2, one_over_root2, 0, 0, 0,-1; // y,x,-z - lmats[5] << 0, 1, 0, + lmats(5) << 0, 1, 0, 1, 0, 0, 0, 0,-1; @@ -223,32 +221,32 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P -3 1 m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // -y,x-y,z - lmats[1] << 0,-1, 0, + lmats(1) << 0,-1, 0, one_over_root2,-one_over_root2, 0, 0, 0, 1; // -x+y,-x,z - lmats[2] << -one_over_root2, one_over_root2, 0, + lmats(2) << -one_over_root2, one_over_root2, 0, -1, 0, 0, 0, 0, 1; // -y,-x,-z - lmats[3] << 0,-1, 0, + lmats(3) << 0,-1, 0, -1, 0, 0, 0, 0,-1; // -x+y,y,-z - lmats[4] << -one_over_root2, one_over_root2, 0, + lmats(4) << -one_over_root2, one_over_root2, 0, 0, 1, 0, 0, 0,-1; // x,x-y,-z - lmats[5] << 1, 0, 0, + lmats(5) << 1, 0, 0, one_over_root2,-one_over_root2, 0, 0, 0,-1; @@ -258,32 +256,32 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 6/m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // x-y,x,z - lmats[1] << one_over_root2,-one_over_root2, 0, + lmats(1) << one_over_root2,-one_over_root2, 0, 1, 0, 0, 0, 0, 1; // y,-x+y,z - lmats[2] << 0, 1, 0, + lmats(2) << 0, 1, 0, -one_over_root2, one_over_root2, 0, 0, 0, 1; // -y,x-y,z - lmats[3] << 0,-1, 0, + lmats(3) << 0,-1, 0, one_over_root2,-one_over_root2, 0, 0, 0, 1; // -x+y,-x,z - lmats[4] << -one_over_root2, one_over_root2, 0, + lmats(4) << -one_over_root2, one_over_root2, 0, -1, 0, 0, 0, 0, 1; // -x,-y,z - lmats[5] << -1, 0, 0, + lmats(5) << -1, 0, 0, 0,-1, 0, 0, 0, 1; @@ -293,62 +291,62 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P 6/m m m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // x-y,x,z - lmats[1] << one_over_root2,-one_over_root2, 0, + lmats(1) << one_over_root2,-one_over_root2, 0, 1, 0, 0, 0, 0, 1; // y,-x+y,z - lmats[2] << 0, 1, 0, + lmats(2) << 0, 1, 0, -one_over_root2, one_over_root2, 0, 0, 0, 1; // -y,x-y,z - lmats[3] << 0,-1, 0, + lmats(3) << 0,-1, 0, one_over_root2,-one_over_root2, 0, 0, 0, 1; // -x+y,-x,z - lmats[4] << -one_over_root2, one_over_root2, 0, + lmats(4) << -one_over_root2, one_over_root2, 0, -1, 0, 0, 0, 0, 1; // x-y,-y,-z - lmats[5] << one_over_root2,-one_over_root2, 0, + lmats(5) << one_over_root2,-one_over_root2, 0, 0,-1, 0, 0, 0,-1; // -x,-x+y,-z - lmats[6] << -1, 0, 0, + lmats(6) << -1, 0, 0, -one_over_root2, one_over_root2, 0, 0, 0,-1; // -x,-y,z - lmats[7] << -1, 0, 0, + lmats(7) << -1, 0, 0, 0,-1, 0, 0, 0, 1; // y,x,-z - lmats[8] << 0, 1, 0, + lmats(8) << 0, 1, 0, 1, 0, 0, 0, 0,-1; // -y,-x,-z - lmats[9] << 0,-1, 0, + lmats(9) << 0,-1, 0, -1, 0, 0, 0, 0,-1; // -x+y,y,-z - lmats[10] << -one_over_root2, one_over_root2, 0, + lmats(10) << -one_over_root2, one_over_root2, 0, 0, 1, 0, 0, 0,-1; // x,x-y,-z - lmats[11] << 1, 0, 0, + lmats(11) << 1, 0, 0, one_over_root2,-one_over_root2, 0, 0, 0,-1; @@ -358,62 +356,62 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P m -3 // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // z,x,y - lmats[1] << 0, 0, 1, + lmats(1) << 0, 0, 1, 1, 0, 0, 0, 1, 0; // y,z,x - lmats[2] << 0, 1, 0, + lmats(2) << 0, 1, 0, 0, 0, 1, 1, 0, 0; // -y,-z,x - lmats[3] << 0,-1, 0, + lmats(3) << 0,-1, 0, 0, 0,-1, 1, 0, 0; // z,-x,-y - lmats[4] << 0, 0, 1, + lmats(4) << 0, 0, 1, -1, 0, 0, 0,-1, 0; // -y,z,-x - lmats[5] << 0,-1, 0, + lmats(5) << 0,-1, 0, 0, 0, 1, -1, 0, 0; // -z,-x,y - lmats[6] << 0, 0,-1, + lmats(6) << 0, 0,-1, -1, 0, 0, 0, 1, 0; // -z,x,-y - lmats[7] << 0, 0,-1, + lmats(7) << 0, 0,-1, 1, 0, 0, 0,-1, 0; // y,-z,-x - lmats[8] << 0, 1, 0, + lmats(8) << 0, 1, 0, 0, 0,-1, -1, 0, 0; // x,-y,-z - lmats[9] << 1, 0, 0, + lmats(9) << 1, 0, 0, 0,-1, 0, 0, 0,-1; // -x,y,-z - lmats[10] << -1, 0, 0, + lmats(10) << -1, 0, 0, 0, 1, 0, 0, 0,-1; // -x,-y,z - lmats[11] << -1, 0, 0, + lmats(11) << -1, 0, 0, 0,-1, 0, 0, 0, 1; @@ -423,122 +421,122 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { // P m -3 m // x,y,z - lmats[0] << 1, 0, 0, + lmats(0) << 1, 0, 0, 0, 1, 0, 0, 0, 1; // x,-z,y - lmats[1] << 1, 0, 0, + lmats(1) << 1, 0, 0, 0, 0,-1, 0, 1, 0; // x,z,-y - lmats[2] << 1, 0, 0, + lmats(2) << 1, 0, 0, 0, 0, 1, 0,-1, 0; // z,y,-x - lmats[3] << 0, 0, 1, + lmats(3) << 0, 0, 1, 0, 1, 0, -1, 0, 0; // -z,y,x - lmats[4] << 0, 0,-1, + lmats(4) << 0, 0,-1, 0, 1, 0, 1, 0, 0; // -y,x,z - lmats[5] << 0,-1, 0, + lmats(5) << 0,-1, 0, 1, 0, 0, 0, 0, 1; // y,-x,z - lmats[6] << 0, 1, 0, + lmats(6) << 0, 1, 0, -1, 0, 0, 0, 0, 1; // z,x,y - lmats[7] << 0, 0, 1, + lmats(7) << 0, 0, 1, 1, 0, 0, 0, 1, 0; // y,z,x - lmats[8] << 0, 1, 0, + lmats(8) << 0, 1, 0, 0, 0, 1, 1, 0, 0; // -y,-z,x - lmats[9] << 0,-1, 0, + lmats(9) << 0,-1, 0, 0, 0,-1, 1, 0, 0; // z,-x,-y - lmats[10] << 0, 0, 1, + lmats(10) << 0, 0, 1, -1, 0, 0, 0,-1, 0; // -y,z,-x - lmats[11] << 0,-1, 0, + lmats(11) << 0,-1, 0, 0, 0, 1, -1, 0, 0; // -z,-x,y - lmats[12] << 0, 0,-1, + lmats(12) << 0, 0,-1, -1, 0, 0, 0, 1, 0; // -z,x,-y - lmats[13] << 0, 0,-1, + lmats(13) << 0, 0,-1, 1, 0, 0, 0,-1, 0; // y,-z,-x - lmats[14] << 0, 1, 0, + lmats(14) << 0, 1, 0, 0, 0,-1, -1, 0, 0; // x,-y,-z - lmats[15] << 1, 0, 0, + lmats(15) << 1, 0, 0, 0,-1, 0, 0, 0,-1; // -x,y,-z - lmats[16] << -1, 0, 0, + lmats(16) << -1, 0, 0, 0, 1, 0, 0, 0,-1; // -x,-y,z - lmats[17] << -1, 0, 0, + lmats(17) << -1, 0, 0, 0,-1, 0, 0, 0, 1; // y,x,-z - lmats[18] << 0, 1, 0, + lmats(18) << 0, 1, 0, 1, 0, 0, 0, 0,-1; // -y,-x,-z - lmats[19] << 0,-1, 0, + lmats(19) << 0,-1, 0, -1, 0, 0, 0, 0,-1; // z,-y,x - lmats[20] << 0, 0, 1, + lmats(20) << 0, 0, 1, 0,-1, 0, 1, 0, 0; // -z,-y,-x - lmats[21] << 0, 0,-1, + lmats(21) << 0, 0,-1, 0,-1, 0, -1, 0, 0; // -x,z,y - lmats[22] << -1, 0, 0, + lmats(22) << -1, 0, 0, 0, 0, 1, 0, 1, 0; // -x,-z,-y - lmats[23] << -1, 0, 0, + lmats(23) << -1, 0, 0, 0, 0,-1, 0,-1, 0; @@ -546,87 +544,78 @@ int gen_laue_mats(int laue_group_num, KOKKOS_MAT3 *lmats, KOKKOS_MAT3 rpa) { } for (int i_mat=0; i_mat < num_mats; i_mat ++){ - lmats[i_mat] = lmats[i_mat] * rpa; + lmats(i_mat) = lmats(i_mat) * rpa; } return num_mats; }; KOKKOS_FUNCTION -void calc_diffuse_at_hkl(KOKKOS_VEC3 H_vec, KOKKOS_VEC3 H0, KOKKOS_VEC3 dHH, KOKKOS_VEC3 Hmin, KOKKOS_VEC3 Hmax, KOKKOS_VEC3 Hrange, KOKKOS_MAT3 Ainv, const vector_cudareal_t FhklLinear, int num_laue_mats, const KOKKOS_MAT3 *laue_mats, KOKKOS_MAT3 anisoG_local, KOKKOS_MAT3 anisoU_local, const KOKKOS_MAT3 *dG_dgam, bool refine_diffuse, CUDAREAL *I0, CUDAREAL *step_diffuse_param){ - CUDAREAL four_mpi_sq = 4.*M_PI*M_PI; - // loop over laue matrices - int num_stencil_points = (2*dHH[0] + 1) * (2*dHH[1] + 1) * (2*dHH[2] + 1); - bool h_bounded= (H0[0]+dHH[0]<=Hmax[0]) && (H0[0]-dHH[0]>=Hmin[0]) ; - bool k_bounded= (H0[1]+dHH[1]<=Hmax[1]) && (H0[1]-dHH[1]>=Hmin[1]) ; - bool l_bounded= (H0[2]+dHH[2]<=Hmax[2]) && (H0[2]-dHH[2]>=Hmin[2]) ; - if (h_bounded && k_bounded && l_bounded) { - int Fhkl_linear_index_0 = (H0[0]-Hmin[0]) * Hrange[1] * Hrange[2] - + (H0[1]-Hmin[1]) * Hrange[2] + (H0[2]-Hmin[2]); - CUDAREAL _F_cell_0 = FhklLinear(Fhkl_linear_index_0); - KOKKOS_MAT3 Ginv = anisoG_local.inverse(); - CUDAREAL anisoG_determ = anisoG_local.determinant(); - for (int hh=-dHH[0]; hh <= dHH[0]; hh++){ - for (int kk=-dHH[1]; kk <= dHH[1]; kk++){ - for (int ll=-dHH[2]; ll <= dHH[2]; ll++){ - CUDAREAL ID_this = 0; - CUDAREAL step_diffuse_param_this[6] = {0,0,0,0,0,0}; - int Fhkl_linear_index_this = (H0[0]+hh-Hmin[0]) * Hrange[1] * Hrange[2] - + (H0[1]+kk-Hmin[1]) * Hrange[2] + (H0[2]+ll-Hmin[2]); - CUDAREAL _F_cell_this = FhklLinear(Fhkl_linear_index_this); - CUDAREAL _this_diffuse_scale; - if (_F_cell_0 != 0.0) - _this_diffuse_scale = _F_cell_this/_F_cell_0; - else - _this_diffuse_scale = 1.0; - - _this_diffuse_scale *= _this_diffuse_scale/(CUDAREAL)num_laue_mats/ - (CUDAREAL)num_stencil_points; - // Use (a-b, a+b, c) as the principal axes of the diffuse model - // TODO: Add an option to select (a, b, c) as the principal axes - - for ( int iL = 0; iL < num_laue_mats; iL++ ){ - KOKKOS_VEC3 Q0 =Ainv*laue_mats[iL]*H0; - CUDAREAL exparg = four_mpi_sq*Q0.dot(anisoU_local*Q0); - CUDAREAL dwf = exp(-exparg); - KOKKOS_VEC3 H0_offset(H0[0]+hh, H0[1]+kk, H0[2]+ll); - KOKKOS_VEC3 delta_H_offset = H_vec - H0_offset; - KOKKOS_VEC3 delta_Q = Ainv*laue_mats[iL]*delta_H_offset; - KOKKOS_VEC3 anisoG_q = anisoG_local*delta_Q; - - CUDAREAL V_dot_V = anisoG_q.dot(anisoG_q); - CUDAREAL gamma_portion_denom = (1.+ V_dot_V* four_mpi_sq); - gamma_portion_denom *= gamma_portion_denom; - CUDAREAL gamma_portion = 8.*M_PI*anisoG_determ / - gamma_portion_denom; - CUDAREAL this_I_latt_diffuse = dwf*exparg*gamma_portion; - - ID_this += this_I_latt_diffuse; - if (refine_diffuse){ // add the contributions to diffuse scattering gradients here - for (int i_gam=0; i_gam<3; i_gam++){ - KOKKOS_VEC3 dV = dG_dgam[i_gam]*delta_Q; - CUDAREAL V_dot_dV = anisoG_q.dot(dV); - CUDAREAL deriv = (Ginv*dG_dgam[i_gam]).trace() - 4.*four_mpi_sq*V_dot_dV/(1+four_mpi_sq*V_dot_V); - step_diffuse_param_this[i_gam] += gamma_portion*deriv*dwf*exparg; - } - KOKKOS_MAT3 dU_dsigma; - // dU_dsigma << 0,0,0,0,0,0,0,0,0; - for (int i_sig = 0;i_sig<3; i_sig++){ - dU_dsigma(i_sig, i_sig) = 2.*sqrt(anisoU_local(i_sig,i_sig)); - CUDAREAL dexparg = four_mpi_sq*Q0.dot(dU_dsigma*Q0); - dU_dsigma(i_sig, i_sig) = 0.; - step_diffuse_param_this[i_sig+3] += gamma_portion*dwf*dexparg*(1. - exparg); - } - } - } // end loop over iL (laue group mats) - // Update the lattice interference term here to include diffuse scattering (F_latt squared) - *I0 += ID_this * _this_diffuse_scale; - - for (int idp=0; idp < 6; idp++) - step_diffuse_param[idp] += step_diffuse_param_this[idp]*_this_diffuse_scale; - } // end ll loop - } // end kk loop - } // end hh loop - } // end if bounded +void calc_diffuse_at_hkl(KOKKOS_VEC3 H_vec, KOKKOS_VEC3 H0, KOKKOS_VEC3 dHH, int h_min, int k_min, int l_min, int h_max, int k_max, int l_max, int h_range, int k_range, int l_range, KOKKOS_MAT3 Ainv, const vector_cudareal_t FhklLinear, int num_laue_mats, const vector_mat3_t laue_mats, KOKKOS_MAT3 anisoG_local, vector_cudareal_t dG_trace, CUDAREAL anisoG_determ, KOKKOS_MAT3 anisoU_local, const vector_vec3_t dG_dgam, bool refine_diffuse, CUDAREAL *I0, CUDAREAL *step_diffuse_param){ + constexpr CUDAREAL four_mpi_sq = 4.*M_PI*M_PI; + // loop over laue matrices + int num_stencil_points = (2*dHH[0] + 1) * (2*dHH[1] + 1) * (2*dHH[2] + 1); + bool h_bounded= (H0[0]+dHH[0]<=h_max) && (H0[0]-dHH[0]>=h_min) ; + bool k_bounded= (H0[1]+dHH[1]<=k_max) && (H0[1]-dHH[1]>=k_min) ; + bool l_bounded= (H0[2]+dHH[2]<=l_max) && (H0[2]-dHH[2]>=l_min) ; + if (h_bounded && k_bounded && l_bounded) { + int Fhkl_linear_index_0 = (H0[0]-h_min) * k_range * l_range + (H0[1]-k_min) * l_range + (H0[2]-l_min); + const CUDAREAL _F_cell_0 = FhklLinear(Fhkl_linear_index_0); + for (int hh=-dHH[0]; hh <= dHH[0]; hh++){ + for (int kk=-dHH[1]; kk <= dHH[1]; kk++){ + for (int ll=-dHH[2]; ll <= dHH[2]; ll++){ + CUDAREAL ID_this = 0; + CUDAREAL step_diffuse_param_this[6] = {0,0,0,0,0,0}; + const int Fhkl_linear_index_this = hh * k_range * l_range + kk * l_range + ll + Fhkl_linear_index_0; + const CUDAREAL _F_cell_this = FhklLinear(Fhkl_linear_index_this); + CUDAREAL _this_diffuse_scale; + if (_F_cell_0 != 0.0) + _this_diffuse_scale = _F_cell_this/_F_cell_0; + else + _this_diffuse_scale = 1.0; + + _this_diffuse_scale *= _this_diffuse_scale/(CUDAREAL)num_laue_mats/(CUDAREAL)num_stencil_points; + + const KOKKOS_VEC3 H0_offset(H0[0]+hh, H0[1]+kk, H0[2]+ll); + const KOKKOS_VEC3 delta_H_offset = H_vec - H0_offset; + + for ( int iL = 0; iL < num_laue_mats; iL++ ){ + const KOKKOS_VEC3 Q0 = laue_mats(iL)*H0; + const CUDAREAL exparg = four_mpi_sq*Q0.dot(anisoU_local*Q0); + const CUDAREAL dwf = exp(-exparg); + const KOKKOS_VEC3 delta_Q = laue_mats(iL)*delta_H_offset; + const KOKKOS_VEC3 anisoG_q = anisoG_local*delta_Q; + + const CUDAREAL V_dot_V = anisoG_q.length_sqr(); + const CUDAREAL gamma_portion_denom = 1 / (1.+ V_dot_V* four_mpi_sq); + const CUDAREAL gamma_portion = 8.*M_PI*anisoG_determ * gamma_portion_denom * gamma_portion_denom; + const CUDAREAL this_I_latt_diffuse = dwf*exparg*gamma_portion; + + ID_this += this_I_latt_diffuse; + if (refine_diffuse){ // add the contributions to diffuse scattering gradients here + for (int i_gam=0; i_gam<3; i_gam++){ + const CUDAREAL dV = dG_dgam(i_gam).dot(delta_Q); + const CUDAREAL V_dot_dV = anisoG_q[i_gam] * dV; + const CUDAREAL deriv = dG_trace(i_gam) - 4.*four_mpi_sq*V_dot_dV*gamma_portion_denom; + step_diffuse_param_this[i_gam] += gamma_portion*deriv*dwf*exparg; + } + for (int i_sig = 0;i_sig<3; i_sig++){ + const CUDAREAL dexparg = 2 * four_mpi_sq * sqrt(anisoU_local(i_sig,i_sig)) * Q0[i_sig] * Q0[i_sig]; + step_diffuse_param_this[i_sig+3] += gamma_portion*dwf*dexparg*(1. - exparg); + } + } + } // end loop over iL (laue group mats) + // Update the lattice interference term here to include diffuse scattering (F_latt squared) + *I0 += ID_this * _this_diffuse_scale; + + if (refine_diffuse) { + for (int idp=0; idp < 6; idp++) { + step_diffuse_param[idp] += step_diffuse_param_this[idp]*_this_diffuse_scale; + } + } + } // end ll loop + } // end kk loop + } // end hh loop + } // end if bounded } #endif diff --git a/simtbx/diffBragg/src/util.h b/simtbx/diffBragg/src/util.h index 3cf9360e55..ec197c1dc9 100644 --- a/simtbx/diffBragg/src/util.h +++ b/simtbx/diffBragg/src/util.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifndef CUDAREAL #define CUDAREAL double @@ -19,14 +20,35 @@ typedef Eigen::Matrix MAT3; typedef std::vector > eigMat3_vec; typedef std::vector > eigVec3_vec; +inline void easy_time(double& timer, struct timeval& t, bool recording){ + double before_sec = t.tv_sec; + double before_usec = t.tv_usec; + gettimeofday(&t, 0); + double time = (1000000.0 * (t.tv_sec - before_sec) + t.tv_usec - before_usec) / 1000.0; + if (recording) + timer += time; +} + struct timer_variables{ - CUDAREAL add_spots_pre=0; // times the initializations for add spots kernel - CUDAREAL add_spots_post=0; // times the copies that occur after add spots kernel - CUDAREAL add_spots_kernel_wrapper=0; // times the add spots kernel overall, either CPU or GPU - CUDAREAL cuda_alloc=0; // times the allocation of the device - CUDAREAL cuda_copy_to_dev=0; // times the copying from host to device - CUDAREAL cuda_copy_from_dev=0; // times the copying back from device to host - CUDAREAL cuda_kernel=0; // times the GPU kernel + double add_spots_pre=0; // times the initializations for add spots kernel + double add_spots_post=0; // times the copies that occur after add spots kernel + double add_spots_kernel_wrapper=0; // times the add spots kernel overall, either CPU or GPU + double cuda_alloc=0; // times the allocation of the device + double cuda_copy_to_dev=0; // times the copying from host to device + double cuda_copy_from_dev=0; // times the copying back from device to host + double cuda_kernel=0; // times the GPU kernel + double copy_sources=0; + double copy_Fhkl_scale=0; + double copy_umats=0; + double copy_amats=0; + double copy_bmats=0; + double copy_rotmats=0; + double copy_det=0; + double copy_nomhkl=0; + double copy_flags=0; + double copy_fhkl=0; + double copy_detderiv=0; + double copy_pfs=0; int timings=0; // how many times these variables were incremented bool recording=true; }; @@ -72,16 +94,18 @@ struct cuda_flags{ int Npix_to_allocate; // how much space to allocate for simulating forward model and gradients // these following flags indicate whether to update quantities on the GPU device prior to running the kernel // ( of course they are all set prior to running the kernel for the first time) - bool update_step_positions; // step arrays - bool update_panels_fasts_slows; // pixels to simulatoe (panel id, fast scan, slow scan) - bool update_sources; // beam sources - bool update_umats; // umatrices for mosaic blocks - bool update_dB_mats; // derivative of the orthogonalization matrix (for unit cell derivatives) - bool update_rotmats; // rotation matrices (for Umat derivatives) - bool update_Fhkl; // structure factors - bool update_detector; // detector vectors (origin, slow-axis, fast-axis, orth-axis) - bool update_refine_flags; // refinement flags (in case one is iteratively freezing parameters) - bool update_panel_deriv_vecs; // if one is refining the detector vectors) + bool update_step_positions = false; // step arrays + bool update_panels_fasts_slows = false; // pixels to simulatoe (panel id, fast scan, slow scan) + bool update_sources = false; // beam sources + bool update_umats = false; // umatrices for mosaic blocks + bool update_dB_mats = false; // derivative of the orthogonalization matrix (for unit cell derivatives) + bool update_rotmats = false; // rotation matrices (for Umat derivatives) + bool update_Fhkl = false; // structure factors + bool update_Fhkl_scales = false; // structure factors + bool update_Fhkl_channels = false; // structure factors + bool update_detector = false; // detector vectors (origin, slow-axis, fast-axis, orth-axis) + bool update_refine_flags = false; // refinement flags (in case one is iteratively freezing parameters) + bool update_panel_deriv_vecs = false; // if one is refining the detector vectors) }; struct flags{ @@ -91,30 +115,30 @@ struct flags{ bool using_trusted_mask=false; bool Fhkl_gradient_mode=false; bool wavelength_img=false; - bool track_Fhkl; // for CPU kernel only, track the HKLS evaluated in the inner most loop - bool printout; // whether to printout debug info for a pixel - bool nopolar; // disable polarization effects - bool point_pixel; // approximate solid angle effects - bool only_save_omega_kahn; // only save the polarization and solid angle corrections (deprecated) - bool compute_curvatures; // whether to compute the curvatures in addition to gradients - bool isotropic_ncells; // one mosaic domain parameter - bool complex_miller; // is the miller array complex (such thet Fhkl_linear and Fhkl2_linear are both defined) - bool no_Nabc_scale; // no Nabc prefactor - bool refine_diffuse; // flag for computing diffuse gradients + bool track_Fhkl = false; // for CPU kernel only, track the HKLS evaluated in the inner most loop + bool printout = false; // whether to printout debug info for a pixel + bool nopolar = false; // disable polarization effects + bool point_pixel = false; // approximate solid angle effects + bool only_save_omega_kahn = false; // only save the polarization and solid angle corrections (deprecated) + bool compute_curvatures = false; // whether to compute the curvatures in addition to gradients + bool isotropic_ncells = false; // one mosaic domain parameter + bool complex_miller = false; // is the miller array complex (such thet Fhkl_linear and Fhkl2_linear are both defined) + bool no_Nabc_scale = false; // no Nabc prefactor + bool refine_diffuse = false; // flag for computing diffuse gradients std::vector refine_Bmat; // Bmatrix std::vector refine_Ncells; // mosaic domain size - bool refine_Ncells_def; // mosaic domain size off diag + bool refine_Ncells_def = false; // mosaic domain size off diag std::vector refine_panel_origin; // panel shift std::vector refine_panel_rot; // detector panel rotation - bool refine_fcell; // structure factor + bool refine_fcell = false; // structure factor std::vector refine_lambda; // spectrum affine correction - bool refine_eta; // mosaic spread + bool refine_eta = false; // mosaic spread std::vector refine_Umat; // missetting angle umatrix - bool refine_fp_fdp; // fprime and fbl prime - bool use_lambda_coefficients; // affine correction lam0 , lam1 - bool oversample_omega; // omega is computed separately for each sub-pixel - int printout_fpixel, printout_spixel; // debug printout pixel (fast scan, slow scan) // TODO add panel id - int verbose; // nanoBragg verbosity flag + bool refine_fp_fdp = false; // fprime and fbl prime + bool use_lambda_coefficients = false; // affine correction lam0 , lam1 + bool oversample_omega = false; // omega is computed separately for each sub-pixel + int printout_fpixel = 0, printout_spixel = 0; // debug printout pixel (fast scan, slow scan) // TODO add panel id + int verbose = 0; // nanoBragg verbosity flag bool use_diffuse = false; // model diffuse bool only_diffuse = false; // model diffuse scattering (experimental) bool refine_Icell = false; // option to refine the structure factor intensity directly (F_cell^2) @@ -203,7 +227,10 @@ struct beam{ struct detector{ std::vector > dF_vecs; // derivative of the panel fast direction std::vector > dS_vecs; // derivative of the panel slow direction - CUDAREAL detector_thickstep, detector_thicksteps, detector_thick, detector_attnlen; + CUDAREAL detector_thickstep; + int detector_thicksteps; + CUDAREAL detector_thick; + CUDAREAL detector_attnlen; std::vector close_distances; // offsets to the detector origins (Z direction) int oversample; // determines the pixel subsampling rate CUDAREAL subpixel_size, pixel_size; diff --git a/simtbx/diffBragg/src/util_kokkos.h b/simtbx/diffBragg/src/util_kokkos.h index f74a6d35be..7f0a8da0df 100644 --- a/simtbx/diffBragg/src/util_kokkos.h +++ b/simtbx/diffBragg/src/util_kokkos.h @@ -23,29 +23,37 @@ inline KOKKOS_VEC3 to_vec3(const Eigen::Vector3d& v) { inline KOKKOS_MAT3 to_mat3(const Eigen::Matrix3d& m) { // Eigen matrix is column-major! - return KOKKOS_MAT3(m(0, 0), m(0, 1), m(0, 2), m(1, 0), m(1, 1), m(1, 2), m(2, 0), m(2, 1), m(2, 2)); + return KOKKOS_MAT3(m(0, 0), m(0, 1), m(0, 2), + m(1, 0), m(1, 1), m(1, 2), + m(2, 0), m(2, 1), m(2, 2)); } template -inline void transfer(T& target, U& source, int size=-1) { - const int length = size>=0 ? size : source.size(); +inline void transfer(T& dst, U& src, int size=-1) { + const int length = size>=0 ? size : src.size(); for (int i=0; i -inline void transfer_KOKKOS_VEC3(T& target, U& source) { - for (int i=0; i -inline void transfer_KOKKOS_MAT3(T& target, U& source) { - for (int i=0; i dF_vecs; // derivative of the panel fast direction std::vector dS_vecs; // derivative of the panel slow direction - CUDAREAL detector_thickstep, detector_thicksteps, detector_thick, detector_attnlen; + CUDAREAL detector_thickstep; + int detector_thicksteps; + CUDAREAL detector_thick; + CUDAREAL detector_attnlen; std::vector close_distances; // offsets to the detector origins (Z direction) int oversample; // determines the pixel subsampling rate CUDAREAL subpixel_size, pixel_size; @@ -268,4 +279,34 @@ struct kokkos_detector { }; +struct kokkos_manager { + KOKKOS_VEC3 rot; + double ucell[6] = {0, 0, 0, 0, 0, 0}; + double Ncells[6] = {0, 0, 0, 0, 0, 0}; + KOKKOS_VEC3 pan_orig; + KOKKOS_VEC3 pan_rot; + double fcell = 0; + KOKKOS_VEC3 eta; + double lambda[2] = {0, 0}; + double fp_fdp[2] = {0, 0}; + double diffuse[6] = {0, 0, 0, 0, 0, 0}; + + KOKKOS_INLINE_FUNCTION void reset() { + for (int i=0; i<6; ++i) { + ucell[i] = 0; + Ncells[i] = 0; + diffuse[i] = 0; + } + for (int i=0; i<2; ++i) { + lambda[i] = 0; + fp_fdp[i] = 0; + } + rot.zero(); + pan_orig.zero(); + pan_rot.zero(); + eta.zero(); + fcell = 0; + } +}; + #endif diff --git a/simtbx/diffBragg/stage_two_utils.py b/simtbx/diffBragg/stage_two_utils.py index bc7df9f761..a73c8ac30c 100644 --- a/simtbx/diffBragg/stage_two_utils.py +++ b/simtbx/diffBragg/stage_two_utils.py @@ -113,11 +113,8 @@ def PAR_from_params(params, experiment, best=None): PAR.Ndef = [None]*3 PAR.eta = [None]*3 PAR.RotXYZ_params = [None]*3 - - if not params.use_restraints or params.fix.ucell: - # dummie values: - params.centers.ucell = [1, 1, 1, 1, 1, 1] - params.betas.ucell = [1,1,1,1,1,1] + PAR.diffuse_sigma = [None]*3 + PAR.diffuse_gamma = [None]*3 eta_min = params.mins.eta_abc init_eta = params.init.eta_abc if best is None else best.eta_abc.values[0] @@ -126,28 +123,46 @@ def PAR_from_params(params, experiment, best=None): if not params.simulator.crystal.num_mosaicity_samples == 1: raise ValueError("if all eta_abc are 0,0,0, num_mosaicity_samples should be 1") + # TODO allow setting diffuse gamma/sigma from stage 1 (e.g. from the `best` dataframe) + init_diffuse_sigma = params.init.diffuse_sigma + init_diffuse_gamma = params.init.diffuse_gamma + for i in range(3): initN = params.init.Nabc[i] if best is None else best.ncells.values[0][i] PAR.Nabc[i] = ParameterType(init=initN, minval=params.mins.Nabc[i], maxval=params.maxs.Nabc[i], fix=params.fix.Nabc, sigma=params.sigmas.Nabc[i], - center=params.centers.Nabc[i], beta=params.betas.Nabc[i]) + center=params.centers.Nabc[i] if params.centers.Nabc is not None else None, + beta=params.betas.Nabc[i] if params.betas.Nabc is not None else None) initN = params.init.Ndef[i] if best is None else best.ncells_def.values[0][i] PAR.Ndef[i] = ParameterType(init=initN, minval=params.mins.Ndef[i], maxval=params.maxs.Ndef[i], fix=params.fix.Ndef, sigma=params.sigmas.Ndef[i], - center=params.centers.Ndef[i], beta=params.betas.Ndef[i]) + center=params.centers.Ndef[i] if params.centers.Ndef is not None else None, + beta=params.betas.Ndef[i] if params.betas.Ndef is not None else None) PAR.RotXYZ_params[i] = ParameterType(init=0, minval=params.mins.RotXYZ[i], maxval=params.maxs.RotXYZ[i], fix=params.fix.RotXYZ, sigma=params.sigmas.RotXYZ[i], - center=0, beta=params.betas.RotXYZ) + center=0 if params.betas.RotXYZ is not None else None, beta=params.betas.RotXYZ) PAR.eta[i] = ParameterType(init=init_eta[i], minval=eta_min[i], maxval=params.maxs.eta_abc[i], fix=params.fix.eta_abc, sigma=params.sigmas.eta_abc[i], - center=params.betas.eta_abc[i], beta=params.betas.eta_abc[i]) + center=params.betas.eta_abc[i] if params.centers.eta_abc is not None else None, + beta=params.betas.eta_abc[i] if params.betas.eta_abc is not None else None) # TODO: diffuse scattering terms + PAR.diffuse_sigma[i] = ParameterType(init=init_diffuse_sigma[i], minval=params.mins.diffuse_sigma[i], + maxval=params.maxs.diffuse_sigma[i], fix=params.fix.diffuse_sigma, + sigma=params.sigmas.diffuse_sigma[i], + center=params.centers.diffuse_sigma[i] if params.centers.diffuse_sigma is not None else None, + beta=params.betas.diffuse_sigma[i] if params.betas.diffuse_sigma is not None else None) + + PAR.diffuse_gamma[i] = ParameterType(init=init_diffuse_gamma[i], minval=params.mins.diffuse_gamma[i], + maxval=params.maxs.diffuse_gamma[i], fix=params.fix.diffuse_gamma, + sigma=params.sigmas.diffuse_gamma[i], + center=params.centers.diffuse_gamma[i] if params.centers.diffuse_gamma is not None else None, + beta=params.betas.diffuse_gamma[i] if params.betas.diffuse_gamma is not None else None) # unit cell parameters ucell_man = utils.manager_from_crystal(experiment.crystal) # Note ucell man contains the best parameters (if best is not None) @@ -157,12 +172,34 @@ def PAR_from_params(params, experiment, best=None): if "Ang" in name: minval = val - ucell_vary_perc * val maxval = val + ucell_vary_perc * val + if name == 'a_Ang': + cent = params.centers.ucell_a + beta = params.betas.ucell_a + elif name == 'b_Ang': + cent = params.centers.ucell_b + beta = params.betas.ucell_b + else: + cent = params.centers.ucell_c + beta = params.betas.ucell_c else: val_in_deg = val * 180 / np.pi minval = (val_in_deg - params.ucell_ang_abs) * np.pi / 180. maxval = (val_in_deg + params.ucell_ang_abs) * np.pi / 180. + if name == 'alpha_rad': + cent = params.centers.ucell_alpha + beta = params.betas.ucell_alpha + elif name == 'beta_rad': + cent = params.centers.ucell_beta + beta = params.betas.ucell_beta + else: + cent = params.centers.ucell_gamma + beta = params.betas.ucell_gamma + assert cent is not None + assert beta is not None + cent = cent * np.pi / 180. + p = ParameterType(init=val, minval=minval, maxval=maxval, fix=params.fix.ucell, sigma=params.sigmas.ucell[i_uc], - center=params.centers.ucell[i_uc], beta=params.betas.ucell[i_uc]) + center=cent, beta=beta) PAR.ucell.append(p) PAR.ucell_man = ucell_man @@ -173,7 +210,7 @@ def PAR_from_params(params, experiment, best=None): center=params.centers.detz_shift, beta=params.betas.detz_shift) PAR.B = ParameterType(init=params.init.B, sigma=params.sigmas.B, minval=params.mins.B, maxval=params.maxs.B, fix=True, - center=0, beta=1e8) + center=params.centers.B, beta=params.betas.B) lam0, lam1 = params.init.spec if best is not None: @@ -181,8 +218,8 @@ def PAR_from_params(params, experiment, best=None): PAR.spec_coef = [] for i_p, init_val in enumerate((lam0, lam1)): p = ParameterType(init=init_val, sigma=params.sigmas.spec[i_p], - center=params.centers.spec[i_p], - beta=params.betas.spec[i_p], + center=params.centers.spec[i_p] if params.centers.spec is not None else None, + beta=params.betas.spec[i_p] if params.betas.spec is not None else None, fix=params.fix.spec, minval=params.mins.spec[i_p], maxval=params.maxs.spec[i_p]) PAR.spec_coef.append(p) @@ -203,3 +240,5 @@ def __init__(self): self.detz_shift = None self.paneRot = None self.PanXYZ = None + self.diffuse_sigma = None + self.diffuse_gamma = None diff --git a/simtbx/diffBragg/tests/tst_diffBragg_Fcell_deriv.py b/simtbx/diffBragg/tests/tst_diffBragg_Fcell_deriv.py index a24b0bee4a..45e0b6ae98 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_Fcell_deriv.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_Fcell_deriv.py @@ -152,6 +152,7 @@ assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("Error versus parameter shift fits a line with slope=%2.7g and Correleation Coef=%2.7g" % (l.slope, l.rvalue)) print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_detdist_derivatives.py b/simtbx/diffBragg/tests/tst_diffBragg_detdist_derivatives.py index 55f30ef84f..7c14318a5d 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_detdist_derivatives.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_detdist_derivatives.py @@ -372,6 +372,8 @@ assert l.rvalue > .99 assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope + if args.curvatures: if args.plotlines: @@ -387,5 +389,7 @@ assert l.rvalue > .99 assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope + print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_diffuse_properties.py b/simtbx/diffBragg/tests/tst_diffBragg_diffuse_properties.py index ca64bc1e98..c09f28597f 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_diffuse_properties.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_diffuse_properties.py @@ -122,6 +122,7 @@ assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin det_sh = 1024, 1024 print("OK") diff --git a/simtbx/diffBragg/tests/tst_diffBragg_eta_derivs.py b/simtbx/diffBragg/tests/tst_diffBragg_eta_derivs.py index ab3a3c5dd8..d1c0c79819 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_eta_derivs.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_eta_derivs.py @@ -99,7 +99,7 @@ SIM.add_air = True SIM.add_water = True SIM.include_noise = True - SIM.D.use_cuda = args.kokkos + SIM.D.use_gpu = args.kokkos SIM.D.compute_curvatures = args.curvatures SIM.D.add_diffBragg_spots() @@ -196,10 +196,12 @@ assert l.rvalue > .99, "%f" % l.rvalue assert l.slope > 0, "%f" % l.slope assert l.pvalue < 1e-6, "%f" % l.pvalue + assert l.intercept < 0.1*l.slope, "%f" % l.intercept if args.curvatures: l = linregress(all_shifts2, all_errors2) assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine.py b/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine.py index 1609175af1..8c4e0f12ea 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine.py @@ -194,6 +194,13 @@ P.fix.detz_shift = True P.ftol=1e-15 + if args.perturb == ["detz_shift"]: + P.fix.detz_shift = False + P.fix.ucell=True + P.fix.Nabc=True + P.fix.G=True + P.fix.RotXYZ=True + E.detector = SIM.detector E.beam = SIM.D.beam E.imageset = make_imageset([img], E.beam, E.detector) @@ -219,6 +226,7 @@ P.simulator.structure_factors.mtz_name = mtz_name P.simulator.structure_factors.mtz_column = "F(+),F(-)" P.niter = 0 + P.sigmas.RotXYZ = [1,1,1] P.logging.parameters=True P.niter_per_J = 1 P.method="L-BFGS-B" @@ -268,7 +276,10 @@ # TODO open the pandas output file and optimized expt in outdir and verify the optimized parameters are similar to ground exit() + P.record_device_timings = True Eopt,_, Mod, SIM_used_by_hopper, x = hopper_utils.refine(E, refls, P, spec=spec, return_modeler=True) + if SIM_used_by_hopper.D.record_timings: + SIM_used_by_hopper.D.show_timings(MPI_RANK=0) G, rotX,rotY, rotZ, Na,Nb,Nc,_,_,_,_,_,_,_,_,_,a,b,c,al,be,ga,detz_shift = hopper_utils.get_param_from_x(x, Mod) eta_abc_opt = hopper_utils.get_mosaicity_from_x(x, Mod, SIM_used_by_hopper) diff --git a/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine_Fhkl.py b/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine_Fhkl.py index 5846564feb..5be8e6b274 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine_Fhkl.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_hopper_refine_Fhkl.py @@ -129,6 +129,7 @@ SIM.D.raw_pixels *= 0 P = phil_scope.extract() + P.debug_mode=True E = Experiment() P.init.G = SIM.D.spot_scale @@ -215,7 +216,9 @@ P.outdir="_temp_fhkl_refine" if args.maxiter is not None: P.lbfgs_maxiter = args.maxiter + P.record_device_timings = True Eopt,_, Mod,SIM_from_hopper, x = hopper_utils.refine(E, refls, P, return_modeler=True, free_mem=False) + SIM_from_hopper.D.show_timings(0) logging.disable() print("\nResults\n<><><><><><>") @@ -331,10 +334,18 @@ def compute_r_factor_with_gt(corrections): df[refl_col] = [input_refl] P.refiner.load_data_from_refl = True P.refiner.check_expt_format = False + + #from simtbx.diffBragg import mpi_logger + #P.logging.rank0_level="high" + #mpi_logger.setup_logging_from_params(P) modelers = load_inputs(df, P, exper_key="opt_exp_name", refls_key=refl_col) modelers.outdir=P.outdir modelers.prep_for_refinement() + print("Minimizing using hopper_ensemble_utils...") modelers.Minimize(save=True) + if modelers.SIM.D.record_timings: + modelers.SIM.D.show_timings(MPI_RANK=0) + print("Done!") from iotbx.reflection_file_reader import any_reflection_file opt_F = any_reflection_file("_temp_fhkl_refine/optimized_channel0.mtz").as_miller_arrays()[0] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_lambda_coefficients.py b/simtbx/diffBragg/tests/tst_diffBragg_lambda_coefficients.py index 386f6cd212..da7557b3b4 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_lambda_coefficients.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_lambda_coefficients.py @@ -155,6 +155,7 @@ assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py b/simtbx/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py index 9bf3dfd801..4084080cf6 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_ncells_offdiag_property.py @@ -136,11 +136,13 @@ assert l.rvalue > .999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-5 + assert l.intercept < 0.1*l.slope # line should go through origin if args.curvatures: l = linregress(shifts2, all_error2) assert l.rvalue > .999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-5 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_ncells_property.py b/simtbx/diffBragg/tests/tst_diffBragg_ncells_property.py index 70e3d28eb7..c3310fb5ca 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_ncells_property.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_ncells_property.py @@ -128,11 +128,13 @@ assert l.rvalue > .9999, l # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin if args.curvatures: l = linregress(shifts2, all_error2) assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py b/simtbx/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py index 8c9dee4013..42a2011ecf 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py @@ -137,11 +137,13 @@ assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin if args.curvatures: l = linregress(shifts2, all_error2) assert l.rvalue > .9999 # this is definitely a line! assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_panelXY_derivs.py b/simtbx/diffBragg/tests/tst_diffBragg_panelXY_derivs.py index 2578fa7136..f542ed9372 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_panelXY_derivs.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_panelXY_derivs.py @@ -121,6 +121,7 @@ assert l.rvalue > .99 assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope # line should go through origin print("OK!") for name in find_diffBragg_instances(globals()): del globals()[name] diff --git a/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ.py b/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ.py index 730f8f9db9..2e93bc87e1 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ.py @@ -24,7 +24,7 @@ def main(): print (angles_XYZ*180 / np.pi) D = get_diffBragg_instance() - D.use_cuda = args.kokkos + D.use_gpu = args.kokkos rotX, rotY, rotZ = 0, 1, 2 D.refine(rotX) # rotX diff --git a/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ_deriv.py b/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ_deriv.py index 0bd0b89f9f..d8009ed6fa 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ_deriv.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_rotXYZ_deriv.py @@ -59,7 +59,7 @@ D.refine(rot_idx) D.initialize_managers() D.set_value(rot_idx, 0) - D.use_cuda = args.kokkos + D.use_gpu = args.kokkos #D.printout_pixel_fastslow = 786, 567 D.add_diffBragg_spots() img0 = D.raw_pixels_roi.as_numpy_array() @@ -157,11 +157,13 @@ assert l.rvalue > .9999, "%2.7g" % l.rvalue assert l.slope > 0, "%2.7g" % l.slope assert l.pvalue < 1e-6, "%2.7g" % l.pvalue + assert l.intercept < 0.1*l.slope, "%2.7g" % l.intercept if args.curvatures: l = stats.linregress(delta_h2, error_vals2) assert l.rvalue > .9999, "2nd deriv rvalue %2.7g" % l.rvalue assert l.slope > 0, "2nd deriv slope %2.7g" % l.slope assert l.pvalue < 1e-6, "2nd deriv pvalue %2.7g" % l.pvalue + assert l.intercept < 0.1*l.slope, "2nd deriv pvalue %2.7g" % l.intercept if args.kokkos: D.gpu_free() print("OK!") diff --git a/simtbx/diffBragg/tests/tst_diffBragg_unitcell_property.py b/simtbx/diffBragg/tests/tst_diffBragg_unitcell_property.py index d138a671db..90ef89420d 100644 --- a/simtbx/diffBragg/tests/tst_diffBragg_unitcell_property.py +++ b/simtbx/diffBragg/tests/tst_diffBragg_unitcell_property.py @@ -1,12 +1,10 @@ from __future__ import division -##from simtbx.kokkos import gpu_instance -#kokkos_run = gpu_instance(deviceId = 0) from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("--plot", action='store_true') parser.add_argument("--crystalsystem", default='tetragonal', - choices=["monoclinic", "tetragonal"]) + choices=["monoclinic", "tetragonal", "hexagonal"]) parser.add_argument("--curvatures", action='store_true') parser.add_argument("--kokkos", action="store_true") args = parser.parse_args() @@ -38,6 +36,9 @@ if args.crystalsystem=="tetragonal": ucell = (55, 55, 77, 90, 90, 90) symbol = "P43212" + elif args.crystalsystem=="hexagonal": + ucell = (55, 55, 77, 90, 90, 120) + symbol = "P6522" else: # args.crystalsystem == "monoclinic" ucell = (70, 60, 50, 90.0, 110, 90.0) symbol = "C121" @@ -76,6 +77,7 @@ # and our dxtbx crystal created above D = SIM.D D.progress_meter = True + D.compute_curvatures = args.curvatures # STEP6: # initialize the derivative managers for the unit cell parameters @@ -178,26 +180,26 @@ cc_vals2.append(r2) if args.plot: plt.subplot(121) - plt.imshow(finite_deriv) + plt.imshow(finite_deriv.reshape((img_sh))) plt.title("finite diff") plt.subplot(122) - plt.imshow(analy_deriv) + plt.imshow(analy_deriv.reshape((img_sh))) plt.title("analytical") plt.draw() plt.suptitle("Shift %d / %d" % (i_shift+1, len(shifts))) - plt.pause(0.8) + plt.pause(1.2) if args.curvatures: plt.subplot(121) - plt.imshow(finite_second_deriv) + plt.imshow(finite_second_deriv.reshape((img_sh))) plt.title("finite second diff") plt.subplot(122) - plt.imshow(second_derivs[i_param]) + plt.imshow(second_derivs[i_param].reshape((img_sh))) plt.title("analytical") plt.draw() plt.suptitle("Shift %d / %d" % (i_shift + 1, len(shifts))) - plt.pause(0.8) + plt.pause(1.2) l = linregress(h_vals, error) @@ -205,6 +207,7 @@ assert l.rvalue > .99 assert l.slope > 0 assert l.pvalue < 1e-6 + assert l.intercept < 0.1*l.slope if args.curvatures: l2 = linregress(np.array(h_vals)**2, error2) @@ -212,6 +215,7 @@ assert l2.rvalue > .99 assert l2.slope > 0 assert l2.pvalue < 1e-6 + assert l2.intercept < 0.1*l2.slope if args.plot: plt.close() diff --git a/simtbx/diffBragg/utils.py b/simtbx/diffBragg/utils.py index 3692633de0..28e7857820 100644 --- a/simtbx/diffBragg/utils.py +++ b/simtbx/diffBragg/utils.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function import os +import socket import sys import re from io import StringIO @@ -29,6 +30,8 @@ from cctbx.eltbx import henke from simtbx.diffBragg import psf from dials.algorithms.shoebox import MaskCode +from xfel.merging.application.utils.memory_usage import get_memory_usage + import logging MAIN_LOGGER = logging.getLogger("diffBragg.main") @@ -81,14 +84,22 @@ def label_background_pixels(roi_img, thresh=3.5, iterations=1, only_high=True): while iterations > 0: if background_pixels is None: outliers = is_outlier(img1, thresh) - m = np.median(img1[~outliers]) + inlier_vals = img1[~outliers] + if inlier_vals.size: + m = np.median(inlier_vals) + else: + m = np.nan if only_high: outliers = np.logical_and(outliers, img1 > m) background_pixels = ~outliers else: where_bg = np.where(background_pixels)[0] outliers = is_outlier(img1[background_pixels], thresh) - m = np.median(img1[background_pixels][~outliers]) + inlier_vals = img1[background_pixels][~outliers] + if inlier_vals.size: + m = np.median(inlier_vals) + else: + m = np.nan if only_high: outliers = np.logical_and(outliers, img1[background_pixels] > m) background_pixels[where_bg[outliers]] = False @@ -101,10 +112,16 @@ def is_outlier(points, thresh=3.5): """http://stackoverflow.com/a/22357811/2077270""" if len(points.shape) == 1: points = points[:, None] - median = np.median(points, axis=0) + if points.size: + median = np.median(points, axis=0) + else: + median = np.nan diff = np.sum((points - median) ** 2, axis=-1) diff = np.sqrt(diff) - med_abs_deviation = np.median(diff) + if diff.size: + med_abs_deviation = np.median(diff) + else: + med_abs_deviation = np.nan if med_abs_deviation == 0: return np.zeros(points.shape[0], bool) @@ -452,13 +469,13 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed selection_flags = [] num_roi_negative_bg = 0 num_roi_nan_bg = 0 - background = np.ones(imgs.shape)*-1 + background = np.full_like(imgs, -1, dtype=float) i_roi = 0 while i_roi < len(rois): roi = rois[i_roi] i1, i2, j1, j2 = roi is_selected = True - MAIN_LOGGER.debug("Reflection %d bounded by x1=%d,x2=%d,y1=%d,y2=%d" % (i_roi, i1,i2,j1,j2)) + refl_bbox_str = "Reflection %d bounded by x1=%d,x2=%d,y1=%d,y2=%d" % (i_roi, i1,i2,j1,j2) if is_on_edge[i_roi] and reject_edge_reflections: MAIN_LOGGER.debug("Reflection %d is on edge" % i_roi) is_selected = False @@ -474,6 +491,14 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed MAIN_LOGGER.debug("reflection %d has too many (%d) hot pixels (%d allowed)!" % (i_roi, num_hotpix, min_trusted_pix_per_roi)) is_selected = False + # Before padding and fitting, test for overlaps and shrink if needed + is_overlapping = not np.all(background[pid, j1:j2, i1:i2] == -1) + if not allow_overlaps and is_overlapping: + MAIN_LOGGER.debug("region of interest already accounted for roi size= %d %d" % (i2-i1, j2-j1)) + rois[i_roi] = (i1 + 1, i2, j1 + 1, j2) if (i1 + i2) % 2 \ + else (i1, i2 - 1, j1, j2 - 1) # shrink alternately from corners + continue + dimY, dimX = imgs[pid].shape j1_nopad = j1 i1_nopad = i1 @@ -487,6 +512,10 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed shoebox = imgs[pid, j1:j2, i1:i2] + if shoebox.size < 4: + MAIN_LOGGER.debug("reflection %d has negative background" % i_roi) + is_selected = False + if not isinstance(sigma_rdout, float) and not isinstance(sigma_rdout, int): shoebox_sigma_readout = sigma_rdout[pid, j1:j2, i1:i2] else: @@ -495,13 +524,19 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed if background_mask is not None: is_background = background_mask[pid, j1:j2, i1:i2] else: - is_background = label_background_pixels(shoebox,thresh=bg_thresh, iterations=2, only_high=only_high) + if shoebox.shape== (0,0): + is_background = shoebox.copy().astype(bool) + else: + is_background = label_background_pixels(shoebox,thresh=bg_thresh, iterations=2, only_high=only_high) Ycoords, Xcoords = np.indices((j2-j1, i2-i1)) if use_robust_estimation: bg_pixels = shoebox[is_background] - bg_signal = np.median(bg_pixels) + if not bg_pixels.size: + bg_signal = np.nan + else: + bg_signal = np.median(bg_pixels) if bg_signal < 0: num_roi_negative_bg += 1 if set_negative_bg_to_zero: @@ -509,6 +544,8 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed elif skip_roi_with_negative_bg: MAIN_LOGGER.debug("reflection %d has negative background" % i_roi) is_selected = False + elif np.isnan(bg_signal): + is_selected = False tilt_a, tilt_b, tilt_c = 0, 0, bg_signal covariance = None @@ -524,7 +561,7 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed MAIN_LOGGER.debug("tilt fit failed for reflection %d, probably too few pixels" % i_roi) tilt_plane = np.zeros_like(Xcoords) else: - MAIN_LOGGER.debug("successfully fit tilt plane") + #MAIN_LOGGER.debug("successfully fit tilt plane") (tilt_a, tilt_b, tilt_c), covariance = fit_results tilt_plane = tilt_a * Xcoords + tilt_b * Ycoords + tilt_c if np.any(np.isnan(tilt_plane)) and is_selected: @@ -536,14 +573,6 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed MAIN_LOGGER.debug("reflection %d has tilt plane that dips below 0" % i_roi) is_selected = False - is_overlapping = not np.all(background[pid, j1_nopad:j2_nopad, i1_nopad:i2_nopad] == -1) - - if not allow_overlaps and is_overlapping: - # NOTE : move away from this option, it potentially moves the pixel centroid outside of the ROI (in very rare instances) - MAIN_LOGGER.debug("region of interest already accounted for roi size= %d %d" % (i2_nopad-i1_nopad, j2_nopad-j1_nopad)) - rois[i_roi] = i1_nopad+1,i2_nopad,j1_nopad+1,j2_nopad - continue - # unpadded ROI dimension roi_dimY = j2_nopad-j1_nopad roi_dimX = i2_nopad-i1_nopad @@ -561,6 +590,8 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed kept_rois.append(roi) panel_ids.append(pid) selection_flags.append(is_selected) + if not is_selected: + MAIN_LOGGER.debug("--> %s was not selected for above reasons" % refl_bbox_str) i_roi += 1 MAIN_LOGGER.debug("Number of skipped ROI with negative BGs: %d / %d" % (num_roi_negative_bg, len(rois))) @@ -650,7 +681,7 @@ def fit_plane_equation_to_background_pixels(shoebox_img, fit_sel, sigma_rdout=3, # vector of residuals r = rho_bg - np.dot(A, (t1, t2, t3)) Nbg = len(rho_bg) - with np.errstate(invalid='ignore'): + with np.errstate(divide='ignore', invalid='ignore'): r_fact = np.dot(r.T, np.dot(W, r)) / (Nbg - 3) # 3 parameters fit var_covar = AWA_inv * r_fact # TODO: check for correlations in the off diagonal elems @@ -705,6 +736,7 @@ def image_data_from_expt(expt, as_double=True): raise ValueError("imageset should have only 1 shot. This expt has imageset with %d shots" % len(iset)) try: flex_data = iset.get_raw_data(0) + except Exception as err: assert str(type(err)) == "", "something weird going on with imageset data" flex_data = iset.get_raw_data() @@ -748,6 +780,9 @@ def simulator_for_refinement(expt, params): else: MAIN_LOGGER.info("Will not use mosaic models, as simulator.crystal.num_mosaicity_samples=1") + if not params.fix.eta_abc: + assert SIM.D.mosaic_domains > 1 + if not params.fix.diffuse_gamma or not params.fix.diffuse_sigma: assert params.use_diffuse_models SIM.D.use_diffuse = params.use_diffuse_models @@ -1385,15 +1420,20 @@ def refls_to_hkl(refls, detector, beam, crystal, return np.vstack(HKL).T, np.vstack(HKLi).T -def get_panels_fasts_slows(expt, pids, rois): +def get_panels_fasts_slows(expt, pids, rois, img_sh=None): """ :param expt: dxtbx experiment :param pids: panel ids :param rois: regions of interest + :param img_sh: 3-tuple Npan, Nslow, Nfast :return: """ - npan = len(expt.detector) - nfast, nslow = expt.detector[0].get_image_size() + if expt is not None: + npan = len(expt.detector) + nfast, nslow = expt.detector[0].get_image_size() + else: + assert img_sh is not None + npan, nslow, nfast = img_sh MASK = np.zeros((npan, nslow, nfast), bool) ROI_ID = np.zeros((npan, nslow, nfast), 'uint16') #ROI_ID = NP_ONES((npan, nslow, nfast), 'uint16') * mx @@ -1727,3 +1767,52 @@ def find_diffBragg_instances(globe_objs): if "simtbx_diffBragg_ext.diffBragg" in str(obj): inst_names.append(name) return inst_names + + +def memory_report(prefix='Memory usage'): + """Return a string documenting memory usage; to be used with LOGGER.info""" + memory_usage_in_gb = get_memory_usage() / 1024. + host = socket.gethostname() + return "%s: %f GB on node %s" % (prefix, memory_usage_in_gb, host) + + +def smooth(x, beta=10.0, window_size=11): + """ + https://glowingpython.blogspot.com/2012/02/convolution-with-numpy.html + + Apply a Kaiser window smoothing convolution. + + Parameters + ---------- + x : ndarray, float + The array to smooth. + + Optional Parameters + ------------------- + beta : float + Parameter controlling the strength of the smoothing -- bigger beta + results in a smoother function. + window_size : int + The size of the Kaiser window to apply, i.e. the number of neighboring + points used in the smoothing. + + Returns + ------- + smoothed : ndarray, float + A smoothed version of `x`. + """ + + # make sure the window size is odd + if window_size % 2 == 0: + window_size += 1 + + # apply the smoothing function + s = np.r_[x[window_size - 1:0:-1], x, x[-1:-window_size:-1]] + w = np.kaiser(window_size, beta) + y = np.convolve(w / w.sum(), s, mode='valid') + + # remove the extra array length convolve adds + b = int((window_size - 1) / 2) + smoothed = y[b:len(y) - b] + + return smoothed diff --git a/simtbx/gpu/simulation.cu b/simtbx/gpu/simulation.cu index af29f670e7..24b2274cb5 100644 --- a/simtbx/gpu/simulation.cu +++ b/simtbx/gpu/simulation.cu @@ -92,7 +92,8 @@ namespace af = scitbx::af; simtbx::gpu::gpu_detector & gdt, double const& weight ){ - cudaSafeCall(cudaSetDevice(SIM.device_Id)); + SCITBX_ASSERT(SIM.device_Id == stash_device_Id); + cudaSafeCall(cudaSetDevice(stash_device_Id)); // transfer source_I, source_lambda // the int arguments are for sizes of the arrays @@ -107,7 +108,7 @@ namespace af = scitbx::af; cu_current_channel_Fhkl = gec.d_channel_Fhkl[ichannel]; cudaDeviceProp deviceProps = { 0 }; - cudaSafeCall(cudaGetDeviceProperties(&deviceProps, SIM.device_Id)); + cudaSafeCall(cudaGetDeviceProperties(&deviceProps, stash_device_Id)); int smCount = deviceProps.multiProcessorCount; dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); dim3 numBlocks(smCount * 8, 1); @@ -185,7 +186,8 @@ namespace af = scitbx::af; simtbx::gpu::gpu_detector & gdt, af::shared const active_pixel_list ){ - cudaSafeCall(cudaSetDevice(SIM.device_Id)); + SCITBX_ASSERT(SIM.device_Id == stash_device_Id); + cudaSafeCall(cudaSetDevice(stash_device_Id)); gdt.set_active_pixels_on_GPU(active_pixel_list); @@ -198,7 +200,7 @@ namespace af = scitbx::af; cu_current_channel_Fhkl = gec.d_channel_Fhkl[ichannel]; cudaDeviceProp deviceProps = { 0 }; - cudaSafeCall(cudaGetDeviceProperties(&deviceProps, SIM.device_Id)); + cudaSafeCall(cudaGetDeviceProperties(&deviceProps, stash_device_Id)); int smCount = deviceProps.multiProcessorCount; dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); dim3 numBlocks(smCount * 8, 1); @@ -249,7 +251,8 @@ namespace af = scitbx::af; void exascale_api::add_background(simtbx::gpu::gpu_detector & gdt, int const& override_source){ - cudaSafeCall(cudaSetDevice(SIM.device_Id)); + SCITBX_ASSERT(SIM.device_Id == stash_device_Id); + cudaSafeCall(cudaSetDevice(stash_device_Id)); // transfer source_I, source_lambda // the int arguments are for sizes of the arrays @@ -265,7 +268,7 @@ namespace af = scitbx::af; cudaSafeCall(cudaMemcpyVectorDoubleToDevice(cu_Fbg_of, SIM.Fbg_of, SIM.stols)); cudaDeviceProp deviceProps = { 0 }; - cudaSafeCall(cudaGetDeviceProperties(&deviceProps, SIM.device_Id)); + cudaSafeCall(cudaGetDeviceProperties(&deviceProps, stash_device_Id)); int smCount = deviceProps.multiProcessorCount; dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); dim3 numBlocks(smCount * 8, 1); @@ -314,7 +317,8 @@ namespace af = scitbx::af; void exascale_api::allocate(){ - cudaSafeCall(cudaSetDevice(SIM.device_Id)); + SCITBX_ASSERT(SIM.device_Id == stash_device_Id); + cudaSafeCall(cudaSetDevice(stash_device_Id)); /* water_size not defined in class, CLI argument, defaults to 0 */ double water_size = 0.0; @@ -386,7 +390,7 @@ namespace af = scitbx::af; }; exascale_api::~exascale_api(){ - cudaSafeCall(cudaSetDevice(SIM.device_Id)); + cudaSafeCall(cudaSetDevice(stash_device_Id)); cudaSafeCall(cudaFree(cu_beam_vector)); cudaSafeCall(cudaFree(cu_spindle_vector)); diff --git a/simtbx/gpu/simulation.h b/simtbx/gpu/simulation.h index 8cf384f862..b518196494 100644 --- a/simtbx/gpu/simulation.h +++ b/simtbx/gpu/simulation.h @@ -14,7 +14,7 @@ namespace af = scitbx::af; struct exascale_api { inline exascale_api(const simtbx::nanoBragg::nanoBragg& nB): - SIM(nB){ + SIM(nB),stash_device_Id(nB.device_Id){ } void show(); @@ -38,6 +38,7 @@ struct exascale_api { ~exascale_api(); const simtbx::nanoBragg::nanoBragg& SIM; + const int stash_device_Id; // must remain the same after initialization CUDAREAL * cu_current_channel_Fhkl; CUDAREAL cu_subpixel_size; diff --git a/simtbx/kokkos/SConscript b/simtbx/kokkos/SConscript index e9bcae54b6..2407718563 100644 --- a/simtbx/kokkos/SConscript +++ b/simtbx/kokkos/SConscript @@ -1,5 +1,4 @@ import os -import subprocess from shutil import copy, which import libtbx.load_env diff --git a/simtbx/kokkos/detector.cpp b/simtbx/kokkos/detector.cpp index 85715e3077..0c310551ca 100644 --- a/simtbx/kokkos/detector.cpp +++ b/simtbx/kokkos/detector.cpp @@ -16,50 +16,46 @@ using Kokkos::deep_copy; using Kokkos::create_mirror_view; using Kokkos::parallel_for; +auto get_kokkos_vec3 = [](auto&& src) { return vec3(src[0], src[1], src[2]); }; + namespace simtbx { namespace Kokkos { packed_metrology::packed_metrology(dxtbx::model::Detector const & arg_detector, dxtbx::model::Beam const & arg_beam) { for (std::size_t panel_id = 0; panel_id < arg_detector.size(); panel_id++){ - // helper code arising from the nanoBragg constructor, with user_beam=True - typedef scitbx::vec3 vec3; + // helper code arising from the nanoBragg constructor, with user_beam=True // DETECTOR properties // typically: 1 0 0 - vec3 fdet_vector = arg_detector[panel_id].get_fast_axis(); - fdet_vector = fdet_vector.normalize(); + vec3 fdet_vector = get_kokkos_vec3(arg_detector[panel_id].get_fast_axis()); + fdet_vector.normalize(); // typically: 0 -1 0 - vec3 sdet_vector = arg_detector[panel_id].get_slow_axis(); - sdet_vector = sdet_vector.normalize(); + vec3 sdet_vector = get_kokkos_vec3(arg_detector[panel_id].get_slow_axis()); + sdet_vector.normalize(); // set orthogonal vector to the detector pixel array vec3 odet_vector = fdet_vector.cross(sdet_vector); - odet_vector = odet_vector.normalize(); + odet_vector.normalize(); // dxtbx origin is location of outer corner of the first pixel - vec3 pix0_vector = arg_detector[panel_id].get_origin()/1000.0; + vec3 pix0_vector = get_kokkos_vec3(arg_detector[panel_id].get_origin()/1000.0); // what is the point of closest approach between sample and detector? - double close_distance = pix0_vector * odet_vector; + double close_distance = pix0_vector.dot(odet_vector); if (close_distance < 0){ bool verbose = false; if(verbose)printf("WARNING: dxtbx model is lefthanded. Inverting odet_vector.\n"); odet_vector = -1. * odet_vector; - close_distance = -1*close_distance; + close_distance = -1 * close_distance; } - sdet.push_back(sdet_vector.length()); - fdet.push_back(fdet_vector.length()); - odet.push_back(odet_vector.length()); - pix0.push_back(0.); - for (std::size_t idx_vec = 0; idx_vec < 3; idx_vec++){ - sdet.push_back(sdet_vector[idx_vec]); - fdet.push_back(fdet_vector[idx_vec]); - odet.push_back(odet_vector[idx_vec]); - pix0.push_back(pix0_vector[idx_vec]); - } + sdet.push_back(sdet_vector); + fdet.push_back(fdet_vector); + odet.push_back(odet_vector); + pix0.push_back(pix0_vector); + // set beam centre scitbx::vec2 dials_bc=arg_detector[panel_id].get_beam_centre(arg_beam.get_s0()); dists.push_back(close_distance); @@ -69,28 +65,28 @@ namespace simtbx { namespace Kokkos { }; packed_metrology::packed_metrology(const simtbx::nanoBragg::nanoBragg& nB){ - for (std::size_t idx_vec = 0; idx_vec < 4; idx_vec++){ - sdet.push_back(nB.sdet_vector[idx_vec]); - fdet.push_back(nB.fdet_vector[idx_vec]); - odet.push_back(nB.odet_vector[idx_vec]); - pix0.push_back(nB.pix0_vector[idx_vec]); - } - dists.push_back(nB.close_distance); - Xbeam.push_back(nB.Xbeam); - Ybeam.push_back(nB.Ybeam); + // Careful, 4-vectors! [length, x, y, z] + auto get_kokkos_vec3 = [](auto& src) { return vec3(src[1], src[2], src[3]); }; + sdet.push_back( get_kokkos_vec3(nB.sdet_vector) ); + fdet.push_back( get_kokkos_vec3(nB.fdet_vector) ); + odet.push_back( get_kokkos_vec3(nB.odet_vector) ); + pix0.push_back( get_kokkos_vec3(nB.pix0_vector) ); + dists.push_back(nB.close_distance); + Xbeam.push_back(nB.Xbeam); + Ybeam.push_back(nB.Ybeam); } void packed_metrology::show() const { for (std::size_t idx_p = 0; idx_p < Xbeam.size(); idx_p++){ printf(" Panel %3ld\n",idx_p); - printf(" Panel %3ld sdet %9.6f %9.6f %9.6f %9.6f fdet %9.6f %9.6f %9.6f %9.6f\n", - idx_p,sdet[4*idx_p+0],sdet[4*idx_p+1],sdet[4*idx_p+2],sdet[4*idx_p+3], - fdet[4*idx_p+0],fdet[4*idx_p+1],fdet[4*idx_p+2],fdet[4*idx_p+3] + printf(" Panel %3ld sdet %9.6f %9.6f %9.6f fdet %9.6f %9.6f %9.6f\n", + idx_p,sdet[idx_p][0],sdet[idx_p][1],sdet[idx_p][2], + fdet[idx_p][0],fdet[idx_p][1],fdet[idx_p][2] ); - printf(" Panel %3ld odet %9.6f %9.6f %9.6f %9.6f pix0 %9.6f %9.6f %9.6f %9.6f\n", - idx_p,odet[4*idx_p+0],odet[4*idx_p+1],odet[4*idx_p+2],odet[4*idx_p+3], - pix0[4*idx_p+0],pix0[4*idx_p+1],pix0[4*idx_p+2],pix0[4*idx_p+3] + printf(" Panel %3ld odet %9.6f %9.6f %9.6f pix0 %9.6f %9.6f %9.6f\n", + idx_p,odet[idx_p][0],odet[idx_p][1],odet[idx_p][2], + pix0[idx_p][0],pix0[idx_p][1],pix0[idx_p][2] ); printf(" Panel %3ld beam %11.8f %11.8f\n",idx_p,Xbeam[idx_p],Ybeam[idx_p]); } diff --git a/simtbx/kokkos/detector.h b/simtbx/kokkos/detector.h index ced5c4b87d..03e3913e17 100644 --- a/simtbx/kokkos/detector.h +++ b/simtbx/kokkos/detector.h @@ -7,7 +7,7 @@ // 3) the associated simulated data, in process of being accumulated by kernel calls // 4) mask data // 5) possibly other metadata - +#include #include "scitbx/array_family/shared.h" #include "scitbx/array_family/flex_types.h" @@ -15,8 +15,12 @@ #include "dxtbx/model/beam.h" #include "simtbx/nanoBragg/nanoBragg.h" #include "kokkostbx/kokkos_types.h" +#include "kokkostbx/kokkos_vector3.h" +#include "kokkostbx/kokkos_matrix3.h" + +using vec3 = kokkostbx::vector3; +using mat3 = kokkostbx::matrix3; -#include namespace simtbx { namespace Kokkos { @@ -27,10 +31,10 @@ struct packed_metrology{ packed_metrology(dxtbx::model::Detector const &,dxtbx::model::Beam const &); packed_metrology(const simtbx::nanoBragg::nanoBragg& nB); void show() const; - af::sharedsdet; - af::sharedfdet; - af::sharedodet; - af::sharedpix0; + af::sharedsdet; + af::sharedfdet; + af::sharedodet; + af::sharedpix0; af::shareddists; af::sharedXbeam; af::sharedYbeam; @@ -83,10 +87,10 @@ struct kokkos_detector{ vector_float_t m_floatimage = vector_float_t("m_floatimage", 0); // all-panel packed GPU representation of the multi-panel metrology - vector_cudareal_t m_sdet_vector = vector_cudareal_t("m_sdet_vector", 0); - vector_cudareal_t m_fdet_vector = vector_cudareal_t("m_fdet_vector", 0); - vector_cudareal_t m_odet_vector = vector_cudareal_t("m_odet_vector", 0); - vector_cudareal_t m_pix0_vector = vector_cudareal_t("m_pix0_vector", 0); + view_1d_t m_sdet_vector = view_1d_t("m_sdet_vector", 0); + view_1d_t m_fdet_vector = view_1d_t("m_fdet_vector", 0); + view_1d_t m_odet_vector = view_1d_t("m_odet_vector", 0); + view_1d_t m_pix0_vector = view_1d_t("m_pix0_vector", 0); vector_cudareal_t m_distance = vector_cudareal_t("m_distance", 0); vector_cudareal_t m_Xbeam = vector_cudareal_t("m_Xbeam", 0); vector_cudareal_t m_Ybeam = vector_cudareal_t("m_Ybeam", 0); diff --git a/simtbx/kokkos/kernel_math.h b/simtbx/kokkos/kernel_math.h index b140ec91d9..f55e38013e 100644 --- a/simtbx/kokkos/kernel_math.h +++ b/simtbx/kokkos/kernel_math.h @@ -24,6 +24,7 @@ KOKKOS_INLINE_FUNCTION static void cross_product(CUDAREAL *x, CUDAREAL *y, CUDAR /* rotate a 3-vector about a unit vector axis */ KOKKOS_INLINE_FUNCTION static CUDAREAL *rotate_axis(const vector_cudareal_t v, CUDAREAL * newv, const vector_cudareal_t axis, const CUDAREAL phi); KOKKOS_INLINE_FUNCTION static CUDAREAL *rotate_axis(const CUDAREAL * v, CUDAREAL * newv, const vector_cudareal_t axis, const CUDAREAL phi); +KOKKOS_INLINE_FUNCTION static vector3 rotate_axis(const vector3& v, vector3& newv, const vector3& axis, const CUDAREAL phi); /* rotate a 3-vector using a 9-element unitary matrix */ KOKKOS_INLINE_FUNCTION static void rotate_umat(CUDAREAL * v, CUDAREAL *newv, const CUDAREAL * __restrict__ umat); // measure magnitude of vector and put it in 0th element @@ -201,6 +202,18 @@ KOKKOS_INLINE_FUNCTION CUDAREAL *rotate_axis(const CUDAREAL * v, CUDAREAL * newv return newv; } +/* rotate a point about a unit vector axis */ +KOKKOS_INLINE_FUNCTION vector3 rotate_axis(const vector3& v, vector3& newv, const vector3& axis, const CUDAREAL phi) { + + const CUDAREAL sinphi = sin(phi); + const CUDAREAL cosphi = cos(phi); + const CUDAREAL dot = axis.dot(v) * (1.0 - cosphi); + + newv = axis * dot + v * cosphi + axis.cross(v) * sinphi; + + return newv; +} + /* rotate a vector using a 9-element unitary matrix */ KOKKOS_INLINE_FUNCTION void rotate_umat(CUDAREAL * v, CUDAREAL *newv, const CUDAREAL * __restrict__ umat) { diff --git a/simtbx/kokkos/kokkos_instance.cpp b/simtbx/kokkos/kokkos_instance.cpp index 2624459b8e..d0b467501d 100644 --- a/simtbx/kokkos/kokkos_instance.cpp +++ b/simtbx/kokkos/kokkos_instance.cpp @@ -1,6 +1,6 @@ #include "simtbx/kokkos/kokkos_instance.h" -using Kokkos::InitArguments; +using Kokkos::InitializationSettings; using Kokkos::initialize; using Kokkos::finalize; @@ -15,11 +15,9 @@ namespace Kokkos { } kokkos_instance::kokkos_instance(int const& t_deviceID) { - InitArguments kokkos_init; - kokkos_init.device_id = t_deviceID; - if (!m_isInitialized) { - initialize(kokkos_init); + initialize(InitializationSettings() + .set_device_id(t_deviceID)); m_isInitialized = true; m_isFinalized = false; diff --git a/simtbx/kokkos/simulation.cpp b/simtbx/kokkos/simulation.cpp index dc3d5f9aa4..23f960ff08 100644 --- a/simtbx/kokkos/simulation.cpp +++ b/simtbx/kokkos/simulation.cpp @@ -100,8 +100,6 @@ namespace Kokkos { simtbx::Kokkos::kokkos_detector & kdt, double const& weight ){ - // cudaSafeCall(cudaSetDevice(SIM.device_Id)); - // transfer source_I, source_lambda // the int arguments are for sizes of the arrays int source_count = SIM.sources; @@ -110,18 +108,15 @@ namespace Kokkos { for (std::size_t iwt = 0; iwt < source_count; iwt++){wptr[iwt] = weight*(SIM.source_I[iwt]);} kokkostbx::transfer_double2kokkos(m_source_I, wptr, source_count); kokkostbx::transfer_double2kokkos(m_source_lambda, SIM.source_lambda, source_count); - // cudaSafeCall(cudaMemcpyVectorDoubleToDevice(cu_source_I, SIM.source_I, SIM.sources)); - // cudaSafeCall(cudaMemcpyVectorDoubleToDevice(cu_source_lambda, SIM.source_lambda, SIM.sources)); + + ::Kokkos::resize(m_crystal_orientation, SIM.phisteps, SIM.mosaic_domains, 3); + calc_CrystalOrientations( + SIM.phi0, SIM.phistep, SIM.phisteps, m_spindle_vector, m_a0, m_b0, m_c0, SIM.mosaic_spread, + SIM.mosaic_domains, m_mosaic_umats, m_crystal_orientation); // magic happens here(?): take pointer from singleton, temporarily use it for add Bragg iteration: vector_cudareal_t current_channel_Fhkl = kec.d_channel_Fhkl[ichannel]; - // cudaDeviceProp deviceProps = { 0 }; - // cudaSafeCall(cudaGetDeviceProperties(&deviceProps, SIM.device_Id)); - // int smCount = deviceProps.multiProcessorCount; - // dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // dim3 numBlocks(smCount * 8, 1); - std::size_t panel_size = kdt.m_slow_dim_size * kdt.m_fast_dim_size; // the for loop around panels. Offsets given. @@ -132,16 +127,14 @@ namespace Kokkos { SIM.roi_xmax, SIM.roi_ymin, SIM.roi_ymax, SIM.oversample, SIM.point_pixel, SIM.pixel_size, m_subpixel_size, m_steps, SIM.detector_thickstep, SIM.detector_thicksteps, SIM.detector_thick, SIM.detector_attnlen, - extract_subview(kdt.m_sdet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_fdet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_odet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_pix0_vector, panel_id, m_vector_length), + extract_subview(kdt.m_sdet_vector, panel_id, 1), + extract_subview(kdt.m_fdet_vector, panel_id, 1), + extract_subview(kdt.m_odet_vector, panel_id, 1), + extract_subview(kdt.m_pix0_vector, panel_id, 1), SIM.curved_detector, kdt.metrology.dists[panel_id], kdt.metrology.dists[panel_id], m_beam_vector, kdt.metrology.Xbeam[panel_id], kdt.metrology.Ybeam[panel_id], - SIM.dmin, SIM.phi0, SIM.phistep, SIM.phisteps, m_spindle_vector, - SIM.sources, m_source_X, m_source_Y, m_source_Z, - m_source_I, m_source_lambda, m_a0, m_b0, - m_c0, SIM.xtal_shape, SIM.mosaic_spread, SIM.mosaic_domains, m_mosaic_umats, + SIM.dmin, SIM.phisteps, SIM.sources, m_source_X, m_source_Y, m_source_Z, + m_source_I, m_source_lambda, SIM.xtal_shape, SIM.mosaic_domains, m_crystal_orientation, SIM.Na, SIM.Nb, SIM.Nc, SIM.V_cell, m_water_size, m_water_F, m_water_MW, simtbx::nanoBragg::r_e_sqr, SIM.fluence, simtbx::nanoBragg::Avogadro, SIM.spot_scale, SIM.integral_form, SIM.default_F, @@ -380,10 +373,10 @@ namespace Kokkos { SIM.oversample, override_source, SIM.pixel_size, kdt.m_slow_dim_size, kdt.m_fast_dim_size, SIM.detector_thicksteps, SIM.detector_thickstep, SIM.detector_attnlen, - extract_subview(kdt.m_sdet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_fdet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_odet_vector, panel_id, m_vector_length), - extract_subview(kdt.m_pix0_vector, panel_id, m_vector_length), + extract_subview(kdt.m_sdet_vector, panel_id, 1), + extract_subview(kdt.m_fdet_vector, panel_id, 1), + extract_subview(kdt.m_odet_vector, panel_id, 1), + extract_subview(kdt.m_pix0_vector, panel_id, 1), kdt.metrology.dists[panel_id], SIM.point_pixel, SIM.detector_thick, m_source_X, m_source_Y, m_source_Z, m_source_lambda, m_source_I, diff --git a/simtbx/kokkos/simulation.h b/simtbx/kokkos/simulation.h index 23f8cc0ddb..f1fc1c68ca 100644 --- a/simtbx/kokkos/simulation.h +++ b/simtbx/kokkos/simulation.h @@ -7,6 +7,12 @@ #include "simtbx/kokkos/structure_factors.h" #include "simtbx/kokkos/detector.h" #include "kokkostbx/kokkos_types.h" +#include "kokkostbx/kokkos_vector3.h" +#include "kokkostbx/kokkos_matrix3.h" + +using vec3 = kokkostbx::vector3; +using mat3 = kokkostbx::matrix3; +using crystal_orientation_t = Kokkos::View; // [phisteps, domains, 3] namespace simtbx { namespace Kokkos { @@ -62,6 +68,7 @@ struct exascale_api { vector_cudareal_t m_source_I = vector_cudareal_t("m_source_I", 0); vector_cudareal_t m_source_lambda = vector_cudareal_t("m_source_lambda", 0); vector_cudareal_t m_mosaic_umats = vector_cudareal_t("m_mosaic_umats", 0); + crystal_orientation_t m_crystal_orientation = crystal_orientation_t("m_crystal_orientation", 0, 0, 3); CUDAREAL m_water_size = 0; CUDAREAL m_water_F = 0; CUDAREAL m_water_MW = 0; diff --git a/simtbx/kokkos/simulation_kernels.h b/simtbx/kokkos/simulation_kernels.h index 84c4152423..eb368a95db 100644 --- a/simtbx/kokkos/simulation_kernels.h +++ b/simtbx/kokkos/simulation_kernels.h @@ -16,25 +16,72 @@ using simtbx::nanoBragg::GAUSS; using simtbx::nanoBragg::GAUSS_ARGCHK; using simtbx::nanoBragg::TOPHAT; +void calc_CrystalOrientations(CUDAREAL phi0, + CUDAREAL phistep, + int phisteps, + const vector_cudareal_t spindle_vector, + const vector_cudareal_t a0, + const vector_cudareal_t b0, + const vector_cudareal_t c0, + CUDAREAL mosaic_spread, + int mosaic_domains, + const vector_cudareal_t mosaic_umats, + crystal_orientation_t crystal_orientation) { + + Kokkos::parallel_for("calc_CrystalOrientation", phisteps, KOKKOS_LAMBDA(const int& phi_tic) { + // sweep over phi angles + CUDAREAL phi = phistep * phi_tic + phi0; + + vec3 spindle_vector_tmp {spindle_vector(1), spindle_vector(2), spindle_vector(3)}; + vec3 a0_tmp {a0(1), a0(2), a0(3)}; + vec3 b0_tmp {b0(1), b0(2), b0(3)}; + vec3 c0_tmp {c0(1), c0(2), c0(3)}; + + // rotate about spindle if necessary + vec3 ap = a0_tmp.rotate_around_axis(spindle_vector_tmp, phi); + vec3 bp = b0_tmp.rotate_around_axis(spindle_vector_tmp, phi); + vec3 cp = c0_tmp.rotate_around_axis(spindle_vector_tmp, phi); + + // enumerate mosaic domains + for (int mos_tic = 0; mos_tic < mosaic_domains; ++mos_tic) { + // apply mosaic rotation after phi rotation + vec3 a, b, c; + + if (mosaic_spread > 0.0) { + mat3 umat; + for (int i=0; i<9; ++i) { + umat[i] = mosaic_umats(mos_tic * 9 + i); + } + a = umat.dot(ap); + b = umat.dot(bp); + c = umat.dot(cp); + } else { + a = ap; + b = bp; + c = cp; + } + crystal_orientation(phi_tic, mos_tic, 0) = a; + crystal_orientation(phi_tic, mos_tic, 1) = b; + crystal_orientation(phi_tic, mos_tic, 2) = c; + } + }); +} + void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, int roi_ymin, int roi_ymax, int oversample, int point_pixel, CUDAREAL pixel_size, CUDAREAL subpixel_size, int steps, CUDAREAL detector_thickstep, int detector_thicksteps, CUDAREAL detector_thick, CUDAREAL detector_mu, - const vector_cudareal_t sdet_vector, const vector_cudareal_t fdet_vector, - const vector_cudareal_t odet_vector, const vector_cudareal_t pix0_vector, + const view_1d_t sdet_vector, const view_1d_t fdet_vector, + const view_1d_t odet_vector, const view_1d_t pix0_vector, int curved_detector, CUDAREAL distance, CUDAREAL close_distance, const vector_cudareal_t beam_vector, - CUDAREAL Xbeam, CUDAREAL Ybeam, CUDAREAL dmin, CUDAREAL phi0, CUDAREAL phistep, - int phisteps, const vector_cudareal_t spindle_vector, int sources, + CUDAREAL Xbeam, CUDAREAL Ybeam, CUDAREAL dmin, int phisteps, int sources, const vector_cudareal_t source_X, const vector_cudareal_t source_Y, const vector_cudareal_t source_Z, const vector_cudareal_t source_I, const vector_cudareal_t source_lambda, - const vector_cudareal_t a0, const vector_cudareal_t b0, - const vector_cudareal_t c0, shapetype xtal_shape, CUDAREAL mosaic_spread, - int mosaic_domains, const vector_cudareal_t mosaic_umats, - CUDAREAL Na, CUDAREAL Nb, - CUDAREAL Nc, CUDAREAL V_cell, + shapetype xtal_shape, int mosaic_domains, crystal_orientation_t crystal_orientation, + CUDAREAL Na, CUDAREAL Nb, CUDAREAL Nc, CUDAREAL V_cell, CUDAREAL water_size, CUDAREAL water_F, CUDAREAL water_MW, CUDAREAL r_e_sqr, CUDAREAL fluence, CUDAREAL Avogadro, CUDAREAL spot_scale, int integral_form, CUDAREAL default_F, @@ -57,23 +104,17 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, const int total_pixels = spixels * fpixels; - // add background from something amorphous - CUDAREAL F_bg = water_F; - CUDAREAL I_bg = F_bg * F_bg * r_e_sqr * fluence * water_size * water_size * water_size * 1e6 * Avogadro / water_MW; + const CUDAREAL distance_r = 1 / distance; + const CUDAREAL dmin_r = (dmin > 0.0) ? 1/dmin : 0.0; - Kokkos::parallel_for("kokkosSpotsKernel", total_pixels, KOKKOS_LAMBDA(const int& pixIdx) { + // add background from something amorphous, precalculate scaling + const CUDAREAL F_bg = water_F; + const CUDAREAL I_bg = F_bg * F_bg * r_e_sqr * fluence * water_size * water_size * water_size * 1e6 * Avogadro / water_MW; + const CUDAREAL I_factor = r_e_sqr * spot_scale * fluence / steps; - vec3 sdet_tmp {sdet_vector(1), sdet_vector(2), sdet_vector(3)}; - vec3 fdet_tmp {fdet_vector(1), fdet_vector(2), fdet_vector(3)}; - vec3 odet_tmp {odet_vector(1), odet_vector(2), odet_vector(3)}; - vec3 pix0_tmp {pix0_vector(1), pix0_vector(2), pix0_vector(3)}; + Kokkos::parallel_for("kokkosSpotsKernel", total_pixels, KOKKOS_LAMBDA(const int& pixIdx) { vec3 beam_vector_tmp {beam_vector(1), beam_vector(2), beam_vector(3)}; - vec3 spindle_vector_tmp {spindle_vector(1), spindle_vector(2), spindle_vector(3)}; - - vec3 a0_tmp {a0(1), a0(2), a0(3)}; - vec3 b0_tmp {b0(1), b0(2), b0(3)}; - vec3 c0_tmp {c0(1), c0(2), c0(3)}; vec3 polar_vector_tmp {polar_vector(1), polar_vector(2), polar_vector(3)}; @@ -127,38 +168,35 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, // pixel_Y = Sdet-Ybeam; // pixel_Z = Fdet-Xbeam; vec3 pixel_pos; - pixel_pos += Fdet * fdet_tmp; - pixel_pos += Sdet * sdet_tmp; - pixel_pos += Odet * odet_tmp; - pixel_pos += pix0_tmp; - - CUDAREAL pixel_pos_tmp[] = {0, pixel_pos[0], pixel_pos[1], pixel_pos[2]}; + pixel_pos += Fdet * fdet_vector(0); + pixel_pos += Sdet * sdet_vector(0); + pixel_pos += Odet * odet_vector(0); + pixel_pos += pix0_vector(0); if (curved_detector) { // construct detector pixel that is always "distance" from the sample vec3 dbvector = distance * vec3{beam_vector(1), beam_vector(2), beam_vector(3)}; // treat detector pixel coordinates as radians - vec3 newvector = dbvector.rotate_around_axis(sdet_tmp, pixel_pos.y_val() / distance ); - pixel_pos = newvector.rotate_around_axis(fdet_tmp, pixel_pos.z_val() / distance ); + vec3 newvector = dbvector.rotate_around_axis(sdet_vector(0), pixel_pos.y_val() * distance_r ); + pixel_pos = newvector.rotate_around_axis(fdet_vector(0), pixel_pos.z_val() * distance_r ); } // construct the diffracted-beam unit vector to this sub-pixel - CUDAREAL airpath = pixel_pos.length(); + CUDAREAL airpath_r = 1 / pixel_pos.length(); vec3 diffracted = pixel_pos.get_unit_vector(); // solid angle subtended by a pixel: (pix/airpath)^2*cos(2theta) - CUDAREAL omega_pixel = pixel_size * pixel_size / airpath / airpath * close_distance / airpath; + CUDAREAL omega_pixel = pixel_size * pixel_size * airpath_r * airpath_r * close_distance * airpath_r; // option to turn off obliquity effect, inverse-square-law only if (point_pixel) { - omega_pixel = 1.0 / airpath / airpath; + omega_pixel = airpath_r * airpath_r; } // now calculate detector thickness effects CUDAREAL capture_fraction = 1.0; if (detector_thick > 0.0 && detector_mu> 0.0) { // inverse of effective thickness increase - vec3 odet{odet_vector(1), odet_vector(2), odet_vector(3)}; - CUDAREAL parallax = odet.dot(diffracted); + CUDAREAL parallax = odet_vector(0).dot(diffracted); capture_fraction = exp(-thick_tic * detector_thickstep / detector_mu / parallax) - exp(-(thick_tic + 1) * detector_thickstep / detector_mu / parallax); } @@ -182,7 +220,8 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, // rough cut to speed things up when we aren't using whole detector if (dmin > 0.0 && stol > 0.0) { - if (dmin > 0.5 / stol) { + // use reciprocal of (dmin > 0.5 / stol) + if (dmin_r <= 2 * stol) { continue; } } @@ -197,34 +236,12 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, // sweep over phi angles for (int phi_tic = 0; phi_tic < phisteps; ++phi_tic) { - CUDAREAL phi = phistep * phi_tic + phi0; - - // rotate about spindle if necessary - vec3 ap = a0_tmp.rotate_around_axis(spindle_vector_tmp, phi); - vec3 bp = b0_tmp.rotate_around_axis(spindle_vector_tmp, phi); - vec3 cp = c0_tmp.rotate_around_axis(spindle_vector_tmp, phi); - // enumerate mosaic domains for (int mos_tic = 0; mos_tic < mosaic_domains; ++mos_tic) { // apply mosaic rotation after phi rotation - // CUDAREAL a[4]; - // CUDAREAL b[4]; - // CUDAREAL c[4]; - vec3 a, b, c; - - if (mosaic_spread > 0.0) { - mat3 umat; - for (int i=0; i<9; ++i) { - umat[i] = mosaic_umats(mos_tic * 9 + i); - } - a = umat.dot(ap); - b = umat.dot(bp); - c = umat.dot(cp); - } else { - a = ap; - b = bp; - c = cp; - } + auto a = crystal_orientation(phi_tic, mos_tic, 0); + auto b = crystal_orientation(phi_tic, mos_tic, 1); + auto c = crystal_orientation(phi_tic, mos_tic, 2); // construct fractional Miller indicies CUDAREAL h = a.dot(scattering); @@ -257,7 +274,10 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, } } else { // handy radius in reciprocal space, squared - hrad_sqr = (h - h0) * (h - h0) * Na * Na + (k - k0) * (k - k0) * Nb * Nb + (l - l0) * (l - l0) * Nc * Nc; + const CUDAREAL hrad = (h - h0) * Na; + const CUDAREAL krad = (k - k0) * Nb; + const CUDAREAL lrad = (l - l0) * Nc; + hrad_sqr = hrad * hrad + krad * krad + lrad * lrad; } if (xtal_shape == ROUND) { // use sinc3 for elliptical xtal shape, @@ -304,19 +324,13 @@ void kokkosSpotsKernel(int spixels, int fpixels, int roi_xmin, int roi_xmax, // convert amplitudes into intensity (photons per steradian) I += F_cell * F_cell * F_latt * F_latt * source_fraction * capture_fraction * omega_pixel; omega_sub_reduction += omega_pixel; - } - // end of mosaic loop - } - // end of phi loop - } - // end of source loop - } - // end of detector thickness loop - } - // end of sub-pixel y loop - } - // end of sub-pixel x loop - const double photons = I_bg + (r_e_sqr * spot_scale * fluence * polar * I) / steps; + } // end of mosaic loop + } // end of phi loop + } // end of source loop + } // end of detector thickness loop + } // end of sub-pixel y loop + } // end of sub-pixel x loop + const double photons = I_bg + I_factor * polar * I; floatimage( pixIdx ) = photons; omega_reduction( pixIdx ) = omega_sub_reduction; // shared contention max_I_x_reduction( pixIdx ) = max_I_x_sub_reduction; @@ -330,8 +344,8 @@ void debranch_maskall_Kernel(int npanels, int spixels, int fpixels, int total_pi CUDAREAL pixel_size, CUDAREAL subpixel_size, int steps, CUDAREAL detector_thickstep, int detector_thicksteps, CUDAREAL detector_thick, CUDAREAL detector_mu, const int vec_len, - const vector_cudareal_t sdet_vector, const vector_cudareal_t fdet_vector, - const vector_cudareal_t odet_vector, const vector_cudareal_t pix0_vector, + const view_1d_t sdet_vector, const view_1d_t fdet_vector, + const view_1d_t odet_vector, const view_1d_t pix0_vector, const vector_cudareal_t distance, const vector_cudareal_t close_distance, const vector_cudareal_t beam_vector, const vector_cudareal_t Xbeam, const vector_cudareal_t Ybeam, // not even used, after all the work @@ -419,19 +433,18 @@ void debranch_maskall_Kernel(int npanels, int spixels, int fpixels, int total_pi // pixel_Z = Fdet-Xbeam; //CUDAREAL * pixel_pos = tmpVector1; CUDAREAL pixel_pos[4]; - int iVL = vec_len * i_panel; - pixel_pos[1] = Fdet * fdet_vector(iVL+1) - + Sdet * sdet_vector(iVL+1) - + Odet * odet_vector(iVL+1) - + pix0_vector(iVL+1); // X - pixel_pos[2] = Fdet * fdet_vector(iVL+2) - + Sdet * sdet_vector(iVL+2) - + Odet * odet_vector(iVL+2) - + pix0_vector(iVL+2); // Y - pixel_pos[3] = Fdet * fdet_vector(iVL+3) - + Sdet * sdet_vector(iVL+3) - + Odet * odet_vector(iVL+3) - + pix0_vector(iVL+3); // Z + pixel_pos[1] = Fdet * fdet_vector(i_panel)[0] + + Sdet * sdet_vector(i_panel)[0] + + Odet * odet_vector(i_panel)[0] + + pix0_vector(i_panel)[0]; // X + pixel_pos[2] = Fdet * fdet_vector(i_panel)[1] + + Sdet * sdet_vector(i_panel)[1] + + Odet * odet_vector(i_panel)[1] + + pix0_vector(i_panel)[1]; // Y + pixel_pos[3] = Fdet * fdet_vector(i_panel)[2] + + Sdet * sdet_vector(i_panel)[2] + + Odet * odet_vector(i_panel)[2] + + pix0_vector(i_panel)[2]; // Z // construct the diffracted-beam unit vector to this sub-pixel //CUDAREAL * diffracted = tmpVector2; @@ -450,9 +463,9 @@ void debranch_maskall_Kernel(int npanels, int spixels, int fpixels, int total_pi if (detector_thick > 0.0 && detector_mu> 0.0) { // inverse of effective thickness increase CUDAREAL odet[4]; - odet[1] = odet_vector(iVL+1); - odet[2] = odet_vector(iVL+2); - odet[3] = odet_vector(iVL+3); + odet[1] = odet_vector(i_panel)[0]; + odet[2] = odet_vector(i_panel)[1]; + odet[3] = odet_vector(i_panel)[2]; CUDAREAL parallax = dot_product(odet, diffracted); capture_fraction = exp(-thick_tic * detector_thickstep / detector_mu / parallax) - exp(-(thick_tic + 1) * detector_thickstep / detector_mu / parallax); @@ -615,8 +628,8 @@ void add_array( view_1d_t lhs, const view_1d_t rhs ) { void add_background_kokkos_kernel(int sources, int nanoBragg_oversample, int override_source, CUDAREAL pixel_size, int spixels, int fpixels, int detector_thicksteps, CUDAREAL detector_thickstep, CUDAREAL detector_attnlen, - const vector_cudareal_t sdet_vector, const vector_cudareal_t fdet_vector, - const vector_cudareal_t odet_vector, const vector_cudareal_t pix0_vector, + const view_1d_t sdet_vector, const view_1d_t fdet_vector, + const view_1d_t odet_vector, const view_1d_t pix0_vector, CUDAREAL close_distance, int point_pixel, CUDAREAL detector_thick, const vector_cudareal_t source_X, const vector_cudareal_t source_Y, const vector_cudareal_t source_Z, @@ -653,11 +666,6 @@ void add_background_kokkos_kernel(int sources, int nanoBragg_oversample, int ove // const int stride = fstride * sstride; Kokkos::parallel_for("add_background", total_pixels, KOKKOS_LAMBDA(const int& pixIdx) { - vec3 sdet_tmp {sdet_vector(1), sdet_vector(2), sdet_vector(3)}; - vec3 fdet_tmp {fdet_vector(1), fdet_vector(2), fdet_vector(3)}; - vec3 odet_tmp {odet_vector(1), odet_vector(2), odet_vector(3)}; - vec3 pix0_tmp {pix0_vector(1), pix0_vector(2), pix0_vector(3)}; - vec3 polar_vector_tmp {polar_vector(1), polar_vector(2), polar_vector(3)}; const int fpixel = pixIdx % fpixels; @@ -675,19 +683,11 @@ void add_background_kokkos_kernel(int sources, int nanoBragg_oversample, int ove for(int thick_tic=0; thick_tic 0.0){ // inverse of effective thickness increase // CUDAREAL parallax = diffracted[1] * odet_vector(1) + diffracted[2] * odet_vector(2) + diffracted[3] * odet_vector(3); - CUDAREAL parallax = diffracted.dot(odet_tmp); + CUDAREAL parallax = diffracted.dot(odet_vector(0)); capture_fraction = exp(-thick_tic*detector_thickstep/detector_attnlen/parallax) -exp(-(thick_tic+1)*detector_thickstep/detector_attnlen/parallax); } @@ -710,26 +710,15 @@ void add_background_kokkos_kernel(int sources, int nanoBragg_oversample, int ove for(int source=source_start; source 0 : value of the multiplicative factor + intfile_scale = 1 (default): do not apply a factor + intfile_scale = 0 : compute a reasonable scale factor to set max pixel to 55000; given by get_intfile_scale() + cbf_int: boolean flag, write the cbf using 32-bit int precision + """ + temp = self.cbf_int + self.cbf_int = cbf_int + + if intfile_scale != 1.0: + cache_pixels = self.raw_pixels + if intfile_scale > 0: self.raw_pixels = self.raw_pixels * intfile_scale + else: self.raw_pixels = self.raw_pixels * self.get_intfile_scale() + # print("switch to scaled") + + if toggle_conventions: + # switch to DIALS convention before writing CBF + CURRENT_CONV = self.beamcenter_convention + self.beamcenter_convention=DIALS + + imgset = self.imageset + writer = cbf_writer.FullCBFWriter(imageset=imgset) + cbf = writer.get_cbf_handle(index=0, header_only=True) + data = imgset.get_raw_data(0) + writer.add_data_to_cbf(cbf, data=data) + writer.write_cbf(cbf_filename, cbf=cbf) + + if toggle_conventions: + self.beamcenter_convention=CURRENT_CONV + + if intfile_scale != 1.0: + self.raw_pixels = cache_pixels + # print("switch back to cached") + + self.cbf_int = temp + + def to_nexus_nxmx(self, nxmx_filename, toggle_conventions=False, intfile_scale=1.0): + """write a NeXus NXmx-format image file to disk from the raw pixel array intfile_scale: multiplicative factor applied to raw pixels before output intfile_scale > 0 : value of the multiplicative factor intfile_scale = 1 (default): do not apply a factor @@ -285,8 +354,18 @@ def to_cbf(self, cbf_filename, toggle_conventions=False, intfile_scale=1.0): CURRENT_CONV = self.beamcenter_convention self.beamcenter_convention=DIALS - writer = cbf_writer.FullCBFWriter(imageset=self.imageset) - writer.write_cbf(cbf_filename, index=0) + params = nxmx_writer.phil_scope.fetch(parse(""" + output_file=%s + nexus_details { + instrument_name=nanoBragg + source_name=nanoBragg + start_time=NA + end_time_estimated=NA + sample_name=nanoBragg + } + """%nxmx_filename)).extract() + writer = nxmx_writer.NXmxWriter(params) + writer(imageset=self.imageset) if toggle_conventions: self.beamcenter_convention=CURRENT_CONV @@ -295,6 +374,22 @@ def to_cbf(self, cbf_filename, toggle_conventions=False, intfile_scale=1.0): self.raw_pixels = cache_pixels # print("switch back to cached") +def nexus_factory(nxmx_filename): + params = nxmx_writer.phil_scope.fetch(parse(""" + output_file=%s + nexus_details { + instrument_name=nanoBragg + source_name=nanoBragg + start_time=NA + end_time_estimated=NA + sample_name=nanoBragg + } + dtype=int32 + """%nxmx_filename)).extract() + writer = nxmx_writer.NXmxWriter(params) + return writer + + def make_imageset(data, beam, detector): format_class = FormatBraggInMemoryMultiPanel(data) reader = MemReaderNamedPath("virtual_Bragg_path", [format_class]) @@ -332,7 +427,7 @@ def get_mask(self, goniometer=None): """dummie place holder for mask, consider using internal nanoBragg mask""" return self.mask -class FormatBraggInMemory: +class FormatBraggInMemory(Format): def __init__(self, raw_pixels): self.raw_pixels = raw_pixels @@ -358,6 +453,10 @@ def get_mask(self, goniometer=None): """dummie place holder for mask, consider using internal nanoBragg mask""" return self.mask, + @classmethod + def get_instance(Class, filename, **kwargs): + return Class(raw_pixels = kwargs.pop('raw_pixels'), **kwargs) + #def paths(self): # return ["InMemoryBraggPath"] # TODO: CBFLib complains if no datablock path provided which comes from path diff --git a/simtbx/nanoBragg/anisotropic_mosaicity.py b/simtbx/nanoBragg/anisotropic_mosaicity.py index edf3d20643..3934117b47 100644 --- a/simtbx/nanoBragg/anisotropic_mosaicity.py +++ b/simtbx/nanoBragg/anisotropic_mosaicity.py @@ -72,8 +72,8 @@ def _compute(rot_ax, ang_idx, eta_eff, Cvec, derivs=None, second_derivs=None): dU_d_theta = rot_ax.axis_and_angle_as_r3_derivative_wrt_angle(rot_sign*rot_ang, deg=False) # 1st deriv d2U_d_theta2 = rot_ax.axis_and_angle_as_r3_derivative_wrt_angle(rot_sign*rot_ang, deg=False, second_order=True) # second deriv for d, d2 in zip(d_theta_d_eta, dsquared_theta_d_eta_squared): - dU_d_eta = rot_sign*dU_d_theta*d - d2U_d_eta2 = d2U_d_theta2*(d**2) + dU_d_theta*d2 + dU_d_eta = dU_d_theta*(rot_sign*d) + d2U_d_eta2 = d2U_d_theta2*(d**2) + dU_d_theta*(rot_sign*d2) Uprimes.append(dU_d_eta) Udblprimes.append(d2U_d_eta2) diff --git a/simtbx/nanoBragg/nanoBragg.cpp b/simtbx/nanoBragg/nanoBragg.cpp index 2c4948c4d9..14ba8b26b8 100644 --- a/simtbx/nanoBragg/nanoBragg.cpp +++ b/simtbx/nanoBragg/nanoBragg.cpp @@ -216,6 +216,9 @@ nanoBragg::init_defaults() exit(9); }; + /* write cbf files in int precision? */ + cbf_int = false; + /* optional file stuff, to be removed eventually? */ matfilename = NULL; hklfilename = NULL; diff --git a/simtbx/nanoBragg/nanoBragg.h b/simtbx/nanoBragg/nanoBragg.h index 00963071c5..bd1c1a5cc6 100644 --- a/simtbx/nanoBragg/nanoBragg.h +++ b/simtbx/nanoBragg/nanoBragg.h @@ -366,6 +366,7 @@ class nanoBragg { af::flex_double raw_pixels; unsigned short int *intimage; unsigned char *pgmimage; + bool cbf_int; // if saving the cbf file raw pixels will be converted to int // char *byte_order; // = get_byte_order(); /* optional input image to extract background? */ // SMVinfo imginfile; diff --git a/simtbx/nanoBragg/nanoBragg_ext.cpp b/simtbx/nanoBragg/nanoBragg_ext.cpp index 197170d696..269c11634f 100644 --- a/simtbx/nanoBragg/nanoBragg_ext.cpp +++ b/simtbx/nanoBragg/nanoBragg_ext.cpp @@ -1915,6 +1915,11 @@ printf("DEBUG: pythony_stolFbg[1]=(%g,%g)\n",nanoBragg.pythony_stolFbg[1][0],nan make_setter(&nanoBragg::raw_pixels,dcp()), "2D flex array representing floating-point pixel values, this is expected photons before you call add_noise(), which converts it into detector pixel values, or ADU") + .add_property("cbf_int", + make_getter(&nanoBragg::cbf_int,rbv()), + make_setter(&nanoBragg::cbf_int,dcp()), + "Write the cbf file using to_cbf with int32 precision") + /* print to screen a summary of all initialized parameters */ .def("show_params",&nanoBragg::show_params, "print out all simulation parameters, just like the standalone program") diff --git a/simtbx/nanoBragg/sim_data.py b/simtbx/nanoBragg/sim_data.py index ac7d5cc56b..f40c2581da 100644 --- a/simtbx/nanoBragg/sim_data.py +++ b/simtbx/nanoBragg/sim_data.py @@ -360,6 +360,8 @@ def get_detector_corner_res(self): def update_Fhkl_tuple(self): if self.crystal.miller_array is not None: + if np.all(self.crystal.miller_array.data().as_numpy_array()==0): + raise ValueError("Seems all miller indices are 0") d_max, _ = self.crystal.miller_array.resolution_range() d_min = self.get_detector_corner_res() ma_on_detector = self.crystal.miller_array.resolution_filter(d_min=d_min, d_max=d_max) @@ -470,8 +472,12 @@ def _init_diffBragg_umats(self): if self.crystal.anisotropic_mos_spread_deg is not None: if tuple(self.crystal.anisotropic_mos_spread_deg) == (0,0,0) and self.crystal.n_mos_domains != 1: raise ValueError("If more than 1 mosaic domain are passed, must set a positive value for anisotropic_mos_spread") - self.D.has_anisotropic_mosaic_spread = True - mosaicity = self.crystal.anisotropic_mos_spread_deg + if len(set(self.crystal.anisotropic_mos_spread_deg))==1: + self.D.has_anisotropic_mosaic_spread = False + mosaicity = self.crystal.anisotropic_mos_spread_deg[0] + else: + self.D.has_anisotropic_mosaic_spread = True + mosaicity = self.crystal.anisotropic_mos_spread_deg else: self.D.has_anisotropic_mosaic_spread = False mosaicity = self.crystal.mos_spread_deg diff --git a/simtbx/nanoBragg/tst_gauss_argchk.py b/simtbx/nanoBragg/tst_gauss_argchk.py index 87368ac24f..91ba3043ff 100644 --- a/simtbx/nanoBragg/tst_gauss_argchk.py +++ b/simtbx/nanoBragg/tst_gauss_argchk.py @@ -231,6 +231,7 @@ def simple_monochromatic_case(bragg_engine, BEAM, DETECTOR, CRYSTAL, SF_model, a SIM.to_smv_format(fileout="test_full_001.img", intfile_scale=output_scale) assert approx_equal(SIM.raw_pixels, SIM2.raw_pixels) SIM.to_cbf("test_full_001.cbf", intfile_scale=output_scale) + SIM.to_nexus_nxmx("test_full_001.h5", intfile_scale=output_scale) if runmode=="GPU": bragg_engine = nanoBragg.add_nanoBragg_spots_cuda diff --git a/simtbx/run_tests.py b/simtbx/run_tests.py index e333d6ca87..7add96ae7c 100644 --- a/simtbx/run_tests.py +++ b/simtbx/run_tests.py @@ -33,6 +33,7 @@ ["$D/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py", "--idx 1"], ["$D/diffBragg/tests/tst_diffBragg_ncells_property_anisotropic.py", "--idx 2"], ["$D/diffBragg/tests/tst_diffBragg_unitcell_property.py", "--crystalsystem tetragonal" ], + ["$D/diffBragg/tests/tst_diffBragg_unitcell_property.py", "--crystalsystem hexagonal" ], ["$D/diffBragg/tests/tst_diffBragg_unitcell_property.py", "--crystalsystem monoclinic" ], ["$D/diffBragg/tests/tst_diffBragg_lambda_coefficients.py", "--idx 0"], ["$D/diffBragg/tests/tst_diffBragg_lambda_coefficients.py", "--idx 1"], diff --git a/xfel/merging/application/errors/error_modifier_ev11.py b/xfel/merging/application/errors/error_modifier_ev11.py index 004c9539d8..cf72dbb1c6 100644 --- a/xfel/merging/application/errors/error_modifier_ev11.py +++ b/xfel/merging/application/errors/error_modifier_ev11.py @@ -325,7 +325,7 @@ def calculate_initial_ev11_parameters(self): # Calculate initial EV11 parameters self.sfac = 1/slope self.sadd = offset - self.sb = math.sqrt(self.sadd) + self.sb = math.sqrt(self.sadd) if self.sadd > 0 else 0 ''' if True: