diff --git a/examples/2_Intermediate/stage_two_optimization_planar_coils.py b/examples/2_Intermediate/stage_two_optimization_planar_coils.py index 630b10655..bdc8a2aff 100755 --- a/examples/2_Intermediate/stage_two_optimization_planar_coils.py +++ b/examples/2_Intermediate/stage_two_optimization_planar_coils.py @@ -100,7 +100,7 @@ # Create the initial coils: base_curves = create_equally_spaced_planar_curves( ncoils, s.nfp, stellsym=True, R0=R0, R1=R1, order=order, - jax_flag=True + jax_flag=False ) # for i in range(len(base_curves)): # base_curves[i].set('x' + str(2 * order + 1), np.random.rand(1) - 0.5) diff --git a/examples/3_Advanced/QH_reactorscale_DA.py b/examples/3_Advanced/QH_reactorscale_DA.py index f5e057651..fef898a02 100644 --- a/examples/3_Advanced/QH_reactorscale_DA.py +++ b/examples/3_Advanced/QH_reactorscale_DA.py @@ -99,7 +99,7 @@ def initialize_coils_QH(TEST_DIR, s): base_curves = create_equally_spaced_curves( ncoils, s.nfp, stellsym=True, R0=R0, R1=R1, order=order, numquadpoints=256, - jax_flag=True, + jax_flag=False, ) base_currents = [(Current(total_current / ncoils * 1e-7) * 1e7) for _ in range(ncoils - 1)] @@ -142,12 +142,12 @@ def initialize_coils_QH(TEST_DIR, s): aa = 0.05 bb = 0.05 -Nx = 6 +Nx = 7 Ny = Nx Nz = Nx # Create the initial coils: base_curves, all_curves = create_planar_curves_between_two_toroidal_surfaces( - s, s_inner, s_outer, Nx, Ny, Nz, order=order, coil_coil_flag=True, jax_flag=True, + s, s_inner, s_outer, Nx, Ny, Nz, order=order, coil_coil_flag=True, jax_flag=False, # numquadpoints=10 # Defaults is (order + 1) * 40 so this halves it ) import warnings @@ -309,8 +309,8 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): NetTorques=coil_net_torques(coils, coils + coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns) ) # Force and Torque calculations spawn a bunch of spurious BiotSavart child objects -- erase them! -for c in (coils + coils_TF): - c._children = set() +# for c in (coils + coils_TF): +# c._children = set() pointData = {"B_N": np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)[:, :, None]} s.to_vtk(OUT_DIR + "surf_init_DA", extra_data=pointData) @@ -346,7 +346,7 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): # While the coil array is not moving around, they cannot # interlink. -linkNum = LinkingNumber(curves + curves_TF) +linkNum = LinkingNumber(curves + curves_TF, downsample=4) ##### Note need coils_TF + coils below!!!!!!! # Jforce2 = sum([LpCurveForce(c, coils + coils_TF, @@ -359,7 +359,7 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): regularization_list2 = np.ones(len(coils_TF)) * regularization_rect(a, b) # Jforce = MixedLpCurveForce(coils, coils_TF, regularization_list, regularization_list2) # [SquaredMeanForce2(c, coils) for c in (base_coils)] # Jforce = MixedSquaredMeanForce(coils, coils_TF) -Jforce = sum([LpCurveForce(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=4, threshold=4e5 * 100) for i, c in enumerate(base_coils + base_coils_TF)]) +Jforce = sum([LpCurveForce(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=4, threshold=4e5 * 100, downsample=1) for i, c in enumerate(base_coils + base_coils_TF)]) Jforce2 = sum([SquaredMeanForce(c, coils + coils_TF) for c in (base_coils + base_coils_TF)]) Jtorque = sum([LpCurveTorque(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=4e5 * 100) for i, c in enumerate(base_coils + base_coils_TF)]) # Jtorque = sum([LpCurveTorque(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=1e5 * 100) for i, c in enumerate(base_coils + base_coils_TF)]) @@ -407,29 +407,27 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): # print(btot.ancestors,len(btot.ancestors)) # print(JF.ancestors,len(JF.ancestors)) +# Force and Torque calculations using JaxCurves spawn a bunch of spurious BiotSavart child objects +# erase them! +# for c in (coils + coils_TF): +# c._children = set() def fun(dofs): + pr = cProfile.Profile() + pr.enable() JF.x = dofs - # pr = cProfile.Profile() - # pr.enable() J = JF.J() grad = JF.dJ() - # pr.disable() - # sio = io.StringIO() - # sortby = SortKey.CUMULATIVE - # ps = pstats.Stats(pr, stream=sio).sort_stats(sortby) - # ps.print_stats(20) - # print(sio.getvalue()) - # exit() jf = Jf.J() + # print(JF._children, btot._children, btot.coils[0]._children) length_val = LENGTH_WEIGHT.value * Jlength.J() cc_val = CC_WEIGHT * Jccdist.J() cs_val = CS_WEIGHT * Jcsdist.J() link_val = LINK_WEIGHT * linkNum.J() forces_val = FORCE_WEIGHT.value * Jforce.J() - forces_val2 = FORCE_WEIGHT2.value * Jforce2.J() - torques_val = TORQUE_WEIGHT.value * Jtorque.J() - torques_val2 = TORQUE_WEIGHT2.value * Jtorque2.J() + # forces_val2 = FORCE_WEIGHT2.value * Jforce2.J() + # torques_val = TORQUE_WEIGHT.value * Jtorque.J() + # torques_val2 = TORQUE_WEIGHT2.value * Jtorque2.J() BdotN = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2))) BdotN_over_B = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)) ) / np.mean(btot.AbsB()) @@ -442,18 +440,27 @@ def fun(dofs): valuestr += f", csObj={cs_val:.2e}" valuestr += f", Lk1Obj={link_val:.2e}" valuestr += f", forceObj={forces_val:.2e}" - valuestr += f", forceObj2={forces_val2:.2e}" - valuestr += f", torqueObj={torques_val:.2e}" - valuestr += f", torqueObj2={torques_val2:.2e}" + # valuestr += f", forceObj2={forces_val2:.2e}" + # valuestr += f", torqueObj={torques_val:.2e}" + # valuestr += f", torqueObj2={torques_val2:.2e}" outstr += f", F={Jforce.J():.2e}" - outstr += f", Fnet={Jforce2.J():.2e}" - outstr += f", T={Jtorque.J():.2e}" - outstr += f", Tnet={Jtorque2.J():.2e}" + # outstr += f", Fnet={Jforce2.J():.2e}" + # outstr += f", T={Jtorque.J():.2e}" + # outstr += f", Tnet={Jtorque2.J():.2e}" outstr += f", C-C-Sep={Jccdist.shortest_distance():.2f}, C-S-Sep={Jcsdist2.shortest_distance():.2f}" outstr += f", Link Number = {linkNum.J()}" outstr += f", ║∇J║={np.linalg.norm(grad):.1e}" print(outstr) print(valuestr) + pr.disable() + sio = io.StringIO() + sortby = SortKey.CUMULATIVE + ps = pstats.Stats(pr, stream=sio).sort_stats(sortby) + ps.print_stats(20) + print(sio.getvalue()) + # for c in (coils + coils_TF): + # c._children = set() + # exit() return J, grad @@ -511,14 +518,14 @@ def fun(dofs): Jlength.dJ() t2 = time.time() print('sum(Jls_TF) time = ', t2 - t1, ' s') -# t1 = time.time() -# Jforce.J() -# t2 = time.time() -# print('Jforces time = ', t2 - t1, ' s') -# t1 = time.time() -# Jforce.dJ() -# t2 = time.time() -# print('dJforces time = ', t2 - t1, ' s') +t1 = time.time() +Jforce.J() +t2 = time.time() +print('Jforces time = ', t2 - t1, ' s') +t1 = time.time() +Jforce.dJ() +t2 = time.time() +print('dJforces time = ', t2 - t1, ' s') # t1 = time.time() # Jforce2.J() # t2 = time.time() @@ -563,6 +570,8 @@ def fun(dofs): NetForces=coil_net_forces(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), NetTorques=coil_net_torques(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), ) + # for c in (coils + coils_TF): + # c._children = set() btot.set_points(s_plot.gamma().reshape((-1, 3))) pointData = {"B_N": np.sum(btot.B().reshape((qphi, qtheta, 3)) * s_plot.unitnormal(), axis=2)[:, :, None]} diff --git a/examples/3_Advanced/QH_reactorscale_notfixed.py b/examples/3_Advanced/QH_reactorscale_notfixed.py index 5f1952b01..cb4e01450 100644 --- a/examples/3_Advanced/QH_reactorscale_notfixed.py +++ b/examples/3_Advanced/QH_reactorscale_notfixed.py @@ -22,6 +22,7 @@ SurfaceRZFourier, curves_to_vtk, create_equally_spaced_planar_curves, create_planar_curves_between_two_toroidal_surfaces ) +from simsopt.geo import create_equally_spaced_curves from simsopt.objectives import Weight, SquaredFlux, QuadraticPenalty from simsopt.util import in_github_actions import cProfile @@ -30,7 +31,7 @@ t1 = time.time() # Number of Fourier modes describing each Cartesian component of each coil: -order = 0 +# order = 0 # File for the desired boundary magnetic surface: TEST_DIR = (Path(__file__).parent / ".." / ".." / "tests" / "test_files").resolve() @@ -85,10 +86,10 @@ def initialize_coils_QH(TEST_DIR, s): from simsopt.geo import curves_to_vtk # generate planar TF coils - ncoils = 2 + ncoils = 20 R0 = s.get_rc(0, 0) * 1 R1 = s.get_rc(1, 0) * 4 - order = 8 + order = 2 from simsopt.mhd.vmec import Vmec vmec_file = 'wout_LandremanPaul2021_QH_reactorScale_lowres_reference.nc' @@ -96,10 +97,10 @@ def initialize_coils_QH(TEST_DIR, s): print('Total current = ', total_current) # Only need Jax flag for CurvePlanarFourier class - base_curves = create_equally_spaced_curves( + base_curves = create_equally_spaced_planar_curves( ncoils, s.nfp, stellsym=True, R0=R0, R1=R1, order=order, numquadpoints=256, - jax_flag=True, + jax_flag=False, ) base_currents = [(Current(total_current / ncoils * 1e-7) * 1e7) for _ in range(ncoils - 1)] @@ -135,94 +136,98 @@ def initialize_coils_QH(TEST_DIR, s): # Only need this if make self forces and TVE nonzero in the objective! a = 0.2 b = 0.2 -nturns = 100 +# nturns = 100 nturns_TF = 200 # wire cross section for the dipole coils should be more like 5 cm x 5 cm -aa = 0.05 -bb = 0.05 - -Nx = 6 -Ny = Nx -Nz = Nx -# Create the initial coils: -base_curves, all_curves = create_planar_curves_between_two_toroidal_surfaces( - s, s_inner, s_outer, Nx, Ny, Nz, order=order, coil_coil_flag=True, jax_flag=True, - # numquadpoints=10 # Defaults is (order + 1) * 40 so this halves it -) -import warnings - -keep_inds = [] -for ii in range(len(base_curves)): - counter = 0 - for i in range(base_curves[0].gamma().shape[0]): - eps = 0.05 - for j in range(len(base_curves_TF)): - for k in range(base_curves_TF[j].gamma().shape[0]): - dij = np.sqrt(np.sum((base_curves[ii].gamma()[i, :] - base_curves_TF[j].gamma()[k, :]) ** 2)) - conflict_bool = (dij < (1.0 + eps) * base_curves[0].x[0]) - if conflict_bool: - print('bad indices = ', i, j, dij, base_curves[0].x[0]) - warnings.warn( - 'There is a PSC coil initialized such that it is within a radius' - 'of a TF coil. Deleting these PSCs now.') - counter += 1 - break - if counter == 0: - keep_inds.append(ii) - -print(keep_inds) -base_curves = np.array(base_curves)[keep_inds] - -ncoils = len(base_curves) -print('Ncoils = ', ncoils) -coil_normals = np.zeros((ncoils, 3)) -plasma_points = s.gamma().reshape(-1, 3) -plasma_unitnormals = s.unitnormal().reshape(-1, 3) -for i in range(ncoils): - point = (base_curves[i].get_dofs()[-3:]) - dists = np.sum((point - plasma_points) ** 2, axis=-1) - min_ind = np.argmin(dists) - coil_normals[i, :] = plasma_unitnormals[min_ind, :] - # coil_normals[i, :] = (plasma_points[min_ind, :] - point) -coil_normals = coil_normals / np.linalg.norm(coil_normals, axis=-1)[:, None] -# alphas = np.arctan2( +# aa = 0.05 +# bb = 0.05 + +# Nx = 6 +# Ny = Nx +# Nz = Nx +# # Create the initial coils: +# base_curves, all_curves = create_planar_curves_between_two_toroidal_surfaces( +# s, s_inner, s_outer, Nx, Ny, Nz, order=order, coil_coil_flag=True, jax_flag=True, +# ) +# base_curves = create_equally_spaced_curves( +# 40, s.nfp, stellsym=True, +# R0=s.get_rc(0, 0), R1=40, order=3, numquadpoints=256, +# jax_flag=True, +# ) +# import warnings + +# keep_inds = [] +# for ii in range(len(base_curves)): +# counter = 0 +# for i in range(base_curves[0].gamma().shape[0]): +# eps = 0.05 +# for j in range(len(base_curves_TF)): +# for k in range(base_curves_TF[j].gamma().shape[0]): +# dij = np.sqrt(np.sum((base_curves[ii].gamma()[i, :] - base_curves_TF[j].gamma()[k, :]) ** 2)) +# conflict_bool = (dij < (1.0 + eps) * base_curves[0].x[0]) +# if conflict_bool: +# print('bad indices = ', i, j, dij, base_curves[0].x[0]) +# warnings.warn( +# 'There is a PSC coil initialized such that it is within a radius' +# 'of a TF coil. Deleting these PSCs now.') +# counter += 1 +# break +# if counter == 0: +# keep_inds.append(ii) + +# print(keep_inds) +# base_curves = np.array(base_curves)[keep_inds] + +# ncoils = len(base_curves) +# print('Ncoils = ', ncoils) +# coil_normals = np.zeros((ncoils, 3)) +# plasma_points = s.gamma().reshape(-1, 3) +# plasma_unitnormals = s.unitnormal().reshape(-1, 3) +# for i in range(ncoils): +# point = (base_curves[i].get_dofs()[-3:]) +# dists = np.sum((point - plasma_points) ** 2, axis=-1) +# min_ind = np.argmin(dists) +# coil_normals[i, :] = plasma_unitnormals[min_ind, :] +# # coil_normals[i, :] = (plasma_points[min_ind, :] - point) +# coil_normals = coil_normals / np.linalg.norm(coil_normals, axis=-1)[:, None] +# # alphas = np.arctan2( +# # -coil_normals[:, 1], +# # np.sqrt(coil_normals[:, 0] ** 2 + coil_normals[:, 2] ** 2)) +# # deltas = np.arcsin(coil_normals[:, 0] / \ +# # np.sqrt(coil_normals[:, 0] ** 2 + coil_normals[:, 2] ** 2)) +# alphas = np.arcsin( # -coil_normals[:, 1], -# np.sqrt(coil_normals[:, 0] ** 2 + coil_normals[:, 2] ** 2)) -# deltas = np.arcsin(coil_normals[:, 0] / \ -# np.sqrt(coil_normals[:, 0] ** 2 + coil_normals[:, 2] ** 2)) -alphas = np.arcsin( - -coil_normals[:, 1], - ) -deltas = np.arctan2(coil_normals[:, 0], coil_normals[:, 2]) -for i in range(len(base_curves)): - alpha2 = alphas[i] / 2.0 - delta2 = deltas[i] / 2.0 - calpha2 = np.cos(alpha2) - salpha2 = np.sin(alpha2) - cdelta2 = np.cos(delta2) - sdelta2 = np.sin(delta2) - base_curves[i].set('x' + str(2 * order + 1), calpha2 * cdelta2) - base_curves[i].set('x' + str(2 * order + 2), salpha2 * cdelta2) - base_curves[i].set('x' + str(2 * order + 3), calpha2 * sdelta2) - base_curves[i].set('x' + str(2 * order + 4), -salpha2 * sdelta2) - # Fix orientations of each coil - base_curves[i].fix('x' + str(2 * order + 1)) - base_curves[i].fix('x' + str(2 * order + 2)) - base_curves[i].fix('x' + str(2 * order + 3)) - base_curves[i].fix('x' + str(2 * order + 4)) - - # Fix shape of each coil - for j in range(2 * order + 1): - base_curves[i].fix('x' + str(j)) - # Fix center points of each coil - base_curves[i].fix('x' + str(2 * order + 5)) - base_curves[i].fix('x' + str(2 * order + 6)) - base_curves[i].fix('x' + str(2 * order + 7)) -base_currents = [Current(1e-1) * 2e7 for i in range(ncoils)] - -coils = coils_via_symmetries(base_curves, base_currents, s.nfp, True) -base_coils = coils[:ncoils] +# ) +# deltas = np.arctan2(coil_normals[:, 0], coil_normals[:, 2]) +# for i in range(len(base_curves)): +# alpha2 = alphas[i] / 2.0 +# delta2 = deltas[i] / 2.0 +# calpha2 = np.cos(alpha2) +# salpha2 = np.sin(alpha2) +# cdelta2 = np.cos(delta2) +# sdelta2 = np.sin(delta2) +# base_curves[i].set('x' + str(2 * order + 1), calpha2 * cdelta2) +# base_curves[i].set('x' + str(2 * order + 2), salpha2 * cdelta2) +# base_curves[i].set('x' + str(2 * order + 3), calpha2 * sdelta2) +# base_curves[i].set('x' + str(2 * order + 4), -salpha2 * sdelta2) +# # Fix orientations of each coil +# base_curves[i].fix('x' + str(2 * order + 1)) +# base_curves[i].fix('x' + str(2 * order + 2)) +# base_curves[i].fix('x' + str(2 * order + 3)) +# base_curves[i].fix('x' + str(2 * order + 4)) + +# # Fix shape of each coil +# for j in range(2 * order + 1): +# base_curves[i].fix('x' + str(j)) +# # Fix center points of each coil +# base_curves[i].fix('x' + str(2 * order + 5)) +# base_curves[i].fix('x' + str(2 * order + 6)) +# base_curves[i].fix('x' + str(2 * order + 7)) +# base_currents = [Current(1e-1) * 2e7 for i in range(ncoils)] + +# coils = coils_via_symmetries(base_curves, base_currents, s.nfp, True) +# base_coils = coils[:ncoils] def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): contig = np.ascontiguousarray @@ -250,17 +255,14 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): "Pointwise_Torques": (contig(torques[:, 0]), contig(torques[:, 1]), contig(torques[:, 2]))} return point_data -bs = BiotSavart(coils) # + coils_TF) -btot = bs + bs_TF +# bs = BiotSavart(coils) # + coils_TF) +btot = bs_TF calculate_on_axis_B(btot, s) btot.set_points(s.gamma().reshape((-1, 3))) -bs.set_points(s.gamma().reshape((-1, 3))) -curves = [c.curve for c in coils] -currents = [c.current.get_value() for c in coils] -a_list = np.hstack((np.ones(len(coils)) * aa, np.ones(len(coils_TF)) * a)) -b_list = np.hstack((np.ones(len(coils)) * bb, np.ones(len(coils_TF)) * b)) -base_a_list = np.hstack((np.ones(len(base_coils)) * aa, np.ones(len(base_coils_TF)) * a)) -base_b_list = np.hstack((np.ones(len(base_coils)) * bb, np.ones(len(base_coils_TF)) * b)) +# a_list = np.hstack((np.ones(len(coils)) * aa, np.ones(len(coils_TF)) * a)) +# b_list = np.hstack((np.ones(len(coils)) * bb, np.ones(len(coils_TF)) * b)) +# base_a_list = np.hstack((np.ones(len(base_coils)) * aa, np.ones(len(base_coils_TF)) * a)) +# base_b_list = np.hstack((np.ones(len(base_coils)) * bb, np.ones(len(base_coils_TF)) * b)) LENGTH_WEIGHT = Weight(0.001) LENGTH_TARGET = 80 @@ -293,32 +295,23 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): # curves_TF, # OUT_DIR + "curves_TF_0", # close=True, -# extra_point_data=pointData_forces_torques(coils_TF, coils + coils_TF, np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b, np.ones(len(coils_TF)) * nturns_TF), +# extra_point_data=pointData_forces_torques(coils_TF, coils_TF, np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b, np.ones(len(coils_TF)) * nturns_TF), # I=currents_TF, -# NetForces=coil_net_forces(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), -# NetTorques=coil_net_torques(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF) +# NetForces=coil_net_forces(coils_TF, coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), +# NetTorques=coil_net_torques(coils_TF, coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF) # ) # curves_to_vtk( # curves, # OUT_DIR + "curves_0", # close=True, -# extra_point_data=pointData_forces_torques(coils, coils + coils_TF, np.ones(len(coils)) * aa, np.ones(len(coils)) * bb, np.ones(len(coils)) * nturns), +# extra_point_data=pointData_forces_torques(coils, coils_TF, np.ones(len(coils)) * aa, np.ones(len(coils)) * bb, np.ones(len(coils)) * nturns), # I=currents, -# NetForces=coil_net_forces(coils, coils + coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), -# NetTorques=coil_net_torques(coils, coils + coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns) +# NetForces=coil_net_forces(coils, coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), +# NetTorques=coil_net_torques(coils, coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns) # ) # Force and Torque calculations spawn a bunch of spurious BiotSavart child objects -- erase them! -# for c in (coils + coils_TF): +# for c in (coils_TF): # c._children = set() - -pointData = {"B_N": np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)[:, :, None]} -s.to_vtk(OUT_DIR + "surf_init_DA", extra_data=pointData) - -btot.set_points(s_plot.gamma().reshape((-1, 3))) -pointData = {"B_N": np.sum(btot.B().reshape((qphi, qtheta, 3)) * s_plot.unitnormal(), axis=2)[:, :, None]} -s_plot.to_vtk(OUT_DIR + "surf_full_init_DA", extra_data=pointData) -btot.set_points(s.gamma().reshape((-1, 3))) - # Repeat for whole B field pointData = {"B_N": np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)[:, :, None]} s.to_vtk(OUT_DIR + "surf_init", extra_data=pointData) @@ -341,37 +334,40 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): ### Jcc below removed the dipoles! Jccdist = CurveCurveDistance(curves_TF, CC_THRESHOLD, num_basecurves=len(coils_TF)) Jcsdist = CurveSurfaceDistance(curves_TF, s, CS_THRESHOLD) -Jcsdist2 = CurveSurfaceDistance(curves + curves_TF, s, CS_THRESHOLD) +# Jcsdist2 = CurveSurfaceDistance(curves_TF, s, CS_THRESHOLD) # While the coil array is not moving around, they cannot # interlink. -linkNum = LinkingNumber(curves + curves_TF, downsample=2) +linkNum = LinkingNumber(curves_TF, downsample=4) ##### Note need coils_TF + coils below!!!!!!! -# Jforce2 = sum([LpCurveForce(c, coils + coils_TF, +# Jforce2 = sum([LpCurveForce(c, coils_TF, # regularization=regularization_rect(base_a_list[i], base_b_list[i]), -# p=2, threshold=1e8) for i, c in enumerate(base_coils + base_coils_TF)]) +# p=2, threshold=1e8) for i, c in enumerate(base_coils_TF)]) # Jforce2 = LpCurveForce2(coils, coils_TF, p=2, threshold=1e8) -# Jforce = sum([SquaredMeanForce(c, coils + coils_TF) for c in (base_coils + base_coils_TF)]) +# Jforce = sum([SquaredMeanForce(c, coils_TF) for c in (base_coils_TF)]) -regularization_list = np.ones(len(coils)) * regularization_rect(aa, bb) -regularization_list2 = np.ones(len(coils_TF)) * regularization_rect(a, b) +# regularization_list = np.ones(len(coils)) * regularization_rect(aa, bb) +# regularization_list2 = np.ones(len(coils_TF)) * regularization_rect(a, b) # Jforce = MixedLpCurveForce(coils, coils_TF, regularization_list, regularization_list2) # [SquaredMeanForce2(c, coils) for c in (base_coils)] # Jforce = MixedSquaredMeanForce(coils, coils_TF) # Jforce = MixedLpCurveForce(coils, coils_TF, regularization_list, regularization_list2, p=4, threshold=4e5 * 100, downsample=4) -Jforce = sum([LpCurveForce(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=4, threshold=4e5 * 100, downsample=4) for i, c in enumerate(base_coils + base_coils_TF)]) -# Jforce2 = sum([SquaredMeanForce(c, coils + coils_TF) for c in (base_coils + base_coils_TF)]) -# Jtorque = sum([LpCurveTorque(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=4e5 * 100) for i, c in enumerate(base_coils + base_coils_TF)]) -# Jtorque = sum([LpCurveTorque(c, coils + coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=1e5 * 100) for i, c in enumerate(base_coils + base_coils_TF)]) +Jforce = sum([LpCurveForce(c, coils_TF, regularization_rect(a, b), p=4, threshold=4e5 * 100, downsample=4) for i, c in enumerate(base_coils_TF)]) +# Jforce2 = sum([SquaredMeanForce(c, coils_TF) for c in (base_coils_TF)]) +# Jtorque = sum([LpCurveTorque(c, coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=4e5 * 100) for i, c in enumerate(base_coils_TF)]) +# Jtorque = sum([LpCurveTorque(c, coils_TF, regularization_rect(a_list[i], b_list[i]), p=2, threshold=1e5 * 100) for i, c in enumerate(base_coils_TF)]) -# Jtorque2 = sum([SquaredMeanTorque(c, coils + coils_TF) for c in (base_coils_TF)]) +# Jtorque2 = sum([SquaredMeanTorque(c, coils_TF) for c in (base_coils_TF)]) -# Jtorque2 = sum([SquaredMeanTorque(c, coils + coils_TF) for c in (base_coils + base_coils_TF)]) +# Jtorque2 = sum([SquaredMeanTorque(c, coils_TF) for c in (base_coils_TF)]) # Jtorque = SquaredMeanTorque2(coils, coils_TF) # [SquaredMeanForce2(c, coils) for c in (base_coils)] -# Jtorque = [SquaredMeanTorque(c, coils + coils_TF) for c in (base_coils + base_coils_TF)] +# Jtorque = [SquaredMeanTorque(c, coils_TF) for c in (base_coils_TF)] +for c in coils_TF: + # print(c._children) + c._children = set() # set(list(c._children)[0:1]) JF = Jf \ + CC_WEIGHT * Jccdist \ @@ -403,50 +399,51 @@ def pointData_forces_torques(coils, allcoils, aprimes, bprimes, nturns_list): # pass directly to scipy.optimize.minimize -print('Timing calls: ') -t1 = time.time() -Jf.J() -t2 = time.time() -print('Jf time = ', t2 - t1, ' s') -t1 = time.time() -Jf.dJ() -t2 = time.time() -print('dJf time = ', t2 - t1, ' s') -t1 = time.time() -Jccdist.J() -Jccdist.dJ() -t2 = time.time() -print('Jcc time = ', t2 - t1, ' s') -t1 = time.time() -Jcsdist.J() -Jcsdist.dJ() -t2 = time.time() -print('Jcs time = ', t2 - t1, ' s') -t1 = time.time() -linkNum.J() -linkNum.dJ() -t2 = time.time() -print('linkNum time = ', t2 - t1, ' s') -t1 = time.time() -Jlength.J() -Jlength.dJ() -t2 = time.time() -print('sum(Jls_TF) time = ', t2 - t1, ' s') +# print('Timing calls: ') +# t1 = time.time() +# Jf.J() +# t2 = time.time() +# print('Jf time = ', t2 - t1, ' s') +# t1 = time.time() +# Jf.dJ() +# t2 = time.time() +# print('dJf time = ', t2 - t1, ' s') +# t1 = time.time() +# Jccdist.J() +# Jccdist.dJ() +# t2 = time.time() +# print('Jcc time = ', t2 - t1, ' s') +# t1 = time.time() +# Jcsdist.J() +# Jcsdist.dJ() +# t2 = time.time() +# print('Jcs time = ', t2 - t1, ' s') +# t1 = time.time() +# linkNum.J() +# linkNum.dJ() +# t2 = time.time() +# print('linkNum time = ', t2 - t1, ' s') +# t1 = time.time() +# Jlength.J() +# Jlength.dJ() +# t2 = time.time() +# print('sum(Jls_TF) time = ', t2 - t1, ' s') # t1 = time.time() # print(Jforce.J()) # print(Jforce.dJ()) # t2 = time.time() # print('Jforce time = ', t2 - t1, ' s') -t1 = time.time() -JF.J() -JF.dJ() -t2 = time.time() -print('JF time = ', t2 - t1, ' s') +# t1 = time.time() +# JF.J() +# JF.dJ() +# t2 = time.time() +# print('JF time = ', t2 - t1, ' s') import pstats, io from pstats import SortKey # print(btot.ancestors,len(btot.ancestors)) # print(JF.ancestors,len(JF.ancestors)) +# Jforce._children = set() def fun(dofs): pr = cProfile.Profile() @@ -455,26 +452,32 @@ def fun(dofs): J = JF.J() grad = JF.dJ() jf = Jf.J() - length_val = LENGTH_WEIGHT.value * Jlength.J() - cc_val = CC_WEIGHT * Jccdist.J() - cs_val = CS_WEIGHT * Jcsdist.J() - link_val = LINK_WEIGHT * linkNum.J() + # print(JF._children, btot._children, btot.coils[-1]._children, + # btot.coils[-1].curve._children, btot._coils[-1]._children, + # Jforce._children) + # length_val = LENGTH_WEIGHT.value * Jlength.J() + # cc_val = CC_WEIGHT * Jccdist.J() + # cs_val = CS_WEIGHT * Jcsdist.J() + # link_val = LINK_WEIGHT * linkNum.J() forces_val = FORCE_WEIGHT.value * Jforce.J() + # print(JF._children, btot._children, btot.coils[-1]._children, + # btot.coils[-1].curve._children, btot._coils[-1]._children, + # Jforce._children) # Jforce3.dJ() # forces_val2 = FORCE_WEIGHT2.value * Jforce2.J() # torques_val = TORQUE_WEIGHT.value * Jtorque.J() # torques_val2 = TORQUE_WEIGHT2.value * Jtorque2.J() - BdotN = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2))) - BdotN_over_B = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)) - ) / np.mean(btot.AbsB()) - outstr = f"J={J:.1e}, Jf={jf:.1e}, ⟨B·n⟩={BdotN:.1e}, ⟨B·n⟩/⟨B⟩={BdotN_over_B:.1e}" + # BdotN = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2))) + # BdotN_over_B = np.mean(np.abs(np.sum(btot.B().reshape((nphi, ntheta, 3)) * s.unitnormal(), axis=2)) + # ) / np.mean(btot.AbsB()) + outstr = f"J={J:.1e}, Jf={jf:.1e}" #, ⟨B·n⟩={BdotN:.1e}, ⟨B·n⟩/⟨B⟩={BdotN_over_B:.1e}" valuestr = f"J={J:.2e}, Jf={jf:.2e}" - cl_string = ", ".join([f"{J.J():.1f}" for J in Jls_TF]) - outstr += f", Len=sum([{cl_string}])={sum(J.J() for J in Jls_TF):.2f}" - valuestr += f", LenObj={length_val:.2e}" - valuestr += f", ccObj={cc_val:.2e}" - valuestr += f", csObj={cs_val:.2e}" - valuestr += f", Lk1Obj={link_val:.2e}" + # cl_string = ", ".join([f"{J.J():.1f}" for J in Jls_TF]) + # outstr += f", Len=sum([{cl_string}])={sum(J.J() for J in Jls_TF):.2f}" + # valuestr += f", LenObj={length_val:.2e}" + # valuestr += f", ccObj={cc_val:.2e}" + # valuestr += f", csObj={cs_val:.2e}" + # valuestr += f", Lk1Obj={link_val:.2e}" valuestr += f", forceObj={forces_val:.2e}" # valuestr += f", forceObj2={forces_val2:.2e}" # valuestr += f", torqueObj={torques_val:.2e}" @@ -483,9 +486,9 @@ def fun(dofs): # outstr += f", Fnet={Jforce2.J():.2e}" # outstr += f", T={Jtorque.J():.2e}" # outstr += f", Tnet={Jtorque2.J():.2e}" - outstr += f", C-C-Sep={Jccdist.shortest_distance():.2f}, C-S-Sep={Jcsdist2.shortest_distance():.2f}" - outstr += f", Link Number = {linkNum.J()}" - outstr += f", ║∇J║={np.linalg.norm(grad):.1e}" + # outstr += f", C-C-Sep={Jccdist.shortest_distance():.2f}, C-S-Sep={Jcsdist.shortest_distance():.2f}" + # outstr += f", Link Number = {linkNum.J()}" + # outstr += f", ║∇J║={np.linalg.norm(grad):.1e}" # print(coils[0]._children, coils_TF[0]._children, JF._children, Jforce._children) print(outstr) print(valuestr) @@ -528,46 +531,46 @@ def fun(dofs): """) -print('Timing calls: ') -t1 = time.time() -Jf.J() -t2 = time.time() -print('Jf time = ', t2 - t1, ' s') -t1 = time.time() -Jf.dJ() -t2 = time.time() -print('dJf time = ', t2 - t1, ' s') -t1 = time.time() -Jccdist.J() -Jccdist.dJ() -t2 = time.time() -print('Jcc time = ', t2 - t1, ' s') -t1 = time.time() -Jcsdist.J() -Jcsdist.dJ() -t2 = time.time() -print('Jcs time = ', t2 - t1, ' s') -t1 = time.time() -linkNum.J() -linkNum.dJ() -t2 = time.time() -print('linkNum time = ', t2 - t1, ' s') -t1 = time.time() -Jlength.J() -Jlength.dJ() -t2 = time.time() -print('sum(Jls_TF) time = ', t2 - t1, ' s') +# print('Timing calls: ') +# t1 = time.time() +# Jf.J() +# t2 = time.time() +# print('Jf time = ', t2 - t1, ' s') +# t1 = time.time() +# Jf.dJ() +# t2 = time.time() +# print('dJf time = ', t2 - t1, ' s') +# t1 = time.time() +# Jccdist.J() +# Jccdist.dJ() +# t2 = time.time() +# print('Jcc time = ', t2 - t1, ' s') +# t1 = time.time() +# Jcsdist.J() +# Jcsdist.dJ() +# t2 = time.time() +# print('Jcs time = ', t2 - t1, ' s') +# t1 = time.time() +# linkNum.J() +# linkNum.dJ() +# t2 = time.time() +# print('linkNum time = ', t2 - t1, ' s') +# t1 = time.time() +# Jlength.J() +# Jlength.dJ() +# t2 = time.time() +# print('sum(Jls_TF) time = ', t2 - t1, ' s') # t1 = time.time() # print(Jforce.J()) # print(Jforce.dJ()) # t2 = time.time() # print('Jforce time = ', t2 - t1, ' s') -t1 = time.time() -JF.J() -JF.dJ() -t2 = time.time() -print('JF time = ', t2 - t1, ' s') # t1 = time.time() +# JF.J() +# JF.dJ() +# t2 = time.time() +# print('JF time = ', t2 - t1, ' s') +# # t1 = time.time() # Jforce.J() # t2 = time.time() # print('Jforces time = ', t2 - t1, ' s') @@ -605,19 +608,19 @@ def fun(dofs): # [c.curve for c in bs.coils], # OUT_DIR + "curves_{0:d}".format(i), # close=True, - # extra_point_data=pointData_forces_torques(coils, coils + coils_TF, np.ones(len(coils)) * aa, np.ones(len(coils)) * bb, np.ones(len(coils)) * nturns), + # extra_point_data=pointData_forces_torques(coils, coils_TF, np.ones(len(coils)) * aa, np.ones(len(coils)) * bb, np.ones(len(coils)) * nturns), # I=dipole_currents, - # NetForces=coil_net_forces(coils, coils + coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), - # NetTorques=coil_net_torques(coils, coils + coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), + # NetForces=coil_net_forces(coils, coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), + # NetTorques=coil_net_torques(coils, coils_TF, regularization_rect(np.ones(len(coils)) * aa, np.ones(len(coils)) * bb), np.ones(len(coils)) * nturns), # ) # curves_to_vtk( # [c.curve for c in bs_TF.coils], # OUT_DIR + "curves_TF_{0:d}".format(i), # close=True, - # extra_point_data=pointData_forces_torques(coils_TF, coils + coils_TF, np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b, np.ones(len(coils_TF)) * nturns_TF), + # extra_point_data=pointData_forces_torques(coils_TF, coils_TF, np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b, np.ones(len(coils_TF)) * nturns_TF), # I=[c.current.get_value() for c in bs_TF.coils], - # NetForces=coil_net_forces(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), - # NetTorques=coil_net_torques(coils_TF, coils + coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), + # NetForces=coil_net_forces(coils_TF, coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), + # NetTorques=coil_net_torques(coils_TF, coils_TF, regularization_rect(np.ones(len(coils_TF)) * a, np.ones(len(coils_TF)) * b), np.ones(len(coils_TF)) * nturns_TF), # ) btot.set_points(s_plot.gamma().reshape((-1, 3))) diff --git a/src/simsopt/field/biotsavart.py b/src/simsopt/field/biotsavart.py index 0cf2001ec..abd52d666 100644 --- a/src/simsopt/field/biotsavart.py +++ b/src/simsopt/field/biotsavart.py @@ -37,14 +37,16 @@ def __init__(self, coils): # self.B_vjp_jax = jit(lambda v: self.B_vjp_pure(v)) def dB_by_dcoilcurrents(self, compute_derivatives=0): - points = self.get_points_cart_ref() - npoints = len(points) - ncoils = len(self._coils) - if any([not self.fieldcache_get_status(f'B_{i}') for i in range(ncoils)]): - assert compute_derivatives >= 0 - self.compute(compute_derivatives) - self._dB_by_dcoilcurrents = [self.fieldcache_get_or_create(f'B_{i}', [npoints, 3]) for i in range(ncoils)] - return self._dB_by_dcoilcurrents + # points = self.get_points_cart_ref() + npoints = len(self.get_points_cart_ref()) + # ncoils = len(self._coils) + # print(any([not self.fieldcache_get_status(f'B_{i}') for i in range(ncoils)])) + # if any([not self.fieldcache_get_status(f'B_{i}') for i in range(ncoils)]): + # assert compute_derivatives >= 0 + # self.compute(compute_derivatives) + # self._dB_by_dcoilcurrents = [self.fieldcache_get_or_create(f'B_{i}', [npoints, 3]) for i in range(ncoils)] + # print(self._dB_by_dcoilcurrents) + return [self.fieldcache_get_or_create(f'B_{i}', [npoints, 3]) for i in range(len(self._coils))] #self._dB_by_dcoilcurrents def d2B_by_dXdcoilcurrents(self, compute_derivatives=1): points = self.get_points_cart_ref() @@ -113,7 +115,6 @@ def B_vjp(self, v): """ coils = self._coils - # t1 = time.time() gammas = [coil.curve.gamma() for coil in coils] gammadashs = [coil.curve.gammadash() for coil in coils] currents = [coil.current.get_value() for coil in coils] @@ -123,18 +124,8 @@ def B_vjp(self, v): points = self.get_points_cart_ref() sopp.biot_savart_vjp_graph(points, gammas, gammadashs, currents, v, res_gamma, res_gammadash, [], [], []) - # t2 = time.time() - # print(t2 - t1) - # t1 = time.time() dB_by_dcoilcurrents = self.dB_by_dcoilcurrents() - # # res_current = np.sum(np.sum(v[None, :, :] * np.array(self.dB_by_dcoilcurrents()), axis=-1), axis=-1) res_current = [np.sum(v * self.dB_by_dcoilcurrents()[i]) for i in range(len(dB_by_dcoilcurrents))] - # t2 = time.time() - # print(t2 - t1) - # t1 = time.time() - # sum([coils[i].vjp(res_gamma[i], res_gammadash[i], np.asarray([res_current[i]])) for i in range(len(coils))]) - # t2 = time.time() - # print(t2 - t1) return sum([coils[i].vjp(res_gamma[i], res_gammadash[i], np.asarray([res_current[i]])) for i in range(len(coils))]) def B_vjp_pure(self, v): diff --git a/src/simsopt/field/coil.py b/src/simsopt/field/coil.py index 800895738..3cb041310 100644 --- a/src/simsopt/field/coil.py +++ b/src/simsopt/field/coil.py @@ -34,18 +34,6 @@ def __init__(self, curve, current): Optimizable.__init__(self, depends_on=[curve, current]) def vjp(self, v_gamma, v_gammadash, v_current): - # t1 = time.time() - # self.curve.dgamma_by_dcoeff_vjp(v_gamma) - # t2 = time.time() - # print('here = ', t2 - t1) - # t1 = time.time() - # self.curve.dgammadash_by_dcoeff_vjp(v_gammadash) - # t2 = time.time() - # print(t2 - t1) - # t1 = time.time() - # self.current.vjp(v_current) - # t2 = time.time() - # print(t2 - t1) return self.curve.dgamma_by_dcoeff_vjp(v_gamma) \ + self.curve.dgammadash_by_dcoeff_vjp(v_gammadash) \ + self.current.vjp(v_current) diff --git a/src/simsopt/field/force.py b/src/simsopt/field/force.py index b8bc6c286..af198cd6b 100644 --- a/src/simsopt/field/force.py +++ b/src/simsopt/field/force.py @@ -5,6 +5,7 @@ import jax.numpy as jnp from jax import grad from simsopt._core.derivative import Derivative +import simsoptpp as sopp from .biotsavart import BiotSavart from .selffield import B_regularized_pure, B_regularized, regularization_circ, regularization_rect from ..geo.jit import jit @@ -141,7 +142,7 @@ def __init__(self, coil, allcoils, regularization, p=2.0, threshold=0.0, downsam self.coil = coil self.allcoils = allcoils self.othercoils = [c for c in allcoils if c is not coil] - # self.biotsavart = BiotSavart(self.othercoils) + self.biotsavart = BiotSavart(self.othercoils) quadpoints = self.coil.curve.quadpoints self.downsample = downsample @@ -185,19 +186,21 @@ def __init__(self, coil, allcoils, regularization, p=2.0, threshold=0.0, downsam super().__init__(depends_on=allcoils) def J(self): - biotsavart = BiotSavart(self.othercoils) - biotsavart.set_points(self.coil.curve.gamma()) # biotsavart._children = set() + self.biotsavart.set_points(self.coil.curve.gamma()) args = [ self.coil.curve.gamma(), self.coil.curve.gammadash(), self.coil.curve.gammadashdash(), self.coil.current.get_value(), - biotsavart.B(), + self.biotsavart.B(), self.downsample ] - # biotsavart._children = set() + #### ABSOLUTELY ESSENTIAL LINES BELOW + # Otherwise optimizable references multiply + # like crazy as number of coils increases + self.biotsavart._children = set() for c in self.othercoils: c._children = set() c.curve._children = set() @@ -207,35 +210,74 @@ def J(self): @derivative_dec def dJ(self): - biotsavart = BiotSavart(self.othercoils) - biotsavart.set_points(self.coil.curve.gamma()) + # biotsavart._children = set() + self.biotsavart.set_points(self.coil.curve.gamma()) args = [ self.coil.curve.gamma(), self.coil.curve.gammadash(), self.coil.curve.gammadashdash(), self.coil.current.get_value(), - biotsavart.B(), + self.biotsavart.B(), self.downsample ] dJ_dB = self.dJ_dB_mutual(*args) - dB_dX = biotsavart.dB_by_dX() + dB_dX = self.biotsavart.dB_by_dX() dJ_dX = np.einsum('ij,ikj->ik', dJ_dB, dB_dX) - + + # coils = self.othercoils + # gammas = [coil.curve.gamma() for coil in coils] + # gammadashs = [coil.curve.gammadash() for coil in coils] + # currents = [coil.current.get_value() for coil in coils] + # res_gamma = [np.zeros_like(gamma) for gamma in gammas] + # res_gammadash = [np.zeros_like(gammadash) for gammadash in gammadashs] + # points = self.coil.curve.gamma() + # sopp.biot_savart_vjp_graph(points, gammas, gammadashs, currents, dJ_dB, + # res_gamma, res_gammadash, [], [], []) + # dB_by_dcoilcurrents = self.biotsavart.dB_by_dcoilcurrents() + # res_current = [np.sum(dJ_dB * dB_by_dcoilcurrents[i]) for i in range(len(dB_by_dcoilcurrents))] + # res_current = np.zeros(len(coils)) + # B_vjp = sum([coils[i].vjp(res_gamma[i], + # res_gammadash[i], + # np.asarray([res_current[i]])) for i in range(len(coils))]) + + + # print(self.othercoils[0]._children) + # print(B_vjp, B_vjp._children) dJ = ( self.coil.curve.dgamma_by_dcoeff_vjp(self.dJ_dgamma(*args) + dJ_dX) + self.coil.curve.dgammadash_by_dcoeff_vjp(self.dJ_dgammadash(*args)) + self.coil.curve.dgammadashdash_by_dcoeff_vjp(self.dJ_dgammadashdash(*args)) + self.coil.current.vjp(jnp.asarray([self.dJ_dcurrent(*args)])) - + biotsavart.B_vjp(dJ_dB) + + self.biotsavart.B_vjp(dJ_dB) ) - # biotsavart._children = set() + #### ABSOLUTELY ESSENTIAL LINES BELOW + # Otherwise optimizable references multiply + # like crazy as number of coils increases + self.biotsavart._children = set() for c in self.othercoils: c._children = set() c.curve._children = set() c.current._children = set() + # dJ = ( + # self.coil.curve.dgamma_by_dcoeff_vjp(self.dJ_dgamma(*args) + dJ_dX) + # + self.coil.curve.dgammadash_by_dcoeff_vjp(self.dJ_dgammadash(*args)) + # + self.coil.current.vjp(jnp.asarray([self.dJ_dcurrent(*args)])) + # + self.coil.curve.dgammadashdash_by_dcoeff_vjp(self.dJ_dgammadashdash(*args)) + # + biotsavart.B_vjp(dJ_dB) + # # ) + + #### Needed if JaxCurves are used? + # self.biotsavart._children = set() + # for c in self.othercoils: + # c._children = set() + # c.curve._children = set() + # c.current._children = set() + + # print(B_vjp, coils[0]._children) + # print(biotsavart.coils[0]._children, self.coil._children, self.coil.curve._children) return dJ @@ -342,12 +384,31 @@ def dJ(self): dB_dX = self.biotsavart.dB_by_dX() dJ_dX = np.einsum('ij,ikj->ik', dJ_dB, dB_dX) + coils = self.othercoils + gammas = [coil.curve.gamma() for coil in coils] + gammadashs = [coil.curve.gammadash() for coil in coils] + currents = [coil.current.get_value() for coil in coils] + res_gamma = [np.zeros_like(gamma) for gamma in gammas] + res_gammadash = [np.zeros_like(gammadash) for gammadash in gammadashs] + points = self.coil.curve.gamma() + sopp.biot_savart_vjp_graph(points, gammas, gammadashs, currents, v, + res_gamma, res_gammadash, [], [], []) + dB_by_dcoilcurrents = self.biotsavart.dB_by_dcoilcurrents() + res_current = [np.sum(v * dB_by_dcoilcurrents[i]) for i in range(len(dB_by_dcoilcurrents))] + # B_vjp = sum([coils[i].vjp(res_gamma[i], res_gammadash[i], np.asarray([res_current[i]])) for i in range(len(coils))]) + # for c in coils: + # c._children = set() + B_vjp = sum([coils[i].vjp(res_gamma[i], res_gammadash[i], np.asarray([res_current[i]])) for i in range(len(coils))]) + + print(self.othercoils[0]._children) + print(B_vjp, B_vjp._children) return ( self.coil.curve.dgamma_by_dcoeff_vjp(self.dJ_dgamma(*args) + dJ_dX) + self.coil.curve.dgammadash_by_dcoeff_vjp(self.dJ_dgammadash(*args)) + self.coil.curve.dgammadashdash_by_dcoeff_vjp(self.dJ_dgammadashdash(*args)) + self.coil.current.vjp(jnp.asarray([self.dJ_dcurrent(*args)])) - + self.biotsavart.B_vjp(dJ_dB) + + B_vjp + # + self.biotsavart.B_vjp(dJ_dB) ) return_fn_map = {'J': J, 'dJ': dJ} diff --git a/src/simsopt/geo/curve.py b/src/simsopt/geo/curve.py index 9bc79341e..b3deda774 100644 --- a/src/simsopt/geo/curve.py +++ b/src/simsopt/geo/curve.py @@ -470,15 +470,6 @@ def gamma_impl(self, gamma, quadpoints): def incremental_arclength_pure(self, dofs): gammadash = self.gammadash_jax(dofs) return jnp.linalg.norm(gammadash, axis=1) - - @property - def qps(self): - return self.quadpoints - - @qps.setter - def qps(self, new_quadpoints): - self.quadpoints = new_quadpoints - self.numquadpoints = len(new_quadpoints) def incremental_arclength(self): return self.incremental_arclength_jax(self.get_dofs()) @@ -518,7 +509,7 @@ def dgamma_by_dcoeff_impl(self, dgamma_by_dcoeff): where :math:`\mathbf{c}` are the curve dofs, and :math:`\Gamma` are the x, y, z coordinates of the curve. """ - dgamma_by_dcoeff[:, :, :] = self.dgamma_by_dcoeff_jax(self.get_dofs()) + dgamma_by_dcoeff[:, :, :] = dgamma_by_dcoeff_jax(self.get_dofs()) def dgamma_by_dcoeff_vjp_impl(self, v): r""" @@ -563,7 +554,6 @@ def dgammadash_by_dcoeff_vjp_impl(self, v): where :math:`\mathbf{c}` are the curve dofs, and :math:`\Gamma` are the x, y, z coordinates of the curve. """ - return self.dgammadash_by_dcoeff_vjp_jax(self.get_dofs(), v) def gammadashdash_impl(self, gammadashdash): diff --git a/src/simsopt/geo/curveplanarfourier.py b/src/simsopt/geo/curveplanarfourier.py index 7c5d16143..eb75f3d1a 100644 --- a/src/simsopt/geo/curveplanarfourier.py +++ b/src/simsopt/geo/curveplanarfourier.py @@ -143,7 +143,7 @@ def get_dofs(self): """ This function returns the dofs associated to this object. """ - return self.dof_list + return np.array(self.dof_list) def set_dofs_impl(self, dofs): """ diff --git a/src/simsoptpp/curveplanarfourier.cpp b/src/simsoptpp/curveplanarfourier.cpp index 9e11bc3e8..3f2e3192d 100644 --- a/src/simsoptpp/curveplanarfourier.cpp +++ b/src/simsoptpp/curveplanarfourier.cpp @@ -16,10 +16,9 @@ void CurvePlanarFourier::gamma_impl(Array& data, Array& quadpoints) { double sinphi, cosphi, siniphi, cosiphi; int numquadpoints = quadpoints.size(); data *= 0; - Array q_norm = q * inv_magnitude(); - +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { double phi = 2 * M_PI * quadpoints[k]; double cosphi = cos(phi); @@ -33,6 +32,7 @@ void CurvePlanarFourier::gamma_impl(Array& data, Array& quadpoints) { data(k, 1) += (rc[i] * cosiphi + rs[i-1] * siniphi) * sinphi; } } +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double i = data(m, 0); double j = data(m, 1); @@ -53,6 +53,7 @@ void CurvePlanarFourier::gammadash_impl(Array& data) { Array q_norm = q * inv_sqrt_s; double cosiphi, siniphi; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { double phi = 2 * M_PI * quadpoints[k]; double cosphi = cos(phi); @@ -70,6 +71,7 @@ void CurvePlanarFourier::gammadash_impl(Array& data) { } data *= (2*M_PI); +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double i = data(m, 0); double j = data(m, 1); @@ -90,6 +92,7 @@ void CurvePlanarFourier::gammadashdash_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosiphi, siniphi; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { double phi = 2 * M_PI * quadpoints[k]; double cosphi = cos(phi); @@ -106,6 +109,7 @@ void CurvePlanarFourier::gammadashdash_impl(Array& data) { } } data *= 2*M_PI*2*M_PI; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double i = data(m, 0); double j = data(m, 1); @@ -126,6 +130,7 @@ void CurvePlanarFourier::gammadashdashdash_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosiphi, siniphi; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { double phi = 2 * M_PI * quadpoints[k]; double cosphi = cos(phi); @@ -153,6 +158,7 @@ void CurvePlanarFourier::gammadashdashdash_impl(Array& data) { } } data *= 2*M_PI*2*M_PI*2*M_PI; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double i = data(m, 0); double j = data(m, 1); @@ -173,6 +179,7 @@ void CurvePlanarFourier::dgamma_by_dcoeff_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosnphi, sinnphi; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double phi = 2 * M_PI * quadpoints[m]; int counter = 0; @@ -211,6 +218,7 @@ void CurvePlanarFourier::dgamma_by_dcoeff_impl(Array& data) { counter++; } + // i and j represent X0 and Y0 here before applying rotation i = rc[0] * cosphi; j = rc[0] * sinphi; k = 0; @@ -224,279 +232,57 @@ void CurvePlanarFourier::dgamma_by_dcoeff_impl(Array& data) { double inv_sqrt_s = inv_magnitude(); data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] + 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[0] - 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[0] + 0.5 * q_norm[2]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[0] - 0.5 * q_norm[1])) + * inv_sqrt_s; counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[1] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[1] - 0.5 * q_norm[3]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[1] - 0.5 * q_norm[0])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[2] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] + 0.5 * q_norm[0]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[2] - 0.5 * q_norm[3])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - + (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] + 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[3] - 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[3] - 0.5 * q_norm[1]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[3] - 0.5 * q_norm[2])) + * inv_sqrt_s; counter++; @@ -517,6 +303,7 @@ void CurvePlanarFourier::dgammadash_by_dcoeff_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosnphi, sinnphi; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double phi = 2 * M_PI * quadpoints[m]; double cosphi = cos(phi); @@ -577,279 +364,57 @@ void CurvePlanarFourier::dgammadash_by_dcoeff_impl(Array& data) { double inv_sqrt_s = inv_magnitude(); data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] + 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[0] - 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[0] + 0.5 * q_norm[2]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[0] - 0.5 * q_norm[1])) + * inv_sqrt_s; counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[1] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[1] - 0.5 * q_norm[3]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[1] - 0.5 * q_norm[0])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[2] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] + 0.5 * q_norm[0]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[2] - 0.5 * q_norm[3])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - + (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] + 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[3] - 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[3] - 0.5 * q_norm[1]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[3] - 0.5 * q_norm[2])) + * inv_sqrt_s; counter++; for (int i = 0; i < 2; ++i) { @@ -870,6 +435,7 @@ void CurvePlanarFourier::dgammadashdash_by_dcoeff_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosnphi, sinnphi; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double phi = 2 * M_PI * quadpoints[m]; double cosphi = cos(phi); @@ -931,279 +497,57 @@ void CurvePlanarFourier::dgammadashdash_by_dcoeff_impl(Array& data) { double inv_sqrt_s = inv_magnitude(); data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] + 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[0] - 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[0] + 0.5 * q_norm[2]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[0] - 0.5 * q_norm[1])) + * inv_sqrt_s; counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[1] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[1] - 0.5 * q_norm[3]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[1] - 0.5 * q_norm[0])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[2] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] + 0.5 * q_norm[0]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[2] - 0.5 * q_norm[3])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - + (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] + 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[3] - 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[3] - 0.5 * q_norm[1]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[3] - 0.5 * q_norm[2])) + * inv_sqrt_s; counter++; for (int i = 0; i < 3; ++i) { @@ -1223,6 +567,7 @@ void CurvePlanarFourier::dgammadashdashdash_by_dcoeff_impl(Array& data) { Array q_norm = q * inv_magnitude(); double cosnphi, sinnphi; +#pragma omp parallel for schedule(static) for (int m = 0; m < numquadpoints; ++m) { double phi = 2 * M_PI * quadpoints[m]; double cosphi = cos(phi); @@ -1306,279 +651,57 @@ void CurvePlanarFourier::dgammadashdashdash_by_dcoeff_impl(Array& data) { double inv_sqrt_s = inv_magnitude(); data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[0] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[0] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] + 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[0] - 0.5 * q_norm[3])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[0] + 0.5 * q_norm[2]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[0] - 0.5 * q_norm[1])) + * inv_sqrt_s; counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (inv_sqrt_s - q[1] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[1] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[1] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[1] - 0.5 * q_norm[2])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[1] - 0.5 * q_norm[3]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[1] - 0.5 * q_norm[0])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (inv_sqrt_s - q[2] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (- q[3] * q[2] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[2] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[2] - 0.5 * q_norm[1])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] + 0.5 * q_norm[0]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[2] - 0.5 * q_norm[3])) + * inv_sqrt_s; + counter++; - data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[0] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[0] - - 2 * j * q_norm[3]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - + (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[2]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[0] * q_norm[0] + q_norm[1] * q_norm[1]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[1]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[1] + q_norm[0] * q_norm[0]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] - - 2 * j * q_norm[0]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 1, counter) = (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[0] - + 2 * i * q_norm[3] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[0]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[2] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[1] - - 4 * j * q_norm[1]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[2] - + 2 * i * q_norm[1] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[2]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[2] + q_norm[3] * q_norm[0]) * q_norm[3] - + 2 * i * q_norm[0] - + 4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3]) * q_norm[3] - - 4 * j * q_norm[3]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); - - - data(m, 2, counter) = (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[0] - - 2 * i * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[0] - + 2 * j * q_norm[1]) - * (- q[0] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 4 * i * (q_norm[1] * q_norm[3] - q_norm[2] * q_norm[0]) * q_norm[1] - + 2 * i * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[1] - + 2 * j * q_norm[0]) - * (- q[1] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (- 2 * i * q_norm[0] - - 4 * i * (q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[2] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[2] - + 2 * j * q_norm[3]) - * (- q[2] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s) - + - (2 * i * q_norm[1] - + 4 * i * (q_norm[2] * q_norm[0] - q_norm[1] * q_norm[3]) * q_norm[3] - - 4 * j * (q_norm[1] * q_norm[0] + q_norm[2] * q_norm[3]) * q_norm[3] - + 2 * j * q_norm[2]) - * (inv_sqrt_s - q[3] * q[3] * inv_sqrt_s * inv_sqrt_s * inv_sqrt_s); + data(m, 0, counter) = (4 * i * (q_norm[2] * q_norm[2] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * j * ((q_norm[1] * q_norm[2] - q_norm[0] * q_norm[3]) * q_norm[3] + 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 1, counter) = (4 * j * (q_norm[1] * q_norm[1] + q_norm[3] * q_norm[3] - 1.0) * q_norm[3] + - 4 * i * ((q_norm[1] * q_norm[2] + q_norm[0] * q_norm[3]) * q_norm[3] - 0.5 * q_norm[0])) + * inv_sqrt_s; + + data(m, 2, counter) = (- 4 * i * ((q_norm[1] * q_norm[3] - q_norm[0] * q_norm[2]) * q_norm[3] - 0.5 * q_norm[1]) + - 4 * j * ((q_norm[2] * q_norm[3] + q_norm[0] * q_norm[1]) * q_norm[3] - 0.5 * q_norm[2])) + * inv_sqrt_s; counter++; for (int i = 0; i < 3; ++i) { diff --git a/src/simsoptpp/curvexyzfourier.cpp b/src/simsoptpp/curvexyzfourier.cpp index 1f51aa9c1..bae6886ef 100644 --- a/src/simsoptpp/curvexyzfourier.cpp +++ b/src/simsoptpp/curvexyzfourier.cpp @@ -4,6 +4,7 @@ template void CurveXYZFourier::gamma_impl(Array& data, Array& quadpoints) { int numquadpoints = quadpoints.size(); data *= 0; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { data(k, i) += dofs[i][0]; @@ -18,6 +19,7 @@ void CurveXYZFourier::gamma_impl(Array& data, Array& quadpoints) { template void CurveXYZFourier::gammadash_impl(Array& data) { data *= 0; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) { @@ -31,6 +33,7 @@ void CurveXYZFourier::gammadash_impl(Array& data) { template void CurveXYZFourier::gammadashdash_impl(Array& data) { data *= 0; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) { @@ -44,6 +47,7 @@ void CurveXYZFourier::gammadashdash_impl(Array& data) { template void CurveXYZFourier::gammadashdashdash_impl(Array& data) { data *= 0; +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) { @@ -56,6 +60,7 @@ void CurveXYZFourier::gammadashdashdash_impl(Array& data) { template void CurveXYZFourier::dgamma_by_dcoeff_impl(Array& data) { +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { data(k, i, i*(2*order+1)) = 1.; @@ -69,6 +74,7 @@ void CurveXYZFourier::dgamma_by_dcoeff_impl(Array& data) { template void CurveXYZFourier::dgammadash_by_dcoeff_impl(Array& data) { +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) { @@ -81,6 +87,7 @@ void CurveXYZFourier::dgammadash_by_dcoeff_impl(Array& data) { template void CurveXYZFourier::dgammadashdash_by_dcoeff_impl(Array& data) { +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) { @@ -93,6 +100,7 @@ void CurveXYZFourier::dgammadashdash_by_dcoeff_impl(Array& data) { template void CurveXYZFourier::dgammadashdashdash_by_dcoeff_impl(Array& data) { +#pragma omp parallel for schedule(static) for (int k = 0; k < numquadpoints; ++k) { for (int i = 0; i < 3; ++i) { for (int j = 1; j < order+1; ++j) {