diff --git a/field/src/field.rs b/field/src/field.rs index 3f7a9acc..dd04f9bd 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -340,25 +340,24 @@ where // Compute Green's fct evaluations let kernel = self.compute_kernel(order, &conv_grid, kernel_point); - let padded_kernel = pad3(&kernel, (p - m, p - m, p - m), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut kernel = flip3(&kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::>::new((p, p, p / 2 + 1)); + let mut kernel_hat = Array3D::>::new((p, p, p / 2 + 1)); T::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), + kernel.get_data_mut(), + kernel_hat.get_data_mut(), &[p, p, p], ); - kernel_data_vec[i].push(padded_kernel_hat); + kernel_data_vec[i].push(kernel_hat); } else { // Fill with zeros when interaction doesn't exist let n = 2 * order - 1; let p = n + 1; - let padded_kernel_hat_zeros = Array3D::>::new((p, p, p / 2 + 1)); - kernel_data_vec[i].push(padded_kernel_hat_zeros); + let kernel_hat_zeros = Array3D::>::new((p, p, p / 2 + 1)); + kernel_data_vec[i].push(kernel_hat_zeros); } } } @@ -422,8 +421,8 @@ where let mut result = FftFieldTranslationKiFmm { alpha, kernel, - surf_to_conv_map: HashMap::default(), - conv_to_surf_map: HashMap::default(), + surf_to_conv_map:Vec::default(), + conv_to_surf_map: Vec::default(), operator_data: FftM2lOperatorData::default(), transfer_vectors: Vec::default(), }; @@ -447,37 +446,58 @@ where /// * `order` - The expansion order for the multipole and local expansions. pub fn compute_surf_to_conv_map( order: usize, - ) -> (HashMap, HashMap) { + ) -> (Vec, Vec) { // Number of points along each axis of convolution grid let n = 2 * order - 1; + let npad = n + 1; + + let nsurf_grid = 6 * (order - 1).pow(2) + 2; // Index maps between surface and convolution grids - let mut surf_to_conv: HashMap = HashMap::new(); - let mut conv_to_surf: HashMap = HashMap::new(); + let mut surf_to_conv = vec![0usize; nsurf_grid]; + let mut conv_to_surf = vec![0usize; nsurf_grid]; // Initialise surface grid index let mut surf_index = 0; // The boundaries of the surface grid when embedded within the convolution grid - let lower = order - 1; - let upper = 2 * order - 2; + let lower = order; + let upper = 2 * order - 1; - for k in 0..n { - for j in 0..n { - for i in 0..n { - let conv_index = i + n * j + n * n * k; + for k in 0..npad { + for j in 0..npad { + for i in 0..npad { + let conv_index = i + npad * j + npad * npad * k; if (i >= lower && j >= lower && (k == lower || k == upper)) || (j >= lower && k >= lower && (i == lower || i == upper)) || (k >= lower && i >= lower && (j == lower || j == upper)) { - surf_to_conv.insert(surf_index, conv_index); - conv_to_surf.insert(conv_index, surf_index); + surf_to_conv[surf_index] = conv_index; surf_index += 1; } } } } + let lower = 0; + let upper = order - 1; + let mut surf_index = 0; + + for k in 0..npad { + for j in 0..npad { + for i in 0..npad { + let conv_index = i + npad * j + npad * npad * k; + if (i <= upper && j <= upper && (k == lower || k == upper)) + || (j <= upper && k <= upper && (i == lower || i == upper)) + || (k <= upper && i <= upper && (j == lower || j == upper)) + { + conv_to_surf[surf_index] = conv_index; + surf_index += 1; + } + } + } + } + (surf_to_conv, conv_to_surf) } @@ -494,11 +514,12 @@ where target_pt: [T; 3], ) -> Array3D { let n = 2 * order - 1; - let mut result = Array3D::::new((n, n, n)); - let nconv = n.pow(3); + let npad = n + 1; - let mut kernel_evals = vec![T::zero(); nconv]; + let mut result = Array3D::::new((npad, npad, npad)); + let nconv = n.pow(3); + let mut kernel_evals = vec![T::zero(); nconv]; self.kernel.assemble_st( EvalType::Value, convolution_grid, @@ -506,7 +527,15 @@ where &mut kernel_evals[..], ); - result.get_data_mut().copy_from_slice(&kernel_evals[..]); + for k in 0..n { + for j in 0..n { + for i in 0..n { + let conv_idx = i + j*n + k*n*n; + let save_idx = i+ j*npad + k*npad*npad; + result.get_data_mut()[save_idx..(save_idx+1)].copy_from_slice(&kernel_evals[(conv_idx)..(conv_idx+1)]); + } + } + } result } @@ -518,26 +547,14 @@ where /// * `charges` - A vector of charges. pub fn compute_signal(&self, order: usize, charges: &[T]) -> Array3D { let n = 2 * order - 1; - let n_tot = n * n * n; - let mut result = Array3D::new((n, n, n)); + let npad = n + 1; + + let mut result = Array3D::new((npad, npad, npad)); - let mut tmp = vec![T::zero(); n_tot]; - - for k in 0..n { - for j in 0..n { - for i in 0..n { - let conv_index = i + n * j + n * n * k; - if let Some(surf_index) = self.conv_to_surf_map.get(&conv_index) { - tmp[conv_index] = charges[*surf_index]; - } else { - tmp[conv_index] = T::zero(); - } - } - } + for (i, &j) in self.surf_to_conv_map.iter().enumerate() { + result.get_data_mut()[j] = charges[i]; } - result.get_data_mut().copy_from_slice(&tmp[..]); - result } } @@ -795,23 +812,19 @@ mod test { // Compute kernel from source/target pair let test_kernel = fft.compute_kernel(order, &conv_grid, kernel_point); - let (m, n, o) = test_kernel.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; + let &(m, n, o) = test_kernel.shape(); - let padded_kernel = pad3(&test_kernel, (p - m, q - n, r - o), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut test_kernel = flip3(&test_kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::::new((p, q, r / 2 + 1)); + let mut test_kernel_hat = Array3D::::new((m, n, o / 2 + 1)); f64::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), - &[p, q, r], + test_kernel.get_data_mut(), + test_kernel_hat.get_data_mut(), + &[m, n, o], ); - for (p, t) in padded_kernel_hat.get_data().iter().zip(kernel_hat.iter()) { + for (p, t) in test_kernel_hat.get_data().iter().zip(kernel_hat.iter()) { assert!((p - t).norm() < 1e-6) } } @@ -903,20 +916,14 @@ mod test { let transfer_vector = &all_transfer_vectors[idx]; // Compute FFT of the representative signal - let signal = fft.compute_signal(order, multipole.data()); + let mut signal = fft.compute_signal(order, multipole.data()); let &(m, n, o) = signal.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; - let pad_size = (p - m, q - n, r - o); - let pad_index = (p - m, q - n, r - o); - let mut padded_signal = pad3(&signal, pad_size, pad_index); - let mut padded_signal_hat = Array3D::::new((p, q, r / 2 + 1)); + let mut signal_hat = Array3D::::new((m, n, o / 2 + 1)); f64::rfft3_fftw( - padded_signal.get_data_mut(), - padded_signal_hat.get_data_mut(), - &[p, q, r], + signal.get_data_mut(), + signal_hat.get_data_mut(), + &[m, n, o], ); let source_equivalent_surface = transfer_vector @@ -953,51 +960,39 @@ mod test { // Compute kernel let kernel = fft.compute_kernel(order, &conv_grid, kernel_point); - let (m, n, o) = kernel.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; + let &(m, n, o) = kernel.shape(); - let padded_kernel = pad3(&kernel, (p - m, q - n, r - o), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut kernel = flip3(&kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::::new((p, q, r / 2 + 1)); + let mut kernel_hat = Array3D::::new((m, n, o / 2 + 1)); f64::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), - &[p, q, r], + kernel.get_data_mut(), + kernel_hat.get_data_mut(), + &[m, n, o], ); // Compute convolution - let hadamard_product = padded_signal_hat + let hadamard_product = signal_hat .get_data() .iter() - .zip(padded_kernel_hat.get_data().iter()) + .zip(kernel_hat.get_data().iter()) .map(|(a, b)| a * b) .collect_vec(); - let mut hadamard_product = Array3D::from_data(hadamard_product, (p, q, r / 2 + 1)); + let mut hadamard_product = Array3D::from_data(hadamard_product, (m, n, o / 2 + 1)); - let mut potentials = Array3D::new((p, q, r)); + let mut potentials = Array3D::new((m, n, o)); f64::irfft3_fftw( hadamard_product.get_data_mut(), potentials.get_data_mut(), - &[p, q, r], + &[m, n, o], ); - let (_, multi_indices) = MortonKey::surface_grid::(order); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); + let mut result = vec![0f64; ntargets]; + for (i, &idx) in fft.conv_to_surf_map.iter().enumerate() { + result[i] = potentials.get_data()[idx]; } // Get direct evaluations for testing @@ -1010,7 +1005,7 @@ mod test { &mut direct[..], ); - let abs_error: f64 = tmp + let abs_error: f64 = result .iter() .zip(direct.iter()) .map(|(a, b)| (a - b).abs()) diff --git a/field/src/types.rs b/field/src/types.rs index 9b82f2df..5c1a3cda 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -33,10 +33,10 @@ where pub alpha: T, /// Map between indices of surface convolution grid points. - pub surf_to_conv_map: HashMap, + pub surf_to_conv_map: Vec, /// Map between indices of convolution and surface grid points. - pub conv_to_surf_map: HashMap, + pub conv_to_surf_map: Vec, /// Precomputed data required for FFT compressed M2L interaction. pub operator_data: FftM2lOperatorData>, diff --git a/fmm/src/field_translation/hashmap/source_to_target.rs b/fmm/src/field_translation/hashmap/source_to_target.rs index d179bd5e..8788cb8c 100644 --- a/fmm/src/field_translation/hashmap/source_to_target.rs +++ b/fmm/src/field_translation/hashmap/source_to_target.rs @@ -403,58 +403,58 @@ where &[p, q, r], ); - // Compute local expansion coefficients and save to data tree - let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); - - let check_potentials = global_check_potentials - // .data() - .chunks_exact(size) - .flat_map(|chunk| { - let m = 2 * self.fmm.order - 1; - let p = m + 1; - let mut potentials = Array3D::new((p, p, p)); - potentials.get_data_mut().copy_from_slice(chunk); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); - } - tmp - }) - .collect_vec(); - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let check_potentials = unsafe { - rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] - }; - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) - .eval(); - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level)); - let locals = tmp; - - for (i, target) in targets.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let top_left = (0, i); - let dim = (ncoeffs, 1); - let target_local_owned = locals.block(top_left, dim); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - } + // // Compute local expansion coefficients and save to data tree + // let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); + + // let check_potentials = global_check_potentials + // // .data() + // .chunks_exact(size) + // .flat_map(|chunk| { + // let m = 2 * self.fmm.order - 1; + // let p = m + 1; + // let mut potentials = Array3D::new((p, p, p)); + // potentials.get_data_mut().copy_from_slice(chunk); + + // let mut tmp = Vec::new(); + // let ntargets = multi_indices.len() / 3; + // let xs = &multi_indices[0..ntargets]; + // let ys = &multi_indices[ntargets..2 * ntargets]; + // let zs = &multi_indices[2 * ntargets..]; + + // for i in 0..ntargets { + // let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); + // tmp.push(*val); + // } + // tmp + // }) + // .collect_vec(); + + // let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + // let check_potentials = unsafe { + // rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] + // }; + + // let mut tmp = self + // .fmm + // .dc2e_inv_1 + // .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) + // .eval(); + // tmp.data_mut() + // .iter_mut() + // .for_each(|d| *d *= self.fmm.kernel.scale(level)); + // let locals = tmp; + + // for (i, target) in targets.iter().enumerate() { + // let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + // let mut target_local_lock = target_local_arc.lock().unwrap(); + + // let top_left = (0, i); + // let dim = (ncoeffs, 1); + // let target_local_owned = locals.block(top_left, dim); + + // *target_local_lock.deref_mut() = + // (target_local_lock.deref() + target_local_owned).eval(); + // } } fn m2l_scale(&self, level: u64) -> U { diff --git a/fmm/src/field_translation/linear/source_to_target.rs b/fmm/src/field_translation/linear/source_to_target.rs index 26777865..3b6c6683 100644 --- a/fmm/src/field_translation/linear/source_to_target.rs +++ b/fmm/src/field_translation/linear/source_to_target.rs @@ -406,60 +406,60 @@ where &[p, q, r], ); - // Compute local expansion coefficients and save to data tree - let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); - - let check_potentials = global_check_potentials - .chunks_exact(size) - .flat_map(|chunk| { - let m = 2 * self.fmm.order - 1; - let p = m + 1; - let mut potentials = Array3D::new((p, p, p)); - potentials.get_data_mut().copy_from_slice(chunk); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); - } - tmp - }) - .collect_vec(); - - // This should be blocked and use blas3 - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let check_potentials = unsafe { - rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] - }; - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) - .eval(); - - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level)); - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - // Add result - tmp.data() - .par_chunks_exact(ncoeffs) - .zip(self.level_locals[level as usize].into_par_iter()) - .for_each(|(result, local)| unsafe { - let mut ptr = local.raw; - for &r in result.iter().take(ncoeffs) { - *ptr += r; - ptr = ptr.add(1) - } - }); + // // Compute local expansion coefficients and save to data tree + // let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); + + // let check_potentials = global_check_potentials + // .chunks_exact(size) + // .flat_map(|chunk| { + // let m = 2 * self.fmm.order - 1; + // let p = m + 1; + // let mut potentials = Array3D::new((p, p, p)); + // potentials.get_data_mut().copy_from_slice(chunk); + + // let mut tmp = Vec::new(); + // let ntargets = multi_indices.len() / 3; + // let xs = &multi_indices[0..ntargets]; + // let ys = &multi_indices[ntargets..2 * ntargets]; + // let zs = &multi_indices[2 * ntargets..]; + + // for i in 0..ntargets { + // let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); + // tmp.push(*val); + // } + // tmp + // }) + // .collect_vec(); + + // // This should be blocked and use blas3 + // let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + // let check_potentials = unsafe { + // rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] + // }; + + // let mut tmp = self + // .fmm + // .dc2e_inv_1 + // .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) + // .eval(); + + // tmp.data_mut() + // .iter_mut() + // .for_each(|d| *d *= self.fmm.kernel.scale(level)); + + // let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + // // Add result + // tmp.data() + // .par_chunks_exact(ncoeffs) + // .zip(self.level_locals[level as usize].into_par_iter()) + // .for_each(|(result, local)| unsafe { + // let mut ptr = local.raw; + // for &r in result.iter().take(ncoeffs) { + // *ptr += r; + // ptr = ptr.add(1) + // } + // }); } fn m2l_scale(&self, level: u64) -> U { diff --git a/tree/src/implementations/helpers.rs b/tree/src/implementations/helpers.rs index 9b3d8183..23dd94b8 100644 --- a/tree/src/implementations/helpers.rs +++ b/tree/src/implementations/helpers.rs @@ -124,72 +124,6 @@ pub fn find_corners(coordinates: &[T]) -> Vec { corners } -/// We need to quickly map between corners and their corresponding positions in the surface grid via -/// their respective indices. -/// -/// # Arguments -/// * `order` - the order of expansions used in constructing the surface grid -pub fn map_corners_to_surface(order: usize) -> (HashMap, HashMap) -where - T: Float + std::ops::SubAssign + std::ops::MulAssign, -{ - let (_, surface_multindex) = MortonKey::surface_grid::(order); - - let nsurf = surface_multindex.len() / 3; - let ncorners = 8; - let corners_multindex = [ - 0, - order - 1, - 0, - order - 1, - 0, - order - 1, - 0, - order - 1, - 0, - 0, - order - 1, - order - 1, - 0, - 0, - order - 1, - order - 1, - 0, - 0, - 0, - 0, - order - 1, - order - 1, - order - 1, - order - 1, - ]; - - let mut _map = HashMap::new(); - let mut _inv_map = HashMap::new(); - - for i in 0..nsurf { - let s = [ - surface_multindex[i], - surface_multindex[nsurf + i], - surface_multindex[2 * nsurf + i], - ]; - - for j in 0..ncorners { - let c = [ - corners_multindex[j], - corners_multindex[8 + j], - corners_multindex[16 + j], - ]; - - if s[0] == c[0] && s[1] == c[1] && s[2] == c[2] { - _map.insert(i, j); - _inv_map.insert(j, i); - } - } - } - - (_map, _inv_map) -} #[cfg(test)] mod test { @@ -200,10 +134,10 @@ mod test { #[test] fn test_find_corners() { let order = 5; - let (grid_1, _) = MortonKey::surface_grid::(order); + let grid_1 = MortonKey::surface_grid::(order); let order = 2; - let (grid_2, _) = MortonKey::surface_grid::(order); + let grid_2 = MortonKey::surface_grid::(order); let corners_1 = find_corners(&grid_1); let corners_2 = find_corners(&grid_2); diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index b2765282..5a49b49d 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -850,7 +850,7 @@ impl MortonKey { /// /// # Arguments /// * `order` - The expansion order being used in the FMM simulation. - pub fn surface_grid(order: usize) -> (Vec, Vec) + pub fn surface_grid(order: usize) -> Vec where T: Float + std::ops::MulAssign + std::ops::SubAssign + ToPrimitive, { @@ -883,12 +883,7 @@ impl MortonKey { } } - // Map surface points to multi-indices - let surface_idxs = surface - .iter() - .clone() - .map(|&x| x.to_usize().unwrap()) - .collect(); + // Shift and scale surface so that it's centered at the origin and has side length of 1 let two = T::from(2.0).unwrap(); @@ -898,7 +893,7 @@ impl MortonKey { surface.iter_mut().for_each(|point| *point -= T::one()); - (surface, surface_idxs) + surface } /// Compute a surface grid centered at this Morton Key, used in the discretisation of Fast Multipole @@ -951,7 +946,7 @@ impl MortonKey { where T: Float + std::ops::MulAssign + std::ops::SubAssign + Default + Scalar, { - let (surface, _) = MortonKey::surface_grid(order); + let surface = MortonKey::surface_grid(order); self.scale_surface::(surface, domain, alpha) } @@ -1811,9 +1806,8 @@ mod test { let surface = key.compute_surface(&domain, order, alpha); assert_eq!(surface.len(), ncoeffs * dim); - let (surface, surface_idxs) = MortonKey::surface_grid::(order); + let surface = MortonKey::surface_grid::(order); assert_eq!(surface.len(), ncoeffs * dim); - assert_eq!(surface_idxs.len(), ncoeffs * dim); let mut expected = vec![[0usize; 3]; ncoeffs]; let lower = 0; @@ -1833,16 +1827,7 @@ mod test { } } - // Test ordering. - for i in 0..ncoeffs { - let point = vec![ - surface_idxs[i], - surface_idxs[i + ncoeffs], - surface_idxs[i + 2 * ncoeffs], - ]; - assert_eq!(point, expected[i]); - } - + // Test scaling let level = 2; let key = MortonKey::from_point(&point, &domain, level);