diff --git a/field/src/fft.rs b/field/src/fft.rs index 56ede31a..cb3c5934 100644 --- a/field/src/fft.rs +++ b/field/src/fft.rs @@ -3,31 +3,47 @@ use fftw::{plan::*, types::*}; use num::Complex; use rayon::prelude::*; -use crate::types::{FftMatrixc32, FftMatrixc64, FftMatrixf32, FftMatrixf64}; - -pub trait Fft +pub trait Fft where Self: Sized, { - /// Compute a Real FFT over a rlst matrix which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. + /// Compute a parallel real to complex FFT over a slice which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. /// This function is multithreaded, and uses the FFTW library. /// /// # Arguments /// * `input` - Input slice of real data, corresponding to a 3D array stored in column major order. /// * `output` - Output slice. /// * `shape` - Shape of input data. - fn rfft3_fftw_par_vec(input: &mut DtypeReal, output: &mut DtypeCplx, shape: &[usize]); + fn rfft3_fftw_par_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]); - /// Compute an inverse Real FFT over a rlst matrix which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. + /// Compute a real to complex FFT over a slice which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. + /// This function is multithreaded, and uses the FFTW library. + /// + /// # Arguments + /// * `input` - Input slice of real data, corresponding to a 3D array stored in column major order. + /// * `output` - Output slice. + /// * `shape` - Shape of input data. + fn rfft3_fftw_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]); + + /// Compute an parallel complex to real inverse FFT over a slice which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. /// This function is multithreaded, and uses the FFTW library. /// /// # Arguments /// * `input` - Input slice of complex data, corresponding to an FFT of a 3D array stored in column major order. /// * `output` - Output slice. /// * `shape` - Shape of output data. - fn irfft_fftw_par_vec(input: &mut DtypeCplx, output: &mut DtypeReal, shape: &[usize]); + fn irfft3_fftw_par_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]); - /// Compute a Real FFT of an input slice corresponding to a 3D array stored in column major format, specified by `shape` using the FFTW library. + /// Compute an complex to real inverse FFT over a rlst matrix which stores data corresponding to multiple 3 dimensional arrays of shape `shape`, stored in column major order. + /// This function is multithreaded, and uses the FFTW library. + /// + /// # Arguments + /// * `input` - Input slice of complex data, corresponding to an FFT of a 3D array stored in column major order. + /// * `output` - Output slice. + /// * `shape` - Shape of output data. + fn irfft3_fftw_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]); + + /// Compute a real to complex FFT of an input slice corresponding to a 3D array stored in column major format, specified by `shape` using the FFTW library. /// /// # Arguments /// * `input` - Input slice of real data, corresponding to a 3D array stored in column major order. @@ -35,7 +51,7 @@ where /// * `shape` - Shape of input data. fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]); - /// Compute an inverse Real FFT of an input slice corresponding to the FFT of a 3D array stored in column major format, specified by `shape` using the FFTW library. + /// Compute an complex to real inverse FFT of an input slice corresponding to the FFT of a 3D array stored in column major format, specified by `shape` using the FFTW library. /// This function normalises the output. /// /// # Arguments @@ -45,120 +61,8 @@ where fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]); } -// impl Fft for f32 { -// fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) { -// let size: usize = shape.iter().product(); -// let size_d = shape.last().unwrap(); -// let size_real = (size / size_d) * (size_d / 2 + 1); -// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); - -// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); -// let it_out = output -// .data_mut() -// .par_chunks_exact_mut(size_real) -// .into_par_iter(); - -// it_inp.zip(it_out).for_each(|(inp, out)| { -// let _ = plan.r2c(inp, out); -// }); -// } - -// fn irfft_fftw_par_vec(input: &mut FftMatrixc32, output: &mut FftMatrixf32, shape: &[usize]) { -// let size: usize = shape.iter().product(); -// let size_d = shape.last().unwrap(); -// let size_real = (size / size_d) * (size_d / 2 + 1); -// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); - -// let it_inp = input -// .data_mut() -// .par_chunks_exact_mut(size_real) -// .into_par_iter(); -// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); - -// it_inp.zip(it_out).for_each(|(inp, out)| { -// let _ = plan.c2r(inp, out); -// // Normalise output -// out.iter_mut() -// .for_each(|value| *value *= 1.0 / (size as f32)); -// }) -// } - -// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { -// assert!(shape.len() == 3); -// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); -// let _ = plan.r2c(input, output); -// } - -// fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { -// assert!(shape.len() == 3); -// let size: usize = shape.iter().product(); -// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); -// let _ = plan.c2r(input, output); -// // Normalise -// output -// .iter_mut() -// .for_each(|value| *value *= 1.0 / (size as f32)); -// } -// } - -// impl Fft for f64 { -// fn rfft3_fftw_par_vec(input: &mut FftMatrixf64, output: &mut FftMatrixc64, shape: &[usize]) { -// let size: usize = shape.iter().product(); -// let size_d = shape.last().unwrap(); -// let size_real = (size / size_d) * (size_d / 2 + 1); -// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); - -// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); -// let it_out = output -// .data_mut() -// .par_chunks_exact_mut(size_real) -// .into_par_iter(); - -// it_inp.zip(it_out).for_each(|(inp, out)| { -// let _ = plan.r2c(inp, out); -// }); -// } - -// fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) { -// let size: usize = shape.iter().product(); -// let size_d = shape.last().unwrap(); -// let size_real = (size / size_d) * (size_d / 2 + 1); -// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); - -// let it_inp = input -// .data_mut() -// .par_chunks_exact_mut(size_real) -// .into_par_iter(); -// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); - -// it_inp.zip(it_out).for_each(|(inp, out)| { -// let _ = plan.c2r(inp, out); -// // Normalise output -// out.iter_mut() -// .for_each(|value| *value *= 1.0 / (size as f64)); -// }) -// } - -// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { -// assert!(shape.len() == 3); -// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); -// let _ = plan.r2c(input, output); -// } - -// fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { -// assert!(shape.len() == 3); -// let size: usize = shape.iter().product(); -// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); -// let _ = plan.c2r(input, output); -// // Normalise -// output -// .iter_mut() -// .for_each(|value| *value *= 1.0 / (size as f64)); -// } -// } - -impl Fft for f32 { - fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) { +impl Fft for f32 { + fn rfft3_fftw_par_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { let size: usize = shape.iter().product(); let size_d = shape.last().unwrap(); let size_real = (size / size_d) * (size_d / 2 + 1); @@ -172,7 +76,7 @@ impl Fft for f32 { }); } - fn irfft_fftw_par_vec(input: &mut FftMatrixc32, output: &mut FftMatrixf32, shape: &[usize]) { + fn irfft3_fftw_par_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { let size: usize = shape.iter().product(); let size_d = shape.last().unwrap(); let size_real = (size / size_d) * (size_d / 2 + 1); @@ -189,6 +93,37 @@ impl Fft for f32 { }) } + fn rfft3_fftw_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + + let it_inp = input.chunks_exact_mut(size); + let it_out = output.chunks_exact_mut(size_real); + + it_inp.zip(it_out).for_each(|(inp, out)| { + let _ = plan.r2c(inp, out); + }); + } + + fn irfft3_fftw_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); + + let it_inp = input.chunks_exact_mut(size_real); + let it_out = output.chunks_exact_mut(size); + + it_inp.zip(it_out).for_each(|(inp, out)| { + let _ = plan.c2r(inp, out); + // Normalise output + out.iter_mut() + .for_each(|value| *value *= 1.0 / (size as f32)); + }) + } + fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { assert!(shape.len() == 3); let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); @@ -207,8 +142,8 @@ impl Fft for f32 { } } -impl Fft for f64 { - fn rfft3_fftw_par_vec(input: &mut FftMatrixf64, output: &mut FftMatrixc64, shape: &[usize]) { +impl Fft for f64 { + fn rfft3_fftw_par_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { let size: usize = shape.iter().product(); let size_d = shape.last().unwrap(); let size_real = (size / size_d) * (size_d / 2 + 1); @@ -222,7 +157,7 @@ impl Fft for f64 { }); } - fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) { + fn irfft3_fftw_par_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { let size: usize = shape.iter().product(); let size_d = shape.last().unwrap(); let size_real = (size / size_d) * (size_d / 2 + 1); @@ -239,6 +174,37 @@ impl Fft for f64 { }) } + fn rfft3_fftw_slice(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + + let it_inp = input.chunks_exact_mut(size); + let it_out = output.chunks_exact_mut(size_real); + + it_inp.zip(it_out).for_each(|(inp, out)| { + let _ = plan.r2c(inp, out); + }); + } + + fn irfft3_fftw_slice(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); + + let it_inp = input.chunks_exact_mut(size_real); + let it_out = output.chunks_exact_mut(size); + + it_inp.zip(it_out).for_each(|(inp, out)| { + let _ = plan.c2r(inp, out); + // Normalise output + out.iter_mut() + .for_each(|value| *value *= 1.0 / (size as f64)); + }) + } + fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { assert!(shape.len() == 3); let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); diff --git a/field/src/field.rs b/field/src/field.rs index 3f7a9acc..4545509a 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -15,7 +15,7 @@ use rlst::{ Shape, VectorContainer, }, }; -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use bempp_tools::Array3D; use bempp_traits::{ @@ -26,12 +26,12 @@ use bempp_tree::{ }; use crate::{ - array::{flip3, pad3}, + array::flip3, fft::Fft, transfer_vector::compute_transfer_vectors, types::{ - FftFieldTranslationKiFmm, FftM2lOperatorData, FftMatrix, SvdFieldTranslationKiFmm, - SvdM2lOperatorData, TransferVector, + FftFieldTranslationKiFmm, FftM2lOperatorData, SvdFieldTranslationKiFmm, SvdM2lOperatorData, + TransferVector, }, }; @@ -218,7 +218,7 @@ where impl FieldTranslationData for FftFieldTranslationKiFmm where - T: Scalar + Float + Default + Fft, FftMatrix>>, + T: Scalar + Float + Default + Fft, Complex: Scalar, U: Kernel + Default, { @@ -340,25 +340,20 @@ where // Compute Green's fct evaluations let kernel = self.compute_kernel(order, &conv_grid, kernel_point); - let padded_kernel = pad3(&kernel, (p - m, p - m, p - m), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut kernel = flip3(&kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::>::new((p, p, p / 2 + 1)); + let mut kernel_hat = Array3D::>::new((p, p, p / 2 + 1)); - T::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), - &[p, p, p], - ); + T::rfft3_fftw(kernel.get_data_mut(), kernel_hat.get_data_mut(), &[p, p, p]); - kernel_data_vec[i].push(padded_kernel_hat); + kernel_data_vec[i].push(kernel_hat); } else { // Fill with zeros when interaction doesn't exist let n = 2 * order - 1; let p = n + 1; - let padded_kernel_hat_zeros = Array3D::>::new((p, p, p / 2 + 1)); - kernel_data_vec[i].push(padded_kernel_hat_zeros); + let kernel_hat_zeros = Array3D::>::new((p, p, p / 2 + 1)); + kernel_data_vec[i].push(kernel_hat_zeros); } } } @@ -396,7 +391,7 @@ where FftM2lOperatorData { kernel_data, - kernel_data_rearranged, + kernel_data_f: kernel_data_rearranged, } } @@ -407,7 +402,7 @@ where impl FftFieldTranslationKiFmm where - T: Float + Scalar + Default + Fft, FftMatrix>>, + T: Float + Scalar + Default + Fft, Complex: Scalar, U: Kernel + Default, { @@ -422,8 +417,8 @@ where let mut result = FftFieldTranslationKiFmm { alpha, kernel, - surf_to_conv_map: HashMap::default(), - conv_to_surf_map: HashMap::default(), + surf_to_conv_map: Vec::default(), + conv_to_surf_map: Vec::default(), operator_data: FftM2lOperatorData::default(), transfer_vectors: Vec::default(), }; @@ -445,33 +440,52 @@ where /// /// # Arguments /// * `order` - The expansion order for the multipole and local expansions. - pub fn compute_surf_to_conv_map( - order: usize, - ) -> (HashMap, HashMap) { + pub fn compute_surf_to_conv_map(order: usize) -> (Vec, Vec) { // Number of points along each axis of convolution grid let n = 2 * order - 1; + let npad = n + 1; + + let nsurf_grid = 6 * (order - 1).pow(2) + 2; // Index maps between surface and convolution grids - let mut surf_to_conv: HashMap = HashMap::new(); - let mut conv_to_surf: HashMap = HashMap::new(); + let mut surf_to_conv = vec![0usize; nsurf_grid]; + let mut conv_to_surf = vec![0usize; nsurf_grid]; // Initialise surface grid index let mut surf_index = 0; // The boundaries of the surface grid when embedded within the convolution grid - let lower = order - 1; - let upper = 2 * order - 2; + let lower = order; + let upper = 2 * order - 1; - for k in 0..n { - for j in 0..n { - for i in 0..n { - let conv_index = i + n * j + n * n * k; + for k in 0..npad { + for j in 0..npad { + for i in 0..npad { + let conv_index = i + npad * j + npad * npad * k; if (i >= lower && j >= lower && (k == lower || k == upper)) || (j >= lower && k >= lower && (i == lower || i == upper)) || (k >= lower && i >= lower && (j == lower || j == upper)) { - surf_to_conv.insert(surf_index, conv_index); - conv_to_surf.insert(conv_index, surf_index); + surf_to_conv[surf_index] = conv_index; + surf_index += 1; + } + } + } + } + + let lower = 0; + let upper = order - 1; + let mut surf_index = 0; + + for k in 0..npad { + for j in 0..npad { + for i in 0..npad { + let conv_index = i + npad * j + npad * npad * k; + if (i <= upper && j <= upper && (k == lower || k == upper)) + || (j <= upper && k <= upper && (i == lower || i == upper)) + || (k <= upper && i <= upper && (j == lower || j == upper)) + { + conv_to_surf[surf_index] = conv_index; surf_index += 1; } } @@ -494,11 +508,12 @@ where target_pt: [T; 3], ) -> Array3D { let n = 2 * order - 1; - let mut result = Array3D::::new((n, n, n)); - let nconv = n.pow(3); + let npad = n + 1; - let mut kernel_evals = vec![T::zero(); nconv]; + let mut result = Array3D::::new((npad, npad, npad)); + let nconv = n.pow(3); + let mut kernel_evals = vec![T::zero(); nconv]; self.kernel.assemble_st( EvalType::Value, convolution_grid, @@ -506,7 +521,16 @@ where &mut kernel_evals[..], ); - result.get_data_mut().copy_from_slice(&kernel_evals[..]); + for k in 0..n { + for j in 0..n { + for i in 0..n { + let conv_idx = i + j * n + k * n * n; + let save_idx = i + j * npad + k * npad * npad; + result.get_data_mut()[save_idx..(save_idx + 1)] + .copy_from_slice(&kernel_evals[(conv_idx)..(conv_idx + 1)]); + } + } + } result } @@ -518,26 +542,14 @@ where /// * `charges` - A vector of charges. pub fn compute_signal(&self, order: usize, charges: &[T]) -> Array3D { let n = 2 * order - 1; - let n_tot = n * n * n; - let mut result = Array3D::new((n, n, n)); + let npad = n + 1; - let mut tmp = vec![T::zero(); n_tot]; + let mut result = Array3D::new((npad, npad, npad)); - for k in 0..n { - for j in 0..n { - for i in 0..n { - let conv_index = i + n * j + n * n * k; - if let Some(surf_index) = self.conv_to_surf_map.get(&conv_index) { - tmp[conv_index] = charges[*surf_index]; - } else { - tmp[conv_index] = T::zero(); - } - } - } + for (i, &j) in self.surf_to_conv_map.iter().enumerate() { + result.get_data_mut()[j] = charges[i]; } - result.get_data_mut().copy_from_slice(&tmp[..]); - result } } @@ -795,23 +807,19 @@ mod test { // Compute kernel from source/target pair let test_kernel = fft.compute_kernel(order, &conv_grid, kernel_point); - let (m, n, o) = test_kernel.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; + let &(m, n, o) = test_kernel.shape(); - let padded_kernel = pad3(&test_kernel, (p - m, q - n, r - o), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut test_kernel = flip3(&test_kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::::new((p, q, r / 2 + 1)); + let mut test_kernel_hat = Array3D::::new((m, n, o / 2 + 1)); f64::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), - &[p, q, r], + test_kernel.get_data_mut(), + test_kernel_hat.get_data_mut(), + &[m, n, o], ); - for (p, t) in padded_kernel_hat.get_data().iter().zip(kernel_hat.iter()) { + for (p, t) in test_kernel_hat.get_data().iter().zip(kernel_hat.iter()) { assert!((p - t).norm() < 1e-6) } } @@ -903,21 +911,11 @@ mod test { let transfer_vector = &all_transfer_vectors[idx]; // Compute FFT of the representative signal - let signal = fft.compute_signal(order, multipole.data()); + let mut signal = fft.compute_signal(order, multipole.data()); let &(m, n, o) = signal.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; - let pad_size = (p - m, q - n, r - o); - let pad_index = (p - m, q - n, r - o); - let mut padded_signal = pad3(&signal, pad_size, pad_index); - let mut padded_signal_hat = Array3D::::new((p, q, r / 2 + 1)); + let mut signal_hat = Array3D::::new((m, n, o / 2 + 1)); - f64::rfft3_fftw( - padded_signal.get_data_mut(), - padded_signal_hat.get_data_mut(), - &[p, q, r], - ); + f64::rfft3_fftw(signal.get_data_mut(), signal_hat.get_data_mut(), &[m, n, o]); let source_equivalent_surface = transfer_vector .source @@ -953,51 +951,35 @@ mod test { // Compute kernel let kernel = fft.compute_kernel(order, &conv_grid, kernel_point); - let (m, n, o) = kernel.shape(); - let p = m + 1; - let q = n + 1; - let r = o + 1; + let &(m, n, o) = kernel.shape(); - let padded_kernel = pad3(&kernel, (p - m, q - n, r - o), (0, 0, 0)); - let mut padded_kernel = flip3(&padded_kernel); + let mut kernel = flip3(&kernel); // Compute FFT of padded kernel - let mut padded_kernel_hat = Array3D::::new((p, q, r / 2 + 1)); - f64::rfft3_fftw( - padded_kernel.get_data_mut(), - padded_kernel_hat.get_data_mut(), - &[p, q, r], - ); + let mut kernel_hat = Array3D::::new((m, n, o / 2 + 1)); + f64::rfft3_fftw(kernel.get_data_mut(), kernel_hat.get_data_mut(), &[m, n, o]); // Compute convolution - let hadamard_product = padded_signal_hat + let hadamard_product = signal_hat .get_data() .iter() - .zip(padded_kernel_hat.get_data().iter()) + .zip(kernel_hat.get_data().iter()) .map(|(a, b)| a * b) .collect_vec(); - let mut hadamard_product = Array3D::from_data(hadamard_product, (p, q, r / 2 + 1)); + let mut hadamard_product = Array3D::from_data(hadamard_product, (m, n, o / 2 + 1)); - let mut potentials = Array3D::new((p, q, r)); + let mut potentials = Array3D::new((m, n, o)); f64::irfft3_fftw( hadamard_product.get_data_mut(), potentials.get_data_mut(), - &[p, q, r], + &[m, n, o], ); - let (_, multi_indices) = MortonKey::surface_grid::(order); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); + let mut result = vec![0f64; ntargets]; + for (i, &idx) in fft.conv_to_surf_map.iter().enumerate() { + result[i] = potentials.get_data()[idx]; } // Get direct evaluations for testing @@ -1010,7 +992,7 @@ mod test { &mut direct[..], ); - let abs_error: f64 = tmp + let abs_error: f64 = result .iter() .zip(direct.iter()) .map(|(a, b)| (a - b).abs()) diff --git a/field/src/types.rs b/field/src/types.rs index 9b82f2df..61177fa7 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -1,7 +1,4 @@ //! Types for storing field translation data. -use std::collections::HashMap; - -use cauchy::{c32, c64}; use num::{Complex, Float}; use rlst::{ common::traits::{Eval, NewLikeSelf}, @@ -33,10 +30,10 @@ where pub alpha: T, /// Map between indices of surface convolution grid points. - pub surf_to_conv_map: HashMap, + pub surf_to_conv_map: Vec, /// Map between indices of convolution and surface grid points. - pub conv_to_surf_map: HashMap, + pub conv_to_surf_map: Vec, /// Precomputed data required for FFT compressed M2L interaction. pub operator_data: FftM2lOperatorData>, @@ -90,7 +87,9 @@ pub struct TransferVector { pub struct FftM2lOperatorData { // FFT of unique kernel evaluations for each transfer vector in a halo of a sibling set pub kernel_data: FftKernelData, - pub kernel_data_rearranged: FftKernelData, + + // FFT of unique kernel evaluations for each transfer vector in a halo of a sibling set, re-arranged in frequency order + pub kernel_data_f: FftKernelData, } /// Container to store precomputed data required for SVD field translations. @@ -126,23 +125,3 @@ where } } } - -/// Type alias for real coefficients for into FFTW wrappers -// pub type FftMatrixf64 = Matrix, Dynamic>, Dynamic>; -pub type FftMatrixf64 = Vec; - -/// Type alias for real coefficients for into FFTW wrappers -// pub type FftMatrixf32 = Matrix, Dynamic>, Dynamic>; -pub type FftMatrixf32 = Vec; - -/// Type alias for complex coefficients for FFTW wrappers -// pub type FftMatrixc64 = Matrix, Dynamic>, Dynamic>; -pub type FftMatrixc64 = Vec; - -/// Type alias for complex coefficients for FFTW wrappers -// pub type FftMatrixc32 = Matrix, Dynamic>, Dynamic>; -pub type FftMatrixc32 = Vec; - -/// Type alias for real coefficients for into FFTW wrappers -// pub type FftMatrix = Matrix, Dynamic>, Dynamic>; -pub type FftMatrix = Vec; diff --git a/fmm/src/field_translation/hadamard.rs b/fmm/src/field_translation/hadamard.rs new file mode 100644 index 00000000..0b2a843e --- /dev/null +++ b/fmm/src/field_translation/hadamard.rs @@ -0,0 +1,24 @@ +//! Implementations of 8x8 operation for Hadamard product in FFT based M2L operations. + +use bempp_traits::types::Scalar; +use num::complex::Complex; + +#[inline(always)] +pub unsafe fn matmul8x8x2( + kernel_data_freq: &[Complex], + signal: &[Complex], + save_locations: &mut [Complex], + scale: Complex, +) where + U: Scalar, +{ + for j in 0..8 { + let kernel_data_j = &kernel_data_freq[j * 8..(j + 1) * 8]; + let sig = signal[j]; + + save_locations + .iter_mut() + .zip(kernel_data_j.iter()) + .for_each(|(sav, &ker)| *sav += scale * ker * sig) + } +} diff --git a/fmm/src/field_translation/hashmap/source.rs b/fmm/src/field_translation/hashmap/source.rs deleted file mode 100644 index 136fbb8a..00000000 --- a/fmm/src/field_translation/hashmap/source.rs +++ /dev/null @@ -1,210 +0,0 @@ -//! Implementation of Source and Target translations, as well as Source to Target translation. -use std::{ - ops::{Deref, DerefMut}, - sync::Arc, -}; - -use itertools::Itertools; -use num::Float; -use rayon::prelude::*; - -use bempp_traits::{ - field::FieldTranslationData, - fmm::{Fmm, SourceTranslation}, - kernel::{Kernel, ScaleInvariantKernel}, - tree::Tree, - types::EvalType, -}; -use bempp_tree::types::single_node::SingleNodeTree; - -use rlst::{ - common::traits::*, - dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, -}; - -use crate::types::{FmmDataHashmap, KiFmmHashMap}; - -impl SourceTranslation for FmmDataHashmap, T, U, V>, V> -where - T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, - V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, - V: MultiplyAdd< - V, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - /// Point to multipole evaluations, multithreaded over each leaf box. - fn p2m<'a>(&self) { - if let Some(leaves) = self.fmm.tree().get_all_leaves() { - leaves.par_iter().for_each(move |&leaf| { - - let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); - - if let Some(leaf_points) = self.points.get(&leaf) { - let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); - - // Lookup data - let leaf_coordinates = leaf_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, V, leaf_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let upward_check_surface = leaf.compute_surface( - &self.fmm.tree().domain, - self.fmm.order, - self.fmm.alpha_outer, - ); - let ntargets = upward_check_surface.len() / self.fmm.kernel.space_dimension(); - - let leaf_charges = leaf_charges_arc.deref(); - - // Calculate check potential - let mut check_potential = rlst_col_vec![V, ntargets]; - - self.fmm.kernel.evaluate_st( - EvalType::Value, - leaf_coordinates.data(), - &upward_check_surface[..], - &leaf_charges[..], - check_potential.data_mut(), - ); - - let mut tmp = self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential)).eval(); - tmp.data_mut().iter_mut().for_each(|d| *d *= self.fmm.kernel.scale(leaf.level())); - let leaf_multipole_owned = tmp; - let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - *leaf_multipole_lock.deref_mut() = (leaf_multipole_lock.deref() + leaf_multipole_owned).eval(); - } - }); - } - } - - /// Multipole to multipole translations, multithreaded over all boxes at a given level. - fn m2m<'a>(&self, level: u64) { - // Parallelise over nodes at a given level - if let Some(sources) = self.fmm.tree().get_keys(level) { - sources.par_iter().for_each(move |&source| { - let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); - let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); - let target_multipole_arc = - Arc::clone(self.multipoles.get(&source.parent()).unwrap()); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_multipole_owned = - self.fmm.m2m[operator_index].dot(&source_multipole_lock); - - let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - *target_multipole_lock.deref_mut() = - (target_multipole_lock.deref() + target_multipole_owned).eval(); - }) - } - } -} - -#[cfg(test)] -mod test { - - use super::*; - - use crate::charge::build_charge_dict; - use bempp_field::types::SvdFieldTranslationKiFmm; - use bempp_kernel::laplace_3d::Laplace3dKernel; - use bempp_tree::{constants::ROOT, implementations::helpers::points_fixture}; - - #[test] - fn test_upward_pass() { - let npoints = 10000; - let points = points_fixture(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - let kernel = Laplace3dKernel::::default(); - let order = 6; - let alpha_inner = 1.05; - let alpha_outer = 2.95; - let adaptive = false; - let k = 1000; - let ncrit = 150; - let depth = 3; - - // Create a tree - let tree = SingleNodeTree::new( - points.data(), - adaptive, - Some(ncrit), - Some(depth), - &global_idxs[..], - ); - - // Precompute the M2L data - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - *tree.get_domain(), - alpha_inner, - ); - let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // Form charge dict, matching charges with their associated global indices - let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - // Associate data with the FMM - let datatree = FmmDataHashmap::new(fmm, &charge_dict); - - // Upward pass - { - datatree.p2m(); - - for level in (1..=depth).rev() { - datatree.m2m(level); - } - } - - let multipole = datatree.multipoles.get(&ROOT).unwrap(); - - let surface = - ROOT.compute_surface(&datatree.fmm.tree().domain, order, datatree.fmm.alpha_inner); - - let test_point = vec![100000., 0., 0.]; - - let mut expected = vec![0.]; - let mut found = vec![0.]; - - let kernel = Laplace3dKernel::::default(); - kernel.evaluate_st( - EvalType::Value, - points.data(), - &test_point, - &charges, - &mut expected, - ); - - kernel.evaluate_st( - EvalType::Value, - &surface, - &test_point, - multipole.lock().unwrap().data(), - &mut found, - ); - - let abs_error = (expected[0] - found[0]).abs(); - let rel_error = abs_error / expected[0]; - assert!(rel_error <= 1e-5); - } -} diff --git a/fmm/src/field_translation/hashmap/source_to_target.rs b/fmm/src/field_translation/hashmap/source_to_target.rs deleted file mode 100644 index d179bd5e..00000000 --- a/fmm/src/field_translation/hashmap/source_to_target.rs +++ /dev/null @@ -1,472 +0,0 @@ -//! Implementation of Source and Target translations, as well as Source to Target translation. -use std::{ - collections::HashMap, - ops::{Deref, DerefMut}, - sync::{Arc, Mutex, RwLock}, -}; - -use bempp_tools::Array3D; -use itertools::Itertools; -use num::{Complex, Float}; -use rayon::prelude::*; - -use bempp_field::{ - array::pad3, - fft::Fft, - types::{FftFieldTranslationKiFmm, FftMatrix, SvdFieldTranslationKiFmm}, -}; - -use bempp_traits::{ - arrays::Array3DAccess, - field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, InteractionLists}, - kernel::{Kernel, ScaleInvariantKernel}, - tree::Tree, -}; -use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; - -use rlst::{ - algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, - common::traits::*, - dense::{ - rlst_col_vec, rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, Shape, - VectorContainer, - }, -}; - -use crate::types::{FmmDataHashmap, KiFmmHashMap, SendPtr, SendPtrMut}; - -/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. -impl FieldTranslation - for FmmDataHashmap, T, SvdFieldTranslationKiFmm, U>, U> -where - T: Kernel - + ScaleInvariantKernel - + std::marker::Send - + std::marker::Sync - + Default, - DenseMatrixLinAlgBuilder: Svd, - U: Scalar, - U: Float - + Default - + MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, - U: std::marker::Send + std::marker::Sync + Default, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { - return; - }; - let mut transfer_vector_to_m2l = - HashMap::>>>::new(); - - for tv in self.fmm.m2l.transfer_vectors.iter() { - transfer_vector_to_m2l.insert(tv.hash, Arc::new(Mutex::new(Vec::new()))); - } - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - targets.par_iter().enumerate().for_each(|(_i, &target)| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let calculated_transfer_vectors = v_list - .iter() - .map(|source| target.find_transfer_vector(source)) - .collect::>(); - for (transfer_vector, &source) in - calculated_transfer_vectors.iter().zip(v_list.iter()) - { - let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); - let mut m2l_lock = m2l_arc.lock().unwrap(); - m2l_lock.push((source, target)); - } - } - }); - - let mut transfer_vector_to_m2l_rw_lock = - HashMap::>>>::new(); - - // Find all multipole expansions and allocate - for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { - transfer_vector_to_m2l_rw_lock.insert( - transfer_vector, - Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), - ); - } - - transfer_vector_to_m2l_rw_lock - .par_iter() - .for_each(|(transfer_vector, m2l_arc)| { - let c_idx = self - .fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.hash == *transfer_vector) - .unwrap(); - - let (nrows, _) = self.fmm.m2l.operator_data.c.shape(); - let top_left = (0, c_idx * self.fmm.m2l.k); - let dim = (nrows, self.fmm.m2l.k); - - let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim); - - let m2l_rw = m2l_arc.read().unwrap(); - let mut multipoles = rlst_dynamic_mat![U, (self.fmm.m2l.k, m2l_rw.len())]; - - for (i, (source, _)) in m2l_rw.iter().enumerate() { - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - // Compressed multipole - let compressed_source_multipole_owned = self - .fmm - .m2l - .operator_data - .st_block - .dot(&source_multipole_lock) - .eval(); - - let first = i * self.fmm.m2l.k; - let last = first + self.fmm.m2l.k; - - let multipole_slice = multipoles.get_slice_mut(first, last); - multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); - } - - // Compute convolution - let compressed_check_potential_owned = c_sub.dot(&multipoles); - - // Post process to find check potential - let check_potential_owned = self - .fmm - .m2l - .operator_data - .u - .dot(&compressed_check_potential_owned) - .eval(); - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potential_owned)); - - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); - let locals_owned = tmp; - - // Assign locals - for (i, (_, target)) in m2l_rw.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let top_left = (0, i); - let dim = (ncoeffs, 1); - let target_local_owned = locals_owned.block(top_left, dim); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - } - }); - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} - -/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. -impl FieldTranslation - for FmmDataHashmap, T, FftFieldTranslationKiFmm, U>, U> -where - T: Kernel - + ScaleInvariantKernel - + std::marker::Send - + std::marker::Sync - + Default, - U: Scalar - + Float - + Default - + std::marker::Send - + std::marker::Sync - + Fft, FftMatrix>>, - Complex: Scalar, - U: MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { - return; - }; - - // Form signals to use for convolution first - let n = 2 * self.fmm.order - 1; - let ntargets = targets.len(); - - // Pad the signal - let &(m, n, o) = &(n, n, n); - - let p = m + 1; - let q = n + 1; - let r = o + 1; - let size = p * q * r; - let size_real = p * q * (r / 2 + 1); - let pad_size = (p - m, q - n, r - o); - let pad_index = (p - m, q - n, r - o); - // let mut padded_signals = rlst_col_vec![U, size * ntargets]; - let mut padded_signals = vec![U::zero(); size * ntargets]; - - // let chunks = padded_signals.data_mut().par_chunks_exact_mut(size); - let chunks = padded_signals.par_chunks_exact_mut(size); - - let range = (0..chunks.len()).into_par_iter(); - range.zip(chunks).for_each(|(i, chunk)| { - let target = targets[i]; - let source_multipole_arc = Arc::clone(self.multipoles.get(&target).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let signal = self - .fmm - .m2l - .compute_signal(self.fmm.order, source_multipole_lock.data()); - - let padded_signal = pad3(&signal, pad_size, pad_index); - - chunk.copy_from_slice(padded_signal.get_data()); - }); - // let mut padded_signals_hat = rlst_col_vec![Complex, size_real * ntargets]; - let mut padded_signals_hat = vec![Complex::::default(); size_real * ntargets]; - - U::rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); - - let kernel_data_halo = &self.fmm.m2l.operator_data.kernel_data_rearranged; - let ntargets = targets.len(); - let nparents = ntargets / 8; - // let mut global_check_potentials_hat = rlst_col_vec![Complex, size_real * ntargets]; - // let mut global_check_potentials = rlst_col_vec![U, size * ntargets]; - let mut global_check_potentials_hat = vec![Complex::::default(); size_real * ntargets]; - let mut global_check_potentials = vec![U::default(); size * ntargets]; - - // Get check potentials in frequency order - let mut global_check_potentials_hat_freq = vec![Vec::new(); size_real]; - - unsafe { - // let ptr = global_check_potentials_hat.get_pointer_mut(); - let ptr = global_check_potentials_hat.as_mut_ptr(); - for (i, elem) in global_check_potentials_hat_freq - .iter_mut() - .enumerate() - .take(size_real) - { - for j in 0..ntargets { - let raw = ptr.offset((j * size_real + i).try_into().unwrap()); - let send_ptr = SendPtrMut { raw }; - elem.push(send_ptr); - } - } - } - - // Get signals into frequency order - let mut padded_signals_hat_freq = vec![Vec::new(); size_real]; - let zero = rlst_col_vec![Complex, 8]; - unsafe { - // let ptr = padded_signals_hat.get_pointer(); - let ptr = padded_signals_hat.as_ptr(); - - for (i, elem) in padded_signals_hat_freq - .iter_mut() - .enumerate() - .take(size_real) - { - for j in 0..ntargets { - let raw = ptr.offset((j * size_real + i).try_into().unwrap()); - let send_ptr = SendPtr { raw }; - elem.push(send_ptr); - } - // put in a bunch of zeros at the end - let ptr = zero.get_pointer(); - for _ in 0..8 { - let send_ptr = SendPtr { raw: ptr }; - elem.push(send_ptr) - } - } - } - - // Create a map between targets and index positions in vec of len 'ntargets' - let mut target_map = HashMap::new(); - - for (i, t) in targets.iter().enumerate() { - target_map.insert(t, i); - } - - // Find all the displacements used for saving results - let mut all_displacements = Vec::new(); - targets.chunks_exact(8).for_each(|sibling_chunk| { - // not in Morton order (refer to sort method when called on 'neighbours') - let parent_neighbours: Vec> = - sibling_chunk[0].parent().all_neighbors(); - - let displacements = parent_neighbours - .iter() - .map(|pn| { - let mut tmp = Vec::new(); - if let Some(pn) = pn { - if self.fmm.tree.keys_set.contains(pn) { - let mut children = pn.children(); - children.sort(); - for child in children { - // tmp.push(*target_map.get(&child).unwrap() as i64) - tmp.push(*target_map.get(&child).unwrap()) - } - } else { - for i in 0..8 { - tmp.push(ntargets + i) - } - } - } else { - for i in 0..8 { - tmp.push(ntargets + i) - } - } - - assert!(tmp.len() == 8); - tmp - }) - .collect_vec(); - all_displacements.push(displacements); - }); - - let scale = Complex::from(self.m2l_scale(level)); - - (0..size_real).into_par_iter().for_each(|freq| { - // Extract frequency component of signal (ntargets long) - let padded_signal_freq = &padded_signals_hat_freq[freq]; - - // Extract frequency components of save locations (ntargets long) - let check_potential_freq = &global_check_potentials_hat_freq[freq]; - - (0..nparents).for_each(|sibling_idx| { - // lookup associated save locations for our current sibling set - let save_locations = - &check_potential_freq[(sibling_idx * 8)..(sibling_idx + 1) * 8]; - let save_locations_raw = save_locations.iter().map(|s| s.raw).collect_vec(); - - // for each halo position compute convolutions to a given sibling set - for (i, kernel_data) in kernel_data_halo.iter().enumerate().take(26) { - let frequency_offset = 64 * freq; - let kernel_data_i = &kernel_data[frequency_offset..(frequency_offset + 64)]; - - // Find displacements for signal being translated - let displacements = &all_displacements[sibling_idx][i]; - - // Lookup signal to be translated if a translation is to be performed - let signal = &padded_signal_freq[(displacements[0])..=(displacements[7])]; - for j in 0..8 { - let kernel_data_ij = &kernel_data_i[j * 8..(j + 1) * 8]; - let sig = signal[j].raw; - unsafe { - save_locations_raw - .iter() - .zip(kernel_data_ij.iter()) - .for_each(|(&sav, &ker)| *sav += scale * ker * *sig) - } - } // inner loop - } - }); // over each sibling set - }); - - U::irfft_fftw_par_vec( - &mut global_check_potentials_hat, - &mut global_check_potentials, - &[p, q, r], - ); - - // Compute local expansion coefficients and save to data tree - let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); - - let check_potentials = global_check_potentials - // .data() - .chunks_exact(size) - .flat_map(|chunk| { - let m = 2 * self.fmm.order - 1; - let p = m + 1; - let mut potentials = Array3D::new((p, p, p)); - potentials.get_data_mut().copy_from_slice(chunk); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); - } - tmp - }) - .collect_vec(); - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let check_potentials = unsafe { - rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] - }; - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) - .eval(); - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level)); - let locals = tmp; - - for (i, target) in targets.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let top_left = (0, i); - let dim = (ncoeffs, 1); - let target_local_owned = locals.block(top_left, dim); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - } - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} diff --git a/fmm/src/field_translation/hashmap/target.rs b/fmm/src/field_translation/hashmap/target.rs deleted file mode 100644 index 0d3b538e..00000000 --- a/fmm/src/field_translation/hashmap/target.rs +++ /dev/null @@ -1,278 +0,0 @@ -//! Implementation of Source and Target translations, as well as Source to Target translation. -use std::{ - ops::{Deref, DerefMut}, - sync::Arc, -}; - -use itertools::Itertools; -use num::Float; -use rayon::prelude::*; - -use bempp_traits::{ - field::FieldTranslationData, - fmm::{Fmm, InteractionLists, TargetTranslation}, - kernel::{Kernel, ScaleInvariantKernel}, - tree::Tree, - types::EvalType, -}; -use bempp_tree::types::single_node::SingleNodeTree; - -use rlst::{ - common::traits::*, - dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, -}; - -use crate::types::{FmmDataHashmap, KiFmmHashMap}; - -impl TargetTranslation for FmmDataHashmap, T, U, V>, V> -where - T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, - V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, - V: MultiplyAdd< - V, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - fn l2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - targets.par_iter().for_each(move |&target| { - let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - - let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let target_local_owned = self.fmm.l2l[operator_index].dot(&source_local_lock); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - }) - } - } - - fn m2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_all_leaves() { - targets.par_iter().for_each(move |&target| { - - - if let Some(points) = self.fmm.tree().get_points(&target) { - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - if let Some(w_list) = self.fmm.get_w_list(&target) { - for source in w_list.iter() { - let source_multipole_arc = - Arc::clone(self.multipoles.get(source).unwrap()); - - let upward_equivalent_surface = source.compute_surface( - self.fmm.tree().get_domain(), - self.fmm.order(), - self.fmm.alpha_inner, - ); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_coordinates = points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - - let mut target_potential = rlst_col_vec![V, ntargets]; - - self.fmm.kernel.evaluate_st( - EvalType::Value, - &upward_equivalent_surface[..], - target_coordinates.data(), - source_multipole_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - } - } - } -) - } - } - - fn l2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_all_leaves() { - targets.par_iter().for_each(move |&leaf| { - let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(target_points) = self.fmm.tree().get_points(&leaf) { - let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); - // Lookup data - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let downward_equivalent_surface = leaf.compute_surface( - &self.fmm.tree().domain, - self.fmm.order, - self.fmm.alpha_outer, - ); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let mut target_potential = rlst_col_vec![V, ntargets]; - - self.fmm.kernel.evaluate_st( - EvalType::Value, - &downward_equivalent_surface[..], - target_coordinates.data(), - source_local_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - }) - } - } - - fn p2l<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_all_leaves() { - targets.par_iter().for_each(move |&leaf| { - let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(x_list) = self.fmm.get_x_list(&leaf) { - for source in x_list.iter() { - if let Some(source_points) = self.fmm.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - let source_coordinates = unsafe { - rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges = self.charges.get(source).unwrap(); - - let downward_check_surface = leaf.compute_surface( - &self.fmm.tree().domain, - self.fmm.order, - self.fmm.alpha_inner, - ); - - let ntargets = downward_check_surface.len() / self.fmm.kernel.space_dimension(); - let mut downward_check_potential = rlst_col_vec![V, ntargets]; - - self.fmm.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - &downward_check_surface[..], - &source_charges[..], - downward_check_potential.data_mut() - ); - - - let mut target_local_lock = target_local_arc.lock().unwrap(); - let mut tmp = self.fmm.dc2e_inv_1.dot(&self.fmm.dc2e_inv_2.dot(&downward_check_potential)).eval(); - tmp.data_mut().iter_mut().for_each(|d| *d *= self.fmm.kernel.scale(leaf.level())); - let target_local_owned = tmp; - *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); - } - } - } - }) - } - } - - fn p2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_all_leaves() { - targets.par_iter().for_each(move |&target| { - - if let Some(target_points) = self.fmm.tree().get_points(&target) { - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - if let Some(u_list) = self.fmm.get_u_list(&target) { - - for source in u_list.iter() { - if let Some(source_points) = self.fmm.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - let source_coordinates = unsafe { - rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges_arc = - Arc::clone(self.charges.get(source).unwrap()); - - let mut target_potential = rlst_col_vec![V, ntargets]; - - self.fmm.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - target_coordinates.data(), - &source_charges_arc[..], - target_potential.data_mut(), - ); - - let mut target_potential_lock = - target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - } - - - let target_potential_lock = - target_potential_arc.lock().unwrap(); - - if target.morton == 3 { - println!("local result {:?}", target_potential_lock.data()); - } - } - } - }) - } - } -} diff --git a/fmm/src/field_translation/linear/source_to_target.rs b/fmm/src/field_translation/linear/source_to_target.rs deleted file mode 100644 index 26777865..00000000 --- a/fmm/src/field_translation/linear/source_to_target.rs +++ /dev/null @@ -1,611 +0,0 @@ -//! kiFMM based on simple linear data structures that minimises memory allocations, maximises cache re-use. -use bempp_tools::Array3D; - -use itertools::Itertools; -use num::{Complex, Float}; -use rayon::prelude::*; -use std::collections::{HashMap, HashSet}; - -use bempp_field::{ - array::pad3, - fft::Fft, - types::{FftFieldTranslationKiFmm, FftMatrix, SvdFieldTranslationKiFmm}, -}; - -use bempp_traits::{ - arrays::Array3DAccess, - field::{FieldTranslation, FieldTranslationData}, - fmm::Fmm, - kernel::{Kernel, ScaleInvariantKernel}, - tree::Tree, -}; -use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; - -use crate::types::{FmmDataLinear, KiFmmLinear, SendPtrMut}; -use rlst::{ - algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, - common::traits::*, - dense::{rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, -}; - -pub fn size_real(order: usize) -> usize { - let m = 2 * order - 1; // Size of each dimension of 3D kernel/signal - let pad_size = 1; - let p = m + pad_size; // Size of each dimension of padded 3D kernel/signal - p * p * (p / 2 + 1) // Number of Fourier coefficients when working with real data -} - -pub fn nleaves(level: usize) -> usize { - 8i32.pow(level as u32) as usize -} - -pub fn nparents(level: usize) -> usize { - 8i32.pow((level - 1) as u32) as usize -} - -pub fn signal_freq_order_cplx_optimized( - order: usize, - level: usize, - signal: &[Complex], -) -> Vec>> -where - U: Scalar, -{ - let size_real = size_real(order); - let nleaves = nleaves(level); - - (0..size_real) - .map(|i| { - let mut tmp = (0..nleaves) - .map(|j| signal[j * size_real + i]) - .collect::>(); - - // Pad with zeros - tmp.extend(std::iter::repeat(Complex::new(U::zero(), U::zero())).take(8)); - - tmp - }) - .collect() -} - -pub unsafe fn check_potentials_freq_cplx( - order: usize, - level: usize, - check_potentials: &mut Vec>, -) -> Vec>>> -where - U: Scalar, -{ - let size_real = size_real(order); // Number of Fourier coefficients when working with real data - let nleaves = nleaves(level); - let mut check_potentials_freq = vec![Vec::new(); size_real]; - - let ptr = check_potentials.as_mut_ptr(); - - for (i, elem) in check_potentials_freq.iter_mut().enumerate().take(size_real) { - for j in 0..nleaves { - let raw = ptr.offset((j * size_real + i).try_into().unwrap()); - let send_ptr = SendPtrMut { raw }; - elem.push(send_ptr); - } - } - - check_potentials_freq -} - -pub fn displacements( - tree: &SingleNodeTree, - level: u64, - target_map_leaves: &HashMap<&MortonKey, usize>, -) -> Vec>> -where - U: Float + Default + Scalar, -{ - let leaves = tree.get_keys(level).unwrap(); - let nleaves = leaves.len(); - - let mut all_displacements = Vec::new(); - - leaves.chunks_exact(8).for_each(|sibling_chunk| { - let parent_neighbours = sibling_chunk[0].parent().all_neighbors(); - let displacements = parent_neighbours - .iter() - .map(|pn| { - let mut tmp = Vec::new(); - - if let Some(pn) = pn { - if tree.keys_set.contains(pn) { - let mut children = pn.children(); - children.sort(); - for child in children.iter() { - tmp.push(*target_map_leaves.get(child).unwrap()) - } - } else { - for i in 0..8 { - tmp.push(nleaves + i); - } - } - } else { - for i in 0..8 { - tmp.push(nleaves + i) - } - } - tmp - }) - .collect_vec(); - all_displacements.push(displacements) - }); - - all_displacements -} - -pub fn chunked_displacements( - level: usize, - chunksize: usize, - displacements: &[Vec>], -) -> (Vec>>, Vec>) { - let mut all_save_locations = Vec::new(); - let mut all_displacements_chunked = Vec::new(); // indexed by chunk index - let nparents = nparents(level); - let nsiblings = 8; - - (0..nparents).step_by(chunksize).for_each(|chunk_start| { - let chunk_end = std::cmp::min(chunksize + chunk_start, nparents); - - // lookup save locations (head) - let save_locations = (chunk_start..chunk_end) - .map(|sibling_idx| sibling_idx * nsiblings) - .collect_vec(); - all_save_locations.push(save_locations); - - // 26 long - let mut tmp = Vec::new(); - for i in 0..26 { - // chunk_size long - let tmp2 = (chunk_start..chunk_end) - .map(|sibling_idx| { - // head - displacements[sibling_idx][i][0] - }) - .collect_vec(); - - tmp.push(tmp2); - } - all_displacements_chunked.push(tmp); - }); - - (all_displacements_chunked, all_save_locations) -} - -#[inline(always)] -pub unsafe fn matmul8x8x2_cplx_simple_local( - kernel_data_freq: &[Complex], - signal: &[Complex], - save_locations: &mut [Complex], - scale: Complex, -) where - U: Scalar, -{ - for j in 0..8 { - let kernel_data_ij = &kernel_data_freq[j * 8..(j + 1) * 8]; - let sig = signal[j]; - - save_locations - .iter_mut() - .zip(kernel_data_ij.iter()) - .for_each(|(sav, &ker)| *sav += scale * ker * sig) - } // inner loop -} -#[allow(clippy::too_many_arguments)] -#[inline(always)] -pub fn m2l_cplx_chunked( - order: usize, - level: usize, - signal_freq_order: &[Vec>], - check_potential_freq_order: &[Vec>>], - kernel_data_halo: &[Vec>], - all_displacements_chunked: &[Vec>], - all_save_locations_chunked: &[Vec], - chunksize: usize, - scale: Complex, -) where - U: Scalar + Sync, -{ - let nparents = nparents(level); - let size_real = size_real(order); - - (0..size_real).into_par_iter().for_each(|freq| { - // Extract frequency component of signal (ntargets long) - let padded_signal_freq = &signal_freq_order[freq]; - - // Extract frequency components of save locations (ntargets long) - let check_potential_freq = &check_potential_freq_order[freq]; - - let zero = U::from(0.).unwrap(); - - (0..nparents) - .step_by(chunksize) - .enumerate() - .for_each(|(c, _chunk_start)| { - let first = all_save_locations_chunked[c].first().unwrap(); - let last = all_save_locations_chunked[c].last().unwrap(); - let save_locations = &check_potential_freq[*first..*last + 8]; - let save_locations_raw = save_locations.iter().map(|s| s.raw).collect_vec(); - let mut local_save_locations = vec![Complex::new(zero, zero); 8 * chunksize]; - - for (i, kernel_data) in kernel_data_halo.iter().enumerate().take(26) { - let frequency_offset = 64 * freq; - let kernel_data_freq = - &kernel_data[frequency_offset..(frequency_offset + 64)].to_vec(); - - // lookup signals - let disps = &all_displacements_chunked[c][i]; - - // Expect this to be 8*chunksize - let signals = disps - .iter() - .map(|d| &padded_signal_freq[*d..d + 8]) - .collect_vec(); - - for j in 0..chunksize { - unsafe { - matmul8x8x2_cplx_simple_local( - kernel_data_freq, - signals[j], - &mut local_save_locations[j * 8..(j + 1) * 8], - scale, - ) - }; - } - } - - unsafe { - save_locations_raw - .iter() - .zip(local_save_locations.iter()) - .for_each(|(&glob, &loc)| { - *glob += loc; - }); - } - }); - }); -} - -/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. -impl FieldTranslation - for FmmDataLinear, T, FftFieldTranslationKiFmm, U>, U> -where - T: Kernel - + ScaleInvariantKernel - + std::marker::Send - + std::marker::Sync - + Default, - U: Scalar - + Float - + Default - + std::marker::Send - + std::marker::Sync - + Fft, FftMatrix>>, - Complex: Scalar, - U: MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { - return; - }; - // let s = Instant::now(); - // Form signals to use for convolution first - let n = 2 * self.fmm.order - 1; - let ntargets = targets.len(); - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - // Pad the signal - let &(m, n, o) = &(n, n, n); - - let p = m + 1; - let q = n + 1; - let r = o + 1; - let size = p * q * r; - let size_real = p * q * (r / 2 + 1); - let pad_size = (p - m, q - n, r - o); - let pad_index = (p - m, q - n, r - o); - let mut padded_signals = vec![U::default(); size * ntargets]; - - let padded_signals_chunks = padded_signals.par_chunks_exact_mut(size); - - let ntargets = targets.len(); - let min = &targets[0]; - let max = &targets[ntargets - 1]; - let min_idx = self.fmm.tree().key_to_index.get(min).unwrap(); - let max_idx = self.fmm.tree().key_to_index.get(max).unwrap(); - - let multipoles = &self.multipoles[min_idx * ncoeffs..(max_idx + 1) * ncoeffs]; - let multipoles_chunks = multipoles.par_chunks_exact(ncoeffs); - - padded_signals_chunks - .zip(multipoles_chunks) - .for_each(|(padded_signal, multipole)| { - let signal = self.fmm.m2l.compute_signal(self.fmm.order, multipole); - - let tmp = pad3(&signal, pad_size, pad_index); - - padded_signal.copy_from_slice(tmp.get_data()); - }); - - let mut padded_signals_hat = vec![Complex::::default(); size_real * ntargets]; - - U::rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); - - let ntargets = targets.len(); - let mut global_check_potentials_hat = vec![Complex::::default(); size_real * ntargets]; - let mut global_check_potentials = vec![U::default(); size * ntargets]; - - // Get check potentials in frequency order - let global_check_potentials_hat_freq = unsafe { - check_potentials_freq_cplx( - self.fmm.order, - level as usize, - &mut global_check_potentials_hat, - ) - }; - - let padded_signals_hat_freq = signal_freq_order_cplx_optimized( - self.fmm.order, - level as usize, - &padded_signals_hat[..], - ); - - // Create a map between targets and index positions in vec of len 'ntargets' - let mut target_map = HashMap::new(); - - for (i, t) in targets.iter().enumerate() { - target_map.insert(t, i); - } - - let chunksize; - if level == 2 { - chunksize = 8; - } else if level == 3 { - chunksize = 64 - } else { - chunksize = 128 - } - - let all_displacements = displacements(&self.fmm.tree, level, &target_map); - - let (chunked_displacements, chunked_save_locations) = - chunked_displacements(level as usize, chunksize, &all_displacements); - - let scale = Complex::from(self.m2l_scale(level)); - - let kernel_data_halo = &self.fmm.m2l.operator_data.kernel_data_rearranged; - - m2l_cplx_chunked( - self.fmm.order, - level as usize, - &padded_signals_hat_freq, - &global_check_potentials_hat_freq, - kernel_data_halo, - &chunked_displacements, - &chunked_save_locations, - chunksize, - scale, - ); - - U::irfft_fftw_par_vec( - &mut global_check_potentials_hat, - &mut global_check_potentials, - &[p, q, r], - ); - - // Compute local expansion coefficients and save to data tree - let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); - - let check_potentials = global_check_potentials - .chunks_exact(size) - .flat_map(|chunk| { - let m = 2 * self.fmm.order - 1; - let p = m + 1; - let mut potentials = Array3D::new((p, p, p)); - potentials.get_data_mut().copy_from_slice(chunk); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); - } - tmp - }) - .collect_vec(); - - // This should be blocked and use blas3 - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let check_potentials = unsafe { - rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] - }; - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) - .eval(); - - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level)); - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - // Add result - tmp.data() - .par_chunks_exact(ncoeffs) - .zip(self.level_locals[level as usize].into_par_iter()) - .for_each(|(result, local)| unsafe { - let mut ptr = local.raw; - for &r in result.iter().take(ncoeffs) { - *ptr += r; - ptr = ptr.add(1) - } - }); - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} - -/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. -impl FieldTranslation - for FmmDataLinear, T, SvdFieldTranslationKiFmm, U>, U> -where - T: Kernel - + ScaleInvariantKernel - + std::marker::Send - + std::marker::Sync - + Default, - DenseMatrixLinAlgBuilder: Svd, - U: Scalar, - U: Float - + Default - + MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, - U: std::marker::Send + std::marker::Sync + Default, -{ - fn m2l<'a>(&self, level: u64) { - let Some(sources) = self.fmm.tree().get_keys(level) else { - return; - }; - - let nsources = sources.len(); - - let mut source_map = HashMap::new(); - - for (i, t) in sources.iter().enumerate() { - source_map.insert(t, i); - } - - let mut target_indices = vec![vec![-1i64; nsources]; 316]; - - // Need to identify all save locations in a pre-processing step. - for (j, source) in sources.iter().enumerate() { - let v_list = source - .parent() - .neighbors() - .iter() - .flat_map(|pn| pn.children()) - .filter(|pnc| !source.is_adjacent(pnc)) - .collect_vec(); - - let transfer_vectors = v_list - .iter() - .map(|target| target.find_transfer_vector(source)) - .collect_vec(); - - let mut transfer_vectors_map = HashMap::new(); - for (i, v) in transfer_vectors.iter().enumerate() { - transfer_vectors_map.insert(v, i); - } - - let transfer_vectors_set: HashSet<_> = transfer_vectors.iter().collect(); - - for (i, tv) in self.fmm.m2l.transfer_vectors.iter().enumerate() { - if transfer_vectors_set.contains(&tv.hash) { - let target = &v_list[*transfer_vectors_map.get(&tv.hash).unwrap()]; - let target_index = source_map.get(target).unwrap(); - target_indices[i][j] = *target_index as i64; - } - } - } - - // Interpret multipoles as a matrix - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let multipoles = unsafe { - rlst_pointer_mat!['a, U, self.level_multipoles[level as usize][0].raw, (ncoeffs, nsources), (1, ncoeffs)] - }; - - let (nrows, _) = self.fmm.m2l.operator_data.c.shape(); - let dim = (nrows, self.fmm.m2l.k); - - let mut compressed_multipoles = self.fmm.m2l.operator_data.st_block.dot(&multipoles); - - compressed_multipoles - .data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); - - (0..316).into_par_iter().for_each(|c_idx| { - let top_left = (0, c_idx * self.fmm.m2l.k); - let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim); - - let locals = self.fmm.dc2e_inv_1.dot( - &self.fmm.dc2e_inv_2.dot( - &self - .fmm - .m2l - .operator_data - .u - .dot(&c_sub.dot(&compressed_multipoles)), - ), - ); - - let displacements = &target_indices[c_idx]; - - for (result_idx, &save_idx) in displacements.iter().enumerate() { - if save_idx > -1 { - let save_idx = save_idx as usize; - let mut local_ptr = self.level_locals[(level) as usize][save_idx].raw; - let res = &locals.data()[result_idx * ncoeffs..(result_idx + 1) * ncoeffs]; - - unsafe { - for &r in res.iter() { - *local_ptr += r; - local_ptr = local_ptr.add(1); - } - } - } - } - }) - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} diff --git a/fmm/src/field_translation/linear/source.rs b/fmm/src/field_translation/source.rs similarity index 100% rename from fmm/src/field_translation/linear/source.rs rename to fmm/src/field_translation/source.rs diff --git a/fmm/src/field_translation/source_to_target.rs b/fmm/src/field_translation/source_to_target.rs new file mode 100644 index 00000000..2d7734e6 --- /dev/null +++ b/fmm/src/field_translation/source_to_target.rs @@ -0,0 +1,482 @@ +//! kiFMM based on simple linear data structures that minimises memory allocations, maximises cache re-use. +use itertools::Itertools; +use num::{Complex, Float}; +use rayon::prelude::*; +use std::collections::{HashMap, HashSet}; + +use bempp_field::{ + fft::Fft, + types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm}, +}; + +use bempp_traits::{ + field::{FieldTranslation, FieldTranslationData}, + fmm::Fmm, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use crate::types::{FmmDataLinear, KiFmmLinear, SendPtrMut}; +use rlst::{ + algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, + common::traits::*, + dense::{rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +use super::hadamard::matmul8x8x2; + +// pub fn size_real(order: usize) -> usize { +// let m = 2 * order - 1; // Size of each dimension of 3D kernel/signal +// let pad_size = 1; +// let p = m + pad_size; // Size of each dimension of padded 3D kernel/signal +// p * p * (p / 2 + 1) // Number of Fourier coefficients when working with real data +// } + +// pub fn nparents(level: usize) -> usize { +// 8i32.pow((level - 1) as u32) as usize +// } + +fn displacements(tree: &SingleNodeTree, level: u64) -> Vec> +where + U: Float + Default + Scalar, +{ + let parents = tree.get_keys(level - 1).unwrap(); + let nparents = parents.len(); + let nneighbors = 26; // Number of neighors for a given box + + let mut target_map = HashMap::new(); + + for (i, parent) in parents.iter().enumerate() { + target_map.insert(parent, i); + } + + let mut result = vec![Vec::new(); nneighbors]; + + let parent_neighbours = parents + .iter() + .map(|parent| parent.all_neighbors()) + .collect_vec(); + + for i in 0..nneighbors { + for all_neighbours in parent_neighbours.iter().take(nparents) { + if let Some(neighbour) = all_neighbours[i] { + result[i].push(*target_map.get(&neighbour).unwrap()) + } else { + result[i].push(nparents); + } + } + } + + result +} + +/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataLinear, T, FftFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + Float + Default + std::marker::Send + std::marker::Sync + Fft, + Complex: Scalar, + U: MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { + return; + }; + let n = 2 * self.fmm.order - 1; + let npad = n + 1; + + let nparents = self.fmm.tree().get_keys(level - 1).unwrap().len(); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let nsiblings = 8; + let nzeros = 8; + let size = npad * npad * npad; + let size_real = npad * npad * (npad / 2 + 1); + let all_displacements = displacements(self.fmm.tree(), level); + + let ntargets = targets.len(); + let min = &targets[0]; + let max = &targets[ntargets - 1]; + let min_idx = self.fmm.tree().key_to_index.get(min).unwrap(); + let max_idx = self.fmm.tree().key_to_index.get(max).unwrap(); + + let multipoles = &self.multipoles[min_idx * ncoeffs..(max_idx + 1) * ncoeffs]; + + //////////////////////////////////////////////////////////////////////////////////// + // Pre-process to setup data structures for M2L kernel + //////////////////////////////////////////////////////////////////////////////////// + + // Allocation of FFT of multipoles on convolution grid, in frequency order + let mut signals_hat_f_buffer = vec![U::zero(); size_real * (ntargets + nzeros) * 2]; + let signals_hat_f: &mut [Complex]; + unsafe { + let ptr = signals_hat_f_buffer.as_mut_ptr() as *mut Complex; + signals_hat_f = std::slice::from_raw_parts_mut(ptr, size_real * (ntargets + nzeros)); + } + + // A thread safe mutable pointer for saving to this vector + let raw = signals_hat_f.as_mut_ptr(); + let signals_hat_f_ptr = SendPtrMut { raw }; + + // Pre processing chunk size, in terms of number of parents + let chunksize; + if level == 2 { + chunksize = 8; // Maximum size at level 2 + } else if level == 3 { + chunksize = 64 // Maximum size at level 3 + } else { + chunksize = 128 // Little cache benefit found beyond this + } + + // Pre-processing to find FFT + multipoles + .par_chunks_exact(ncoeffs * nsiblings * chunksize) + .enumerate() + .for_each(|(i, multipole_chunk)| { + // Place Signal on convolution grid + let mut signal_chunk = vec![U::zero(); size * nsiblings * chunksize]; + + for i in 0..nsiblings * chunksize { + let multipole = &multipole_chunk[i * ncoeffs..(i + 1) * ncoeffs]; + let signal = &mut signal_chunk[i * size..(i + 1) * size]; + for (surf_idx, &conv_idx) in self.fmm.m2l.surf_to_conv_map.iter().enumerate() { + signal[conv_idx] = multipole[surf_idx] + } + } + + // Temporary buffer to hold results of FFT + let signal_hat_chunk_buffer = + vec![U::zero(); size_real * nsiblings * chunksize * 2]; + let signal_hat_chunk_c; + unsafe { + let ptr = signal_hat_chunk_buffer.as_ptr() as *mut Complex; + signal_hat_chunk_c = + std::slice::from_raw_parts_mut(ptr, size_real * nsiblings * chunksize); + } + + U::rfft3_fftw_slice(&mut signal_chunk, signal_hat_chunk_c, &[npad, npad, npad]); + + // Re-order the temporary buffer into frequency order before flushing to main memory + let signal_hat_chunk_f_buffer = + vec![U::zero(); size_real * nsiblings * chunksize * 2]; + let signal_hat_chunk_f_c; + unsafe { + let ptr = signal_hat_chunk_f_buffer.as_ptr() as *mut Complex; + signal_hat_chunk_f_c = + std::slice::from_raw_parts_mut(ptr, size_real * nsiblings * chunksize); + } + + for i in 0..size_real { + for j in 0..nsiblings * chunksize { + signal_hat_chunk_f_c[nsiblings * chunksize * i + j] = + signal_hat_chunk_c[size_real * j + i] + } + } + + // Storing the results of the FFT in frequency order + unsafe { + let sibling_offset = i * nsiblings * chunksize; + + // Pointer to storage buffer for frequency ordered FFT of signals + let ptr = signals_hat_f_ptr; + + for i in 0..size_real { + let frequency_offset = i * (ntargets + nzeros); + + // Head of buffer for each frequency + let mut head = ptr.raw.add(frequency_offset).add(sibling_offset); + + // Store results for this frequency for this sibling set chunk + let results_i = &signal_hat_chunk_f_c + [i * nsiblings * chunksize..(i + 1) * nsiblings * chunksize]; + + for &res in results_i { + *head += res; + head = head.add(1); + } + } + } + }); + + // Allocate check potentials (implicitly in frequency order) + let mut check_potentials_hat_f_buffer = vec![U::zero(); 2 * size_real * ntargets]; + let check_potentials_hat_f: &mut [Complex]; + unsafe { + let ptr = check_potentials_hat_f_buffer.as_mut_ptr() as *mut Complex; + check_potentials_hat_f = std::slice::from_raw_parts_mut(ptr, size_real * ntargets); + } + + //////////////////////////////////////////////////////////////////////////////////// + // M2L Kernel + //////////////////////////////////////////////////////////////////////////////////// + let scale = Complex::from(self.m2l_scale(level) * self.fmm.kernel.scale(level)); + let kernel_data_f = &self.fmm.m2l.operator_data.kernel_data_f; + let max_chunksize = 512; + + (0..size_real) + .into_par_iter() + .zip(signals_hat_f.par_chunks_exact(ntargets + nzeros)) + .zip(check_potentials_hat_f.par_chunks_exact_mut(ntargets)) + .for_each(|((freq, signal_f), check_potential_hat_f)| { + (0..nparents) + .step_by(max_chunksize) + .for_each(|chunk_start| { + let chunk_end = std::cmp::min(chunk_start + max_chunksize, nparents); + + let save_locations = + &mut check_potential_hat_f[chunk_start * 8..(chunk_end) * 8]; + + for (i, kernel_f) in kernel_data_f.iter().enumerate().take(26) { + let frequency_offset = 64 * freq; + let k_f = &kernel_f[frequency_offset..(frequency_offset + 64)].to_vec(); + + // Lookup signals + let displacements = &all_displacements[i][chunk_start..chunk_end]; + + for j in 0..(chunk_end - chunk_start) { + let displacement = displacements[j]; + let s_f = &signal_f[displacement * 8..(displacement + 1) * 8]; + + unsafe { + matmul8x8x2( + k_f, + s_f, + &mut save_locations[j * 8..(j + 1) * 8], + scale, + ) + } + } + } + }); + }); + + //////////////////////////////////////////////////////////////////////////////////// + // Post processing to find local expansions from check potentials + //////////////////////////////////////////////////////////////////////////////////// + + // Get check potentials back into target order from frequency order + let mut check_potential_hat = vec![U::zero(); size_real * ntargets * 2]; + let mut check_potential = vec![U::zero(); size * ntargets]; + let check_potential_hat_c; + unsafe { + let ptr = check_potential_hat.as_mut_ptr() as *mut Complex; + check_potential_hat_c = std::slice::from_raw_parts_mut(ptr, size_real) + } + + check_potential_hat_c + .par_chunks_exact_mut(size_real) + .enumerate() + .for_each(|(i, check_potential_hat_chunk)| { + // Lookup all frequencies for this target box + for j in 0..size_real { + check_potential_hat_chunk[j] = check_potentials_hat_f[j * ntargets + i] + } + }); + + // Compute inverse FFT + U::irfft3_fftw_par_slice( + check_potential_hat_c, + &mut check_potential, + &[npad, npad, npad], + ); + + // TODO: Experiment with chunk size for post processing + check_potential + .par_chunks_exact(nsiblings * size) + .zip(self.level_locals[level as usize].par_chunks_exact(nsiblings)) + .for_each(|(check_potential_chunk, local_ptrs)| { + // Map to surface grid + let mut potential_buffer = vec![U::zero(); ncoeffs * nsiblings]; + for i in 0..nsiblings { + let tmp = &mut potential_buffer[i * ncoeffs..(i + 1) * ncoeffs]; + let check_potential = &check_potential_chunk[i * size..(i + 1) * size]; + for (surf_idx, &conv_idx) in self.fmm.m2l.conv_to_surf_map.iter().enumerate() { + tmp[surf_idx] = check_potential[conv_idx]; + } + } + + // Can now find local expansion coefficients + let potential_chunk = unsafe { + rlst_pointer_mat!['a, U, potential_buffer.as_ptr(), (ncoeffs, nsiblings), (1, ncoeffs)] + }; + + let local_chunk = self + .fmm + .dc2e_inv_1 + .dot(&self.fmm.dc2e_inv_2.dot(&potential_chunk)) + .eval(); + + local_chunk + .data() + .chunks_exact(ncoeffs) + .zip(local_ptrs) + .for_each(|(result, local)| unsafe { + let mut ptr = local.raw; + for &r in result.iter().take(ncoeffs) { + *ptr += r; + ptr = ptr.add(1) + } + }); + }); + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} + +/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataLinear, T, SvdFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + DenseMatrixLinAlgBuilder: Svd, + U: Scalar, + U: Float + + Default + + MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, + U: std::marker::Send + std::marker::Sync + Default, +{ + fn m2l<'a>(&self, level: u64) { + let Some(sources) = self.fmm.tree().get_keys(level) else { + return; + }; + + let nsources = sources.len(); + + let mut source_map = HashMap::new(); + + for (i, t) in sources.iter().enumerate() { + source_map.insert(t, i); + } + + let mut target_indices = vec![vec![-1i64; nsources]; 316]; + + // Need to identify all save locations in a pre-processing step. + for (j, source) in sources.iter().enumerate() { + let v_list = source + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| !source.is_adjacent(pnc)) + .collect_vec(); + + let transfer_vectors = v_list + .iter() + .map(|target| target.find_transfer_vector(source)) + .collect_vec(); + + let mut transfer_vectors_map = HashMap::new(); + for (i, v) in transfer_vectors.iter().enumerate() { + transfer_vectors_map.insert(v, i); + } + + let transfer_vectors_set: HashSet<_> = transfer_vectors.iter().collect(); + + for (i, tv) in self.fmm.m2l.transfer_vectors.iter().enumerate() { + if transfer_vectors_set.contains(&tv.hash) { + let target = &v_list[*transfer_vectors_map.get(&tv.hash).unwrap()]; + let target_index = source_map.get(target).unwrap(); + target_indices[i][j] = *target_index as i64; + } + } + } + + // Interpret multipoles as a matrix + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let multipoles = unsafe { + rlst_pointer_mat!['a, U, self.level_multipoles[level as usize][0].raw, (ncoeffs, nsources), (1, ncoeffs)] + }; + + let (nrows, _) = self.fmm.m2l.operator_data.c.shape(); + let dim = (nrows, self.fmm.m2l.k); + + let mut compressed_multipoles = self.fmm.m2l.operator_data.st_block.dot(&multipoles); + + compressed_multipoles + .data_mut() + .iter_mut() + .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); + + (0..316).into_par_iter().for_each(|c_idx| { + let top_left = (0, c_idx * self.fmm.m2l.k); + let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim); + + let locals = self.fmm.dc2e_inv_1.dot( + &self.fmm.dc2e_inv_2.dot( + &self + .fmm + .m2l + .operator_data + .u + .dot(&c_sub.dot(&compressed_multipoles)), + ), + ); + + let displacements = &target_indices[c_idx]; + + for (result_idx, &save_idx) in displacements.iter().enumerate() { + if save_idx > -1 { + let save_idx = save_idx as usize; + let mut local_ptr = self.level_locals[(level) as usize][save_idx].raw; + let res = &locals.data()[result_idx * ncoeffs..(result_idx + 1) * ncoeffs]; + + unsafe { + for &r in res.iter() { + *local_ptr += r; + local_ptr = local_ptr.add(1); + } + } + } + } + }) + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} diff --git a/fmm/src/field_translation/linear/target.rs b/fmm/src/field_translation/target.rs similarity index 100% rename from fmm/src/field_translation/linear/target.rs rename to fmm/src/field_translation/target.rs diff --git a/fmm/src/fmm/linear.rs b/fmm/src/fmm.rs similarity index 91% rename from fmm/src/fmm/linear.rs rename to fmm/src/fmm.rs index ece8beb3..ebf2d916 100644 --- a/fmm/src/fmm/linear.rs +++ b/fmm/src/fmm.rs @@ -590,7 +590,7 @@ mod test { use crate::charge::build_charge_dict; #[test] - fn test_fmm_data_linear() { + fn test_fmm_data() { let npoints = 10000; let points = points_fixture::(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); @@ -678,7 +678,7 @@ mod test { } #[test] - fn test_fmm_linear_fft_f64() { + fn test_fmm_fft_f64() { let npoints = 10000; let points = points_fixture::(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); @@ -757,7 +757,86 @@ mod test { } #[test] - fn test_fmm_linear_svd_f64() { + fn test_fmm_fft_f32() { + let npoints = 10000; + let points = points_fixture::(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let order = 6; + let alpha_inner = 1.05; + let alpha_outer = 2.95; + let adaptive = false; + let ncrit = 150; + + let depth = 3; + let kernel = Laplace3dKernel::default(); + + let tree = SingleNodeTree::new( + points.data(), + adaptive, + Some(ncrit), + Some(depth), + &global_idxs[..], + ); + + let m2l_data_fft = + FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); + + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + let datatree = FmmDataLinear::new(fmm, &charge_dict).unwrap(); + + datatree.run(false); + + // Test that direct computation is close to the FMM. + let leaf = &datatree.fmm.tree.get_all_leaves().unwrap()[0]; + let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap(); + + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + + let potentials = &datatree.potentials[l..r]; + + let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap(); + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + let leaf_coordinates = &coordinates[l * 3..r * 3]; + + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + + let leaf_coordinates = unsafe { + rlst_pointer_mat!['static, f32, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let mut direct = vec![0f32; ntargets]; + let all_point_coordinates = points_fixture::(npoints, None, None); + + let all_charges = charge_dict.into_values().collect_vec(); + + let kernel = Laplace3dKernel::default(); + + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], + ); + + let abs_error: f32 = potentials + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f32 = abs_error / (direct.iter().sum::()); + + assert!(rel_error <= 1e-5); + } + + #[test] + fn test_fmm_svd_f64() { let npoints = 10000; let points = points_fixture::(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); @@ -837,8 +916,6 @@ mod test { .sum(); let rel_error: f64 = abs_error / (direct.iter().sum::()); - println!("rel_error {:?}", rel_error); - assert!(rel_error <= 1e-6); } } diff --git a/fmm/src/fmm/hashmap.rs b/fmm/src/fmm/hashmap.rs deleted file mode 100644 index 9526da98..00000000 --- a/fmm/src/fmm/hashmap.rs +++ /dev/null @@ -1,835 +0,0 @@ -//! Implementation of constructors for FMMs as well as the implementation of FmmData, Fmm traits. -use cauchy::Scalar; -use itertools::Itertools; -use num::{Float, ToPrimitive}; -use std::{ - collections::HashMap, - sync::{Arc, Mutex}, - time::Instant, -}; - -use rlst::{ - algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, - common::traits::{Eval, NewLikeSelf, Transpose}, - dense::{ - rlst_col_vec, rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, - VectorContainer, - }, -}; - -use bempp_traits::{ - field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, FmmLoop, KiFmm as KiFmmTrait, SourceTranslation, TargetTranslation, TimeDict}, - kernel::{Kernel, ScaleInvariantKernel}, - tree::Tree, - types::EvalType, -}; -use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; - -use crate::pinv::{pinv, SvdScalar}; -use crate::types::{C2EType, ChargeDict, FmmDataHashmap, KiFmmHashMap}; - -/// Implementation of constructor for single node KiFMM -impl<'a, T, U, V> KiFmmHashMap, T, U, V> -where - T: Kernel + ScaleInvariantKernel, - U: FieldTranslationData, - V: Scalar + Default + Float, - SvdScalar: PartialOrd, - SvdScalar: Scalar + Float + ToPrimitive, - DenseMatrixLinAlgBuilder: Svd, - V: MultiplyAdd< - V, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, - SvdScalar: MultiplyAdd< - SvdScalar, - VectorContainer>, - VectorContainer>, - VectorContainer>, - Dynamic, - Dynamic, - Dynamic, - >, -{ - /// Constructor for single node kernel independent FMM (KiFMM). This object contains all the precomputed operator matrices and metadata, as well as references to - /// the associated single node octree, and the associated kernel function. - /// - /// # Arguments - /// * `order` - The expansion order for the multipole and local expansions. - /// * `alpha_inner` - The ratio of the inner check surface diamater in comparison to the surface discretising a box. - /// * `alpha_order` - The ratio of the outer check surface diamater in comparison to the surface discretising a box. - /// * `kernel` - The kernel function for this FMM. - /// * `tree` - The type of tree associated with this FMM, can be single or multi node. - /// * `m2l` - The M2L operator matrices, as well as metadata associated with this FMM. - pub fn new( - order: usize, - alpha_inner: V, - alpha_outer: V, - kernel: T, - tree: SingleNodeTree, - m2l: U, - ) -> Self { - let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); - let upward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); - let downward_equivalent_surface = - ROOT.compute_surface(tree.get_domain(), order, alpha_outer); - let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); - - let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension(); - let ncheck_surface = upward_check_surface.len() / kernel.space_dimension(); - - // Store in RLST matrices - let upward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] - }; - let upward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] - }; - let downward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] - }; - let downward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] - }; - - // Compute upward check to equivalent, and downward check to equivalent Gram matrices - // as well as their inverses using DGESVD. - let mut uc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; - kernel.assemble_st( - EvalType::Value, - upward_equivalent_surface.data(), - upward_check_surface.data(), - uc2e.data_mut(), - ); - - // Need to tranapose so that rows correspond to targets and columns to sources - let uc2e = uc2e.transpose().eval(); - - let mut dc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; - kernel.assemble_st( - EvalType::Value, - downward_equivalent_surface.data(), - downward_check_surface.data(), - dc2e.data_mut(), - ); - - // Need to tranapose so that rows correspond to targets and columns to sources - let dc2e = dc2e.transpose().eval(); - - let (s, ut, v) = pinv::(&uc2e, None, None).unwrap(); - - let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; - for i in 0..s.len() { - mat_s[[i, i]] = SvdScalar::::from_real(s[i]); - } - let uc2e_inv_1 = v.dot(&mat_s); - let uc2e_inv_2 = ut; - - let uc2e_inv_1_shape = uc2e_inv_1.shape(); - let uc2e_inv_2_shape = uc2e_inv_2.shape(); - - let uc2e_inv_1 = uc2e_inv_1 - .data() - .iter() - .map(|x| V::from(*x).unwrap()) - .collect_vec(); - let uc2e_inv_1 = unsafe { - rlst_pointer_mat!['a, V, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)] - } - .eval(); - let uc2e_inv_2 = uc2e_inv_2 - .data() - .iter() - .map(|x| V::from(*x).unwrap()) - .collect_vec(); - let uc2e_inv_2 = unsafe { - rlst_pointer_mat!['a, V, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)] - } - .eval(); - - let (s, ut, v) = pinv::(&dc2e, None, None).unwrap(); - - let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; - for i in 0..s.len() { - mat_s[[i, i]] = SvdScalar::::from_real(s[i]); - } - - let dc2e_inv_1 = v.dot(&mat_s); - let dc2e_inv_2 = ut; - - let dc2e_inv_1_shape = dc2e_inv_1.shape(); - let dc2e_inv_2_shape = dc2e_inv_2.shape(); - - let dc2e_inv_1 = dc2e_inv_1 - .data() - .iter() - .map(|x| V::from(*x).unwrap()) - .collect_vec(); - let dc2e_inv_1 = unsafe { - rlst_pointer_mat!['a, V, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)] - } - .eval(); - let dc2e_inv_2 = dc2e_inv_2 - .data() - .iter() - .map(|x| V::from(*x).unwrap()) - .collect_vec(); - let dc2e_inv_2 = unsafe { - rlst_pointer_mat!['a, V, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)] - } - .eval(); - - // Calculate M2M/L2L matrices - let children = ROOT.children(); - let mut m2m: Vec> = Vec::new(); - let mut l2l: Vec> = Vec::new(); - - for child in children.iter() { - let child_upward_equivalent_surface = - child.compute_surface(tree.get_domain(), order, alpha_inner); - let child_downward_check_surface = - child.compute_surface(tree.get_domain(), order, alpha_inner); - let child_upward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] - }; - let child_downward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] - }; - - let mut pc2ce = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; - - kernel.assemble_st( - EvalType::Value, - child_upward_equivalent_surface.data(), - upward_check_surface.data(), - pc2ce.data_mut(), - ); - - // Need to transpose so that rows correspond to targets, and columns to sources - let pc2ce = pc2ce.transpose().eval(); - - m2m.push(uc2e_inv_1.dot(&uc2e_inv_2.dot(&pc2ce)).eval()); - - let mut cc2pe = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; - - kernel.assemble_st( - EvalType::Value, - downward_equivalent_surface.data(), - child_downward_check_surface.data(), - cc2pe.data_mut(), - ); - - // Need to transpose so that rows correspond to targets, and columns to sources - let cc2pe = cc2pe.transpose().eval(); - let mut tmp = dc2e_inv_1.dot(&dc2e_inv_2.dot(&cc2pe)).eval(); - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= kernel.scale(child.level())); - l2l.push(tmp); - } - - Self { - order, - uc2e_inv_1, - uc2e_inv_2, - dc2e_inv_1, - dc2e_inv_2, - alpha_inner, - alpha_outer, - m2m, - l2l, - kernel, - tree, - m2l, - } - } -} - -/// Implementation of the data structure to store the data for the single node KiFMM. -impl FmmDataHashmap, T, U, V>, V> -where - T: Kernel, - U: FieldTranslationData, - V: Float + Scalar + Default, -{ - /// Constructor fo the KiFMM's associated FmmData on a single node. - /// - /// # Arguments - /// `fmm` - A single node KiFMM object. - /// `global_charges` - The charge data associated to the point data via unique global indices. - pub fn new( - fmm: KiFmmHashMap, T, U, V>, - global_charges: &ChargeDict, - ) -> Self { - let mut multipoles = HashMap::new(); - let mut locals = HashMap::new(); - let mut potentials = HashMap::new(); - let mut points = HashMap::new(); - let mut charges = HashMap::new(); - - let ncoeffs = fmm.m2l.ncoeffs(fmm.order); - - let dummy = rlst_col_vec![V, ncoeffs]; - - if let Some(keys) = fmm.tree().get_all_keys() { - for key in keys.iter() { - multipoles.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); - locals.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); - if let Some(point_data) = fmm.tree().get_points(key) { - points.insert(*key, point_data.iter().cloned().collect_vec()); - - let npoints = point_data.len(); - potentials.insert(*key, Arc::new(Mutex::new(rlst_col_vec![V, npoints]))); - - // Lookup indices and store with charges - let mut tmp_idx = Vec::new(); - for point in point_data.iter() { - tmp_idx.push(point.global_idx) - } - let mut tmp_charges = vec![V::zero(); point_data.len()]; - for i in 0..tmp_idx.len() { - tmp_charges[i] = *global_charges.get(&tmp_idx[i]).unwrap(); - } - - charges.insert(*key, Arc::new(tmp_charges)); - } - } - } - - Self { - fmm, - multipoles, - locals, - potentials, - points, - charges, - } - } -} - -impl KiFmmTrait for KiFmmHashMap -where - T: Tree, - U: Kernel, - V: FieldTranslationData, - W: Scalar + Float + Default, -{ - fn alpha_inner(&self) -> <::Kernel as Kernel>::T { - self.alpha_inner - } - - fn alpha_outer(&self) -> <::Kernel as Kernel>::T { - self.alpha_outer - } -} - -impl Fmm for KiFmmHashMap -where - T: Tree, - U: Kernel, - V: FieldTranslationData, - W: Scalar + Float + Default, -{ - type Tree = T; - type Kernel = U; - - fn order(&self) -> usize { - self.order - } - - fn kernel(&self) -> &Self::Kernel { - &self.kernel - } - - fn tree(&self) -> &Self::Tree { - &self.tree - } -} - -impl FmmLoop for FmmDataHashmap -where - T: Fmm, - U: Scalar + Float + Default, - FmmDataHashmap: SourceTranslation + FieldTranslation + TargetTranslation, -{ - fn upward_pass(&self, time: bool) -> Option { - match time { - true => { - let mut times = TimeDict::default(); - // Particle to Multipole - let start = Instant::now(); - self.p2m(); - times.insert("p2m".to_string(), start.elapsed().as_millis()); - - // Multipole to Multipole - let depth = self.fmm.tree().get_depth(); - let start = Instant::now(); - for level in (1..=depth).rev() { - self.m2m(level) - } - times.insert("m2m".to_string(), start.elapsed().as_millis()); - Some(times) - } - false => { - // Particle to Multipole - self.p2m(); - - // Multipole to Multipole - let depth = self.fmm.tree().get_depth(); - for level in (1..=depth).rev() { - self.m2m(level) - } - None - } - } - } - - fn downward_pass(&self, time: bool) -> Option { - let depth = self.fmm.tree().get_depth(); - - match time { - true => { - let mut times = TimeDict::default(); - let mut l2l_time = 0; - let mut m2l_time = 0; - - for level in 2..=depth { - if level > 2 { - let start = Instant::now(); - self.l2l(level); - l2l_time += start.elapsed().as_millis(); - } - - let start = Instant::now(); - self.m2l(level); - m2l_time += start.elapsed().as_millis(); - } - - times.insert("l2l".to_string(), l2l_time); - times.insert("m2l".to_string(), m2l_time); - - // Leaf level computations - let start = Instant::now(); - self.p2l(); - times.insert("p2l".to_string(), start.elapsed().as_millis()); - - // Sum all potential contributions - let start = Instant::now(); - self.m2p(); - times.insert("m2p".to_string(), start.elapsed().as_millis()); - - let start = Instant::now(); - self.p2p(); - times.insert("p2p".to_string(), start.elapsed().as_millis()); - - let start = Instant::now(); - self.l2p(); - times.insert("l2p".to_string(), start.elapsed().as_millis()); - - Some(times) - } - false => { - for level in 2..=depth { - if level > 2 { - self.l2l(level); - } - self.m2l(level); - } - // Leaf level computations - self.p2l(); - - // Sum all potential contributions - self.m2p(); - self.p2p(); - self.l2p(); - - None - } - } - } - - fn run(&self, time: bool) -> Option { - let t1 = self.upward_pass(time); - let t2 = self.downward_pass(time); - - if let (Some(mut t1), Some(t2)) = (t1, t2) { - t1.extend(t2); - Some(t1) - } else { - None - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - use std::env; - - use bempp_field::types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm}; - use bempp_kernel::laplace_3d::Laplace3dKernel; - use bempp_tree::implementations::helpers::points_fixture; - - use crate::charge::build_charge_dict; - - #[test] - fn test_fmm_svd_f64() { - // Set OMP threads to avoid thrashing during matrix-matrix products in M2L. - env::set_var("OMP_NUM_THREADS", "1"); - - // Generate a set of point data - let npoints = 10000; - let points = points_fixture(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - // Setup a FMM experiment - let order = 6; - let alpha_inner = 1.05; - let alpha_outer = 2.95; - let adaptive = false; - let k = 1000; - let ncrit = 150; - let depth = 3; - - // Create a kernel - let kernel = Laplace3dKernel::::default(); - - // Create a tree - let tree = SingleNodeTree::new( - points.data(), - adaptive, - Some(ncrit), - Some(depth), - &global_idxs[..], - ); - - // Precompute the M2L data - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - *tree.get_domain(), - alpha_inner, - ); - - // Create an FMM - let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // Form charge dict, matching charges with their associated global indices - let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - // Associate data with the FMM - let datatree = FmmDataHashmap::new(fmm, &charge_dict); - - // Run the experiment - datatree.run(false); - - // Test that direct computation is close to the FMM. - let leaf = &datatree.fmm.tree.get_all_leaves().unwrap()[0]; - - let potentials = datatree.potentials.get(leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(leaf).unwrap(); - - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); - - let all_charges = charge_dict.into_values().collect_vec(); - - let kernel = Laplace3dKernel::::default(); - - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); - - let abs_error: f64 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f64 = abs_error / (direct.iter().sum::()); - - assert!(rel_error <= 1e-6); - } - - #[test] - fn test_fmm_svd_f32() { - // Set OMP threads to avoid thrashing during matrix-matrix products in M2L. - env::set_var("OMP_NUM_THREADS", "1"); - - // Generate a set of point data - let npoints = 10000; - let points = points_fixture::(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0_f32; npoints]; - - // Setup a FMM experiment - let order = 6; - let alpha_inner = 1.05; - let alpha_outer = 2.95; - let adaptive = false; - let k = 1000; - let ncrit = 150; - let depth = 3; - - // Create a kernel - let kernel = Laplace3dKernel::default(); - - // Create a tree - let tree = SingleNodeTree::new( - points.data(), - adaptive, - Some(ncrit), - Some(depth), - &global_idxs[..], - ); - - // Precompute the M2L data - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - *tree.get_domain(), - alpha_inner, - ); - - // Create an FMM - let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // Form charge dict, matching charges with their associated global indices - let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - // Associate data with the FMM - let datatree = FmmDataHashmap::new(fmm, &charge_dict); - - // Run the experiment - datatree.run(true); - - // Test that direct computation is close to the FMM. - let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; - - let potentials = datatree.potentials.get(leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(leaf).unwrap(); - - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['static, f32, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let mut direct = vec![0_f32; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); - - let all_charges = charge_dict.into_values().collect_vec(); - - let kernel = Laplace3dKernel::default(); - - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); - - let abs_error: f32 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f32 = abs_error / (direct.iter().sum::()); - - assert!(rel_error <= 1e-4); - } - - #[test] - fn test_fmm_fft_f64() { - let npoints = 10000; - let points = points_fixture::(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - let order = 6; - let alpha_inner = 1.05; - let alpha_outer = 2.95; - let adaptive = false; - let ncrit = 150; - - let depth = 3; - let kernel = Laplace3dKernel::::default(); - - let tree = SingleNodeTree::new( - points.data(), - adaptive, - Some(ncrit), - Some(depth), - &global_idxs[..], - ); - - let m2l_data_fft = - FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); - - let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); - - // Form charge dict, matching charges with their associated global indices - let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - let datatree = FmmDataHashmap::new(fmm, &charge_dict); - - let s = Instant::now(); - let times = datatree.run(true); - println!("runtime {:?} operators {:?}", s.elapsed(), times.unwrap()); - - let leaf = &datatree.fmm.tree.get_all_leaves().unwrap()[0]; - - let potentials = datatree.potentials.get(leaf).unwrap().lock().unwrap(); - - let pts = datatree.fmm.tree().get_points(leaf).unwrap(); - - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); - - let all_charges = charge_dict.into_values().collect_vec(); - - let kernel = Laplace3dKernel::::default(); - - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); - - let abs_error: f64 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - - let rel_error: f64 = abs_error / (direct.iter().sum::()); - assert!(rel_error <= 1e-6); - } - - #[test] - fn test_fmm_fft_f32() { - let npoints = 10000; - let points = points_fixture::(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - let order = 6; - let alpha_inner = 1.05; - let alpha_outer = 2.95; - let adaptive = false; - let ncrit = 150; - let depth = 3; - let kernel = Laplace3dKernel::::default(); - - let tree = SingleNodeTree::new( - points.data(), - adaptive, - Some(ncrit), - Some(depth), - &global_idxs[..], - ); - - let m2l_data_fft = - FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); - - let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); - - // Form charge dict, matching charges with their associated global indices - let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - let datatree = FmmDataHashmap::new(fmm, &charge_dict); - - datatree.run(false); - - let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; - - let potentials = datatree.potentials.get(leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(leaf).unwrap(); - - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['static, f32, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let mut direct = vec![0f32; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); - - let all_charges = charge_dict.into_values().collect_vec(); - - let kernel = Laplace3dKernel::default(); - - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); - - let abs_error: f32 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - - let rel_error = abs_error / (direct.iter().sum::()); - assert!(rel_error <= 1e-4); - } -} diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 87567237..e543c2a9 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -1,24 +1,14 @@ //! A a general framework for implementing Fast Multipole Methods. pub mod charge; pub mod constants; +pub mod fmm; pub mod interaction_lists; pub mod pinv; pub mod types; mod field_translation { - pub mod hashmap { - pub mod source; - pub mod source_to_target; - pub mod target; - } - pub mod linear { - pub mod source; - pub mod source_to_target; - pub mod target; - } -} - -mod fmm { - pub mod hashmap; - pub mod linear; + pub mod hadamard; + pub mod source; + pub mod source_to_target; + pub mod target; } diff --git a/tree/src/constants.rs b/tree/src/constants.rs index 39f89b44..e2cf0b49 100644 --- a/tree/src/constants.rs +++ b/tree/src/constants.rs @@ -23,32 +23,32 @@ pub const ROOT: MortonKey = MortonKey { morton: 0, }; -/// Transfer vectors in component form to nearest octant neighbours. +/// Transfer vectors in component form to nearest octant neighbours, in Morton order. pub const DIRECTIONS: [[i64; 3]; 26] = [ [-1, -1, -1], [-1, -1, 0], [-1, -1, 1], [-1, 0, -1], + [-1, 1, -1], [-1, 0, 0], [-1, 0, 1], - [-1, 1, -1], [-1, 1, 0], [-1, 1, 1], [0, -1, -1], + [1, -1, -1], [0, -1, 0], [0, -1, 1], + [1, -1, 0], + [1, -1, 1], [0, 0, -1], - [0, 0, 1], [0, 1, -1], + [1, 0, -1], + [1, 1, -1], + [0, 0, 1], [0, 1, 0], [0, 1, 1], - [1, -1, -1], - [1, -1, 0], - [1, -1, 1], - [1, 0, -1], [1, 0, 0], [1, 0, 1], - [1, 1, -1], [1, 1, 0], [1, 1, 1], ]; diff --git a/tree/src/implementations/helpers.rs b/tree/src/implementations/helpers.rs index 9b3d8183..a05e17cc 100644 --- a/tree/src/implementations/helpers.rs +++ b/tree/src/implementations/helpers.rs @@ -1,8 +1,6 @@ //! Helper functions used in testing tree implementations, specifically test point generators, //! as well as helpers for handling surfaces that discretise a box corresponding to a Morton key. -use std::collections::HashMap; - use bempp_traits::types::Scalar; use num::Float; use rand::prelude::*; @@ -10,8 +8,6 @@ use rand::SeedableRng; use rlst::dense::{base_matrix::BaseMatrix, rlst_dynamic_mat, Dynamic, Matrix, VectorContainer}; -use crate::types::morton::MortonKey; - /// Alias for an rlst container for point data. pub type PointsMat = Matrix, Dynamic>, Dynamic>; @@ -124,73 +120,6 @@ pub fn find_corners(coordinates: &[T]) -> Vec { corners } -/// We need to quickly map between corners and their corresponding positions in the surface grid via -/// their respective indices. -/// -/// # Arguments -/// * `order` - the order of expansions used in constructing the surface grid -pub fn map_corners_to_surface(order: usize) -> (HashMap, HashMap) -where - T: Float + std::ops::SubAssign + std::ops::MulAssign, -{ - let (_, surface_multindex) = MortonKey::surface_grid::(order); - - let nsurf = surface_multindex.len() / 3; - let ncorners = 8; - let corners_multindex = [ - 0, - order - 1, - 0, - order - 1, - 0, - order - 1, - 0, - order - 1, - 0, - 0, - order - 1, - order - 1, - 0, - 0, - order - 1, - order - 1, - 0, - 0, - 0, - 0, - order - 1, - order - 1, - order - 1, - order - 1, - ]; - - let mut _map = HashMap::new(); - let mut _inv_map = HashMap::new(); - - for i in 0..nsurf { - let s = [ - surface_multindex[i], - surface_multindex[nsurf + i], - surface_multindex[2 * nsurf + i], - ]; - - for j in 0..ncorners { - let c = [ - corners_multindex[j], - corners_multindex[8 + j], - corners_multindex[16 + j], - ]; - - if s[0] == c[0] && s[1] == c[1] && s[2] == c[2] { - _map.insert(i, j); - _inv_map.insert(j, i); - } - } - } - - (_map, _inv_map) -} - #[cfg(test)] mod test { @@ -200,10 +129,10 @@ mod test { #[test] fn test_find_corners() { let order = 5; - let (grid_1, _) = MortonKey::surface_grid::(order); + let grid_1 = MortonKey::surface_grid::(order); let order = 2; - let (grid_2, _) = MortonKey::surface_grid::(order); + let grid_2 = MortonKey::surface_grid::(order); let corners_1 = find_corners(&grid_1); let corners_2 = find_corners(&grid_2); diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index b2765282..4f2a1f78 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -850,7 +850,7 @@ impl MortonKey { /// /// # Arguments /// * `order` - The expansion order being used in the FMM simulation. - pub fn surface_grid(order: usize) -> (Vec, Vec) + pub fn surface_grid(order: usize) -> Vec where T: Float + std::ops::MulAssign + std::ops::SubAssign + ToPrimitive, { @@ -883,12 +883,6 @@ impl MortonKey { } } - // Map surface points to multi-indices - let surface_idxs = surface - .iter() - .clone() - .map(|&x| x.to_usize().unwrap()) - .collect(); // Shift and scale surface so that it's centered at the origin and has side length of 1 let two = T::from(2.0).unwrap(); @@ -898,7 +892,7 @@ impl MortonKey { surface.iter_mut().for_each(|point| *point -= T::one()); - (surface, surface_idxs) + surface } /// Compute a surface grid centered at this Morton Key, used in the discretisation of Fast Multipole @@ -951,7 +945,7 @@ impl MortonKey { where T: Float + std::ops::MulAssign + std::ops::SubAssign + Default + Scalar, { - let (surface, _) = MortonKey::surface_grid(order); + let surface = MortonKey::surface_grid(order); self.scale_surface::(surface, domain, alpha) } @@ -1385,13 +1379,18 @@ mod test { for i in 0..26 { assert!(expected[i] == result[i]); } + + // Test that they are in Morton order + for i in 0..25 { + assert!(expected[i + 1] >= expected[i]) + } } // More complex case, in the middle of the tree { let parent = key.parent().parent().parent(); - let mut result = parent.neighbors(); - result.sort(); + let result = parent.neighbors(); + // result.sort(); // Test that we get the expected number of neighbors assert!(result.len() == 26); @@ -1449,7 +1448,13 @@ mod test { for i in 0..26 { assert!(expected[i] == result[i]); } + + // Test that they are in Morton order + for i in 0..25 { + assert!(result[i + 1] >= result[i]) + } } + // assert!(false) } #[test] @@ -1811,9 +1816,8 @@ mod test { let surface = key.compute_surface(&domain, order, alpha); assert_eq!(surface.len(), ncoeffs * dim); - let (surface, surface_idxs) = MortonKey::surface_grid::(order); + let surface = MortonKey::surface_grid::(order); assert_eq!(surface.len(), ncoeffs * dim); - assert_eq!(surface_idxs.len(), ncoeffs * dim); let mut expected = vec![[0usize; 3]; ncoeffs]; let lower = 0; @@ -1833,16 +1837,6 @@ mod test { } } - // Test ordering. - for i in 0..ncoeffs { - let point = vec![ - surface_idxs[i], - surface_idxs[i + ncoeffs], - surface_idxs[i + 2 * ncoeffs], - ]; - assert_eq!(point, expected[i]); - } - // Test scaling let level = 2; let key = MortonKey::from_point(&point, &domain, level); diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index 8d50d6a1..07533013 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -986,4 +986,28 @@ mod test { } assert_eq!(tot, npoints); } + + #[test] + fn test_siblings_in_tree() { + // Test that siblings lie adjacently in a tree. + + let npoints = 10000; + let points = points_fixture::(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let depth = 3; + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth), &global_idxs); + + let keys = tree.get_keys(3).unwrap(); + + // Pick out some random indices to test + let idx_vec = [0, 8, 16, 32]; + + for idx in idx_vec { + let found = keys[idx].siblings(); + + for i in 0..8 { + assert!(found[i] == keys[idx + i]) + } + } + } }