diff --git a/field/src/fft.rs b/field/src/fft.rs index 021df0c6..56ede31a 100644 --- a/field/src/fft.rs +++ b/field/src/fft.rs @@ -4,7 +4,6 @@ use num::Complex; use rayon::prelude::*; use crate::types::{FftMatrixc32, FftMatrixc64, FftMatrixf32, FftMatrixf64}; -use rlst::dense::RawAccessMut; pub trait Fft where @@ -46,6 +45,118 @@ where fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]); } +// impl Fft for f32 { +// fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) { +// let size: usize = shape.iter().product(); +// let size_d = shape.last().unwrap(); +// let size_real = (size / size_d) * (size_d / 2 + 1); +// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + +// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); +// let it_out = output +// .data_mut() +// .par_chunks_exact_mut(size_real) +// .into_par_iter(); + +// it_inp.zip(it_out).for_each(|(inp, out)| { +// let _ = plan.r2c(inp, out); +// }); +// } + +// fn irfft_fftw_par_vec(input: &mut FftMatrixc32, output: &mut FftMatrixf32, shape: &[usize]) { +// let size: usize = shape.iter().product(); +// let size_d = shape.last().unwrap(); +// let size_real = (size / size_d) * (size_d / 2 + 1); +// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); + +// let it_inp = input +// .data_mut() +// .par_chunks_exact_mut(size_real) +// .into_par_iter(); +// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); + +// it_inp.zip(it_out).for_each(|(inp, out)| { +// let _ = plan.c2r(inp, out); +// // Normalise output +// out.iter_mut() +// .for_each(|value| *value *= 1.0 / (size as f32)); +// }) +// } + +// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { +// assert!(shape.len() == 3); +// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); +// let _ = plan.r2c(input, output); +// } + +// fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { +// assert!(shape.len() == 3); +// let size: usize = shape.iter().product(); +// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); +// let _ = plan.c2r(input, output); +// // Normalise +// output +// .iter_mut() +// .for_each(|value| *value *= 1.0 / (size as f32)); +// } +// } + +// impl Fft for f64 { +// fn rfft3_fftw_par_vec(input: &mut FftMatrixf64, output: &mut FftMatrixc64, shape: &[usize]) { +// let size: usize = shape.iter().product(); +// let size_d = shape.last().unwrap(); +// let size_real = (size / size_d) * (size_d / 2 + 1); +// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + +// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); +// let it_out = output +// .data_mut() +// .par_chunks_exact_mut(size_real) +// .into_par_iter(); + +// it_inp.zip(it_out).for_each(|(inp, out)| { +// let _ = plan.r2c(inp, out); +// }); +// } + +// fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) { +// let size: usize = shape.iter().product(); +// let size_d = shape.last().unwrap(); +// let size_real = (size / size_d) * (size_d / 2 + 1); +// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); + +// let it_inp = input +// .data_mut() +// .par_chunks_exact_mut(size_real) +// .into_par_iter(); +// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); + +// it_inp.zip(it_out).for_each(|(inp, out)| { +// let _ = plan.c2r(inp, out); +// // Normalise output +// out.iter_mut() +// .for_each(|value| *value *= 1.0 / (size as f64)); +// }) +// } + +// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex], shape: &[usize]) { +// assert!(shape.len() == 3); +// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); +// let _ = plan.r2c(input, output); +// } + +// fn irfft3_fftw(input: &mut [Complex], output: &mut [Self], shape: &[usize]) { +// assert!(shape.len() == 3); +// let size: usize = shape.iter().product(); +// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); +// let _ = plan.c2r(input, output); +// // Normalise +// output +// .iter_mut() +// .for_each(|value| *value *= 1.0 / (size as f64)); +// } +// } + impl Fft for f32 { fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) { let size: usize = shape.iter().product(); @@ -53,11 +164,8 @@ impl Fft for f32 { let size_real = (size / size_d) * (size_d / 2 + 1); let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); - let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); - let it_out = output - .data_mut() - .par_chunks_exact_mut(size_real) - .into_par_iter(); + let it_inp = input.par_chunks_exact_mut(size).into_par_iter(); + let it_out = output.par_chunks_exact_mut(size_real).into_par_iter(); it_inp.zip(it_out).for_each(|(inp, out)| { let _ = plan.r2c(inp, out); @@ -70,11 +178,8 @@ impl Fft for f32 { let size_real = (size / size_d) * (size_d / 2 + 1); let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); - let it_inp = input - .data_mut() - .par_chunks_exact_mut(size_real) - .into_par_iter(); - let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); + let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter(); + let it_out = output.par_chunks_exact_mut(size).into_par_iter(); it_inp.zip(it_out).for_each(|(inp, out)| { let _ = plan.c2r(inp, out); @@ -109,27 +214,22 @@ impl Fft for f64 { let size_real = (size / size_d) * (size_d / 2 + 1); let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); - let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); - let it_out = output - .data_mut() - .par_chunks_exact_mut(size_real) - .into_par_iter(); + let it_inp = input.par_chunks_exact_mut(size).into_par_iter(); + let it_out = output.par_chunks_exact_mut(size_real).into_par_iter(); it_inp.zip(it_out).for_each(|(inp, out)| { let _ = plan.r2c(inp, out); }); } + fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) { let size: usize = shape.iter().product(); let size_d = shape.last().unwrap(); let size_real = (size / size_d) * (size_d / 2 + 1); let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); - let it_inp = input - .data_mut() - .par_chunks_exact_mut(size_real) - .into_par_iter(); - let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter(); + let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter(); + let it_out = output.par_chunks_exact_mut(size).into_par_iter(); it_inp.zip(it_out).for_each(|(inp, out)| { let _ = plan.c2r(inp, out); diff --git a/field/src/types.rs b/field/src/types.rs index 2723b5a1..9b82f2df 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -128,16 +128,21 @@ where } /// Type alias for real coefficients for into FFTW wrappers -pub type FftMatrixf64 = Matrix, Dynamic>, Dynamic>; +// pub type FftMatrixf64 = Matrix, Dynamic>, Dynamic>; +pub type FftMatrixf64 = Vec; /// Type alias for real coefficients for into FFTW wrappers -pub type FftMatrixf32 = Matrix, Dynamic>, Dynamic>; +// pub type FftMatrixf32 = Matrix, Dynamic>, Dynamic>; +pub type FftMatrixf32 = Vec; /// Type alias for complex coefficients for FFTW wrappers -pub type FftMatrixc64 = Matrix, Dynamic>, Dynamic>; +// pub type FftMatrixc64 = Matrix, Dynamic>, Dynamic>; +pub type FftMatrixc64 = Vec; /// Type alias for complex coefficients for FFTW wrappers -pub type FftMatrixc32 = Matrix, Dynamic>, Dynamic>; +// pub type FftMatrixc32 = Matrix, Dynamic>, Dynamic>; +pub type FftMatrixc32 = Vec; /// Type alias for real coefficients for into FFTW wrappers -pub type FftMatrix = Matrix, Dynamic>, Dynamic>; +// pub type FftMatrix = Matrix, Dynamic>, Dynamic>; +pub type FftMatrix = Vec; diff --git a/fmm/src/constants.rs b/fmm/src/constants.rs index c64cf8c6..2d47f96e 100644 --- a/fmm/src/constants.rs +++ b/fmm/src/constants.rs @@ -1,4 +1,3 @@ //! Crate wide constants -/// Size of cache in bytes to use for blocking purposes during the M2L sparsification via an FFT -pub const CACHE_SIZE: usize = 512; +pub const P2M_MAX_CHUNK_SIZE: usize = 256; diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs deleted file mode 100644 index b3057c5a..00000000 --- a/fmm/src/field_translation.rs +++ /dev/null @@ -1,813 +0,0 @@ -//! Implementation of field translations for each FMM. -use std::{ - collections::HashMap, - ops::{Deref, DerefMut}, - sync::{Arc, Mutex, RwLock}, -}; - -use bempp_tools::Array3D; -use itertools::Itertools; -use num::{Complex, Float}; -use rayon::prelude::*; - -use bempp_field::{ - array::pad3, - fft::Fft, - types::{FftFieldTranslationKiFmm, FftMatrix, SvdFieldTranslationKiFmm}, -}; - -use bempp_traits::{ - arrays::Array3DAccess, - field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, - kernel::{Kernel, KernelScale}, - tree::Tree, - types::EvalType, -}; -use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; - -use rlst::{ - algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, - common::traits::*, - dense::{ - rlst_col_vec, rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, Shape, - VectorContainer, - }, -}; - -use crate::types::{FmmData, KiFmm}; - -impl SourceTranslation for FmmData, T, U, V>, V> -where - T: Kernel + KernelScale + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, - V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, - V: MultiplyAdd< - V, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - /// Point to multipole evaluations, multithreaded over each leaf box. - fn p2m<'a>(&self) { - if let Some(leaves) = self.fmm.tree().get_leaves() { - leaves.par_iter().for_each(move |&leaf| { - let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - - if let Some(leaf_points) = self.points.get(&leaf) { - let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); - - // Lookup data - let leaf_coordinates = leaf_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); - - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, V, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let upward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - let ntargets = upward_check_surface.len() / fmm_arc.kernel.space_dimension(); - - let leaf_charges = leaf_charges_arc.deref(); - - // Calculate check potential - let mut check_potential = rlst_col_vec![V, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - leaf_coordinates.data(), - &upward_check_surface[..], - &leaf_charges[..], - check_potential.data_mut(), - ); - - let mut tmp = fmm_arc.uc2e_inv_1.dot(&fmm_arc.uc2e_inv_2.dot(&check_potential)).eval(); - tmp.data_mut().iter_mut().for_each(|d| *d *= fmm_arc.kernel.scale(leaf.level())); - let leaf_multipole_owned = tmp; - let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - *leaf_multipole_lock.deref_mut() = (leaf_multipole_lock.deref() + leaf_multipole_owned).eval(); - } - }); - } - } - - /// Multipole to multipole translations, multithreaded over all boxes at a given level. - fn m2m<'a>(&self, level: u64) { - // Parallelise over nodes at a given level - if let Some(sources) = self.fmm.tree().get_keys(level) { - sources.par_iter().for_each(move |&source| { - let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); - let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); - let target_multipole_arc = - Arc::clone(self.multipoles.get(&source.parent()).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_multipole_owned = - fmm_arc.m2m[operator_index].dot(&source_multipole_lock); - - let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - *target_multipole_lock.deref_mut() = - (target_multipole_lock.deref() + target_multipole_owned).eval(); - }) - } - } -} - -impl TargetTranslation for FmmData, T, U, V>, V> -where - T: Kernel + KernelScale + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, - V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, - V: MultiplyAdd< - V, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - fn l2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - targets.par_iter().for_each(move |&target| { - let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - let fmm = Arc::clone(&self.fmm); - - let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let target_local_owned = fmm.l2l[operator_index].dot(&source_local_lock); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - }) - } - } - - fn m2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&target| { - - let fmm_arc = Arc::clone(&self.fmm); - - if let Some(points) = fmm_arc.tree().get_points(&target) { - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - if let Some(w_list) = fmm_arc.get_w_list(&target) { - for source in w_list.iter() { - let source_multipole_arc = - Arc::clone(self.multipoles.get(source).unwrap()); - - let upward_equivalent_surface = source.compute_surface( - fmm_arc.tree().get_domain(), - fmm_arc.order(), - fmm_arc.alpha_inner, - ); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_coordinates = points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - - let mut target_potential = rlst_col_vec![V, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - &upward_equivalent_surface[..], - target_coordinates.data(), - source_multipole_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - } - } - } -) - } - } - - fn l2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { - let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); - // Lookup data - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let downward_equivalent_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let mut target_potential = rlst_col_vec![V, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - &downward_equivalent_surface[..], - target_coordinates.data(), - source_local_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - }) - } - } - - fn p2l<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(x_list) = fmm_arc.get_x_list(&leaf) { - for source in x_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - let source_coordinates = unsafe { - rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges = self.charges.get(source).unwrap(); - - let downward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_inner, - ); - - let ntargets = downward_check_surface.len() / fmm_arc.kernel.space_dimension(); - let mut downward_check_potential = rlst_col_vec![V, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - &downward_check_surface[..], - &source_charges[..], - downward_check_potential.data_mut() - ); - - - let mut target_local_lock = target_local_arc.lock().unwrap(); - let mut tmp = fmm_arc.dc2e_inv_1.dot(&fmm_arc.dc2e_inv_2.dot(&downward_check_potential)).eval(); - tmp.data_mut().iter_mut().for_each(|d| *d *= fmm_arc.kernel.scale(leaf.level())); - let target_local_owned = tmp; - *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); - } - } - } - }) - } - } - - fn p2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - - if let Some(target_points) = fmm_arc.tree().get_points(&target) { - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); - - let target_coordinates = unsafe { - rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - if let Some(u_list) = fmm_arc.get_u_list(&target) { - for source in u_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - let source_coordinates = unsafe { - rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges_arc = - Arc::clone(self.charges.get(source).unwrap()); - - let mut target_potential = rlst_col_vec![V, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - target_coordinates.data(), - &source_charges_arc[..], - target_potential.data_mut(), - ); - - let mut target_potential_lock = - target_potential_arc.lock().unwrap(); - - *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); - } - } - } - } - }) - } - } -} - -/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. -impl FieldTranslation - for FmmData, T, SvdFieldTranslationKiFmm, U>, U> -where - T: Kernel + KernelScale + std::marker::Send + std::marker::Sync + Default, - DenseMatrixLinAlgBuilder: Svd, - U: Scalar, - U: Float - + Default - + MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, - U: std::marker::Send + std::marker::Sync + Default, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { - return; - }; - let mut transfer_vector_to_m2l = - HashMap::>>>::new(); - - for tv in self.fmm.m2l.transfer_vectors.iter() { - transfer_vector_to_m2l.insert(tv.hash, Arc::new(Mutex::new(Vec::new()))); - } - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - targets.par_iter().enumerate().for_each(|(_i, &target)| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let calculated_transfer_vectors = v_list - .iter() - .map(|source| target.find_transfer_vector(source)) - .collect::>(); - for (transfer_vector, &source) in - calculated_transfer_vectors.iter().zip(v_list.iter()) - { - let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); - let mut m2l_lock = m2l_arc.lock().unwrap(); - m2l_lock.push((source, target)); - } - } - }); - - let mut transfer_vector_to_m2l_rw_lock = - HashMap::>>>::new(); - - // Find all multipole expansions and allocate - for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { - transfer_vector_to_m2l_rw_lock.insert( - transfer_vector, - Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), - ); - } - - transfer_vector_to_m2l_rw_lock - .par_iter() - .for_each(|(transfer_vector, m2l_arc)| { - let c_idx = self - .fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.hash == *transfer_vector) - .unwrap(); - - let (nrows, _) = self.fmm.m2l.operator_data.c.shape(); - let top_left = (0, c_idx * self.fmm.m2l.k); - let dim = (nrows, self.fmm.m2l.k); - - let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim); - - let m2l_rw = m2l_arc.read().unwrap(); - let mut multipoles = rlst_dynamic_mat![U, (self.fmm.m2l.k, m2l_rw.len())]; - - for (i, (source, _)) in m2l_rw.iter().enumerate() { - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - // Compressed multipole - let compressed_source_multipole_owned = self - .fmm - .m2l - .operator_data - .st_block - .dot(&source_multipole_lock) - .eval(); - - let first = i * self.fmm.m2l.k; - let last = first + self.fmm.m2l.k; - - let multipole_slice = multipoles.get_slice_mut(first, last); - multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); - } - - // Compute convolution - let compressed_check_potential_owned = c_sub.dot(&multipoles); - - // Post process to find check potential - let check_potential_owned = self - .fmm - .m2l - .operator_data - .u - .dot(&compressed_check_potential_owned) - .eval(); - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potential_owned)); - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); - let locals_owned = tmp; - - // Assign locals - for (i, (_, target)) in m2l_rw.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let top_left = (0, i); - let dim = (ncoeffs, 1); - let target_local_owned = locals_owned.block(top_left, dim); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - } - }); - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} - -#[derive(Clone, Debug, Copy)] -struct SendPtrMut { - raw: *mut T, -} - -unsafe impl Sync for SendPtrMut {} - -#[derive(Clone, Debug, Copy)] -struct SendPtr { - raw: *const T, -} - -unsafe impl Sync for SendPtr {} - -/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. -impl FieldTranslation - for FmmData, T, FftFieldTranslationKiFmm, U>, U> -where - T: Kernel + KernelScale + std::marker::Send + std::marker::Sync + Default, - U: Scalar - + Float - + Default - + std::marker::Send - + std::marker::Sync - + Fft, FftMatrix>>, - Complex: Scalar, - U: MultiplyAdd< - U, - VectorContainer, - VectorContainer, - VectorContainer, - Dynamic, - Dynamic, - Dynamic, - >, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { - return; - }; - - // Form signals to use for convolution first - let n = 2 * self.fmm.order - 1; - let ntargets = targets.len(); - - // Pad the signal - let &(m, n, o) = &(n, n, n); - - let p = m + 1; - let q = n + 1; - let r = o + 1; - let size = p * q * r; - let size_real = p * q * (r / 2 + 1); - let pad_size = (p - m, q - n, r - o); - let pad_index = (p - m, q - n, r - o); - let mut padded_signals = rlst_col_vec![U, size * ntargets]; - - let chunks = padded_signals.data_mut().par_chunks_exact_mut(size); - - let range = (0..chunks.len()).into_par_iter(); - range.zip(chunks).for_each(|(i, chunk)| { - let fmm_arc = Arc::clone(&self.fmm); - let target = targets[i]; - let source_multipole_arc = Arc::clone(self.multipoles.get(&target).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let signal = fmm_arc - .m2l - .compute_signal(fmm_arc.order, source_multipole_lock.data()); - - let padded_signal = pad3(&signal, pad_size, pad_index); - - chunk.copy_from_slice(padded_signal.get_data()); - }); - let mut padded_signals_hat = rlst_col_vec![Complex, size_real * ntargets]; - - U::rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); - - let kernel_data_halo = &self.fmm.m2l.operator_data.kernel_data_rearranged; - let ntargets = targets.len(); - let nparents = ntargets / 8; - let mut global_check_potentials_hat = rlst_col_vec![Complex, size_real * ntargets]; - let mut global_check_potentials = rlst_col_vec![U, size * ntargets]; - - // Get check potentials in frequency order - let mut global_check_potentials_hat_freq = vec![Vec::new(); size_real]; - - unsafe { - let ptr = global_check_potentials_hat.get_pointer_mut(); - for (i, elem) in global_check_potentials_hat_freq - .iter_mut() - .enumerate() - .take(size_real) - { - for j in 0..ntargets { - let raw = ptr.offset((j * size_real + i).try_into().unwrap()); - let send_ptr = SendPtrMut { raw }; - elem.push(send_ptr); - } - } - } - - // Get signals into frequency order - let mut padded_signals_hat_freq = vec![Vec::new(); size_real]; - let zero = rlst_col_vec![Complex, 8]; - unsafe { - let ptr = padded_signals_hat.get_pointer(); - - for (i, elem) in padded_signals_hat_freq - .iter_mut() - .enumerate() - .take(size_real) - { - for j in 0..ntargets { - let raw = ptr.offset((j * size_real + i).try_into().unwrap()); - let send_ptr = SendPtr { raw }; - elem.push(send_ptr); - } - // put in a bunch of zeros at the end - let ptr = zero.get_pointer(); - for _ in 0..8 { - let send_ptr = SendPtr { raw: ptr }; - elem.push(send_ptr) - } - } - } - - // Create a map between targets and index positions in vec of len 'ntargets' - let mut target_map = HashMap::new(); - - for (i, t) in targets.iter().enumerate() { - target_map.insert(t, i); - } - - // Find all the displacements used for saving results - let mut all_displacements = Vec::new(); - targets.chunks_exact(8).for_each(|sibling_chunk| { - // not in Morton order (refer to sort method when called on 'neighbours') - let parent_neighbours: Vec> = - sibling_chunk[0].parent().all_neighbors(); - - let displacements = parent_neighbours - .iter() - .map(|pn| { - let mut tmp = Vec::new(); - if let Some(pn) = pn { - if self.fmm.tree.keys_set.contains(pn) { - let mut children = pn.children(); - children.sort(); - for child in children { - // tmp.push(*target_map.get(&child).unwrap() as i64) - tmp.push(*target_map.get(&child).unwrap()) - } - } else { - for i in 0..8 { - tmp.push(ntargets + i) - } - } - } else { - for i in 0..8 { - tmp.push(ntargets + i) - } - } - - assert!(tmp.len() == 8); - tmp - }) - .collect_vec(); - all_displacements.push(displacements); - }); - - let scale = Complex::from(self.m2l_scale(level)); - - (0..size_real).into_par_iter().for_each(|freq| { - // Extract frequency component of signal (ntargets long) - let padded_signal_freq = &padded_signals_hat_freq[freq]; - - // Extract frequency components of save locations (ntargets long) - let check_potential_freq = &global_check_potentials_hat_freq[freq]; - - (0..nparents).for_each(|sibling_idx| { - // lookup associated save locations for our current sibling set - let save_locations = - &check_potential_freq[(sibling_idx * 8)..(sibling_idx + 1) * 8]; - let save_locations_raw = save_locations.iter().map(|s| s.raw).collect_vec(); - - // for each halo position compute convolutions to a given sibling set - for (i, kernel_data) in kernel_data_halo.iter().enumerate().take(26) { - let frequency_offset = 64 * freq; - let kernel_data_i = &kernel_data[frequency_offset..(frequency_offset + 64)]; - - // Find displacements for signal being translated - let displacements = &all_displacements[sibling_idx][i]; - - // Lookup signal to be translated if a translation is to be performed - let signal = &padded_signal_freq[(displacements[0])..=(displacements[7])]; - for j in 0..8 { - let kernel_data_ij = &kernel_data_i[j * 8..(j + 1) * 8]; - let sig = signal[j].raw; - unsafe { - save_locations_raw - .iter() - .zip(kernel_data_ij.iter()) - .for_each(|(&sav, &ker)| *sav += scale * ker * *sig) - } - } // inner loop - } - }); // over each sibling set - }); - - U::irfft_fftw_par_vec( - &mut global_check_potentials_hat, - &mut global_check_potentials, - &[p, q, r], - ); - - // Compute local expansion coefficients and save to data tree - let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); - - let check_potentials = global_check_potentials - .data() - .chunks_exact(size) - .flat_map(|chunk| { - let m = 2 * self.fmm.order - 1; - let p = m + 1; - let mut potentials = Array3D::new((p, p, p)); - potentials.get_data_mut().copy_from_slice(chunk); - - let mut tmp = Vec::new(); - let ntargets = multi_indices.len() / 3; - let xs = &multi_indices[0..ntargets]; - let ys = &multi_indices[ntargets..2 * ntargets]; - let zs = &multi_indices[2 * ntargets..]; - - for i in 0..ntargets { - let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); - tmp.push(*val); - } - tmp - }) - .collect_vec(); - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let check_potentials = unsafe { - rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] - }; - - let mut tmp = self - .fmm - .dc2e_inv_1 - .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) - .eval(); - tmp.data_mut() - .iter_mut() - .for_each(|d| *d *= self.fmm.kernel.scale(level)); - let locals = tmp; - - for (i, target) in targets.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let top_left = (0, i); - let dim = (ncoeffs, 1); - let target_local_owned = locals.block(top_left, dim); - - *target_local_lock.deref_mut() = - (target_local_lock.deref() + target_local_owned).eval(); - } - } - - fn m2l_scale(&self, level: u64) -> U { - if level < 2 { - panic!("M2L only perfomed on level 2 and below") - } - - if level == 2 { - U::from(1. / 2.).unwrap() - } else { - let two = U::from(2.0).unwrap(); - Scalar::powf(two, U::from(level - 3).unwrap()) - } - } -} diff --git a/fmm/src/field_translation/hashmap/source.rs b/fmm/src/field_translation/hashmap/source.rs new file mode 100644 index 00000000..136fbb8a --- /dev/null +++ b/fmm/src/field_translation/hashmap/source.rs @@ -0,0 +1,210 @@ +//! Implementation of Source and Target translations, as well as Source to Target translation. +use std::{ + ops::{Deref, DerefMut}, + sync::Arc, +}; + +use itertools::Itertools; +use num::Float; +use rayon::prelude::*; + +use bempp_traits::{ + field::FieldTranslationData, + fmm::{Fmm, SourceTranslation}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::EvalType, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use rlst::{ + common::traits::*, + dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +use crate::types::{FmmDataHashmap, KiFmmHashMap}; + +impl SourceTranslation for FmmDataHashmap, T, U, V>, V> +where + T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, + V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + /// Point to multipole evaluations, multithreaded over each leaf box. + fn p2m<'a>(&self) { + if let Some(leaves) = self.fmm.tree().get_all_leaves() { + leaves.par_iter().for_each(move |&leaf| { + + let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); + + if let Some(leaf_points) = self.points.get(&leaf) { + let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); + + // Lookup data + let leaf_coordinates = leaf_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); + + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, V, leaf_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let upward_check_surface = leaf.compute_surface( + &self.fmm.tree().domain, + self.fmm.order, + self.fmm.alpha_outer, + ); + let ntargets = upward_check_surface.len() / self.fmm.kernel.space_dimension(); + + let leaf_charges = leaf_charges_arc.deref(); + + // Calculate check potential + let mut check_potential = rlst_col_vec![V, ntargets]; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + leaf_coordinates.data(), + &upward_check_surface[..], + &leaf_charges[..], + check_potential.data_mut(), + ); + + let mut tmp = self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential)).eval(); + tmp.data_mut().iter_mut().for_each(|d| *d *= self.fmm.kernel.scale(leaf.level())); + let leaf_multipole_owned = tmp; + let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); + *leaf_multipole_lock.deref_mut() = (leaf_multipole_lock.deref() + leaf_multipole_owned).eval(); + } + }); + } + } + + /// Multipole to multipole translations, multithreaded over all boxes at a given level. + fn m2m<'a>(&self, level: u64) { + // Parallelise over nodes at a given level + if let Some(sources) = self.fmm.tree().get_keys(level) { + sources.par_iter().for_each(move |&source| { + let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); + let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); + let target_multipole_arc = + Arc::clone(self.multipoles.get(&source.parent()).unwrap()); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let target_multipole_owned = + self.fmm.m2m[operator_index].dot(&source_multipole_lock); + + let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + + *target_multipole_lock.deref_mut() = + (target_multipole_lock.deref() + target_multipole_owned).eval(); + }) + } + } +} + +#[cfg(test)] +mod test { + + use super::*; + + use crate::charge::build_charge_dict; + use bempp_field::types::SvdFieldTranslationKiFmm; + use bempp_kernel::laplace_3d::Laplace3dKernel; + use bempp_tree::{constants::ROOT, implementations::helpers::points_fixture}; + + #[test] + fn test_upward_pass() { + let npoints = 10000; + let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let kernel = Laplace3dKernel::::default(); + let order = 6; + let alpha_inner = 1.05; + let alpha_outer = 2.95; + let adaptive = false; + let k = 1000; + let ncrit = 150; + let depth = 3; + + // Create a tree + let tree = SingleNodeTree::new( + points.data(), + adaptive, + Some(ncrit), + Some(depth), + &global_idxs[..], + ); + + // Precompute the M2L data + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + *tree.get_domain(), + alpha_inner, + ); + let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + // Associate data with the FMM + let datatree = FmmDataHashmap::new(fmm, &charge_dict); + + // Upward pass + { + datatree.p2m(); + + for level in (1..=depth).rev() { + datatree.m2m(level); + } + } + + let multipole = datatree.multipoles.get(&ROOT).unwrap(); + + let surface = + ROOT.compute_surface(&datatree.fmm.tree().domain, order, datatree.fmm.alpha_inner); + + let test_point = vec![100000., 0., 0.]; + + let mut expected = vec![0.]; + let mut found = vec![0.]; + + let kernel = Laplace3dKernel::::default(); + kernel.evaluate_st( + EvalType::Value, + points.data(), + &test_point, + &charges, + &mut expected, + ); + + kernel.evaluate_st( + EvalType::Value, + &surface, + &test_point, + multipole.lock().unwrap().data(), + &mut found, + ); + + let abs_error = (expected[0] - found[0]).abs(); + let rel_error = abs_error / expected[0]; + assert!(rel_error <= 1e-5); + } +} diff --git a/fmm/src/field_translation/hashmap/source_to_target.rs b/fmm/src/field_translation/hashmap/source_to_target.rs new file mode 100644 index 00000000..ca624415 --- /dev/null +++ b/fmm/src/field_translation/hashmap/source_to_target.rs @@ -0,0 +1,471 @@ +//! Implementation of Source and Target translations, as well as Source to Target translation. +use std::{ + collections::HashMap, + ops::{Deref, DerefMut}, + sync::{Arc, Mutex, RwLock}, +}; + +use bempp_tools::Array3D; +use itertools::Itertools; +use num::{Complex, Float}; +use rayon::prelude::*; + +use bempp_field::{ + array::pad3, + fft::Fft, + types::{FftFieldTranslationKiFmm, FftMatrix, SvdFieldTranslationKiFmm}, +}; + +use bempp_traits::{ + arrays::Array3DAccess, + field::{FieldTranslation, FieldTranslationData}, + fmm::{Fmm, InteractionLists}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, +}; +use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; + +use rlst::{ + algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, + common::traits::*, + dense::{ + rlst_col_vec, rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, Shape, + VectorContainer, + }, +}; + +use crate::types::{FmmDataHashmap, KiFmmHashMap, SendPtr, SendPtrMut}; + +/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataHashmap, T, SvdFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + DenseMatrixLinAlgBuilder: Svd, + U: Scalar, + U: Float + + Default + + MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, + U: std::marker::Send + std::marker::Sync + Default, +{ + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { + return; + }; + let mut transfer_vector_to_m2l = + HashMap::>>>::new(); + + for tv in self.fmm.m2l.transfer_vectors.iter() { + transfer_vector_to_m2l.insert(tv.hash, Arc::new(Mutex::new(Vec::new()))); + } + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + targets.par_iter().enumerate().for_each(|(_i, &target)| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let calculated_transfer_vectors = v_list + .iter() + .map(|source| target.find_transfer_vector(source)) + .collect::>(); + for (transfer_vector, &source) in + calculated_transfer_vectors.iter().zip(v_list.iter()) + { + let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); + let mut m2l_lock = m2l_arc.lock().unwrap(); + m2l_lock.push((source, target)); + } + } + }); + + let mut transfer_vector_to_m2l_rw_lock = + HashMap::>>>::new(); + + // Find all multipole expansions and allocate + for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { + transfer_vector_to_m2l_rw_lock.insert( + transfer_vector, + Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), + ); + } + + transfer_vector_to_m2l_rw_lock + .par_iter() + .for_each(|(transfer_vector, m2l_arc)| { + let c_idx = self + .fmm + .m2l + .transfer_vectors + .iter() + .position(|x| x.hash == *transfer_vector) + .unwrap(); + + let (nrows, _) = self.fmm.m2l.operator_data.c.shape(); + let top_left = (0, c_idx * self.fmm.m2l.k); + let dim = (nrows, self.fmm.m2l.k); + + let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim); + + let m2l_rw = m2l_arc.read().unwrap(); + let mut multipoles = rlst_dynamic_mat![U, (self.fmm.m2l.k, m2l_rw.len())]; + + for (i, (source, _)) in m2l_rw.iter().enumerate() { + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + // Compressed multipole + let compressed_source_multipole_owned = self + .fmm + .m2l + .operator_data + .st_block + .dot(&source_multipole_lock) + .eval(); + + let first = i * self.fmm.m2l.k; + let last = first + self.fmm.m2l.k; + + let multipole_slice = multipoles.get_slice_mut(first, last); + multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); + } + + // Compute convolution + let compressed_check_potential_owned = c_sub.dot(&multipoles); + + // Post process to find check potential + let check_potential_owned = self + .fmm + .m2l + .operator_data + .u + .dot(&compressed_check_potential_owned) + .eval(); + + let mut tmp = self + .fmm + .dc2e_inv_1 + .dot(&self.fmm.dc2e_inv_2.dot(&check_potential_owned)); + tmp.data_mut() + .iter_mut() + .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); + let locals_owned = tmp; + + // Assign locals + for (i, (_, target)) in m2l_rw.iter().enumerate() { + let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + let top_left = (0, i); + let dim = (ncoeffs, 1); + let target_local_owned = locals_owned.block(top_left, dim); + + *target_local_lock.deref_mut() = + (target_local_lock.deref() + target_local_owned).eval(); + } + }); + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} + +/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataHashmap, T, FftFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + + Float + + Default + + std::marker::Send + + std::marker::Sync + + Fft, FftMatrix>>, + Complex: Scalar, + U: MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { + return; + }; + + // Form signals to use for convolution first + let n = 2 * self.fmm.order - 1; + let ntargets = targets.len(); + + // Pad the signal + let &(m, n, o) = &(n, n, n); + + let p = m + 1; + let q = n + 1; + let r = o + 1; + let size = p * q * r; + let size_real = p * q * (r / 2 + 1); + let pad_size = (p - m, q - n, r - o); + let pad_index = (p - m, q - n, r - o); + // let mut padded_signals = rlst_col_vec![U, size * ntargets]; + let mut padded_signals = vec![U::zero(); size * ntargets]; + + // let chunks = padded_signals.data_mut().par_chunks_exact_mut(size); + let chunks = padded_signals.par_chunks_exact_mut(size); + + let range = (0..chunks.len()).into_par_iter(); + range.zip(chunks).for_each(|(i, chunk)| { + let target = targets[i]; + let source_multipole_arc = Arc::clone(self.multipoles.get(&target).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + let signal = self + .fmm + .m2l + .compute_signal(self.fmm.order, source_multipole_lock.data()); + + let padded_signal = pad3(&signal, pad_size, pad_index); + + chunk.copy_from_slice(padded_signal.get_data()); + }); + // let mut padded_signals_hat = rlst_col_vec![Complex, size_real * ntargets]; + let mut padded_signals_hat = vec![Complex::::default(); size_real * ntargets]; + + U::rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); + + let kernel_data_halo = &self.fmm.m2l.operator_data.kernel_data_rearranged; + let ntargets = targets.len(); + let nparents = ntargets / 8; + // let mut global_check_potentials_hat = rlst_col_vec![Complex, size_real * ntargets]; + // let mut global_check_potentials = rlst_col_vec![U, size * ntargets]; + let mut global_check_potentials_hat = vec![Complex::::default(); size_real * ntargets]; + let mut global_check_potentials = vec![U::default(); size * ntargets]; + + // Get check potentials in frequency order + let mut global_check_potentials_hat_freq = vec![Vec::new(); size_real]; + + unsafe { + // let ptr = global_check_potentials_hat.get_pointer_mut(); + let ptr = global_check_potentials_hat.as_mut_ptr(); + for (i, elem) in global_check_potentials_hat_freq + .iter_mut() + .enumerate() + .take(size_real) + { + for j in 0..ntargets { + let raw = ptr.offset((j * size_real + i).try_into().unwrap()); + let send_ptr = SendPtrMut { raw }; + elem.push(send_ptr); + } + } + } + + // Get signals into frequency order + let mut padded_signals_hat_freq = vec![Vec::new(); size_real]; + let zero = rlst_col_vec![Complex, 8]; + unsafe { + // let ptr = padded_signals_hat.get_pointer(); + let ptr = padded_signals_hat.as_ptr(); + + for (i, elem) in padded_signals_hat_freq + .iter_mut() + .enumerate() + .take(size_real) + { + for j in 0..ntargets { + let raw = ptr.offset((j * size_real + i).try_into().unwrap()); + let send_ptr = SendPtr { raw }; + elem.push(send_ptr); + } + // put in a bunch of zeros at the end + let ptr = zero.get_pointer(); + for _ in 0..8 { + let send_ptr = SendPtr { raw: ptr }; + elem.push(send_ptr) + } + } + } + + // Create a map between targets and index positions in vec of len 'ntargets' + let mut target_map = HashMap::new(); + + for (i, t) in targets.iter().enumerate() { + target_map.insert(t, i); + } + + // Find all the displacements used for saving results + let mut all_displacements = Vec::new(); + targets.chunks_exact(8).for_each(|sibling_chunk| { + // not in Morton order (refer to sort method when called on 'neighbours') + let parent_neighbours: Vec> = + sibling_chunk[0].parent().all_neighbors(); + + let displacements = parent_neighbours + .iter() + .map(|pn| { + let mut tmp = Vec::new(); + if let Some(pn) = pn { + if self.fmm.tree.keys_set.contains(pn) { + let mut children = pn.children(); + children.sort(); + for child in children { + // tmp.push(*target_map.get(&child).unwrap() as i64) + tmp.push(*target_map.get(&child).unwrap()) + } + } else { + for i in 0..8 { + tmp.push(ntargets + i) + } + } + } else { + for i in 0..8 { + tmp.push(ntargets + i) + } + } + + assert!(tmp.len() == 8); + tmp + }) + .collect_vec(); + all_displacements.push(displacements); + }); + + let scale = Complex::from(self.m2l_scale(level)); + + (0..size_real).into_par_iter().for_each(|freq| { + // Extract frequency component of signal (ntargets long) + let padded_signal_freq = &padded_signals_hat_freq[freq]; + + // Extract frequency components of save locations (ntargets long) + let check_potential_freq = &global_check_potentials_hat_freq[freq]; + + (0..nparents).for_each(|sibling_idx| { + // lookup associated save locations for our current sibling set + let save_locations = + &check_potential_freq[(sibling_idx * 8)..(sibling_idx + 1) * 8]; + let save_locations_raw = save_locations.iter().map(|s| s.raw).collect_vec(); + + // for each halo position compute convolutions to a given sibling set + for (i, kernel_data) in kernel_data_halo.iter().enumerate().take(26) { + let frequency_offset = 64 * freq; + let kernel_data_i = &kernel_data[frequency_offset..(frequency_offset + 64)]; + + // Find displacements for signal being translated + let displacements = &all_displacements[sibling_idx][i]; + + // Lookup signal to be translated if a translation is to be performed + let signal = &padded_signal_freq[(displacements[0])..=(displacements[7])]; + for j in 0..8 { + let kernel_data_ij = &kernel_data_i[j * 8..(j + 1) * 8]; + let sig = signal[j].raw; + unsafe { + save_locations_raw + .iter() + .zip(kernel_data_ij.iter()) + .for_each(|(&sav, &ker)| *sav += scale * ker * *sig) + } + } // inner loop + } + }); // over each sibling set + }); + + U::irfft_fftw_par_vec( + &mut global_check_potentials_hat, + &mut global_check_potentials, + &[p, q, r], + ); + + // Compute local expansion coefficients and save to data tree + let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); + + let check_potentials = global_check_potentials + // .data() + .chunks_exact(size) + .flat_map(|chunk| { + let m = 2 * self.fmm.order - 1; + let p = m + 1; + let mut potentials = Array3D::new((p, p, p)); + potentials.get_data_mut().copy_from_slice(chunk); + + let mut tmp = Vec::new(); + let ntargets = multi_indices.len() / 3; + let xs = &multi_indices[0..ntargets]; + let ys = &multi_indices[ntargets..2 * ntargets]; + let zs = &multi_indices[2 * ntargets..]; + + for i in 0..ntargets { + let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); + tmp.push(*val); + } + tmp + }) + .collect_vec(); + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let check_potentials = unsafe { + rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] + }; + + let mut tmp = self + .fmm + .dc2e_inv_1 + .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) + .eval(); + tmp.data_mut() + .iter_mut() + .for_each(|d| *d *= self.fmm.kernel.scale(level)); + let locals = tmp; + + for (i, target) in targets.iter().enumerate() { + let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + let top_left = (0, i); + let dim = (ncoeffs, 1); + let target_local_owned = locals.block(top_left, dim); + + *target_local_lock.deref_mut() = + (target_local_lock.deref() + target_local_owned).eval(); + } + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} diff --git a/fmm/src/field_translation/hashmap/target.rs b/fmm/src/field_translation/hashmap/target.rs new file mode 100644 index 00000000..0d3b538e --- /dev/null +++ b/fmm/src/field_translation/hashmap/target.rs @@ -0,0 +1,278 @@ +//! Implementation of Source and Target translations, as well as Source to Target translation. +use std::{ + ops::{Deref, DerefMut}, + sync::Arc, +}; + +use itertools::Itertools; +use num::Float; +use rayon::prelude::*; + +use bempp_traits::{ + field::FieldTranslationData, + fmm::{Fmm, InteractionLists, TargetTranslation}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::EvalType, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use rlst::{ + common::traits::*, + dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +use crate::types::{FmmDataHashmap, KiFmmHashMap}; + +impl TargetTranslation for FmmDataHashmap, T, U, V>, V> +where + T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, + V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + fn l2l(&self, level: u64) { + if let Some(targets) = self.fmm.tree().get_keys(level) { + targets.par_iter().for_each(move |&target| { + let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + + let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); + + let source_local_lock = source_local_arc.lock().unwrap(); + + let target_local_owned = self.fmm.l2l[operator_index].dot(&source_local_lock); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + *target_local_lock.deref_mut() = + (target_local_lock.deref() + target_local_owned).eval(); + }) + } + } + + fn m2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_all_leaves() { + targets.par_iter().for_each(move |&target| { + + + if let Some(points) = self.fmm.tree().get_points(&target) { + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + if let Some(w_list) = self.fmm.get_w_list(&target) { + for source in w_list.iter() { + let source_multipole_arc = + Arc::clone(self.multipoles.get(source).unwrap()); + + let upward_equivalent_surface = source.compute_surface( + self.fmm.tree().get_domain(), + self.fmm.order(), + self.fmm.alpha_inner, + ); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let target_coordinates = points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); + + let target_coordinates = unsafe { + rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + + let mut target_potential = rlst_col_vec![V, ntargets]; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + &upward_equivalent_surface[..], + target_coordinates.data(), + source_multipole_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); + } + } + } + } +) + } + } + + fn l2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_all_leaves() { + targets.par_iter().for_each(move |&leaf| { + let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + + if let Some(target_points) = self.fmm.tree().get_points(&leaf) { + let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); + // Lookup data + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); + + let target_coordinates = unsafe { + rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let downward_equivalent_surface = leaf.compute_surface( + &self.fmm.tree().domain, + self.fmm.order, + self.fmm.alpha_outer, + ); + + let source_local_lock = source_local_arc.lock().unwrap(); + + let mut target_potential = rlst_col_vec![V, ntargets]; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + &downward_equivalent_surface[..], + target_coordinates.data(), + source_local_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); + } + }) + } + } + + fn p2l<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_all_leaves() { + targets.par_iter().for_each(move |&leaf| { + let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + + if let Some(x_list) = self.fmm.get_x_list(&leaf) { + for source in x_list.iter() { + if let Some(source_points) = self.fmm.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + let source_coordinates = unsafe { + rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges = self.charges.get(source).unwrap(); + + let downward_check_surface = leaf.compute_surface( + &self.fmm.tree().domain, + self.fmm.order, + self.fmm.alpha_inner, + ); + + let ntargets = downward_check_surface.len() / self.fmm.kernel.space_dimension(); + let mut downward_check_potential = rlst_col_vec![V, ntargets]; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + &downward_check_surface[..], + &source_charges[..], + downward_check_potential.data_mut() + ); + + + let mut target_local_lock = target_local_arc.lock().unwrap(); + let mut tmp = self.fmm.dc2e_inv_1.dot(&self.fmm.dc2e_inv_2.dot(&downward_check_potential)).eval(); + tmp.data_mut().iter_mut().for_each(|d| *d *= self.fmm.kernel.scale(leaf.level())); + let target_local_owned = tmp; + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); + } + } + } + }) + } + } + + fn p2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_all_leaves() { + targets.par_iter().for_each(move |&target| { + + if let Some(target_points) = self.fmm.tree().get_points(&target) { + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); + + let target_coordinates = unsafe { + rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + if let Some(u_list) = self.fmm.get_u_list(&target) { + + for source in u_list.iter() { + if let Some(source_points) = self.fmm.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + let source_coordinates = unsafe { + rlst_pointer_mat!['a, V, source_coordinates.as_ptr(), (nsources, self.fmm.kernel.space_dimension()), (self.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges_arc = + Arc::clone(self.charges.get(source).unwrap()); + + let mut target_potential = rlst_col_vec![V, ntargets]; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + target_coordinates.data(), + &source_charges_arc[..], + target_potential.data_mut(), + ); + + let mut target_potential_lock = + target_potential_arc.lock().unwrap(); + + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); + } + } + + + let target_potential_lock = + target_potential_arc.lock().unwrap(); + + if target.morton == 3 { + println!("local result {:?}", target_potential_lock.data()); + } + } + } + }) + } + } +} diff --git a/fmm/src/field_translation/linear/source.rs b/fmm/src/field_translation/linear/source.rs new file mode 100644 index 00000000..a9a19319 --- /dev/null +++ b/fmm/src/field_translation/linear/source.rs @@ -0,0 +1,258 @@ +//! kiFMM based on simple linear data structures that minimises memory allocations, maximises cache re-use. +use num::Float; +use rayon::prelude::*; + +use bempp_traits::{ + field::FieldTranslationData, + fmm::{Fmm, SourceTranslation}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::EvalType, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use crate::{ + constants::P2M_MAX_CHUNK_SIZE, + types::{FmmDataLinear, KiFmmLinear}, +}; +use rlst::{ + common::traits::*, + dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +/// Euclidean algorithm to find greatest common divisor less than max +fn find_chunk_size(n: usize, max_chunk_size: usize) -> usize { + let max_divisor = max_chunk_size; + for divisor in (1..=max_divisor).rev() { + if n % divisor == 0 { + return divisor; + } + } + 1 // If no divisor is found greater than 1, return 1 as the GCD +} + +impl SourceTranslation for FmmDataLinear, T, U, V>, V> +where + T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, + V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + /// Point to multipole evaluations, multithreaded over each leaf box. + fn p2m<'a>(&self) { + // Iterate over sibling sets + if let Some(leaves) = self.fmm.tree().get_all_leaves() { + let nleaves = leaves.len(); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let surface_size = ncoeffs * self.fmm.kernel.space_dimension(); + + let mut check_potentials = rlst_col_vec![V, nleaves * ncoeffs]; + let coordinates = self.fmm.tree().get_all_coordinates().unwrap(); + let dim = self.fmm.kernel.space_dimension(); + + check_potentials + .data_mut() + .par_chunks_exact_mut(ncoeffs) + // .enumerate() + .zip(self.leaf_upward_surfaces.par_chunks_exact(surface_size)) + .zip(&self.charge_index_pointer) + .for_each( + |((check_potential, upward_check_surface), charge_index_pointer)| { + let charges = &self.charges[charge_index_pointer.0..charge_index_pointer.1]; + let coordinates = &coordinates + [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim]; + + let nsources = coordinates.len() / dim; + + let coordinates = unsafe { + rlst_pointer_mat!['a, V, coordinates.as_ptr(), (nsources, dim), (dim, 1)] + }.eval(); + + if nsources > 0 { + self.fmm.kernel.evaluate_st( + EvalType::Value, + coordinates.data(), + upward_check_surface, + charges, + check_potential, + ); + } + }, + ); + + // Now compute the multipole expansions, with each of chunk_size boxes at a time. + let chunk_size = find_chunk_size(nleaves, P2M_MAX_CHUNK_SIZE); + + check_potentials + .data() + .par_chunks_exact(ncoeffs*chunk_size) + .zip(self.leaf_multipoles.par_chunks_exact(chunk_size)) + .zip(self.scales.par_chunks_exact(ncoeffs*chunk_size)) + .for_each(|((check_potential, multipole_ptrs), scale)| { + + let check_potential = unsafe { rlst_pointer_mat!['a, V, check_potential.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)] }; + let scale = unsafe {rlst_pointer_mat!['a, V, scale.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)]}.eval(); + + let tmp = (self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential.cmp_wise_product(&scale)))).eval(); + + unsafe { + for (i, multipole_ptr) in multipole_ptrs.iter().enumerate().take(chunk_size) { + let mut ptr = multipole_ptr.raw; + for j in 0..ncoeffs { + *ptr += tmp.data()[i*ncoeffs+j]; + ptr = ptr.add(1); + } + } + } + }) + } + } + + /// Multipole to multipole translations, multithreaded over all boxes at a given level. + fn m2m<'a>(&self, level: u64) { + if let Some(sources) = self.fmm.tree().get_keys(level) { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let nsources = sources.len(); + let min = &sources[0]; + let max = &sources[nsources - 1]; + let min_idx = self.fmm.tree().key_to_index.get(min).unwrap(); + let max_idx = self.fmm.tree().key_to_index.get(max).unwrap(); + + let multipoles = &self.multipoles[min_idx * ncoeffs..(max_idx + 1) * ncoeffs]; + + let nsiblings = 8; + let mut max_chunk_size = 8_i32.pow((level - 1).try_into().unwrap()) as usize; + + if max_chunk_size > P2M_MAX_CHUNK_SIZE { + max_chunk_size = P2M_MAX_CHUNK_SIZE; + } + let chunk_size = find_chunk_size(nsources, max_chunk_size); + + multipoles + .par_chunks_exact(nsiblings * ncoeffs*chunk_size) + .zip(self.level_multipoles[(level - 1) as usize].par_chunks_exact(chunk_size)) + .for_each(|(multipole_chunk, parent)| { + + unsafe { + let tmp = rlst_pointer_mat!['a, V, multipole_chunk.as_ptr(), (ncoeffs*nsiblings, chunk_size), (1, ncoeffs*nsiblings)]; + let tmp = self.fmm.m2m.dot(&tmp).eval(); + + for (i, par) in parent.iter().enumerate().take(chunk_size) { + let mut ptr = par.raw; + for j in 0..ncoeffs { + *ptr += tmp.data()[(i*ncoeffs)+j]; + ptr = ptr.add(1) + } + } + } + }) + } + } +} + +#[cfg(test)] +mod test { + + use super::*; + + use itertools::Itertools; + + use crate::charge::build_charge_dict; + use bempp_field::types::SvdFieldTranslationKiFmm; + use bempp_kernel::laplace_3d::Laplace3dKernel; + use bempp_tree::{constants::ROOT, implementations::helpers::points_fixture}; + + #[test] + fn test_upward_pass() { + let npoints = 10000; + let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let kernel = Laplace3dKernel::::default(); + let order = 6; + let alpha_inner = 1.05; + let alpha_outer = 2.95; + let adaptive = false; + let k = 1000; + let ncrit = 150; + let depth = 3; + + // Create a tree + let tree = SingleNodeTree::new( + points.data(), + adaptive, + Some(ncrit), + Some(depth), + &global_idxs[..], + ); + + // Precompute the M2L data + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + *tree.get_domain(), + alpha_inner, + ); + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + // Associate data with the FMM + let datatree = FmmDataLinear::new(fmm, &charge_dict).unwrap(); + + // Upward pass + { + datatree.p2m(); + + for level in (1..=depth).rev() { + datatree.m2m(level); + } + } + + let midx = datatree.fmm.tree().key_to_index.get(&ROOT).unwrap(); + let ncoeffs = datatree.fmm.m2l.ncoeffs(datatree.fmm.order); + let multipole = &datatree.multipoles[midx * ncoeffs..(midx + 1) * ncoeffs]; + + let surface = + ROOT.compute_surface(&datatree.fmm.tree().domain, order, datatree.fmm.alpha_inner); + + let test_point = vec![100000., 0., 0.]; + + let mut expected = vec![0.]; + let mut found = vec![0.]; + + let kernel = Laplace3dKernel::::default(); + kernel.evaluate_st( + EvalType::Value, + points.data(), + &test_point, + &charges, + &mut expected, + ); + + kernel.evaluate_st( + EvalType::Value, + &surface, + &test_point, + multipole, + &mut found, + ); + + let abs_error = (expected[0] - found[0]).abs(); + let rel_error = abs_error / expected[0]; + assert!(rel_error <= 1e-5); + } +} diff --git a/fmm/src/field_translation/linear/source_to_target.rs b/fmm/src/field_translation/linear/source_to_target.rs new file mode 100644 index 00000000..ee4402e7 --- /dev/null +++ b/fmm/src/field_translation/linear/source_to_target.rs @@ -0,0 +1,526 @@ +//! kiFMM based on simple linear data structures that minimises memory allocations, maximises cache re-use. +use bempp_tools::Array3D; + +use itertools::Itertools; +use num::{Complex, Float}; +use rayon::prelude::*; +use std::collections::HashMap; + +use bempp_field::{ + array::pad3, + fft::Fft, + types::{FftFieldTranslationKiFmm, FftMatrix, SvdFieldTranslationKiFmm}, +}; + +use bempp_traits::{ + arrays::Array3DAccess, + field::{FieldTranslation, FieldTranslationData}, + fmm::Fmm, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, +}; +use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; + +use crate::types::{FmmDataLinear, KiFmmLinear, SendPtrMut}; +use rlst::{ + algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, + common::traits::*, + dense::{rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +pub fn size_real(order: usize) -> usize { + let m = 2 * order - 1; // Size of each dimension of 3D kernel/signal + let pad_size = 1; + let p = m + pad_size; // Size of each dimension of padded 3D kernel/signal + p * p * (p / 2 + 1) // Number of Fourier coefficients when working with real data +} + +pub fn nleaves(level: usize) -> usize { + 8i32.pow(level as u32) as usize +} + +pub fn nparents(level: usize) -> usize { + 8i32.pow((level - 1) as u32) as usize +} + +pub fn signal_freq_order_cplx_optimized( + order: usize, + level: usize, + signal: &[Complex], +) -> Vec>> +where + U: Scalar, +{ + let size_real = size_real(order); + let nleaves = nleaves(level); + + (0..size_real) + .map(|i| { + let mut tmp = (0..nleaves) + .map(|j| signal[j * size_real + i]) + .collect::>(); + + // Pad with zeros + tmp.extend(std::iter::repeat(Complex::new(U::zero(), U::zero())).take(8)); + + tmp + }) + .collect() +} + +pub unsafe fn check_potentials_freq_cplx( + order: usize, + level: usize, + check_potentials: &mut Vec>, +) -> Vec>>> +where + U: Scalar, +{ + let size_real = size_real(order); // Number of Fourier coefficients when working with real data + let nleaves = nleaves(level); + let mut check_potentials_freq = vec![Vec::new(); size_real]; + + let ptr = check_potentials.as_mut_ptr(); + + for (i, elem) in check_potentials_freq.iter_mut().enumerate().take(size_real) { + for j in 0..nleaves { + let raw = ptr.offset((j * size_real + i).try_into().unwrap()); + let send_ptr = SendPtrMut { raw }; + elem.push(send_ptr); + } + } + + check_potentials_freq +} + +pub fn displacements( + tree: &SingleNodeTree, + level: u64, + target_map_leaves: &HashMap<&MortonKey, usize>, +) -> Vec>> +where + U: Float + Default + Scalar, +{ + let leaves = tree.get_keys(level).unwrap(); + let nleaves = leaves.len(); + + let mut all_displacements = Vec::new(); + + leaves.chunks_exact(8).for_each(|sibling_chunk| { + let parent_neighbours = sibling_chunk[0].parent().all_neighbors(); + let displacements = parent_neighbours + .iter() + .map(|pn| { + let mut tmp = Vec::new(); + + if let Some(pn) = pn { + if tree.keys_set.contains(pn) { + let mut children = pn.children(); + children.sort(); + for child in children.iter() { + tmp.push(*target_map_leaves.get(child).unwrap()) + } + } else { + for i in 0..8 { + tmp.push(nleaves + i); + } + } + } else { + for i in 0..8 { + tmp.push(nleaves + i) + } + } + tmp + }) + .collect_vec(); + all_displacements.push(displacements) + }); + + all_displacements +} + +pub fn chunked_displacements( + level: usize, + chunksize: usize, + displacements: &[Vec>], +) -> (Vec>>, Vec>) { + let mut all_save_locations = Vec::new(); + let mut all_displacements_chunked = Vec::new(); // indexed by chunk index + let nparents = nparents(level); + let nsiblings = 8; + + (0..nparents).step_by(chunksize).for_each(|chunk_start| { + let chunk_end = std::cmp::min(chunksize + chunk_start, nparents); + + // lookup save locations (head) + let save_locations = (chunk_start..chunk_end) + .map(|sibling_idx| sibling_idx * nsiblings) + .collect_vec(); + all_save_locations.push(save_locations); + + // 26 long + let mut tmp = Vec::new(); + for i in 0..26 { + // chunk_size long + let tmp2 = (chunk_start..chunk_end) + .map(|sibling_idx| { + // head + displacements[sibling_idx][i][0] + }) + .collect_vec(); + + tmp.push(tmp2); + } + all_displacements_chunked.push(tmp); + }); + + (all_displacements_chunked, all_save_locations) +} + +#[inline(always)] +pub unsafe fn matmul8x8x2_cplx_simple_local( + kernel_data_freq: &[Complex], + signal: &[Complex], + save_locations: &mut [Complex], + scale: Complex, +) where + U: Scalar, +{ + for j in 0..8 { + let kernel_data_ij = &kernel_data_freq[j * 8..(j + 1) * 8]; + let sig = signal[j]; + + save_locations + .iter_mut() + .zip(kernel_data_ij.iter()) + .for_each(|(sav, &ker)| *sav += scale * ker * sig) + } // inner loop +} +#[allow(clippy::too_many_arguments)] +#[inline(always)] +pub fn m2l_cplx_chunked( + order: usize, + level: usize, + signal_freq_order: &[Vec>], + check_potential_freq_order: &[Vec>>], + kernel_data_halo: &[Vec>], + all_displacements_chunked: &[Vec>], + all_save_locations_chunked: &[Vec], + chunksize: usize, + scale: Complex, +) where + U: Scalar + Sync, +{ + let nparents = nparents(level); + let size_real = size_real(order); + + (0..size_real).into_par_iter().for_each(|freq| { + // Extract frequency component of signal (ntargets long) + let padded_signal_freq = &signal_freq_order[freq]; + + // Extract frequency components of save locations (ntargets long) + let check_potential_freq = &check_potential_freq_order[freq]; + + let zero = U::from(0.).unwrap(); + + (0..nparents) + .step_by(chunksize) + .enumerate() + .for_each(|(c, _chunk_start)| { + let first = all_save_locations_chunked[c].first().unwrap(); + let last = all_save_locations_chunked[c].last().unwrap(); + let save_locations = &check_potential_freq[*first..*last + 8]; + let save_locations_raw = save_locations.iter().map(|s| s.raw).collect_vec(); + let mut local_save_locations = vec![Complex::new(zero, zero); 8 * chunksize]; + + for (i, kernel_data) in kernel_data_halo.iter().enumerate().take(26) { + let frequency_offset = 64 * freq; + let kernel_data_freq = + &kernel_data[frequency_offset..(frequency_offset + 64)].to_vec(); + + // lookup signals + let disps = &all_displacements_chunked[c][i]; + + // Expect this to be 8*chunksize + let signals = disps + .iter() + .map(|d| &padded_signal_freq[*d..d + 8]) + .collect_vec(); + + for j in 0..chunksize { + unsafe { + matmul8x8x2_cplx_simple_local( + kernel_data_freq, + signals[j], + &mut local_save_locations[j * 8..(j + 1) * 8], + scale, + ) + }; + } + } + + unsafe { + save_locations_raw + .iter() + .zip(local_save_locations.iter()) + .for_each(|(&glob, &loc)| { + *glob += loc; + }); + } + }); + }); +} + +/// Implement the multipole to local translation operator for an FFT accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataLinear, T, FftFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + + Float + + Default + + std::marker::Send + + std::marker::Sync + + Fft, FftMatrix>>, + Complex: Scalar, + U: MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { + return; + }; + // let s = Instant::now(); + // Form signals to use for convolution first + let n = 2 * self.fmm.order - 1; + let ntargets = targets.len(); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + // Pad the signal + let &(m, n, o) = &(n, n, n); + + let p = m + 1; + let q = n + 1; + let r = o + 1; + let size = p * q * r; + let size_real = p * q * (r / 2 + 1); + let pad_size = (p - m, q - n, r - o); + let pad_index = (p - m, q - n, r - o); + let mut padded_signals = vec![U::default(); size * ntargets]; + + let padded_signals_chunks = padded_signals.par_chunks_exact_mut(size); + + let ntargets = targets.len(); + let min = &targets[0]; + let max = &targets[ntargets - 1]; + let min_idx = self.fmm.tree().key_to_index.get(min).unwrap(); + let max_idx = self.fmm.tree().key_to_index.get(max).unwrap(); + + let multipoles = &self.multipoles[min_idx * ncoeffs..(max_idx + 1) * ncoeffs]; + let multipoles_chunks = multipoles.par_chunks_exact(ncoeffs); + + padded_signals_chunks + .zip(multipoles_chunks) + .for_each(|(padded_signal, multipole)| { + let signal = self.fmm.m2l.compute_signal(self.fmm.order, multipole); + + let tmp = pad3(&signal, pad_size, pad_index); + + padded_signal.copy_from_slice(tmp.get_data()); + }); + + let mut padded_signals_hat = vec![Complex::::default(); size_real * ntargets]; + + U::rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); + + let ntargets = targets.len(); + let mut global_check_potentials_hat = vec![Complex::::default(); size_real * ntargets]; + let mut global_check_potentials = vec![U::default(); size * ntargets]; + + // Get check potentials in frequency order + let global_check_potentials_hat_freq = unsafe { + check_potentials_freq_cplx( + self.fmm.order, + level as usize, + &mut global_check_potentials_hat, + ) + }; + + let padded_signals_hat_freq = signal_freq_order_cplx_optimized( + self.fmm.order, + level as usize, + &padded_signals_hat[..], + ); + + // Create a map between targets and index positions in vec of len 'ntargets' + let mut target_map = HashMap::new(); + + for (i, t) in targets.iter().enumerate() { + target_map.insert(t, i); + } + + let chunksize; + if level == 2 { + chunksize = 8; + } else if level == 3 { + chunksize = 64 + } else { + chunksize = 128 + } + + let all_displacements = displacements(&self.fmm.tree, level, &target_map); + + let (chunked_displacements, chunked_save_locations) = + chunked_displacements(level as usize, chunksize, &all_displacements); + + let scale = Complex::from(self.m2l_scale(level)); + + let kernel_data_halo = &self.fmm.m2l.operator_data.kernel_data_rearranged; + // println!("level {:?} pre processing time {:?} ", level, s.elapsed()); + + // let s = Instant::now(); + m2l_cplx_chunked( + self.fmm.order, + level as usize, + &padded_signals_hat_freq, + &global_check_potentials_hat_freq, + kernel_data_halo, + &chunked_displacements, + &chunked_save_locations, + chunksize, + scale, + ); + // println!("level {:?} kernel time {:?} ", level, s.elapsed()); + + U::irfft_fftw_par_vec( + &mut global_check_potentials_hat, + &mut global_check_potentials, + &[p, q, r], + ); + + // let s = Instant::now(); + // Compute local expansion coefficients and save to data tree + let (_, multi_indices) = MortonKey::surface_grid::(self.fmm.order); + + let check_potentials = global_check_potentials + .chunks_exact(size) + .flat_map(|chunk| { + let m = 2 * self.fmm.order - 1; + let p = m + 1; + let mut potentials = Array3D::new((p, p, p)); + potentials.get_data_mut().copy_from_slice(chunk); + + let mut tmp = Vec::new(); + let ntargets = multi_indices.len() / 3; + let xs = &multi_indices[0..ntargets]; + let ys = &multi_indices[ntargets..2 * ntargets]; + let zs = &multi_indices[2 * ntargets..]; + + for i in 0..ntargets { + let val = potentials.get(zs[i], ys[i], xs[i]).unwrap(); + tmp.push(*val); + } + tmp + }) + .collect_vec(); + + // This should be blocked and use blas3 + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let check_potentials = unsafe { + rlst_pointer_mat!['a, U, check_potentials.as_ptr(), (ncoeffs, ntargets), (1, ncoeffs)] + }; + + let mut tmp = self + .fmm + .dc2e_inv_1 + .dot(&self.fmm.dc2e_inv_2.dot(&check_potentials)) + .eval(); + + tmp.data_mut() + .iter_mut() + .for_each(|d| *d *= self.fmm.kernel.scale(level)); + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + // Add result + tmp.data() + .par_chunks_exact(ncoeffs) + .zip(self.level_locals[level as usize].into_par_iter()) + .for_each(|(result, local)| unsafe { + let mut ptr = local.raw; + for &r in result.iter().take(ncoeffs) { + *ptr += r; + ptr = ptr.add(1) + } + }); + // println!("level {:?} post processing time {:?} ", level, s.elapsed()); + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} + +/// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. +impl FieldTranslation + for FmmDataLinear, T, SvdFieldTranslationKiFmm, U>, U> +where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + DenseMatrixLinAlgBuilder: Svd, + U: Scalar, + U: Float + + Default + + MultiplyAdd< + U, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, + U: std::marker::Send + std::marker::Sync + Default, +{ + fn m2l<'a>(&self, level: u64) { + let Some(_targets) = self.fmm.tree().get_keys(level) else { + return; + }; + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } +} diff --git a/fmm/src/field_translation/linear/target.rs b/fmm/src/field_translation/linear/target.rs new file mode 100644 index 00000000..fc2d9cb8 --- /dev/null +++ b/fmm/src/field_translation/linear/target.rs @@ -0,0 +1,226 @@ +//! kiFMM based on simple linear data structures that minimises memory allocations, maximises cache re-use. + +use itertools::Itertools; +use num::Float; +use rayon::prelude::*; + +use bempp_traits::{ + field::FieldTranslationData, + fmm::{Fmm, InteractionLists, TargetTranslation}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::EvalType, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use crate::{ + constants::P2M_MAX_CHUNK_SIZE, + types::{FmmDataLinear, KiFmmLinear}, +}; + +use rlst::{ + common::traits::*, + dense::{rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +/// Euclidean algorithm to find greatest common divisor less than max +fn find_chunk_size(n: usize, max_chunk_size: usize) -> usize { + let max_divisor = max_chunk_size; + for divisor in (1..=max_divisor).rev() { + if n % divisor == 0 { + return divisor; + } + } + 1 // If no divisor is found greater than 1, return 1 as the GCD +} + +// Try this two different ways, ignoring w,x lists and also including them +impl TargetTranslation for FmmDataLinear, T, U, V>, V> +where + T: Kernel + ScaleInvariantKernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, + V: Scalar + Float + Default + std::marker::Sync + std::marker::Send, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, +{ + fn l2l<'a>(&self, level: u64) { + if let Some(parent_sources) = self.fmm.tree().get_keys(level - 1) { + if let Some(_child_targets) = self.fmm.tree().get_keys(level) { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let nsources = parent_sources.len(); + let min_parent = &parent_sources[0]; + let max_parent = &parent_sources[nsources - 1]; + + let min_parent_idx = self.fmm.tree().key_to_index.get(min_parent).unwrap(); + let max_parent_idx = self.fmm.tree().key_to_index.get(max_parent).unwrap(); + + let parent_locals = + &self.locals[min_parent_idx * ncoeffs..(max_parent_idx + 1) * ncoeffs]; + + let child_locals = &self.level_locals[level as usize]; + + let mut max_chunk_size = 8_i32.pow((level).try_into().unwrap()) as usize; + + if max_chunk_size > P2M_MAX_CHUNK_SIZE { + max_chunk_size = P2M_MAX_CHUNK_SIZE; + } + let chunk_size = find_chunk_size(nsources, max_chunk_size); + let nsiblings = 8; + + parent_locals + .par_chunks_exact(ncoeffs*chunk_size) + .zip(child_locals.par_chunks_exact(chunk_size*nsiblings)) + .for_each(|(parent_local_chunk, children_locals)| { + + let parent_local_chunk = unsafe { rlst_pointer_mat!['a, V, parent_local_chunk.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)] }; + + for i in 0..8 { + // Evaluate all L2L for this position in local chunk + let tmp = self.fmm.l2l[i].dot(&parent_local_chunk).eval(); + + // Assign for each child in this chunk at this position + for j in 0..chunk_size { + let chunk_displacement = j*nsiblings; + let child_displacement = chunk_displacement + i; + let mut ptr = children_locals[child_displacement].raw; + + unsafe { + for k in 0..ncoeffs { + *ptr += tmp.data()[(j*ncoeffs)+k]; + ptr = ptr.add(1); + } + } + + } + } + }); + } + } + } + + fn m2p<'a>(&self) {} + + fn l2p<'a>(&self) { + if let Some(_leaves) = self.fmm.tree().get_all_leaves() { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let coordinates = self.fmm.tree().get_all_coordinates().unwrap(); + let dim = self.fmm.kernel.space_dimension(); + let surface_size = ncoeffs * dim; + + self.leaf_upward_surfaces + .par_chunks_exact(surface_size) + .zip(self.leaf_locals.into_par_iter()) + .zip(&self.charge_index_pointer) + .zip(&self.potentials_send_pointers) + .for_each( + |( + ((leaf_downward_equivalent_surface, local_ptr), charge_index_pointer), + potential_send_ptr, + )| { + let target_coordinates = &coordinates + [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim]; + let ntargets = target_coordinates.len() / dim; + let target_coordinates = unsafe { + rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, dim), (dim, 1)] + }.eval(); + + let local_expansion = + unsafe { rlst_pointer_mat!['a, V, local_ptr.raw, (ncoeffs, 1), (1, ncoeffs) ]}; + + + // Compute direct + if ntargets > 0 { + let result = unsafe { std::slice::from_raw_parts_mut(potential_send_ptr.raw, ntargets)}; + + self.fmm.kernel.evaluate_st( + EvalType::Value, + leaf_downward_equivalent_surface, + target_coordinates.data(), + local_expansion.data(), + result, + ); + + } + }, + ); + } + } + + fn p2l<'a>(&self) {} + + fn p2p<'a>(&self) { + if let Some(leaves) = self.fmm.tree().get_all_leaves() { + let dim = self.fmm.kernel.space_dimension(); + + let coordinates = self.fmm.tree().get_all_coordinates().unwrap(); + + leaves + .par_iter() + .zip(&self.charge_index_pointer) + .zip(&self.potentials_send_pointers) + .for_each(|((leaf, charge_index_pointer), potential_send_pointer)| { + let targets = + &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim]; + let ntargets = targets.len() / dim; + let targets = unsafe { + rlst_pointer_mat!['a, V, targets.as_ptr(), (ntargets, dim), (dim, 1)] + }.eval(); + + if ntargets > 0 { + + if let Some(u_list) = self.fmm.get_u_list(leaf) { + + let u_list_indices = u_list + .iter() + .filter_map(|k| self.fmm.tree().get_leaf_index(k)); + + let charges = u_list_indices + .clone() + .map(|&idx| { + let index_pointer = &self.charge_index_pointer[idx]; + &self.charges[index_pointer.0..index_pointer.1] + }) + .collect_vec(); + + let sources_coordinates = u_list_indices + .into_iter() + .map(|&idx| { + let index_pointer = &self.charge_index_pointer[idx]; + &coordinates[index_pointer.0 * dim..index_pointer.1 * dim] + }) + .collect_vec(); + + for (&charges, sources) in charges.iter().zip(sources_coordinates) { + let nsources = sources.len() / dim; + let sources = unsafe { + rlst_pointer_mat!['a, V, sources.as_ptr(), (nsources, dim), (dim, 1)] + }.eval(); + + + if nsources > 0 { + let result = unsafe { std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)}; + self.fmm.kernel.evaluate_st( + EvalType::Value, + sources.data(), + // sources, + targets.data(), + charges, + result, + ); + } + } + } + } + }) + } + } +} diff --git a/fmm/src/fmm.rs b/fmm/src/fmm/hashmap.rs similarity index 84% rename from fmm/src/fmm.rs rename to fmm/src/fmm/hashmap.rs index 5454e62f..be18c1b1 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm/hashmap.rs @@ -19,39 +19,39 @@ use rlst::{ use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, FmmLoop, SourceTranslation, TargetTranslation, TimeDict}, - kernel::{Kernel, KernelScale}, + fmm::{Fmm, FmmLoop, KiFmm as KiFmmTrait, SourceTranslation, TargetTranslation, TimeDict}, + kernel::{Kernel, ScaleInvariantKernel}, tree::Tree, types::EvalType, }; use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; use crate::pinv::{pinv, SvdScalar}; -use crate::types::{C2EType, ChargeDict, FmmData, KiFmm}; +use crate::types::{C2EType, ChargeDict, FmmDataHashmap, KiFmmHashMap}; /// Implementation of constructor for single node KiFMM -impl<'a, T, U, W> KiFmm, T, U, W> +impl<'a, T, U, V> KiFmmHashMap, T, U, V> where - T: Kernel + KernelScale, + T: Kernel + ScaleInvariantKernel, U: FieldTranslationData, - W: Scalar + Default + Float, - SvdScalar: PartialOrd, - SvdScalar: Scalar + Float + ToPrimitive, - DenseMatrixLinAlgBuilder: Svd, - W: MultiplyAdd< - W, - VectorContainer, - VectorContainer, - VectorContainer, + V: Scalar + Default + Float, + SvdScalar: PartialOrd, + SvdScalar: Scalar + Float + ToPrimitive, + DenseMatrixLinAlgBuilder: Svd, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, Dynamic, Dynamic, Dynamic, >, - SvdScalar: MultiplyAdd< - SvdScalar, - VectorContainer>, - VectorContainer>, - VectorContainer>, + SvdScalar: MultiplyAdd< + SvdScalar, + VectorContainer>, + VectorContainer>, + VectorContainer>, Dynamic, Dynamic, Dynamic, @@ -69,10 +69,10 @@ where /// * `m2l` - The M2L operator matrices, as well as metadata associated with this FMM. pub fn new( order: usize, - alpha_inner: W, - alpha_outer: W, + alpha_inner: V, + alpha_outer: V, kernel: T, - tree: SingleNodeTree, + tree: SingleNodeTree, m2l: U, ) -> Self { let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); @@ -86,21 +86,21 @@ where // Store in RLST matrices let upward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + rlst_pointer_mat!['a, ::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] }; let upward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + rlst_pointer_mat!['a, ::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] }; let downward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + rlst_pointer_mat!['a, ::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] }; let downward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + rlst_pointer_mat!['a, ::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] }; // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. - let mut uc2e = rlst_dynamic_mat![W, (ncheck_surface, nequiv_surface)]; + let mut uc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; kernel.assemble_st( EvalType::Value, upward_equivalent_surface.data(), @@ -111,7 +111,7 @@ where // Need to tranapose so that rows correspond to targets and columns to sources let uc2e = uc2e.transpose().eval(); - let mut dc2e = rlst_dynamic_mat![W, (ncheck_surface, nequiv_surface)]; + let mut dc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; kernel.assemble_st( EvalType::Value, downward_equivalent_surface.data(), @@ -122,11 +122,11 @@ where // Need to tranapose so that rows correspond to targets and columns to sources let dc2e = dc2e.transpose().eval(); - let (s, ut, v) = pinv::(&uc2e, None, None).unwrap(); + let (s, ut, v) = pinv::(&uc2e, None, None).unwrap(); - let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; + let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; for i in 0..s.len() { - mat_s[[i, i]] = SvdScalar::::from_real(s[i]); + mat_s[[i, i]] = SvdScalar::::from_real(s[i]); } let uc2e_inv_1 = v.dot(&mat_s); let uc2e_inv_2 = ut; @@ -137,27 +137,27 @@ where let uc2e_inv_1 = uc2e_inv_1 .data() .iter() - .map(|x| W::from(*x).unwrap()) + .map(|x| V::from(*x).unwrap()) .collect_vec(); let uc2e_inv_1 = unsafe { - rlst_pointer_mat!['a, W, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)] + rlst_pointer_mat!['a, V, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)] } .eval(); let uc2e_inv_2 = uc2e_inv_2 .data() .iter() - .map(|x| W::from(*x).unwrap()) + .map(|x| V::from(*x).unwrap()) .collect_vec(); let uc2e_inv_2 = unsafe { - rlst_pointer_mat!['a, W, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)] + rlst_pointer_mat!['a, V, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)] } .eval(); - let (s, ut, v) = pinv::(&dc2e, None, None).unwrap(); + let (s, ut, v) = pinv::(&dc2e, None, None).unwrap(); - let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; + let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; for i in 0..s.len() { - mat_s[[i, i]] = SvdScalar::::from_real(s[i]); + mat_s[[i, i]] = SvdScalar::::from_real(s[i]); } let dc2e_inv_1 = v.dot(&mat_s); @@ -169,26 +169,26 @@ where let dc2e_inv_1 = dc2e_inv_1 .data() .iter() - .map(|x| W::from(*x).unwrap()) + .map(|x| V::from(*x).unwrap()) .collect_vec(); let dc2e_inv_1 = unsafe { - rlst_pointer_mat!['a, W, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)] + rlst_pointer_mat!['a, V, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)] } .eval(); let dc2e_inv_2 = dc2e_inv_2 .data() .iter() - .map(|x| W::from(*x).unwrap()) + .map(|x| V::from(*x).unwrap()) .collect_vec(); let dc2e_inv_2 = unsafe { - rlst_pointer_mat!['a, W, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)] + rlst_pointer_mat!['a, V, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)] } .eval(); // Calculate M2M/L2L matrices let children = ROOT.children(); - let mut m2m: Vec> = Vec::new(); - let mut l2l: Vec> = Vec::new(); + let mut m2m: Vec> = Vec::new(); + let mut l2l: Vec> = Vec::new(); for child in children.iter() { let child_upward_equivalent_surface = @@ -196,13 +196,13 @@ where let child_downward_check_surface = child.compute_surface(tree.get_domain(), order, alpha_inner); let child_upward_equivalent_surface = unsafe { - rlst_pointer_mat!['a, ::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + rlst_pointer_mat!['a, ::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] }; let child_downward_check_surface = unsafe { - rlst_pointer_mat!['a, ::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + rlst_pointer_mat!['a, ::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] }; - let mut pc2ce = rlst_dynamic_mat![W, (ncheck_surface, nequiv_surface)]; + let mut pc2ce = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; kernel.assemble_st( EvalType::Value, @@ -216,7 +216,7 @@ where m2m.push(uc2e_inv_1.dot(&uc2e_inv_2.dot(&pc2ce)).eval()); - let mut cc2pe = rlst_dynamic_mat![W, (ncheck_surface, nequiv_surface)]; + let mut cc2pe = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; kernel.assemble_st( EvalType::Value, @@ -252,7 +252,7 @@ where } /// Implementation of the data structure to store the data for the single node KiFMM. -impl FmmData, T, U, V>, V> +impl FmmDataHashmap, T, U, V>, V> where T: Kernel, U: FieldTranslationData, @@ -263,7 +263,10 @@ where /// # Arguments /// `fmm` - A single node KiFMM object. /// `global_charges` - The charge data associated to the point data via unique global indices. - pub fn new(fmm: KiFmm, T, U, V>, global_charges: &ChargeDict) -> Self { + pub fn new( + fmm: KiFmmHashMap, T, U, V>, + global_charges: &ChargeDict, + ) -> Self { let mut multipoles = HashMap::new(); let mut locals = HashMap::new(); let mut potentials = HashMap::new(); @@ -299,8 +302,6 @@ where } } - let fmm = Arc::new(fmm); - Self { fmm, multipoles, @@ -312,7 +313,23 @@ where } } -impl Fmm for KiFmm +impl KiFmmTrait for KiFmmHashMap +where + T: Tree, + U: Kernel, + V: FieldTranslationData, + W: Scalar + Float + Default, +{ + fn alpha_inner(&self) -> <::Kernel as Kernel>::T { + self.alpha_inner + } + + fn alpha_outer(&self) -> <::Kernel as Kernel>::T { + self.alpha_outer + } +} + +impl Fmm for KiFmmHashMap where T: Tree, U: Kernel, @@ -335,15 +352,15 @@ where } } -impl FmmLoop for FmmData +impl FmmLoop for FmmDataHashmap where T: Fmm, U: Scalar + Float + Default, - FmmData: SourceTranslation + FieldTranslation + TargetTranslation, + FmmDataHashmap: SourceTranslation + FieldTranslation + TargetTranslation, { - fn upward_pass(&self, time: Option) -> Option { + fn upward_pass(&self, time: bool) -> Option { match time { - Some(true) => { + true => { let mut times = TimeDict::default(); // Particle to Multipole let start = Instant::now(); @@ -359,7 +376,7 @@ where times.insert("m2m".to_string(), start.elapsed().as_millis()); Some(times) } - Some(false) | None => { + false => { // Particle to Multipole self.p2m(); @@ -373,11 +390,11 @@ where } } - fn downward_pass(&self, time: Option) -> Option { + fn downward_pass(&self, time: bool) -> Option { let depth = self.fmm.tree().get_depth(); match time { - Some(true) => { + true => { let mut times = TimeDict::default(); let mut l2l_time = 0; let mut m2l_time = 0; @@ -417,7 +434,7 @@ where Some(times) } - Some(false) | None => { + false => { for level in 2..=depth { if level > 2 { self.l2l(level); @@ -437,7 +454,7 @@ where } } - fn run(&self, time: Option) -> Option { + fn run(&self, time: bool) -> Option { let t1 = self.upward_pass(time); let t2 = self.downward_pass(time); @@ -504,16 +521,16 @@ mod test { ); // Create an FMM - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); // Form charge dict, matching charges with their associated global indices let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); // Associate data with the FMM - let datatree = FmmData::new(fmm, &charge_dict); + let datatree = FmmDataHashmap::new(fmm, &charge_dict); // Run the experiment - datatree.run(None); + datatree.run(false); // Test that direct computation is close to the FMM. let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; @@ -601,16 +618,16 @@ mod test { ); // Create an FMM - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); // Form charge dict, matching charges with their associated global indices let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); // Associate data with the FMM - let datatree = FmmData::new(fmm, &charge_dict); + let datatree = FmmDataHashmap::new(fmm, &charge_dict); // Run the experiment - datatree.run(Some(true)); + datatree.run(true); // Test that direct computation is close to the FMM. let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; @@ -659,7 +676,7 @@ mod test { #[test] fn test_fmm_fft_f64() { let npoints = 10000; - let points = points_fixture(npoints, None, None); + let points = points_fixture::(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; @@ -668,6 +685,7 @@ mod test { let alpha_outer = 2.95; let adaptive = false; let ncrit = 150; + let depth = 3; let kernel = Laplace3dKernel::::default(); @@ -682,18 +700,21 @@ mod test { let m2l_data_fft = FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); // Form charge dict, matching charges with their associated global indices let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - let datatree = FmmData::new(fmm, &charge_dict); + let datatree = FmmDataHashmap::new(fmm, &charge_dict); - datatree.run(Some(true)); + let s = Instant::now(); + let times = datatree.run(true); + println!("runtime {:?} operators {:?}", s.elapsed(), times.unwrap()); - let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; + let leaf = &datatree.fmm.tree.get_all_leaves().unwrap()[0]; let potentials = datatree.potentials.get(leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(leaf).unwrap(); let leaf_coordinates = pts @@ -760,14 +781,14 @@ mod test { let m2l_data_fft = FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + let fmm = KiFmmHashMap::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); // Form charge dict, matching charges with their associated global indices let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - let datatree = FmmData::new(fmm, &charge_dict); + let datatree = FmmDataHashmap::new(fmm, &charge_dict); - datatree.run(None); + datatree.run(false); let leaf = &datatree.fmm.tree.get_keys(depth).unwrap()[0]; diff --git a/fmm/src/fmm/linear.rs b/fmm/src/fmm/linear.rs new file mode 100644 index 00000000..144f9789 --- /dev/null +++ b/fmm/src/fmm/linear.rs @@ -0,0 +1,758 @@ +//! Implementation of constructors for FMMs as well as the implementation of FmmData, Fmm traits. +use cauchy::Scalar; +use itertools::Itertools; +use num::{Float, ToPrimitive}; +use std::time::Instant; + +use rlst::{ + algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd}, + common::traits::{Eval, Transpose}, + dense::{rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer}, +}; + +use bempp_traits::{ + field::{FieldTranslation, FieldTranslationData}, + fmm::{Fmm, FmmLoop, KiFmm as KiFmmTrait, SourceTranslation, TargetTranslation, TimeDict}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::EvalType, +}; +use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; + +use crate::types::{ChargeDict, FmmDataLinear, SendPtrMut}; +use crate::{ + pinv::{pinv, SvdScalar}, + types::KiFmmLinear, +}; + +/// Implementation of constructor for single node KiFMM +impl<'a, T, U, V> KiFmmLinear, T, U, V> +where + T: Kernel + ScaleInvariantKernel, + U: FieldTranslationData, + V: Scalar + Default + Float, + SvdScalar: PartialOrd, + SvdScalar: Scalar + Float + ToPrimitive, + DenseMatrixLinAlgBuilder: Svd, + V: MultiplyAdd< + V, + VectorContainer, + VectorContainer, + VectorContainer, + Dynamic, + Dynamic, + Dynamic, + >, + SvdScalar: MultiplyAdd< + SvdScalar, + VectorContainer>, + VectorContainer>, + VectorContainer>, + Dynamic, + Dynamic, + Dynamic, + >, +{ + /// Constructor for single node kernel independent FMM (KiFMM). This object contains all the precomputed operator matrices and metadata, as well as references to + /// the associated single node octree, and the associated kernel function. + /// + /// # Arguments + /// * `order` - The expansion order for the multipole and local expansions. + /// * `alpha_inner` - The ratio of the inner check surface diamater in comparison to the surface discretising a box. + /// * `alpha_order` - The ratio of the outer check surface diamater in comparison to the surface discretising a box. + /// * `kernel` - The kernel function for this FMM. + /// * `tree` - The type of tree associated with this FMM, can be single or multi node. + /// * `m2l` - The M2L operator matrices, as well as metadata associated with this FMM. + pub fn new( + order: usize, + alpha_inner: V, + alpha_outer: V, + kernel: T, + tree: SingleNodeTree, + m2l: U, + ) -> Self { + let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); + let upward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let downward_equivalent_surface = + ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); + + let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension(); + let ncheck_surface = upward_check_surface.len() / kernel.space_dimension(); + + // Store in RLST matrices + let upward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, ::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let upward_check_surface = unsafe { + rlst_pointer_mat!['a, ::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; + let downward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, ::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let downward_check_surface = unsafe { + rlst_pointer_mat!['a, ::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; + + // Compute upward check to equivalent, and downward check to equivalent Gram matrices + // as well as their inverses using DGESVD. + let mut uc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; + kernel.assemble_st( + EvalType::Value, + upward_equivalent_surface.data(), + upward_check_surface.data(), + uc2e.data_mut(), + ); + + // Need to tranapose so that rows correspond to targets and columns to sources + let uc2e = uc2e.transpose().eval(); + + let mut dc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; + kernel.assemble_st( + EvalType::Value, + downward_equivalent_surface.data(), + downward_check_surface.data(), + dc2e.data_mut(), + ); + + // Need to tranapose so that rows correspond to targets and columns to sources + let dc2e = dc2e.transpose().eval(); + + let (s, ut, v) = pinv::(&uc2e, None, None).unwrap(); + + let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; + for i in 0..s.len() { + mat_s[[i, i]] = SvdScalar::::from_real(s[i]); + } + let uc2e_inv_1 = v.dot(&mat_s); + let uc2e_inv_2 = ut; + + let uc2e_inv_1_shape = uc2e_inv_1.shape(); + let uc2e_inv_2_shape = uc2e_inv_2.shape(); + + let uc2e_inv_1 = uc2e_inv_1 + .data() + .iter() + .map(|x| V::from(*x).unwrap()) + .collect_vec(); + let uc2e_inv_1 = unsafe { + rlst_pointer_mat!['a, V, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)] + } + .eval(); + let uc2e_inv_2 = uc2e_inv_2 + .data() + .iter() + .map(|x| V::from(*x).unwrap()) + .collect_vec(); + let uc2e_inv_2 = unsafe { + rlst_pointer_mat!['a, V, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)] + } + .eval(); + + let (s, ut, v) = pinv::(&dc2e, None, None).unwrap(); + + let mut mat_s = rlst_dynamic_mat![SvdScalar, (s.len(), s.len())]; + for i in 0..s.len() { + mat_s[[i, i]] = SvdScalar::::from_real(s[i]); + } + + let dc2e_inv_1 = v.dot(&mat_s); + let dc2e_inv_2 = ut; + + let dc2e_inv_1_shape = dc2e_inv_1.shape(); + let dc2e_inv_2_shape = dc2e_inv_2.shape(); + + let dc2e_inv_1 = dc2e_inv_1 + .data() + .iter() + .map(|x| V::from(*x).unwrap()) + .collect_vec(); + let dc2e_inv_1 = unsafe { + rlst_pointer_mat!['a, V, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)] + } + .eval(); + let dc2e_inv_2 = dc2e_inv_2 + .data() + .iter() + .map(|x| V::from(*x).unwrap()) + .collect_vec(); + let dc2e_inv_2 = unsafe { + rlst_pointer_mat!['a, V, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)] + } + .eval(); + + // Calculate M2M/L2L matrices + let children = ROOT.children(); + let mut m2m = rlst_dynamic_mat![V, (nequiv_surface, 8 * nequiv_surface)]; + // let mut l2l = rlst_dynamic_mat![V, (nequiv_surface, 8 * nequiv_surface)]; + let mut l2l = Vec::new(); + + for (i, child) in children.iter().enumerate() { + let child_upward_equivalent_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_downward_check_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_upward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, ::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let child_downward_check_surface = unsafe { + rlst_pointer_mat!['a, ::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; + + let mut pc2ce = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; + + kernel.assemble_st( + EvalType::Value, + child_upward_equivalent_surface.data(), + upward_check_surface.data(), + pc2ce.data_mut(), + ); + + // Need to transpose so that rows correspond to targets, and columns to sources + let pc2ce = pc2ce.transpose().eval(); + + let tmp = uc2e_inv_1.dot(&uc2e_inv_2.dot(&pc2ce)).eval(); + let l = i * nequiv_surface * nequiv_surface; + let r = l + nequiv_surface * nequiv_surface; + + m2m.data_mut()[l..r].copy_from_slice(tmp.data()); + + let mut cc2pe = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)]; + + kernel.assemble_st( + EvalType::Value, + downward_equivalent_surface.data(), + child_downward_check_surface.data(), + cc2pe.data_mut(), + ); + + // Need to transpose so that rows correspond to targets, and columns to sources + let cc2pe = cc2pe.transpose().eval(); + let mut tmp = dc2e_inv_1.dot(&dc2e_inv_2.dot(&cc2pe)).eval(); + tmp.data_mut() + .iter_mut() + .for_each(|d| *d *= kernel.scale(child.level())); + + // let l = i * nequiv_surface * nequiv_surface; + // let r = l + nequiv_surface * nequiv_surface; + // l2l.data_mut()[l..r].copy_from_slice(tmp.data()); + l2l.push(tmp); + } + + Self { + order, + uc2e_inv_1, + uc2e_inv_2, + dc2e_inv_1, + dc2e_inv_2, + alpha_inner, + alpha_outer, + m2m, + l2l, + kernel, + tree, + m2l, + } + } +} + +/// Implementation of the data structure to store the data for the single node KiFMM. +impl FmmDataLinear, T, U, V>, V> +where + T: Kernel + ScaleInvariantKernel, + U: FieldTranslationData, + V: Float + Scalar + Default, +{ + /// Constructor fo the KiFMM's associated FmmData on a single node. + /// + /// # Arguments + /// `fmm` - A single node KiFMM object. + /// `global_charges` - The charge data associated to the point data via unique global indices. + pub fn new( + fmm: KiFmmLinear, T, U, V>, + global_charges: &ChargeDict, + ) -> Result { + if let Some(keys) = fmm.tree().get_all_keys() { + let ncoeffs = fmm.m2l.ncoeffs(fmm.order); + let nkeys = keys.len(); + let leaves = fmm.tree().get_all_leaves().unwrap(); + let nleaves = leaves.len(); + let npoints = fmm.tree().get_all_points().unwrap().len(); + + let multipoles = vec![V::default(); ncoeffs * nkeys]; + let locals = vec![V::default(); ncoeffs * nkeys]; + + let mut potentials = vec![V::default(); npoints]; + let mut potentials_send_pointers = vec![SendPtrMut::default(); nleaves]; + + let mut charges = vec![V::default(); npoints]; + let global_indices = vec![0usize; npoints]; + + // Lookup leaf coordinates, and assign charges from within the data tree. + for (i, g_idx) in fmm + .tree() + .get_all_global_indices() + .unwrap() + .iter() + .enumerate() + { + let charge = global_charges.get(g_idx).unwrap(); + charges[i] = *charge; + } + + let mut level_multipoles = vec![Vec::new(); (fmm.tree().get_depth() + 1) as usize]; + let mut level_locals = vec![Vec::new(); (fmm.tree().get_depth() + 1) as usize]; + for level in 0..=fmm.tree().get_depth() { + let keys = fmm.tree().get_keys(level).unwrap(); + + let mut tmp_multipoles = Vec::new(); + let mut tmp_locals = Vec::new(); + for key in keys.iter() { + let idx = fmm.tree().key_to_index.get(key).unwrap(); + unsafe { + let raw = multipoles.as_ptr().add(ncoeffs * idx) as *mut V; + tmp_multipoles.push(SendPtrMut { raw }); + let raw = locals.as_ptr().add(ncoeffs * idx) as *mut V; + tmp_locals.push(SendPtrMut { raw }) + } + } + level_multipoles[level as usize] = tmp_multipoles; + level_locals[level as usize] = tmp_locals; + } + + let mut leaf_multipoles = Vec::new(); + let mut leaf_locals = Vec::new(); + for (i, key) in fmm.tree().get_all_keys().unwrap().iter().enumerate() { + if fmm.tree().get_all_leaves_set().contains(key) { + unsafe { + let raw = multipoles.as_ptr().add(i * ncoeffs) as *mut V; + leaf_multipoles.push(SendPtrMut { raw }); + let raw = locals.as_ptr().add(i * ncoeffs) as *mut V; + leaf_locals.push(SendPtrMut { raw }); + } + } + } + + // Create an index pointer for the charge data + let mut index_pointer = 0; + let mut charge_index_pointer = vec![(0usize, 0usize); nleaves]; + + let mut potential_raw_pointer = potentials.as_mut_ptr(); + + let mut scales = vec![V::default(); nleaves * ncoeffs]; + for (i, leaf) in leaves.iter().enumerate() { + // Assign scales + let l = i * ncoeffs; + let r = l + ncoeffs; + scales[l..r] + .copy_from_slice(vec![fmm.kernel.scale(leaf.level()); ncoeffs].as_slice()); + + // Assign potential pointers + let npoints; + if let Some(points) = fmm.tree().get_points(leaf) { + npoints = points.len(); + } else { + npoints = 0; + } + + potentials_send_pointers[i] = SendPtrMut { + raw: potential_raw_pointer, + }; + + let bounds = (index_pointer, index_pointer + npoints); + charge_index_pointer[i] = bounds; + index_pointer += npoints; + unsafe { potential_raw_pointer = potential_raw_pointer.add(npoints) }; + } + + let dim = fmm.kernel().space_dimension(); + let mut upward_surfaces = vec![V::default(); ncoeffs * nkeys * dim]; + let mut downward_surfaces = vec![V::default(); ncoeffs * nkeys * dim]; + + // For each key form both upward and downward check surfaces + for (i, key) in keys.iter().enumerate() { + let upward_surface = + key.compute_surface(fmm.tree().get_domain(), fmm.order(), fmm.alpha_outer()); + + let downward_surface = + key.compute_surface(fmm.tree().get_domain(), fmm.order(), fmm.alpha_inner()); + + let l = i * ncoeffs * dim; + let r = l + ncoeffs * dim; + + upward_surfaces[l..r].copy_from_slice(&upward_surface); + downward_surfaces[l..r].copy_from_slice(&downward_surface); + } + + let mut leaf_upward_surfaces = vec![V::default(); ncoeffs * nleaves * dim]; + let mut leaf_downward_surfaces = vec![V::default(); ncoeffs * nleaves * dim]; + for (i, leaf) in leaves.iter().enumerate() { + let upward_surface = + leaf.compute_surface(fmm.tree().get_domain(), fmm.order(), fmm.alpha_outer()); + + let downward_surface = + leaf.compute_surface(fmm.tree().get_domain(), fmm.order(), fmm.alpha_outer()); + + let l = i * ncoeffs * dim; + let r = l + ncoeffs * dim; + leaf_upward_surfaces[l..r].copy_from_slice(&upward_surface); + leaf_downward_surfaces[l..r].copy_from_slice(&downward_surface); + } + + return Ok(Self { + fmm, + multipoles, + level_multipoles, + leaf_multipoles, + locals, + level_locals, + leaf_locals, + upward_surfaces, + downward_surfaces, + leaf_upward_surfaces, + leaf_downward_surfaces, + potentials, + potentials_send_pointers, + charges, + charge_index_pointer, + scales, + global_indices, + }); + } + + Err("Not a valid tree".to_string()) + } +} + +impl KiFmmTrait for KiFmmLinear +where + T: Tree, + U: Kernel, + V: FieldTranslationData, + W: Scalar + Float + Default, +{ + fn alpha_inner(&self) -> <::Kernel as Kernel>::T { + self.alpha_inner + } + + fn alpha_outer(&self) -> <::Kernel as Kernel>::T { + self.alpha_outer + } +} + +impl Fmm for KiFmmLinear +where + T: Tree, + U: Kernel, + V: FieldTranslationData, + W: Scalar + Float + Default, +{ + type Tree = T; + type Kernel = U; + + fn order(&self) -> usize { + self.order + } + + fn kernel(&self) -> &Self::Kernel { + &self.kernel + } + + fn tree(&self) -> &Self::Tree { + &self.tree + } +} + +impl FmmLoop for FmmDataLinear +where + T: Fmm, + U: Scalar + Float + Default, + FmmDataLinear: SourceTranslation + FieldTranslation + TargetTranslation, +{ + fn upward_pass(&self, time: bool) -> Option { + match time { + true => { + let mut times = TimeDict::default(); + // Particle to Multipole + let start = Instant::now(); + self.p2m(); + times.insert("p2m".to_string(), start.elapsed().as_millis()); + + // Multipole to Multipole + let depth = self.fmm.tree().get_depth(); + let start = Instant::now(); + for level in (1..=depth).rev() { + self.m2m(level) + } + times.insert("m2m".to_string(), start.elapsed().as_millis()); + Some(times) + } + false => { + // Particle to Multipole + self.p2m(); + + // Multipole to Multipole + let depth = self.fmm.tree().get_depth(); + for level in (1..=depth).rev() { + self.m2m(level) + } + None + } + } + } + + fn downward_pass(&self, time: bool) -> Option { + let depth = self.fmm.tree().get_depth(); + + match time { + true => { + let mut times = TimeDict::default(); + let mut l2l_time = 0; + let mut m2l_time = 0; + + for level in 2..=depth { + if level > 2 { + let start = Instant::now(); + self.l2l(level); + l2l_time += start.elapsed().as_millis(); + } + + let start = Instant::now(); + self.m2l(level); + m2l_time += start.elapsed().as_millis(); + } + + times.insert("l2l".to_string(), l2l_time); + times.insert("m2l".to_string(), m2l_time); + + // Leaf level computations + let start = Instant::now(); + self.p2l(); + times.insert("p2l".to_string(), start.elapsed().as_millis()); + + // Sum all potential contributions + let start = Instant::now(); + self.m2p(); + times.insert("m2p".to_string(), start.elapsed().as_millis()); + + let start = Instant::now(); + self.p2p(); + times.insert("p2p".to_string(), start.elapsed().as_millis()); + + let start = Instant::now(); + self.l2p(); + times.insert("l2p".to_string(), start.elapsed().as_millis()); + + Some(times) + } + false => { + for level in 2..=depth { + if level > 2 { + self.l2l(level); + } + self.m2l(level); + } + // Leaf level computations + // self.p2l(); + + // Sum all potential contributions + // self.m2p(); + self.p2p(); + self.l2p(); + + None + } + } + } + + fn run(&self, time: bool) -> Option { + let t1 = self.upward_pass(time); + let t2 = self.downward_pass(time); + + if let (Some(mut t1), Some(t2)) = (t1, t2) { + t1.extend(t2); + Some(t1) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + + use super::*; + + use bempp_field::types::FftFieldTranslationKiFmm; + use bempp_kernel::laplace_3d::Laplace3dKernel; + use bempp_tree::implementations::helpers::points_fixture; + + use crate::charge::build_charge_dict; + + #[test] + fn test_fmm_data_linear() { + let npoints = 10000; + let points = points_fixture::(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let order = 8; + let alpha_inner = 1.05; + let alpha_outer = 2.95; + let ncrit = 150; + let depth = 3; + + // Uniform trees + { + let adaptive = false; + let kernel = Laplace3dKernel::default(); + + let tree = SingleNodeTree::new( + points.data(), + adaptive, + Some(ncrit), + Some(depth), + &global_idxs[..], + ); + + let m2l_data_fft = FftFieldTranslationKiFmm::new( + kernel.clone(), + order, + *tree.get_domain(), + alpha_inner, + ); + + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + let datatree = FmmDataLinear::new(fmm, &charge_dict).unwrap(); + + let ncoeffs = datatree.fmm.m2l.ncoeffs(order); + let nleaves = datatree.fmm.tree().get_all_leaves_set().len(); + let nkeys = datatree.fmm.tree().get_all_keys_set().len(); + + // Test that the number of of coefficients is being correctly assigned + assert_eq!(datatree.multipoles.len(), ncoeffs * nkeys); + assert_eq!(datatree.leaf_multipoles.len(), nleaves); + + // Test that leaf indices are being mapped correctly to leaf multipoles + let idx = 0; + let leaf_key = &datatree.fmm.tree().get_all_leaves().unwrap()[idx]; + let &leaf_idx = datatree.fmm.tree().get_index(leaf_key).unwrap(); + unsafe { + let result = datatree.multipoles.as_ptr().add(leaf_idx * ncoeffs); + let expected = + datatree.multipoles[leaf_idx * ncoeffs..(leaf_idx + 1) * ncoeffs].as_ptr(); + assert_eq!(result, expected); + + let result = datatree.locals.as_ptr().add(leaf_idx * ncoeffs); + let expected = + datatree.locals[leaf_idx * ncoeffs..(leaf_idx + 1) * ncoeffs].as_ptr(); + assert_eq!(result, expected); + } + + // Test that level expansion information is referring to correct memory, and is in correct shape + assert_eq!( + datatree.level_multipoles.len() as u64, + datatree.fmm.tree().get_depth() + 1 + ); + assert_eq!( + datatree.level_locals.len() as u64, + datatree.fmm.tree().get_depth() + 1 + ); + + assert_eq!( + datatree.level_multipoles[(datatree.fmm.tree().get_depth()) as usize].len(), + nleaves + ); + assert_eq!( + datatree.level_locals[(datatree.fmm.tree().get_depth()) as usize].len(), + nleaves + ); + + // Test that points are being assigned correctly + assert_eq!(datatree.potentials_send_pointers.len(), nleaves); + assert_eq!(datatree.potentials.len(), npoints); + } + } + + #[test] + fn test_fmm_linear() { + let npoints = 10000; + let points = points_fixture::(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let order = 6; + let alpha_inner = 1.05; + let alpha_outer = 2.95; + let adaptive = false; + let ncrit = 150; + + let depth = 3; + let kernel = Laplace3dKernel::default(); + + let tree = SingleNodeTree::new( + points.data(), + adaptive, + Some(ncrit), + Some(depth), + &global_idxs[..], + ); + + let m2l_data_fft = + FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); + + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + let datatree = FmmDataLinear::new(fmm, &charge_dict).unwrap(); + + datatree.run(false); + + // Test that direct computation is close to the FMM. + let leaf = &datatree.fmm.tree.get_all_leaves().unwrap()[0]; + let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap(); + + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + + let potentials = &datatree.potentials[l..r]; + + let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap(); + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + let leaf_coordinates = &coordinates[l * 3..r * 3]; + + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + + let leaf_coordinates = unsafe { + rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let mut direct = vec![0f64; ntargets]; + let all_point_coordinates = points_fixture::(npoints, None, None); + + let all_charges = charge_dict.into_values().collect_vec(); + + let kernel = Laplace3dKernel::default(); + + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], + ); + + let abs_error: f64 = potentials + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); + + assert!(rel_error <= 1e-6); + } +} diff --git a/fmm/src/helpers.rs b/fmm/src/helpers.rs new file mode 100644 index 00000000..a8fe1eb1 --- /dev/null +++ b/fmm/src/helpers.rs @@ -0,0 +1,10 @@ +/// Euclidean algorithm to find greatest common divisor less than max +pub fn find_chunk_size(n: usize, max_chunk_size: usize) -> usize { + let max_divisor = max_chunk_size; + for divisor in (1..=max_divisor).rev() { + if n % divisor == 0 { + return divisor; + } + } + 1 // If no divisor is found greater than 1, return 1 as the GCD +} diff --git a/fmm/src/interaction_lists.rs b/fmm/src/interaction_lists.rs index 76979c10..8d9768ea 100644 --- a/fmm/src/interaction_lists.rs +++ b/fmm/src/interaction_lists.rs @@ -8,9 +8,131 @@ use bempp_traits::{ use bempp_tree::types::morton::{MortonKey, MortonKeys}; use num::Float; -use crate::types::KiFmm; +use crate::types::{KiFmmHashMap, KiFmmLinear}; -impl InteractionLists for KiFmm +impl InteractionLists for KiFmmHashMap +where + T: Tree, + U: Kernel, + V: FieldTranslationData, + W: Scalar + Float + Default, +{ + type Tree = T; + + fn get_u_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let mut u_list = Vec::::new(); + let neighbours = key.neighbors(); + + // Child level + let mut neighbors_children_adj: Vec = neighbours + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree.get_all_keys_set().contains(nc) && key.is_adjacent(nc)) + .collect(); + + // Key level + let mut neighbors_adj: Vec = neighbours + .iter() + .filter(|n| self.tree.get_all_keys_set().contains(n) && key.is_adjacent(n)) + .cloned() + .collect(); + + // Parent level + let mut parent_neighbours_adj: Vec = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree.get_all_keys_set().contains(pn) && key.is_adjacent(pn)) + .collect(); + + u_list.append(&mut neighbors_children_adj); + u_list.append(&mut neighbors_adj); + u_list.append(&mut parent_neighbours_adj); + u_list.push(*key); + + if !u_list.is_empty() { + Some(MortonKeys { + keys: u_list, + index: 0, + }) + } else { + None + } + } + + fn get_v_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + if key.level() >= 2 { + let v_list = key + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| self.tree.get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) + .collect_vec(); + + if !v_list.is_empty() { + return Some(MortonKeys { + keys: v_list, + index: 0, + }); + } else { + return None; + } + } + None + } + + fn get_w_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + // Child level + let w_list = key + .neighbors() + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree.get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) + .collect_vec(); + + if !w_list.is_empty() { + Some(MortonKeys { + keys: w_list, + index: 0, + }) + } else { + None + } + } + + fn get_x_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let x_list = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) + .collect_vec(); + + if !x_list.is_empty() { + Some(MortonKeys { + keys: x_list, + index: 0, + }) + } else { + None + } + } +} + +impl InteractionLists for KiFmmLinear where T: Tree, U: Kernel, diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index fbc31a22..87567237 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -1,8 +1,24 @@ //! A a general framework for implementing Fast Multipole Methods. pub mod charge; pub mod constants; -pub mod field_translation; -pub mod fmm; pub mod interaction_lists; pub mod pinv; pub mod types; + +mod field_translation { + pub mod hashmap { + pub mod source; + pub mod source_to_target; + pub mod target; + } + pub mod linear { + pub mod source; + pub mod source_to_target; + pub mod target; + } +} + +mod fmm { + pub mod hashmap; + pub mod linear; +} diff --git a/fmm/src/types.rs b/fmm/src/types.rs index 1b9441a1..62259781 100644 --- a/fmm/src/types.rs +++ b/fmm/src/types.rs @@ -6,7 +6,7 @@ use std::{ use bempp_traits::{field::FieldTranslationData, fmm::Fmm, kernel::Kernel, tree::Tree}; use bempp_tree::types::{morton::MortonKey, point::Point}; use cauchy::Scalar; -use num::Float; +use num::{Complex, Float}; use rlst::dense::traits::*; use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; use rlst::{self}; @@ -30,13 +30,13 @@ pub type Potentials = Matrix, Dynamic>, D pub type C2EType = Matrix, Dynamic>, Dynamic>; /// Type to store data associated with an FMM in. -pub struct FmmData +pub struct FmmDataHashmap where T: Fmm, U: Scalar + Float + Default, { /// The associated FMM object, which implements an FMM interface - pub fmm: Arc, + pub fmm: T, /// The multipole expansion data at each box. pub multipoles: HashMap>>>, @@ -54,8 +54,65 @@ where pub charges: HashMap>>, } +pub struct FmmDataLinear +where + T: Fmm, + U: Scalar + Float + Default, +{ + /// The associated FMM object, which implements an FMM interface + pub fmm: T, + + /// The multipole expansion data at each box. + pub multipoles: Vec, + + /// Multipole expansions at leaf level + pub leaf_multipoles: Vec>, + + /// Multipole expansions at each level + pub level_multipoles: Vec>>, + + /// The local expansion at each box + pub locals: Vec, + + /// Local expansions at the leaf level + pub leaf_locals: Vec>, + + /// The local expansion data at each level. + pub level_locals: Vec>>, + + /// The evaluated potentials at each leaf box. + pub potentials: Vec, + + /// The evaluated potentials at each leaf box. + pub potentials_send_pointers: Vec>, + + /// All upward surfaces + pub upward_surfaces: Vec, + + /// All downward surfaces + pub downward_surfaces: Vec, + + /// Leaf upward surfaces + pub leaf_upward_surfaces: Vec, + + /// Leaf downward surfaces + pub leaf_downward_surfaces: Vec, + + /// The charge data at each leaf box. + pub charges: Vec, + + /// Index pointer between leaf keys and charges + pub charge_index_pointer: Vec<(usize, usize)>, + + /// Scales of each leaf operator + pub scales: Vec, + + /// Global indices of each charge + pub global_indices: Vec, +} + /// Type to store data associated with the kernel independent (KiFMM) in. -pub struct KiFmm +pub struct KiFmmHashMap where T: Tree, U: Kernel, @@ -97,10 +154,78 @@ where pub m2l: V, } -pub trait SameType { - type Other; +/// Type to store data associated with the kernel independent (KiFMM) in. +pub struct KiFmmLinear +where + T: Tree, + U: Kernel, + V: FieldTranslationData, + W: Scalar + Float + Default, +{ + /// The expansion order + pub order: usize, + + /// The pseudo-inverse of the dense interaction matrix between the upward check and upward equivalent surfaces. + /// Store in two parts to avoid propagating error from computing pseudo-inverse + pub uc2e_inv_1: C2EType, + pub uc2e_inv_2: C2EType, + + /// The pseudo-inverse of the dense interaction matrix between the downward check and downward equivalent surfaces. + /// Store in two parts to avoid propagating error from computing pseudo-inverse + pub dc2e_inv_1: C2EType, + pub dc2e_inv_2: C2EType, + + /// The ratio of the inner check surface diamater in comparison to the surface discretising a box. + pub alpha_inner: W, + + /// The ratio of the outer check surface diamater in comparison to the surface discretising a box. + pub alpha_outer: W, + + /// The multipole to multipole operator matrices, each index is associated with a child box (in sequential Morton order), + pub m2m: C2EType, + + /// The local to local operator matrices, each index is associated with a child box (in sequential Morton order). + pub l2l: Vec>, + + /// The tree (single or multi node) associated with this FMM + pub tree: T, + + /// The kernel associated with this FMM. + pub kernel: U, + + /// The M2L operator matrices, as well as metadata associated with this FMM. + pub m2l: V, +} + +/// A threadsafe mutable raw pointer +#[derive(Clone, Debug, Copy)] +pub struct SendPtrMut { + pub raw: *mut T, } -impl SameType for T { - type Other = T; +unsafe impl Sync for SendPtrMut {} +unsafe impl Send for SendPtrMut> {} + +impl Default for SendPtrMut { + fn default() -> Self { + SendPtrMut { + raw: std::ptr::null_mut(), + } + } +} + +/// A threadsafe raw pointer +#[derive(Clone, Debug, Copy)] +pub struct SendPtr { + pub raw: *const T, +} + +unsafe impl Sync for SendPtr {} + +impl Default for SendPtr { + fn default() -> Self { + SendPtr { + raw: std::ptr::null(), + } + } } diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index bfc95c9f..7d4e7dd9 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use crate::helpers::{check_dimensions_assemble, check_dimensions_evaluate}; use bempp_traits::{ - kernel::{Kernel, KernelScale}, + kernel::{Kernel, ScaleInvariantKernel}, types::{EvalType, KernelType, Scalar}, }; use num::traits::FloatConst; @@ -16,7 +16,7 @@ pub struct Laplace3dKernel { _phantom_t: std::marker::PhantomData, } -impl> KernelScale for Laplace3dKernel { +impl> ScaleInvariantKernel for Laplace3dKernel { type T = T; fn scale(&self, level: u64) -> Self::T { @@ -209,7 +209,7 @@ pub fn evaluate_laplace_one_target( my_result += charges[index].mul_real(inv_diff_norm); } - result[0] = my_result.mul_real(m_inv_4pi); + result[0] += my_result.mul_real(m_inv_4pi); } EvalType::ValueDeriv => { // Cannot simply use an array my_result as this is not @@ -240,10 +240,10 @@ pub fn evaluate_laplace_one_target( my_result3 += charges[index].mul_real(diff2 * inv_diff_norm_cubed); } - result[0] = my_result0.mul_real(m_inv_4pi); - result[1] = my_result1.mul_real(m_inv_4pi); - result[2] = my_result2.mul_real(m_inv_4pi); - result[3] = my_result3.mul_real(m_inv_4pi); + result[0] += my_result0.mul_real(m_inv_4pi); + result[1] += my_result1.mul_real(m_inv_4pi); + result[2] += my_result2.mul_real(m_inv_4pi); + result[3] += my_result3.mul_real(m_inv_4pi); } } } @@ -381,7 +381,7 @@ mod test { let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; let charges = rlst::dense::rlst_rand_col_vec![f64, nsources]; - let mut green_value = rlst::dense::rlst_rand_col_vec![f64, ntargets]; + let mut green_value = rlst::dense::rlst_col_vec![f64, ntargets]; Laplace3dKernel::::default().evaluate_st( EvalType::Value, diff --git a/traits/src/fmm.rs b/traits/src/fmm.rs index ea4f695c..454cefd2 100644 --- a/traits/src/fmm.rs +++ b/traits/src/fmm.rs @@ -54,6 +54,15 @@ pub trait Fmm { fn tree(&self) -> &Self::Tree; } +pub trait KiFmm +where + Self: Fmm, +{ + fn alpha_inner(&self) -> <::Kernel as Kernel>::T; + + fn alpha_outer(&self) -> <::Kernel as Kernel>::T; +} + /// Dictionary containing timings pub type TimeDict = HashMap; @@ -63,19 +72,19 @@ pub trait FmmLoop { /// /// # Arguments /// `time` - If true, method returns a dictionary of times for the downward pass operators. - fn upward_pass(&self, time: Option) -> Option; + fn upward_pass(&self, time: bool) -> Option; /// Compute the downward pass, optionally collect timing for each operator. /// /// # Arguments /// `time` - If true, method returns a dictionary of times for the upward pass operators. - fn downward_pass(&self, time: Option) -> Option; + fn downward_pass(&self, time: bool) -> Option; /// Compute the upward and downward pass, optionally collect timing for each operator. /// /// # Arguments /// `time` - If true, method returns a dictionary of times for all operators. - fn run(&self, time: Option) -> Option; + fn run(&self, time: bool) -> Option; } /// Interface to compute interaction lists given a tree. diff --git a/traits/src/kernel.rs b/traits/src/kernel.rs index 15cb3b5c..06a35114 100644 --- a/traits/src/kernel.rs +++ b/traits/src/kernel.rs @@ -94,7 +94,7 @@ pub trait Kernel: Sync { } /// Scaling required by the FMM to apply kernel to each octree level. -pub trait KernelScale { +pub trait ScaleInvariantKernel { /// The kernel is generic over data type. type T: Scalar; diff --git a/traits/src/tree.rs b/traits/src/tree.rs index af82d487..115b4ad8 100644 --- a/traits/src/tree.rs +++ b/traits/src/tree.rs @@ -2,6 +2,9 @@ use std::collections::HashSet; use std::hash::Hash; +use cauchy::Scalar; +use num::Float; + /// Tree is the trait interface for distributed octrees implemented by Rusty Fast Solvers. /// This trait makes no assumptions about the downstream usage of a struct implementing Tree, /// it simply provides methods for accessing tree nodes, and associated data, and is generically @@ -10,22 +13,16 @@ pub trait Tree { /// The computational domain defined by the tree. type Domain; + type Precision: Scalar + Float + Default; + /// The type of points that define a tree. - type Point; + type Point: Point; /// Slice of points. type PointSlice<'a>: IntoIterator where Self: 'a; - /// Data to be attached to each point. - type PointData; - - /// A slice of point data. - type PointDataSlice<'a>: IntoIterator - where - Self: 'a; - /// A tree node. type NodeIndex: MortonKeyInterface; @@ -49,7 +46,7 @@ pub trait Tree { fn get_depth(&self) -> u64; /// Get a reference to all leaves, gets local keys in multi-node setting. - fn get_leaves(&self) -> Option>; + fn get_all_leaves(&self) -> Option>; /// Get a reference to keys at a given level, gets local keys in a multi-node setting. fn get_keys(&self, level: u64) -> Option>; @@ -66,9 +63,30 @@ pub trait Tree { /// Gets a reference to the points contained with a leaf node. fn get_points<'a>(&'a self, key: &Self::NodeIndex) -> Option>; + /// Gets a reference to the points contained with a leaf node. + fn get_all_points(&self) -> Option>; + + /// Gets a reference to the coordinates contained with a leaf node. + fn get_coordinates<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [Self::Precision]>; + + /// Gets a reference to the coordinates contained in across tree (local in multinode setting) + fn get_all_coordinates(&self) -> Option<&[Self::Precision]>; + + /// Gets global indices at a leaf (local in multinode setting) + fn get_global_indices<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [usize]>; + + /// Gets all global indices (local in multinode setting) + fn get_all_global_indices(&self) -> Option<&[usize]>; + /// Get domain defined by the points, gets global domain in multi-node setting. fn get_domain(&self) -> &'_ Self::Domain; + /// Get a map from the key to index position in sorted keys + fn get_index(&self, key: &Self::NodeIndex) -> Option<&usize>; + + /// Get a map from the key to leaf index position in sorted leaves + fn get_leaf_index(&self, key: &Self::NodeIndex) -> Option<&usize>; + /// Checks whether a a given node corresponds to a leaf fn is_leaf(&self, key: &Self::NodeIndex) -> bool; @@ -76,6 +94,12 @@ pub trait Tree { fn is_node(&self, key: &Self::NodeIndex) -> bool; } +pub trait Point +where + T: Scalar + Float + Default, +{ +} + /// A minimal interface for Morton Key like nodes. pub trait MortonKeyInterface where diff --git a/tree/examples/test_adaptive.rs b/tree/examples/test_adaptive.rs index 65998399..07813ef7 100644 --- a/tree/examples/test_adaptive.rs +++ b/tree/examples/test_adaptive.rs @@ -1,119 +1,121 @@ -//? mpirun -n {{NPROCESSES}} --features "mpi" -#![allow(unused_imports)] - -#[cfg(feature = "mpi")] -use mpi::{environment::Universe, topology::UserCommunicator, traits::*}; - -use bempp_tree::implementations::helpers::points_fixture; - -#[cfg(feature = "mpi")] -use bempp_tree::types::{domain::Domain, morton::MortonKey, multi_node::MultiNodeTree}; - -use bempp_traits::types::Scalar; -use rlst::dense::RawAccess; - -use rand::distributions::uniform::SampleUniform; - -use num::traits::Float; - -/// Test that the leaves on separate nodes do not overlap. +// //? mpirun -n {{NPROCESSES}} --features "mpi" +// #![allow(unused_imports)] + +// #[cfg(feature = "mpi")] +// use mpi::{environment::Universe, topology::UserCommunicator, traits::*}; + +// use bempp_tree::implementations::helpers::points_fixture; + +// #[cfg(feature = "mpi")] +// use bempp_tree::types::{domain::Domain, morton::MortonKey, multi_node::MultiNodeTree}; + +// use bempp_traits::types::Scalar; +// use rlst::dense::RawAccess; + +// use rand::distributions::uniform::SampleUniform; + +// use num::traits::Float; + +// /// Test that the leaves on separate nodes do not overlap. +// #[cfg(feature = "mpi")] +// fn test_no_overlaps>( +// world: &UserCommunicator, +// tree: &MultiNodeTree, +// ) { +// // Communicate bounds from each process +// let max = tree.leaves.iter().max().unwrap(); +// let min = tree.leaves.iter().min().unwrap(); + +// // Gather all bounds at root +// let size = world.size(); +// let rank = world.rank(); + +// let next_rank = if rank + 1 < size { rank + 1 } else { 0 }; +// let previous_rank = if rank > 0 { rank - 1 } else { size - 1 }; + +// let previous_process = world.process_at_rank(previous_rank); +// let next_process = world.process_at_rank(next_rank); + +// // Send max to partner +// if rank < (size - 1) { +// next_process.send(max); +// } + +// let mut partner_max = MortonKey::default(); + +// if rank > 0 { +// previous_process.receive_into(&mut partner_max); +// } + +// // Test that the partner's minimum node is greater than the process's maximum node +// if rank > 0 { +// assert!(partner_max < *min) +// } +// } + +// /// Test that the globally defined domain contains all the points at a given node. +// #[cfg(feature = "mpi")] +// fn test_global_bounds( +// world: &UserCommunicator, +// ) { +// let npoints = 10000; +// let points = points_fixture::(npoints, None, None); + +// let comm = world.duplicate(); + +// let domain = Domain::from_global_points(points.data(), &comm); + +// // Test that all local points are contained within the global domain +// for i in 0..npoints { +// let x = points.data()[i]; +// let y = points.data()[i + npoints]; +// let z = points.data()[i + 2 * npoints]; + +// assert!(domain.origin[0] <= x && x <= domain.origin[0] + domain.diameter[0]); +// assert!(domain.origin[1] <= y && y <= domain.origin[1] + domain.diameter[1]); +// assert!(domain.origin[2] <= z && z <= domain.origin[2] + domain.diameter[2]); +// } +// } + +// #[cfg(feature = "mpi")] +// fn main() { +// // Setup an MPI environment +// let universe: Universe = mpi::initialize().unwrap(); +// let world = universe.world(); +// let comm = world.duplicate(); + +// // Setup tree parameters +// let adaptive = true; +// let n_crit = Some(50); +// let depth: Option<_> = None; +// let n_points = 10000; +// let k = 2; + +// let points = points_fixture::(n_points, None, None); +// let global_idxs: Vec<_> = (0..n_points).collect(); + +// let tree = MultiNodeTree::new( +// &comm, +// points.data(), +// adaptive, +// n_crit, +// depth, +// k, +// &global_idxs, +// ); + +// test_global_bounds::(&comm); +// if world.rank() == 0 { +// println!("\t ... test_global_bounds passed on adaptive tree"); +// } + +// test_no_overlaps(&comm, &tree); +// if world.rank() == 0 { +// println!("\t ... test_no_overlaps passed on adaptive tree"); +// } +// } #[cfg(feature = "mpi")] -fn test_no_overlaps>( - world: &UserCommunicator, - tree: &MultiNodeTree, -) { - // Communicate bounds from each process - let max = tree.leaves.iter().max().unwrap(); - let min = tree.leaves.iter().min().unwrap(); - - // Gather all bounds at root - let size = world.size(); - let rank = world.rank(); - - let next_rank = if rank + 1 < size { rank + 1 } else { 0 }; - let previous_rank = if rank > 0 { rank - 1 } else { size - 1 }; - - let previous_process = world.process_at_rank(previous_rank); - let next_process = world.process_at_rank(next_rank); - - // Send max to partner - if rank < (size - 1) { - next_process.send(max); - } - - let mut partner_max = MortonKey::default(); - - if rank > 0 { - previous_process.receive_into(&mut partner_max); - } - - // Test that the partner's minimum node is greater than the process's maximum node - if rank > 0 { - assert!(partner_max < *min) - } -} - -/// Test that the globally defined domain contains all the points at a given node. -#[cfg(feature = "mpi")] -fn test_global_bounds( - world: &UserCommunicator, -) { - let npoints = 10000; - let points = points_fixture::(npoints, None, None); - - let comm = world.duplicate(); - - let domain = Domain::from_global_points(points.data(), &comm); - - // Test that all local points are contained within the global domain - for i in 0..npoints { - let x = points.data()[i]; - let y = points.data()[i + npoints]; - let z = points.data()[i + 2 * npoints]; - - assert!(domain.origin[0] <= x && x <= domain.origin[0] + domain.diameter[0]); - assert!(domain.origin[1] <= y && y <= domain.origin[1] + domain.diameter[1]); - assert!(domain.origin[2] <= z && z <= domain.origin[2] + domain.diameter[2]); - } -} - -#[cfg(feature = "mpi")] -fn main() { - // Setup an MPI environment - let universe: Universe = mpi::initialize().unwrap(); - let world = universe.world(); - let comm = world.duplicate(); - - // Setup tree parameters - let adaptive = true; - let n_crit = Some(50); - let depth: Option<_> = None; - let n_points = 10000; - let k = 2; - - let points = points_fixture::(n_points, None, None); - let global_idxs: Vec<_> = (0..n_points).collect(); - - let tree = MultiNodeTree::new( - &comm, - points.data(), - adaptive, - n_crit, - depth, - k, - &global_idxs, - ); - - test_global_bounds::(&comm); - if world.rank() == 0 { - println!("\t ... test_global_bounds passed on adaptive tree"); - } - - test_no_overlaps(&comm, &tree); - if world.rank() == 0 { - println!("\t ... test_no_overlaps passed on adaptive tree"); - } -} +fn main() {} #[cfg(not(feature = "mpi"))] fn main() {} diff --git a/tree/examples/test_uniform.rs b/tree/examples/test_uniform.rs index fc5c86c4..44f670ca 100644 --- a/tree/examples/test_uniform.rs +++ b/tree/examples/test_uniform.rs @@ -1,113 +1,116 @@ -// ? mpirun -n {{NPROCESSES}} --features "mpi" -#![allow(unused_imports)] +// // ? mpirun -n {{NPROCESSES}} --features "mpi" +// #![allow(unused_imports)] + +// #[cfg(feature = "mpi")] +// use mpi::{environment::Universe, topology::UserCommunicator, traits::*}; + +// use bempp_traits::tree::Tree; +// use bempp_traits::types::Scalar; + +// use rand::distributions::uniform::SampleUniform; + +// #[cfg(feature = "mpi")] +// use bempp_tree::types::{domain::Domain, morton::MortonKey, multi_node::MultiNodeTree}; + +// use bempp_tree::implementations::helpers::points_fixture; +// use rlst::dense::RawAccess; + +// use num::traits::Float; + +// /// Test that the leaves on separate nodes do not overlap. +// #[cfg(feature = "mpi")] +// fn test_no_overlaps>( +// world: &UserCommunicator, +// tree: &MultiNodeTree, +// ) { +// // Communicate bounds from each process +// let max = tree.get_all_leaves_set().iter().max().unwrap(); +// let min = tree.get_all_leaves_set().iter().min().unwrap(); + +// // Gather all bounds at root +// let size = world.size(); +// let rank = world.rank(); + +// let next_rank = if rank + 1 < size { rank + 1 } else { 0 }; +// let previous_rank = if rank > 0 { rank - 1 } else { size - 1 }; + +// let previous_process = world.process_at_rank(previous_rank); +// let next_process = world.process_at_rank(next_rank); + +// // Send min to partner +// if rank > 0 { +// previous_process.send(min); +// } + +// let mut partner_min = MortonKey::default(); + +// if rank < (size - 1) { +// next_process.receive_into(&mut partner_min); +// } + +// // Test that the partner's minimum node is greater than the process's maximum node +// if rank < size - 1 { +// assert!(max < &partner_min) +// } +// } + +// /// Test that the globally defined domain contains all the points at a given node. +// #[cfg(feature = "mpi")] +// fn test_global_bounds( +// world: &UserCommunicator, +// ) { +// let npoints = 10000; +// let points = points_fixture::(npoints, None, None); + +// let comm = world.duplicate(); + +// let domain = Domain::from_global_points(points.data(), &comm); + +// // Test that all local points are contained within the global domain +// for i in 0..npoints { +// let x = points.data()[i]; +// let y = points.data()[i + npoints]; +// let z = points.data()[i + 2 * npoints]; + +// assert!(domain.origin[0] <= x && x <= domain.origin[0] + domain.diameter[0]); +// assert!(domain.origin[1] <= y && y <= domain.origin[1] + domain.diameter[1]); +// assert!(domain.origin[2] <= z && z <= domain.origin[2] + domain.diameter[2]); +// } +// } + +// #[cfg(feature = "mpi")] +// fn main() { +// // Setup an MPI environment +// let universe: Universe = mpi::initialize().unwrap(); +// let world = universe.world(); +// let comm = world.duplicate(); + +// // Setup tree parameters +// let adaptive = false; +// let k = 2; +// let depth = Some(3); +// let n_points = 10000; + +// // Generate some random test data local to each process +// let points = points_fixture::(n_points, None, None); +// let global_idxs: Vec<_> = (0..n_points).collect(); + +// // Create a uniform tree +// let tree = MultiNodeTree::new(&comm, points.data(), adaptive, None, depth, k, &global_idxs); + +// test_global_bounds::(&comm); +// if world.rank() == 0 { +// println!("\t ... test_global_bounds passed on uniform tree"); +// } + +// test_no_overlaps(&comm, &tree); +// if world.rank() == 0 { +// println!("\t ... test_no_overlaps passed on uniform tree"); +// } +// } -#[cfg(feature = "mpi")] -use mpi::{environment::Universe, topology::UserCommunicator, traits::*}; - -use bempp_traits::tree::Tree; -use bempp_traits::types::Scalar; - -use rand::distributions::uniform::SampleUniform; - -#[cfg(feature = "mpi")] -use bempp_tree::types::{domain::Domain, morton::MortonKey, multi_node::MultiNodeTree}; - -use bempp_tree::implementations::helpers::points_fixture; -use rlst::dense::RawAccess; - -use num::traits::Float; - -/// Test that the leaves on separate nodes do not overlap. -#[cfg(feature = "mpi")] -fn test_no_overlaps>( - world: &UserCommunicator, - tree: &MultiNodeTree, -) { - // Communicate bounds from each process - let max = tree.get_all_leaves_set().iter().max().unwrap(); - let min = tree.get_all_leaves_set().iter().min().unwrap(); - - // Gather all bounds at root - let size = world.size(); - let rank = world.rank(); - - let next_rank = if rank + 1 < size { rank + 1 } else { 0 }; - let previous_rank = if rank > 0 { rank - 1 } else { size - 1 }; - - let previous_process = world.process_at_rank(previous_rank); - let next_process = world.process_at_rank(next_rank); - - // Send min to partner - if rank > 0 { - previous_process.send(min); - } - - let mut partner_min = MortonKey::default(); - - if rank < (size - 1) { - next_process.receive_into(&mut partner_min); - } - - // Test that the partner's minimum node is greater than the process's maximum node - if rank < size - 1 { - assert!(max < &partner_min) - } -} - -/// Test that the globally defined domain contains all the points at a given node. -#[cfg(feature = "mpi")] -fn test_global_bounds( - world: &UserCommunicator, -) { - let npoints = 10000; - let points = points_fixture::(npoints, None, None); - - let comm = world.duplicate(); - - let domain = Domain::from_global_points(points.data(), &comm); - - // Test that all local points are contained within the global domain - for i in 0..npoints { - let x = points.data()[i]; - let y = points.data()[i + npoints]; - let z = points.data()[i + 2 * npoints]; - - assert!(domain.origin[0] <= x && x <= domain.origin[0] + domain.diameter[0]); - assert!(domain.origin[1] <= y && y <= domain.origin[1] + domain.diameter[1]); - assert!(domain.origin[2] <= z && z <= domain.origin[2] + domain.diameter[2]); - } -} +#[cfg(not(feature = "mpi"))] +fn main() {} #[cfg(feature = "mpi")] -fn main() { - // Setup an MPI environment - let universe: Universe = mpi::initialize().unwrap(); - let world = universe.world(); - let comm = world.duplicate(); - - // Setup tree parameters - let adaptive = false; - let k = 2; - let depth = Some(3); - let n_points = 10000; - - // Generate some random test data local to each process - let points = points_fixture::(n_points, None, None); - let global_idxs: Vec<_> = (0..n_points).collect(); - - // Create a uniform tree - let tree = MultiNodeTree::new(&comm, points.data(), adaptive, None, depth, k, &global_idxs); - - test_global_bounds::(&comm); - if world.rank() == 0 { - println!("\t ... test_global_bounds passed on uniform tree"); - } - - test_no_overlaps(&comm, &tree); - if world.rank() == 0 { - println!("\t ... test_no_overlaps passed on uniform tree"); - } -} - -#[cfg(not(feature = "mpi"))] fn main() {} diff --git a/tree/src/implementations/impl_multi_node.rs b/tree/src/implementations/impl_multi_node.rs index 0b979197..3ad53857 100644 --- a/tree/src/implementations/impl_multi_node.rs +++ b/tree/src/implementations/impl_multi_node.rs @@ -4,19 +4,21 @@ use itertools::Itertools; use std::collections::{HashMap, HashSet}; use std::fmt::Debug; -use mpi::{collective::SystemOperation, topology::UserCommunicator, traits::*, Rank}; +use mpi::{ + // collective::SystemOperation, + topology::UserCommunicator, + traits::*, + Rank, +}; use num::traits::Float; use hyksort::hyksort; -use bempp_traits::tree::Tree; +// use bempp_traits::tree::Tree; use crate::{ constants::{DEEPEST_LEVEL, DEFAULT_LEVEL, NCRIT, ROOT}, - implementations::{ - impl_morton::{complete_region, encode_anchor}, - mpi_helpers::all_to_allv_sparse, - }, + implementations::impl_morton::{complete_region, encode_anchor}, types::{ domain::Domain, morton::{KeyType, MortonKey, MortonKeys}, @@ -166,11 +168,21 @@ where subset.sort(); } + let coordinates = points + .points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let global_indices = points.points.iter().map(|p| p.global_idx).collect_vec(); + MultiNodeTree { world: world.duplicate(), depth, domain: *domain, points, + coordinates, + global_indices, leaves, keys, leaves_to_points, @@ -361,11 +373,21 @@ where subset.sort(); } + let coordinates = points + .points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let global_indices = points.points.iter().map(|p| p.global_idx).collect_vec(); + MultiNodeTree { world: world.duplicate(), depth, domain: *domain, points, + coordinates, + global_indices, leaves: globally_balanced, keys, leaves_to_points, @@ -544,293 +566,319 @@ where } } -impl Tree for MultiNodeTree -where - T: Float + Default + Scalar, -{ - type Domain = Domain; - type NodeIndex = MortonKey; - type NodeIndexSlice<'a> = &'a [MortonKey] - where T: 'a; - type NodeIndices = MortonKeys; - type Point = Point; - type PointSlice<'a> = &'a [Point] - where T: 'a; - type PointData = f64; - type PointDataSlice<'a> = &'a [f64] - where T: 'a; - type GlobalIndex = usize; - type GlobalIndexSlice<'a> = &'a [usize] - where T: 'a; - - fn get_depth(&self) -> u64 { - self.depth - } - - fn get_domain(&self) -> &'_ Self::Domain { - &self.domain - } - - fn get_keys(&self, level: u64) -> Option> { - if let Some(&(l, r)) = self.levels_to_keys.get(&level) { - Some(&self.keys[l..r]) - } else { - None - } - } - - fn get_all_keys(&self) -> Option> { - Some(&self.keys) - } - - fn get_all_keys_set(&self) -> &'_ HashSet { - &self.keys_set - } - - fn get_all_leaves_set(&self) -> &'_ HashSet { - &self.leaves_set - } - - fn get_leaves(&self) -> Option> { - Some(&self.leaves) - } - - fn get_points<'a>(&'a self, key: &Self::NodeIndex) -> Option> { - if let Some(&(l, r)) = self.leaves_to_points.get(key) { - Some(&self.points.points[l..r]) - } else { - None - } - } - - fn is_leaf(&self, key: &Self::NodeIndex) -> bool { - self.leaves_set.contains(key) - } - - fn is_node(&self, key: &Self::NodeIndex) -> bool { - self.keys_set.contains(key) - } -} - -impl MultiNodeTree -where - T: Float + Default + Equivalence + Scalar, -{ - /// Create a locally essential tree (LET) for use in Fast Multipole Methods (FMMs). - /// - /// The idea is to communicate the required point and octant data across the distributed tree prior - /// to the running of the upward pass so that multipole expansions can be constructed independently - /// on each processor at the leaf level, and for the final potential evaluation each process already - /// contains its required point data for near field calculations. - pub fn create_let(&self) -> MultiNodeTree { - // Communicate ranges globally using AllGather - let rank = self.world.rank(); - let size = self.world.size(); - - let mut ranges = vec![0 as KeyType; (size as usize) * 3]; - - self.world.all_gather_into(&self.range, &mut ranges); - - // Calculate users for each key in local tree - let mut users: Vec> = Vec::new(); - let mut key_packet_destinations = vec![0 as Rank; size as usize]; - let mut leaf_packet_destinations = vec![0 as Rank; size as usize]; - - for key in self.keys_set.iter() { - let mut user_tmp: Vec = Vec::new(); - - // Loop over all ranges, each key may be used by multiple processes - for chunk in ranges.chunks_exact(3) { - let rank = chunk[0] as Rank; - let min = MortonKey::from_morton(chunk[1]); - let max = MortonKey::from_morton(chunk[2]); - - // Check if ranges overlap with the neighbors of the key's parent - if rank != self.world.rank() { - if key.level() > 1 { - let colleagues_parent: Vec = key.parent().neighbors(); - let (cp_min, cp_max) = ( - colleagues_parent.iter().min(), - colleagues_parent.iter().max(), - ); - - if let (Some(cp_min), Some(cp_max)) = (cp_min, cp_max) { - if (cp_min >= &min) && (cp_max <= &max) - || (cp_min <= &min) && (cp_max >= &min) && (cp_max <= &max) - || (cp_min >= &min) && (cp_min <= &max) && (cp_max >= &max) - || (cp_min <= &min) && (cp_max >= &max) - { - user_tmp.push(rank); - - // Mark ranks as users of keys/leaves from this process - if key_packet_destinations[rank as usize] == 0 { - key_packet_destinations[rank as usize] = 1 - } - - if leaf_packet_destinations[rank as usize] == 0 - && self.leaves_set.contains(key) - { - leaf_packet_destinations[rank as usize] = 1; - } - } - } - } - // If the key is at level one its parent is the root node, so by definition - // it will always overlap with a range - else if key.level() <= 1 { - user_tmp.push(rank); - if key_packet_destinations[rank as usize] == 0 { - key_packet_destinations[rank as usize] = 1 - } - - if leaf_packet_destinations[rank as usize] == 0 - && self.leaves_set.contains(key) - { - leaf_packet_destinations[rank as usize] = 1; - } - } - } - } - users.push(user_tmp); - } - - // Communicate number of packets being received by each process globally - let mut keys_to_receive = vec![0i32; size as usize]; - self.world.all_reduce_into( - &key_packet_destinations, - &mut keys_to_receive, - SystemOperation::sum(), - ); - - let mut leaves_to_receive = vec![0i32; size as usize]; - self.world.all_reduce_into( - &leaf_packet_destinations, - &mut leaves_to_receive, - SystemOperation::sum(), - ); - - // Calculate the number of receives that this process is expecting - let recv_count_keys = keys_to_receive[rank as usize]; - let recv_count_leaves = leaves_to_receive[rank as usize]; - - // Filter for packet destinations - key_packet_destinations = key_packet_destinations - .into_iter() - .enumerate() - .filter(|(_, x)| x > &0) - .map(|(i, _)| i as Rank) - .collect(); - - leaf_packet_destinations = leaf_packet_destinations - .into_iter() - .enumerate() - .filter(|(_, x)| x > &0) - .map(|(i, _)| i as Rank) - .collect(); - - let mut key_packets: Vec> = Vec::new(); - let mut leaf_packets: Vec> = Vec::new(); - let mut point_packets: Vec>> = Vec::new(); - - let mut key_packet_destinations_filt: Vec = Vec::new(); - let mut leaf_packet_destinations_filt: Vec = Vec::new(); - - // Form packets for each send - for &rank in key_packet_destinations.iter() { - let key_packet: Vec = self - .keys_set - .iter() - .zip(users.iter()) - .filter(|(_, user)| user.contains(&rank)) - .map(|(k, _)| *k) - .collect(); - let key_packet_set: HashSet = key_packet.iter().cloned().collect(); - - if !key_packet.is_empty() { - key_packets.push(key_packet); - key_packet_destinations_filt.push(rank); - } - - if leaf_packet_destinations.contains(&rank) { - let leaf_packet: Vec = key_packet_set - .intersection(&self.leaves_set) - .cloned() - .collect(); - - let point_packet: Vec> = leaf_packet - .iter() - .flat_map(|leaf| self.get_points(leaf).unwrap().to_vec()) - .collect(); - - if !leaf_packet.is_empty() { - leaf_packets.push(leaf_packet); - point_packets.push(point_packet); - leaf_packet_destinations_filt.push(rank); - } - } - } - - // Communicate keys, leaves and points - let received_leaves = all_to_allv_sparse( - &self.world, - &leaf_packets, - &leaf_packet_destinations_filt, - &recv_count_leaves, - ); - - let mut received_points = all_to_allv_sparse( - &self.world, - &point_packets, - &leaf_packet_destinations_filt, - &recv_count_leaves, - ); - - let received_keys = all_to_allv_sparse( - &self.world, - &key_packets, - &key_packet_destinations_filt, - &recv_count_keys, - ); - - // Group received points by received leaves - received_points.sort(); - - let mut leaves_to_points = HashMap::new(); - let mut curr = received_points[0]; - let mut curr_idx = 0; - - for (i, point) in received_points.iter().enumerate() { - if point.encoded_key != curr.encoded_key { - leaves_to_points.insert(curr.encoded_key, (curr_idx, i)); - curr_idx = i; - curr = *point; - } - } - leaves_to_points.insert(curr.encoded_key, (curr_idx, received_points.len())); - - let locally_essential_tree = MultiNodeTree { - range: self.range, - world: self.world.duplicate(), - depth: self.depth, - domain: self.domain, - leaves_to_points, - levels_to_keys: HashMap::default(), - leaves_set: received_leaves.iter().cloned().collect(), - keys_set: received_keys.iter().cloned().collect(), - points: Points { - points: received_points, - index: 0, - }, - leaves: MortonKeys { - keys: received_leaves, - index: 0, - }, - keys: MortonKeys { - keys: received_keys, - index: 0, - }, - }; - - locally_essential_tree - } -} +// impl Tree for MultiNodeTree +// where +// T: Float + Default + Scalar, +// { +// type Precision = T; +// type Domain = Domain; +// type NodeIndex = MortonKey; +// type NodeIndexSlice<'a> = &'a [MortonKey] +// where T: 'a; +// type NodeIndices = MortonKeys; +// type Point = Point; +// type PointSlice<'a> = &'a [Point] +// where T: 'a; +// type GlobalIndex = usize; +// type GlobalIndexSlice<'a> = &'a [usize] +// where T: 'a; + +// fn get_depth(&self) -> u64 { +// self.depth +// } + +// fn get_domain(&self) -> &'_ Self::Domain { +// &self.domain +// } + +// fn get_keys(&self, level: u64) -> Option> { +// if let Some(&(l, r)) = self.levels_to_keys.get(&level) { +// Some(&self.keys[l..r]) +// } else { +// None +// } +// } + +// fn get_all_keys(&self) -> Option> { +// Some(&self.keys) +// } + +// fn get_all_keys_set(&self) -> &'_ HashSet { +// &self.keys_set +// } + +// fn get_all_leaves_set(&self) -> &'_ HashSet { +// &self.leaves_set +// } + +// fn get_all_leaves(&self) -> Option> { +// Some(&self.leaves) +// } + +// fn get_points<'a>(&'a self, key: &Self::NodeIndex) -> Option> { +// if let Some(&(l, r)) = self.leaves_to_points.get(key) { +// Some(&self.points.points[l..r]) +// } else { +// None +// } +// } + +// fn get_all_points<'a>(&'a self) -> Option> { +// Some(&self.points.points) +// } + +// fn get_coordinates<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [Self::Precision]> { +// if let Some(&(l, r)) = self.leaves_to_points.get(key) { +// Some(&self.coordinates[l * 3..r * 3]) +// } else { +// None +// } +// } + +// fn get_all_coordinates<'a>(&'a self) -> Option<&'a [Self::Precision]> { +// Some(&self.coordinates) +// } + +// fn get_global_indices<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [usize]> { +// if let Some(&(l, r)) = self.leaves_to_points.get(key) { +// Some(&self.global_indices[l..r]) +// } else { +// None +// } +// } + +// fn get_all_global_indices<'a>(&'a self) -> Option<&'a [usize]> { +// Some(&self.global_indices) +// } + +// fn is_leaf(&self, key: &Self::NodeIndex) -> bool { +// self.leaves_set.contains(key) +// } + +// fn is_node(&self, key: &Self::NodeIndex) -> bool { +// self.keys_set.contains(key) +// } +// } + +// impl MultiNodeTree +// where +// T: Float + Default + Equivalence + Scalar, +// { +// /// Create a locally essential tree (LET) for use in Fast Multipole Methods (FMMs). +// /// +// /// The idea is to communicate the required point and octant data across the distributed tree prior +// /// to the running of the upward pass so that multipole expansions can be constructed independently +// /// on each processor at the leaf level, and for the final potential evaluation each process already +// /// contains its required point data for near field calculations. +// pub fn create_let(&self) -> MultiNodeTree { +// // Communicate ranges globally using AllGather +// let rank = self.world.rank(); +// let size = self.world.size(); + +// let mut ranges = vec![0 as KeyType; (size as usize) * 3]; + +// self.world.all_gather_into(&self.range, &mut ranges); + +// // Calculate users for each key in local tree +// let mut users: Vec> = Vec::new(); +// let mut key_packet_destinations = vec![0 as Rank; size as usize]; +// let mut leaf_packet_destinations = vec![0 as Rank; size as usize]; + +// for key in self.keys_set.iter() { +// let mut user_tmp: Vec = Vec::new(); + +// // Loop over all ranges, each key may be used by multiple processes +// for chunk in ranges.chunks_exact(3) { +// let rank = chunk[0] as Rank; +// let min = MortonKey::from_morton(chunk[1]); +// let max = MortonKey::from_morton(chunk[2]); + +// // Check if ranges overlap with the neighbors of the key's parent +// if rank != self.world.rank() { +// if key.level() > 1 { +// let colleagues_parent: Vec = key.parent().neighbors(); +// let (cp_min, cp_max) = ( +// colleagues_parent.iter().min(), +// colleagues_parent.iter().max(), +// ); + +// if let (Some(cp_min), Some(cp_max)) = (cp_min, cp_max) { +// if (cp_min >= &min) && (cp_max <= &max) +// || (cp_min <= &min) && (cp_max >= &min) && (cp_max <= &max) +// || (cp_min >= &min) && (cp_min <= &max) && (cp_max >= &max) +// || (cp_min <= &min) && (cp_max >= &max) +// { +// user_tmp.push(rank); + +// // Mark ranks as users of keys/leaves from this process +// if key_packet_destinations[rank as usize] == 0 { +// key_packet_destinations[rank as usize] = 1 +// } + +// if leaf_packet_destinations[rank as usize] == 0 +// && self.leaves_set.contains(key) +// { +// leaf_packet_destinations[rank as usize] = 1; +// } +// } +// } +// } +// // If the key is at level one its parent is the root node, so by definition +// // it will always overlap with a range +// else if key.level() <= 1 { +// user_tmp.push(rank); +// if key_packet_destinations[rank as usize] == 0 { +// key_packet_destinations[rank as usize] = 1 +// } + +// if leaf_packet_destinations[rank as usize] == 0 +// && self.leaves_set.contains(key) +// { +// leaf_packet_destinations[rank as usize] = 1; +// } +// } +// } +// } +// users.push(user_tmp); +// } + +// // Communicate number of packets being received by each process globally +// let mut keys_to_receive = vec![0i32; size as usize]; +// self.world.all_reduce_into( +// &key_packet_destinations, +// &mut keys_to_receive, +// SystemOperation::sum(), +// ); + +// let mut leaves_to_receive = vec![0i32; size as usize]; +// self.world.all_reduce_into( +// &leaf_packet_destinations, +// &mut leaves_to_receive, +// SystemOperation::sum(), +// ); + +// // Calculate the number of receives that this process is expecting +// let recv_count_keys = keys_to_receive[rank as usize]; +// let recv_count_leaves = leaves_to_receive[rank as usize]; + +// // Filter for packet destinations +// key_packet_destinations = key_packet_destinations +// .into_iter() +// .enumerate() +// .filter(|(_, x)| x > &0) +// .map(|(i, _)| i as Rank) +// .collect(); + +// leaf_packet_destinations = leaf_packet_destinations +// .into_iter() +// .enumerate() +// .filter(|(_, x)| x > &0) +// .map(|(i, _)| i as Rank) +// .collect(); + +// let mut key_packets: Vec> = Vec::new(); +// let mut leaf_packets: Vec> = Vec::new(); +// let mut point_packets: Vec>> = Vec::new(); + +// let mut key_packet_destinations_filt: Vec = Vec::new(); +// let mut leaf_packet_destinations_filt: Vec = Vec::new(); + +// // Form packets for each send +// for &rank in key_packet_destinations.iter() { +// let key_packet: Vec = self +// .keys_set +// .iter() +// .zip(users.iter()) +// .filter(|(_, user)| user.contains(&rank)) +// .map(|(k, _)| *k) +// .collect(); +// let key_packet_set: HashSet = key_packet.iter().cloned().collect(); + +// if !key_packet.is_empty() { +// key_packets.push(key_packet); +// key_packet_destinations_filt.push(rank); +// } + +// if leaf_packet_destinations.contains(&rank) { +// let leaf_packet: Vec = key_packet_set +// .intersection(&self.leaves_set) +// .cloned() +// .collect(); + +// let point_packet: Vec> = leaf_packet +// .iter() +// .flat_map(|leaf| self.get_points(leaf).unwrap().to_vec()) +// .collect(); + +// if !leaf_packet.is_empty() { +// leaf_packets.push(leaf_packet); +// point_packets.push(point_packet); +// leaf_packet_destinations_filt.push(rank); +// } +// } +// } + +// // Communicate keys, leaves and points +// let received_leaves = all_to_allv_sparse( +// &self.world, +// &leaf_packets, +// &leaf_packet_destinations_filt, +// &recv_count_leaves, +// ); + +// let mut received_points = all_to_allv_sparse( +// &self.world, +// &point_packets, +// &leaf_packet_destinations_filt, +// &recv_count_leaves, +// ); + +// let received_keys = all_to_allv_sparse( +// &self.world, +// &key_packets, +// &key_packet_destinations_filt, +// &recv_count_keys, +// ); + +// // Group received points by received leaves +// received_points.sort(); + +// let mut leaves_to_points = HashMap::new(); +// let mut curr = received_points[0]; +// let mut curr_idx = 0; + +// for (i, point) in received_points.iter().enumerate() { +// if point.encoded_key != curr.encoded_key { +// leaves_to_points.insert(curr.encoded_key, (curr_idx, i)); +// curr_idx = i; +// curr = *point; +// } +// } +// leaves_to_points.insert(curr.encoded_key, (curr_idx, received_points.len())); + +// let locally_essential_tree = MultiNodeTree { +// range: self.range, +// world: self.world.duplicate(), +// depth: self.depth, +// domain: self.domain, +// leaves_to_points, +// levels_to_keys: HashMap::default(), +// leaves_set: received_leaves.iter().cloned().collect(), +// keys_set: received_keys.iter().cloned().collect(), +// points: Points { +// points: received_points, +// index: 0, +// }, +// leaves: MortonKeys { +// keys: received_leaves, +// index: 0, +// }, +// keys: MortonKeys { +// keys: received_keys, +// index: 0, +// }, +// }; + +// locally_essential_tree +// } +// } diff --git a/tree/src/implementations/impl_point.rs b/tree/src/implementations/impl_point.rs index 89c4f62b..896f2c0f 100644 --- a/tree/src/implementations/impl_point.rs +++ b/tree/src/implementations/impl_point.rs @@ -2,7 +2,9 @@ use std::cmp::Ordering; use std::hash::{Hash, Hasher}; +use bempp_traits::tree::Point as PointInterface; use bempp_traits::types::Scalar; +use num::Float; use crate::types::point::{Point, Points}; @@ -65,6 +67,8 @@ where } } +impl PointInterface for Point where T: Scalar + Float + Default {} + // impl Iterator for Points { // type Item = Point; diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index b9c6a1a3..8d50d6a1 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -90,7 +90,7 @@ where leaves_to_points.insert(curr.encoded_key, (curr_idx, points.points.len())); // Add unmapped leaves - let leaves = MortonKeys { + let mut leaves = MortonKeys { keys: leaves_to_points .keys() .cloned() @@ -99,6 +99,9 @@ where index: 0, }; + // Sort leaves before returning + leaves.sort(); + // Find all keys in tree let tmp: HashSet = leaves .iter() @@ -135,13 +138,38 @@ where subset.sort(); } + let coordinates = points + .points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let global_indices = points.points.iter().map(|p| p.global_idx).collect_vec(); + + let mut key_to_index = HashMap::new(); + + for (i, key) in keys.iter().enumerate() { + key_to_index.insert(*key, i); + } + + let mut leaf_to_index = HashMap::new(); + + for (i, key) in leaves.iter().enumerate() { + leaf_to_index.insert(*key, i); + } + SingleNodeTree { depth, points, + coordinates, + global_indices, domain: *domain, leaves, keys, leaves_to_points, + key_to_index, + leaf_to_index, leaves_set, keys_set, levels_to_keys, @@ -225,7 +253,7 @@ where leaves_to_points.insert(curr.encoded_key, (curr_idx, points.points.len())); // Add unmapped leaves - let leaves = MortonKeys { + let mut leaves = MortonKeys { keys: leaves_to_points .keys() .cloned() @@ -234,6 +262,9 @@ where index: 0, }; + // Sort leaves before returning + leaves.sort(); + // Find all keys in tree let tmp: HashSet = leaves .iter() @@ -278,13 +309,36 @@ where subset.sort(); } + let coordinates = points + .points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let global_indices = points.points.iter().map(|p| p.global_idx).collect_vec(); + + let mut key_to_index = HashMap::new(); + + for (i, key) in keys.iter().enumerate() { + key_to_index.insert(*key, i); + } + + let mut leaf_to_index = HashMap::new(); + + for (i, key) in leaves.iter().enumerate() { + leaf_to_index.insert(*key, i); + } SingleNodeTree { depth, points, + coordinates, + global_indices, domain: *domain, leaves, keys, leaves_to_points, + key_to_index, + leaf_to_index, leaves_set, keys_set, levels_to_keys, @@ -507,18 +561,17 @@ impl Tree for SingleNodeTree where T: Float + Default + Scalar, { + type Precision = T; type Domain = Domain; type NodeIndex = MortonKey; type NodeIndexSlice<'a> = &'a [MortonKey] where T: 'a; type NodeIndices = MortonKeys; - type Point = Point; + type Point = Point; type PointSlice<'a> = &'a [Point] where T: 'a; - type PointData = f64; - type PointDataSlice<'a> = &'a [f64] - where T: 'a; + type GlobalIndex = usize; type GlobalIndexSlice<'a> = &'a [usize] where T: 'a; @@ -551,7 +604,7 @@ where &self.leaves_set } - fn get_leaves(&self) -> Option> { + fn get_all_leaves(&self) -> Option> { Some(&self.leaves) } @@ -563,6 +616,42 @@ where } } + fn get_all_points(&self) -> Option> { + Some(&self.points.points) + } + + fn get_coordinates<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [Self::Precision]> { + if let Some(&(l, r)) = self.leaves_to_points.get(key) { + Some(&self.coordinates[l * 3..r * 3]) + } else { + None + } + } + + fn get_all_coordinates(&self) -> Option<&[Self::Precision]> { + Some(&self.coordinates) + } + + fn get_global_indices<'a>(&'a self, key: &Self::NodeIndex) -> Option<&'a [usize]> { + if let Some(&(l, r)) = self.leaves_to_points.get(key) { + Some(&self.global_indices[l..r]) + } else { + None + } + } + + fn get_all_global_indices(&self) -> Option<&[usize]> { + Some(&self.global_indices) + } + + fn get_index(&self, key: &Self::NodeIndex) -> Option<&usize> { + self.key_to_index.get(key) + } + + fn get_leaf_index(&self, key: &Self::NodeIndex) -> Option<&usize> { + self.leaf_to_index.get(key) + } + fn is_leaf(&self, key: &Self::NodeIndex) -> bool { self.leaves_set.contains(key) } @@ -593,7 +682,7 @@ mod test { // Test that the tree really is uniform let levels: Vec = tree - .get_leaves() + .get_all_leaves() .unwrap() .iter() .map(|node| node.level()) @@ -611,7 +700,7 @@ mod test { // Test that the tree really is uniform let levels: Vec = tree - .get_leaves() + .get_all_leaves() .unwrap() .iter() .map(|node| node.level()) @@ -647,7 +736,7 @@ mod test { // Test that tree is not uniform let levels: Vec = tree - .get_leaves() + .get_all_leaves() .unwrap() .iter() .map(|node| node.level()) @@ -687,8 +776,8 @@ mod test { let global_idxs = (0..npoints).collect_vec(); let uniform = SingleNodeTree::new(points.data(), false, Some(150), Some(4), &global_idxs); let adaptive = SingleNodeTree::new(points.data(), true, Some(150), None, &global_idxs); - test_no_overlaps_helper(uniform.get_leaves().unwrap()); - test_no_overlaps_helper(adaptive.get_leaves().unwrap()); + test_no_overlaps_helper(uniform.get_all_leaves().unwrap()); + test_no_overlaps_helper(adaptive.get_all_leaves().unwrap()); } #[test] diff --git a/tree/src/types/multi_node.rs b/tree/src/types/multi_node.rs index 6c383fcc..62dd966f 100644 --- a/tree/src/types/multi_node.rs +++ b/tree/src/types/multi_node.rs @@ -29,6 +29,12 @@ where /// A vector of Cartesian points. pub points: Points, + /// All coordinates + pub coordinates: Vec, + + /// All global indices + pub global_indices: Vec, + /// The leaves that span the tree. pub leaves: MortonKeys, diff --git a/tree/src/types/single_node.rs b/tree/src/types/single_node.rs index 5e53fc20..b4803d91 100644 --- a/tree/src/types/single_node.rs +++ b/tree/src/types/single_node.rs @@ -25,6 +25,12 @@ where /// All Points. pub points: Points, + /// All coordinates + pub coordinates: Vec, + + /// All global indices + pub global_indices: Vec, + /// The leaves that span the tree, and associated Point data. pub leaves: MortonKeys, @@ -37,6 +43,12 @@ where /// Associate levels with key indices. pub levels_to_keys: HashMap, + /// Map between a key and its index + pub key_to_index: HashMap, + + /// Map between a leaf and its index + pub leaf_to_index: HashMap, + /// All leaves, returned as a set. pub leaves_set: HashSet,