From 17b85689670d96ff9dd1e06ae3be2ea91f79152d Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 22 Jan 2024 13:26:46 +0000 Subject: [PATCH] Add individual low rank appx --- field/src/field.rs | 145 +++++ field/src/types.rs | 39 ++ fmm/examples/single_node.rs | 11 +- fmm/examples/single_node_matrix.rs | 17 +- .../source_to_target/svd_ia.rs | 504 ++++++++++++++++++ fmm/src/fmm.rs | 105 +++- fmm/src/lib.rs | 1 + 7 files changed, 815 insertions(+), 7 deletions(-) create mode 100644 fmm/src/field_translation/source_to_target/svd_ia.rs diff --git a/field/src/field.rs b/field/src/field.rs index f5c4c533..e6a1f91b 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -22,6 +22,7 @@ use bempp_tree::{ implementations::helpers::find_corners, types::domain::Domain, types::morton::MortonKey, }; +use crate::types::{SvdFieldTranslationKiFmmIA, SvdM2lOperatorDataIA}; use crate::{ array::flip3, fft::Fft, @@ -164,6 +165,116 @@ where } } +impl FieldTranslationData for SvdFieldTranslationKiFmmIA +where + T: Float + Default, + T: Scalar + Gemm, + U: Kernel + Default, + Array, 2>, 2>: MatrixSvd, +{ + type TransferVector = Vec; + type M2LOperators = SvdM2lOperatorDataIA; + type Domain = Domain; + + fn ncoeffs(&self, order: usize) -> usize { + 6 * (order - 1).pow(2) + 2 + } + + fn compute_m2l_operators<'a>(&self, order: usize, domain: Self::Domain) -> Self::M2LOperators { + // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + + // Compute interaction matrices between source and unique targets, defined by unique transfer vectors + let nrows = self.ncoeffs(order); + let ncols = self.ncoeffs(order); + + let mut u = Vec::new(); + let mut vt = Vec::new(); + + for (_i, t) in self.transfer_vectors.iter().enumerate() { + let source_equivalent_surface = t.source.compute_surface(&domain, order, self.alpha); + let target_check_surface = t.target.compute_surface(&domain, order, self.alpha); + + let mut tmp_gram_t = rlst_dynamic_array2!(T, [nrows, ncols]); + + self.kernel.assemble_st( + EvalType::Value, + &target_check_surface[..], + &source_equivalent_surface[..], + tmp_gram_t.data_mut(), + ); + + let mut u_i = rlst_dynamic_array2!(T, [nrows, self.k]); + let mut sigma_i = vec![T::zero(); self.k]; + let mut vt_i = rlst_dynamic_array2!(T, [self.k, ncols]); + + tmp_gram_t + .into_svd_alloc( + u_i.view_mut(), + vt_i.view_mut(), + &mut sigma_i, + SvdMode::Full, + ) + .unwrap(); + + // Retain such that 95% of energy of singular values is retained. + let rank = retain_energy(&sigma_i, self.threshold); + + // let rank = 30; + // println!("{:?} rank {:?}", _i, rank); + + let mut u_i_compressed = rlst_dynamic_array2!(T, [nrows, rank]); + let mut vt_i_compressed_= rlst_dynamic_array2!(T, [rank, ncols]); + + let mut sigma_mat_i_compressed = rlst_dynamic_array2!(T, [rank, rank]); + + u_i_compressed.fill_from(u_i.into_subview([0, 0], [nrows, rank])); + vt_i_compressed_.fill_from(vt_i.into_subview([0, 0], [rank, ncols])); + + for (j, s) in sigma_i.iter().enumerate().take(rank) { + unsafe { + *sigma_mat_i_compressed.get_unchecked_mut([j, j]) = T::from(*s).unwrap(); + } + } + + let vt_i_compressed = empty_array::() + .simple_mult_into_resize( + sigma_mat_i_compressed.view(), vt_i_compressed_.view() + ); + + // Store compressed M2L oeprators + u.push(u_i_compressed); + vt.push(vt_i_compressed); + } + + SvdM2lOperatorDataIA { u, vt } + } +} + +fn retain_energy + Gemm>( + singular_values: &Vec, + percentage: T, +) -> usize { + // Calculate the total energy. + let total_energy: T = singular_values.iter().map(|&s| s * s).sum(); + + // Calculate the threshold energy to retain. + let threshold_energy = total_energy * (percentage / T::one()); + + // Iterate over singular values to find the minimum set that retains the desired energy. + let mut cumulative_energy = T::zero(); + let mut significant_values = Vec::new(); + + for (i, &value) in singular_values.iter().enumerate() { + cumulative_energy += value * value; + significant_values.push(value); + if cumulative_energy >= threshold_energy { + return i + 1; + } + } + + significant_values.len() +} + impl SvdFieldTranslationKiFmm where T: Float + Default, @@ -205,6 +316,40 @@ where } } +impl SvdFieldTranslationKiFmmIA +where + T: Float + Default, + T: Scalar + rlst_blis::interface::gemm::Gemm, + U: Kernel + Default, + Array, 2>, 2>: MatrixSvd, +{ + /// Constructor for SVD field translation struct for the kernel independent FMM (KiFMM). + /// + /// # Arguments + /// * `kernel` - The kernel being used, only compatible with homogenous, translationally invariant kernels. + /// * `k` - The maximum rank to be used in SVD compression for the translation operators, if none is specified will be taken as max({50, max_column_rank}) + /// * `order` - The expansion order for the multipole and local expansions. + /// * `domain` - Domain associated with the global point set. + /// * `alpha` - The multiplier being used to modify the diameter of the surface grid uniformly along each coordinate axis. + pub fn new(kernel: U, threshold: T, order: usize, domain: Domain, alpha: T) -> Self { + let mut result = SvdFieldTranslationKiFmmIA { + alpha, + k: 0, + threshold, + kernel, + operator_data: SvdM2lOperatorDataIA::default(), + transfer_vectors: vec![], + }; + + let ncoeffs = result.ncoeffs(order); + result.k = ncoeffs; + result.transfer_vectors = compute_transfer_vectors(); + result.operator_data = result.compute_m2l_operators(order, domain); + + result + } +} + impl FieldTranslationData for FftFieldTranslationKiFmm where T: Scalar + Float + Default + Fft, diff --git a/field/src/types.rs b/field/src/types.rs index 7b80a8b9..fd750b40 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -63,6 +63,31 @@ where pub kernel: U, } +/// A type to store the M2L field translation meta-data and datafor an SVD based sparsification in the kernel independent FMM. +pub struct SvdFieldTranslationKiFmmIA +where + T: Scalar + Float + Default + rlst_blis::interface::gemm::Gemm, + U: Kernel + Default, +{ + /// Amount to dilate inner check surface by when computing operator. + pub alpha: T, + + /// Maximum rank taken for SVD compression + pub k: usize, + + /// Amount of energy of each M2L operator retained in SVD compression + pub threshold: T, + + /// Precomputed data required for SVD compressed M2L interaction. + pub operator_data: SvdM2lOperatorDataIA, + + /// Unique transfer vectors to lookup m2l unique kernel interactions. + pub transfer_vectors: Vec, + + /// The associated kernel with this translation operator. + pub kernel: U, +} + /// A type to store a transfer vector between a `source` and `target` Morton key. #[derive(Debug)] pub struct TransferVector { @@ -119,3 +144,17 @@ where SvdM2lOperatorData { u, st_block, c } } } + +/// Container to store precomputed data required for SVD field translations. +/// See Fong & Darve (2009) for the definitions of 'fat' and 'thin' M2L matrices. +#[derive(Default)] +pub struct SvdM2lOperatorDataIA +where + T: Scalar, +{ + /// Left singular vectors from SVD of fat M2L matrix. + pub u: Vec>, + + /// Right singular vectors from SVD of thin M2L matrix, cutoff to a maximum rank of 'k'. + pub vt: Vec>, +} diff --git a/fmm/examples/single_node.rs b/fmm/examples/single_node.rs index 1e62d371..003ae547 100644 --- a/fmm/examples/single_node.rs +++ b/fmm/examples/single_node.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use rlst_dense::traits::RawAccess; -use bempp_field::types::FftFieldTranslationKiFmm; +use bempp_field::types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm, SvdFieldTranslationKiFmmIA}; use bempp_fmm::{ charge::build_charge_dict, types::{FmmDataUniform, KiFmmLinear}, @@ -32,6 +32,7 @@ fn main() { let kernel = Laplace3dKernel::default(); let m2l_data: FftFieldTranslationKiFmm> = FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner); + // let m2l_data = SvdFieldTranslationKiFmm::new( // kernel.clone(), // Some(80), @@ -40,6 +41,14 @@ fn main() { // alpha_inner, // ); + // let m2l_data = SvdFieldTranslationKiFmmIA::new( + // kernel.clone(), + // 0.9999, + // order, + // *tree.get_domain(), + // alpha_inner + // ); + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data); // Form charge dict, matching charges with their associated global indices diff --git a/fmm/examples/single_node_matrix.rs b/fmm/examples/single_node_matrix.rs index e025c227..a6d5d4e6 100644 --- a/fmm/examples/single_node_matrix.rs +++ b/fmm/examples/single_node_matrix.rs @@ -5,7 +5,7 @@ use itertools::Itertools; use rlst_dense::traits::RawAccess; -use bempp_field::types::SvdFieldTranslationKiFmm; +use bempp_field::types::{SvdFieldTranslationKiFmm, SvdFieldTranslationKiFmmIA}; use bempp_fmm::charge::build_charge_dict; use bempp_kernel::laplace_3d::Laplace3dKernel; use bempp_traits::{fmm::FmmLoop, tree::Tree}; @@ -23,7 +23,7 @@ fn main() { // Test matrix input let points = points_fixture::(npoints, None, None); - let ncharge_vecs = 3; + let ncharge_vecs = 5; let depth = 4; let mut charge_mat = vec![vec![0.0; npoints]; ncharge_vecs]; @@ -39,12 +39,19 @@ fn main() { let tree = SingleNodeTree::new(points.data(), false, None, Some(depth), &global_idxs, true); // Precompute the M2L data - let m2l_data = SvdFieldTranslationKiFmm::new( + // let m2l_data = SvdFieldTranslationKiFmm::new( + // kernel.clone(), + // Some(80), + // order, + // *tree.get_domain(), + // alpha_inner, + // ); + let m2l_data = SvdFieldTranslationKiFmmIA::new( kernel.clone(), - Some(80), + 0.9999, order, *tree.get_domain(), - alpha_inner, + alpha_inner ); let fmm = bempp_fmm::types::KiFmmLinearMatrix::new( diff --git a/fmm/src/field_translation/source_to_target/svd_ia.rs b/fmm/src/field_translation/source_to_target/svd_ia.rs new file mode 100644 index 00000000..53efbcd6 --- /dev/null +++ b/fmm/src/field_translation/source_to_target/svd_ia.rs @@ -0,0 +1,504 @@ +//! Multipole to Local field translations for uniform and adaptive Kernel Indepenent FMMs +use itertools::Itertools; +use num::Float; +use rayon::prelude::*; +use std::collections::{HashMap, HashSet}; +use std::sync::Mutex; + +use bempp_field::types::SvdFieldTranslationKiFmm; + +use bempp_traits::{ + field::{FieldTranslation, FieldTranslationData}, + fmm::{Fmm, InteractionLists}, + kernel::{Kernel, ScaleInvariantKernel}, + tree::Tree, + types::{EvalType, Scalar}, +}; +use bempp_tree::types::single_node::SingleNodeTree; + +use crate::types::{FmmDataAdaptive, FmmDataUniform, KiFmmLinear}; + +use rlst_dense::{ + array::{empty_array, Array}, + base_array::BaseArray, + data_container::VectorContainer, + rlst_array_from_slice2, rlst_dynamic_array2, + traits::{MatrixSvd, MultIntoResize, RawAccess, RawAccessMut, Shape}, +}; + +pub mod matrix { + use bempp_field::types::SvdFieldTranslationKiFmmIA; + + use crate::types::{FmmDataUniformMatrix, KiFmmLinearMatrix}; + + use super::*; + + impl + FmmDataUniformMatrix< + KiFmmLinearMatrix, T, SvdFieldTranslationKiFmmIA, U>, + U, + > + where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + rlst_blis::interface::gemm::Gemm, + U: Float + Default, + U: std::marker::Send + std::marker::Sync + Default, + Array, 2>, 2>: MatrixSvd, + { + fn displacements(&self, level: u64) -> Vec>> { + let sources = self.fmm.tree().get_keys(level).unwrap(); + let nsources = sources.len(); + + let all_displacements = vec![vec![-1i64; nsources]; 316]; + let all_displacements = all_displacements.into_iter().map(Mutex::new).collect_vec(); + + sources.into_par_iter().enumerate().for_each(|(j, source)| { + let v_list = source + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| { + !source.is_adjacent(pnc) && self.fmm.tree().get_all_keys_set().contains(pnc) + }) + .collect_vec(); + + let transfer_vectors = v_list + .iter() + .map(|target| target.find_transfer_vector(source)) + .collect_vec(); + + let mut transfer_vectors_map = HashMap::new(); + for (i, v) in transfer_vectors.iter().enumerate() { + transfer_vectors_map.insert(v, i); + } + + let transfer_vectors_set: HashSet<_> = transfer_vectors.iter().cloned().collect(); + + for (i, tv) in self.fmm.m2l.transfer_vectors.iter().enumerate() { + let mut all_displacements_lock = all_displacements[i].lock().unwrap(); + + if transfer_vectors_set.contains(&tv.hash) { + let target = &v_list[*transfer_vectors_map.get(&tv.hash).unwrap()]; + let target_index = self.level_index_pointer[level as usize] + .get(target) + .unwrap(); + all_displacements_lock[j] = *target_index as i64; + } + } + }); + + all_displacements + } + } + + /// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. + impl FieldTranslation + for FmmDataUniformMatrix< + KiFmmLinearMatrix, T, SvdFieldTranslationKiFmmIA, U>, + U, + > + where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + rlst_blis::interface::gemm::Gemm, + U: Float + Default, + U: std::marker::Send + std::marker::Sync + Default, + Array, 2>, 2>: MatrixSvd, + { + fn p2l(&self, _level: u64) {} + + fn m2l<'a>(&self, level: u64) { + let Some(sources) = self.fmm.tree().get_keys(level) else { + return; + }; + + let nsources = sources.len(); + + let all_displacements = self.displacements(level); + + // Interpret multipoles as a matrix + let multipoles = rlst_array_from_slice2!( + U, + unsafe { + std::slice::from_raw_parts( + self.level_multipoles[level as usize][0][0].raw, + self.ncoeffs * nsources * self.ncharge_vectors, + ) + }, + [self.ncoeffs, nsources * self.ncharge_vectors] + ); + + // let [nrows, _] = self.fmm.m2l.operator_data.c.shape(); + // let c_dim = [nrows, self.fmm.m2l.k]; + + // let mut compressed_multipoles = empty_array::() + // .simple_mult_into_resize(self.fmm.m2l.operator_data.st_block.view(), multipoles); + + // compressed_multipoles + // .data_mut() + // .iter_mut() + // .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); + + let level_locals = self.level_locals[level as usize] + .iter() + .map(Mutex::new) + .collect_vec(); + + let multipole_idxs = all_displacements + .iter() + .map(|displacements| { + displacements + .lock() + .unwrap() + .iter() + .enumerate() + .filter(|(_, &d)| d != -1) + .map(|(i, _)| i) + .collect_vec() + }) + .collect_vec(); + + let local_idxs = all_displacements + .iter() + .map(|displacements| { + displacements + .lock() + .unwrap() + .iter() + .enumerate() + .filter(|(_, &d)| d != -1) + .map(|(_, &j)| j as usize) + .collect_vec() + }) + .collect_vec(); + + (0..316) + .into_par_iter() + .zip(multipole_idxs) + .zip(local_idxs) + .for_each(|((c_idx, multipole_idxs), local_idxs)| { + // let top_left = [0, c_idx * self.fmm.m2l.k]; + // let c_sub = self + // .fmm + // .m2l + // .operator_data + // .c + // .view() + // .into_subview(top_left, c_dim); + + let mut multipoles_subset = rlst_dynamic_array2!( + U, + [self.ncoeffs, multipole_idxs.len() * self.ncharge_vectors] + ); + + for (local_multipole_idx, &global_multipole_idx) in + multipole_idxs.iter().enumerate() + { + let key_displacement_global = + global_multipole_idx * self.ncoeffs * self.ncharge_vectors; + + let key_displacement_local = + local_multipole_idx * self.ncoeffs * self.ncharge_vectors; + + for charge_vec_idx in 0..self.ncharge_vectors { + let charge_vec_displacement = charge_vec_idx * self.ncoeffs; + + multipoles_subset.data_mut()[key_displacement_local + + charge_vec_displacement + ..key_displacement_local + + charge_vec_displacement + + self.ncoeffs] + .copy_from_slice( + &multipoles.data()[key_displacement_global + + charge_vec_displacement + ..key_displacement_global + + charge_vec_displacement + + self.ncoeffs], + ); + } + } + + // let locals = empty_array::().simple_mult_into_resize( + // self.fmm.dc2e_inv_1.view(), + // empty_array::().simple_mult_into_resize( + // self.fmm.dc2e_inv_2.view(), + // empty_array::().simple_mult_into_resize( + // self.fmm.m2l.operator_data.u.view(), + // empty_array::().simple_mult_into_resize( + // c_sub.view(), + // compressed_multipoles_subset.view(), + // ), + // ), + // ), + // ); + let u_sub = &self.fmm.m2l.operator_data.u[c_idx]; + let vt_sub = &self.fmm.m2l.operator_data.vt[c_idx]; + + let locals = empty_array::().simple_mult_into_resize( + self.fmm.dc2e_inv_1.view(), + empty_array::().simple_mult_into_resize( + self.fmm.dc2e_inv_2.view(), + empty_array::().simple_mult_into_resize( + u_sub.view(), + empty_array::().simple_mult_into_resize( + vt_sub.view(), + multipoles_subset.view(), + ), + ), + ), + ); + + for (local_multipole_idx, &global_local_idx) in local_idxs.iter().enumerate() { + let local_lock = level_locals[global_local_idx].lock().unwrap(); + for charge_vec_idx in 0..self.ncharge_vectors { + let local_send_ptr = local_lock[charge_vec_idx]; + let local_ptr = local_send_ptr.raw; + + let local = + unsafe { std::slice::from_raw_parts_mut(local_ptr, self.ncoeffs) }; + + let key_displacement = + local_multipole_idx * self.ncoeffs * self.ncharge_vectors; + let charge_vec_displacement = charge_vec_idx * self.ncoeffs; + + let result = &locals.data()[key_displacement + charge_vec_displacement + ..key_displacement + charge_vec_displacement + self.ncoeffs]; + local.iter_mut().zip(result).for_each(|(l, r)| *l += *r); + } + } + }) + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } + } +} + + +pub mod uniform { + use bempp_field::types::SvdFieldTranslationKiFmmIA; + use rlst_dense::traits::DefaultIteratorMut; + + use super::*; + + impl FmmDataUniform, T, SvdFieldTranslationKiFmmIA, U>, U> + where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + rlst_blis::interface::gemm::Gemm, + U: Float + Default, + U: std::marker::Send + std::marker::Sync + Default, + Array, 2>, 2>: MatrixSvd, + { + fn displacements(&self, level: u64) -> Vec>> { + let sources = self.fmm.tree().get_keys(level).unwrap(); + let nsources = sources.len(); + + let all_displacements = vec![vec![-1i64; nsources]; 316]; + let all_displacements = all_displacements.into_iter().map(Mutex::new).collect_vec(); + + sources.into_par_iter().enumerate().for_each(|(j, source)| { + let v_list = source + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| { + !source.is_adjacent(pnc) && self.fmm.tree().get_all_keys_set().contains(pnc) + }) + .collect_vec(); + + let transfer_vectors = v_list + .iter() + .map(|target| target.find_transfer_vector(source)) + .collect_vec(); + + let mut transfer_vectors_map = HashMap::new(); + for (i, v) in transfer_vectors.iter().enumerate() { + transfer_vectors_map.insert(v, i); + } + + let transfer_vectors_set: HashSet<_> = transfer_vectors.iter().cloned().collect(); + + for (i, tv) in self.fmm.m2l.transfer_vectors.iter().enumerate() { + let mut all_displacements_lock = all_displacements[i].lock().unwrap(); + + if transfer_vectors_set.contains(&tv.hash) { + let target = &v_list[*transfer_vectors_map.get(&tv.hash).unwrap()]; + let target_index = self.level_index_pointer[level as usize] + .get(target) + .unwrap(); + all_displacements_lock[j] = *target_index as i64; + } + } + }); + + all_displacements + } + } + + /// Implement the multipole to local translation operator for an SVD accelerated KiFMM on a single node. + impl FieldTranslation + for FmmDataUniform< + KiFmmLinear, T, SvdFieldTranslationKiFmmIA, U>, + U, + > + where + T: Kernel + + ScaleInvariantKernel + + std::marker::Send + + std::marker::Sync + + Default, + U: Scalar + rlst_blis::interface::gemm::Gemm, + U: Float + Default, + U: std::marker::Send + std::marker::Sync + Default, + Array, 2>, 2>: MatrixSvd, + { + fn p2l(&self, _level: u64) {} + + fn m2l<'a>(&self, level: u64) { + let Some(sources) = self.fmm.tree().get_keys(level) else { + return; + }; + + let nsources = sources.len(); + + let all_displacements = self.displacements(level); + + // Interpret multipoles as a matrix + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let multipoles = rlst_array_from_slice2!( + U, + unsafe { + std::slice::from_raw_parts( + self.level_multipoles[level as usize][0].raw, + ncoeffs * nsources, + ) + }, + [ncoeffs, nsources] + ); + + // multipoles + // .data_mut() + // .iter_mut() + // .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level)); + + let level_locals = self.level_locals[level as usize] + .iter() + .map(Mutex::new) + .collect_vec(); + + let multipole_idxs = all_displacements + .iter() + .map(|displacements| { + displacements + .lock() + .unwrap() + .iter() + .enumerate() + .filter(|(_, &d)| d != -1) + .map(|(i, _)| i) + .collect_vec() + }) + .collect_vec(); + + let local_idxs = all_displacements + .iter() + .map(|displacements| { + displacements + .lock() + .unwrap() + .iter() + .enumerate() + .filter(|(_, &d)| d != -1) + .map(|(_, &j)| j as usize) + .collect_vec() + }) + .collect_vec(); + + let scale = self.fmm.kernel.scale(level) * self.m2l_scale(level); + + (0..316) + .into_par_iter() + .zip(multipole_idxs) + .zip(local_idxs) + .for_each(|((c_idx, multipole_idxs), local_idxs)| { + + let mut multipoles_subset = + rlst_dynamic_array2!(U, [ncoeffs, multipole_idxs.len()]); + + for (i, &multipole_idx) in multipole_idxs.iter().enumerate() { + multipoles_subset.data_mut()[i * ncoeffs..(i + 1) * ncoeffs] + .copy_from_slice( + &multipoles.data() + [(multipole_idx) * ncoeffs..(multipole_idx + 1) * ncoeffs], + ); + } + + multipoles_subset.iter_mut().for_each(|m| *m *= scale); + + let u_sub = &self.fmm.m2l.operator_data.u[c_idx]; + let vt_sub = &self.fmm.m2l.operator_data.vt[c_idx]; + + let locals = empty_array::().simple_mult_into_resize( + self.fmm.dc2e_inv_1.view(), + empty_array::().simple_mult_into_resize( + self.fmm.dc2e_inv_2.view(), + empty_array::().simple_mult_into_resize( + u_sub.view(), + empty_array::().simple_mult_into_resize( + vt_sub.view(), + multipoles_subset.view(), + ), + ), + ), + ); + + for (multipole_idx, &local_idx) in local_idxs.iter().enumerate() { + let local_lock = level_locals[local_idx].lock().unwrap(); + let local_ptr = local_lock.raw; + let local = unsafe { std::slice::from_raw_parts_mut(local_ptr, ncoeffs) }; + + let res = + &locals.data()[multipole_idx * ncoeffs..(multipole_idx + 1) * ncoeffs]; + local.iter_mut().zip(res).for_each(|(l, r)| *l += *r); + } + }); + } + + fn m2l_scale(&self, level: u64) -> U { + if level < 2 { + panic!("M2L only perfomed on level 2 and below") + } + + if level == 2 { + U::from(1. / 2.).unwrap() + } else { + let two = U::from(2.0).unwrap(); + Scalar::powf(two, U::from(level - 3).unwrap()) + } + } + } +} diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 43c81fe1..3bd3b24e 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -777,7 +777,7 @@ mod test { use super::*; use rlst_dense::rlst_array_from_slice2; - use bempp_field::types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm}; + use bempp_field::types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm, SvdFieldTranslationKiFmmIA}; use bempp_kernel::laplace_3d::Laplace3dKernel; use bempp_tree::implementations::helpers::{points_fixture, points_fixture_sphere}; @@ -866,6 +866,7 @@ mod test { assert!(rel_error <= 1e-5); } + #[allow(clippy::too_many_arguments)] fn test_uniform_f64_svd( points: &Array, 2>, 2>, @@ -1036,6 +1037,96 @@ mod test { assert!(rel_error <= 1e-5); } + + + #[allow(clippy::too_many_arguments)] + fn test_uniform_f64_svd_ia( + points: &Array, 2>, 2>, + charges: &[f64], + global_idxs: &[usize], + order: usize, + alpha_inner: f64, + alpha_outer: f64, + sparse: bool, + depth: u64, + ) { + // Test with SVD field translation + let tree = + SingleNodeTree::new(points.data(), false, None, Some(depth), global_idxs, sparse); + + let kernel = Laplace3dKernel::default(); + + let m2l_data = SvdFieldTranslationKiFmmIA::new( + kernel.clone(), + 0.999, + order, + *tree.get_domain(), + alpha_inner, + ); + + let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data); + + // Form charge dict, matching charges with their associated global indices + let charge_dict = build_charge_dict(global_idxs, charges); + + let datatree = FmmDataUniform::new(fmm, &charge_dict).unwrap(); + + datatree.run(false); + + // Test that direct computation is close to the FMM. + let mut test_idx_vec = Vec::new(); + for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() { + if index_pointer.1 - index_pointer.0 > 0 { + test_idx_vec.push(idx); + } + } + let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[0]]; + + let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap(); + + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + + let potentials = &datatree.potentials[l..r]; + + let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap(); + let (l, r) = datatree.charge_index_pointer[*leaf_idx]; + let leaf_coordinates_row_major = &coordinates[l * 3..r * 3]; + + let dim = datatree.fmm.kernel.space_dimension(); + let ntargets = leaf_coordinates_row_major.len() / dim; + + let leaf_coordinates_row_major = + rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]); + let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]); + leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view()); + + let mut direct = vec![0f64; ntargets]; + + let all_charges = charge_dict.into_values().collect_vec(); + + datatree.fmm.kernel().evaluate_st( + EvalType::Value, + points.data(), + leaf_coordinates_col_major.data(), + &all_charges, + &mut direct, + ); + + let abs_error: f64 = potentials + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); + // TODO: remove this print + println!( + "rel_error = {rel_error} = {abs_error} / {}", + direct.iter().sum::() + ); + assert!(rel_error <= 1e-5); + assert!(false); + } + #[allow(clippy::too_many_arguments)] fn test_adaptive_f64_svd( points: Array, 2>, 2>, @@ -1288,6 +1379,18 @@ mod test { false, 3, ); + + test_uniform_f64_svd_ia( + &points_sphere, + &charges, + &global_idxs, + order, + alpha_inner, + alpha_outer, + false, + 3, + ); + } #[test] fn test_uniform_box_fft() { diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 7fffb85f..281e4668 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -13,6 +13,7 @@ mod field_translation { pub mod source_to_target { pub mod fft; pub mod svd; + pub mod svd_ia; } pub mod target; }