From a6a4142e47fab525bbb4fe0fbf5330ee1ae54284 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 30 Jun 2023 17:05:28 +0100 Subject: [PATCH 01/40] Start making surface grids flat --- tree/src/implementations/impl_morton.rs | 158 +++++++++++++++++++++--- 1 file changed, 138 insertions(+), 20 deletions(-) diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index 379e40f4..f4d16095 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -679,12 +679,13 @@ impl MortonKey { &self, order: usize, domain: &Domain, - surface: &[[f64; 3]], + surface: &[f64], alpha: f64, ) -> Vec<[f64; 3]> { // Number of convolution points along each axis let n = 2 * order - 1; - let mut grid = vec![[0f64; 3]; n.pow(3)]; + let dim = 3; + let mut grid = vec![[0f64; 3]; n.pow(dim)]; for i in 0..n { for j in 0..n { @@ -694,6 +695,7 @@ impl MortonKey { } } + // Dilate convolution grid let diameter = self .diameter(domain) .iter() @@ -720,14 +722,14 @@ impl MortonKey { .unwrap(); let max_conv_point = grid[max_index]; - let sums: Vec = surface.iter().map(|point| point.iter().sum()).collect(); + let sums: Vec = surface.chunks(dim as usize).map(|point| point.iter().sum()).collect(); let max_index = sums .iter() .enumerate() .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); - let max_surface_point = surface[max_index]; + let max_surface_point = [surface[max_index*dim as usize], surface[max_index*(dim as usize)+1], surface[max_index*(dim as usize)+2]]; let diff = max_conv_point .iter() @@ -745,15 +747,18 @@ impl MortonKey { grid } - pub fn surface_grid(&self, order: usize) -> (Vec<[f64; 3]>, Vec<[usize; 3]>) { + pub fn surface_grid(&self, order: usize) -> (Vec, Vec) { + + let dim = 3; let n_coeffs = 6 * (order - 1).pow(2) + 2; - let mut surface: Vec<[f64; 3]> = vec![[0f64; 3]; n_coeffs]; + let mut surface: Vec = vec![0f64; dim*n_coeffs]; let lower = 0; let upper = order - 1; let mut idx = 0; + // Generate surface points on a grid scaled by the order for i in 0..order { for j in 0..order { for k in 0..order { @@ -761,57 +766,60 @@ impl MortonKey { || (j >= lower && k >= lower && (i == lower || i == upper)) || (k >= lower && i >= lower && (j == lower || j == upper)) { - surface[idx] = [i as f64, j as f64, k as f64]; + surface[dim*idx] = i as f64; + surface[dim*idx+1] = j as f64; + surface[dim*idx+2] = k as f64; idx += 1; } } } } + // Map surface points to indices let surface_idxs = surface .iter() .clone() - .map(|&[a, b, c]| [a as usize, b as usize, c as usize]) + .map(|&x| x as usize) .collect(); // Shift and scale surface so that it's centered at the origin and has side length of 1 surface.iter_mut().for_each(|point| { - point - .iter_mut() - .for_each(|value| *value *= 2.0 / (order as f64 - 1.0)); + *point *= 2.0/ (order as f64 -1.0); }); surface .iter_mut() - .for_each(|point| point.iter_mut().for_each(|value| *value -= 1.0)); + .for_each(|point| *point-= 1.0); (surface, surface_idxs) } pub fn scale_surface( &self, - surface: Vec<[f64; 3]>, + surface: Vec, domain: &Domain, alpha: f64, - ) -> Vec<[f64; 3]> { + ) -> Vec { + let dim = 3; // Translate box to specified centre, and scale let scaled_diameter = self.diameter(domain); let dilated_diameter = scaled_diameter.map(|d| d * alpha); - let mut scaled_surface = vec![[0f64; 3]; surface.len()]; + let mut scaled_surface = vec![0f64; surface.len()]; let centre = self.centre(domain); - for i in 0..surface.len() { - scaled_surface[i][0] = (surface[i][0] * (dilated_diameter[0] / 2.0)) + centre[0]; - scaled_surface[i][1] = (surface[i][1] * (dilated_diameter[1] / 2.0)) + centre[1]; - scaled_surface[i][2] = (surface[i][2] * (dilated_diameter[2] / 2.0)) + centre[2]; + let n = surface.len() / 3; + for i in 0..n { + scaled_surface[i*dim] = (surface[i*dim] * (dilated_diameter[0] / 2.0)) + centre[0]; + scaled_surface[i*dim+1] = (surface[i*dim+1] * (dilated_diameter[1] / 2.0)) + centre[1]; + scaled_surface[i*dim+2] = (surface[i*dim+2] * (dilated_diameter[2] / 2.0)) + centre[2]; } scaled_surface } - pub fn compute_surface(&self, domain: &Domain, order: usize, alpha: f64) -> Vec<[f64; 3]> { + pub fn compute_surface(&self, domain: &Domain, order: usize, alpha: f64) -> Vec { let (surface, _) = self.surface_grid(order); self.scale_surface(surface, domain, alpha) @@ -1664,4 +1672,114 @@ mod test { let sibling = key.siblings()[0]; key.find_transfer_vector(&sibling); } + + #[test] + fn test_surface_grid() { + let point = [0.5, 0.5, 0.5]; + let domain = Domain { + origin: [0., 0., 0.], + diameter: [1., 1., 1.], + }; + let key = MortonKey::from_point(&point, &domain, 0); + + let order = 2; + let alpha = 1.; + let dim = 3; + let ncoeffs = 6*(order-1 as usize).pow(2) + 2; + + // Test lengths + let surface = key.compute_surface(&domain, order, alpha); + assert_eq!(surface.len(), ncoeffs*dim); + + let (surface, surface_idxs) = key.surface_grid(order); + assert_eq!(surface.len(), ncoeffs*dim); + assert_eq!(surface_idxs.len(), ncoeffs*dim); + + let mut expected = vec![[0usize; 3]; ncoeffs]; + let lower = 0; + let upper = order -1; + let mut idx = 0; + for i in 0..order { + for j in 0..order { + for k in 0..order { + if (i >= lower && j >= lower && (k == lower || k == upper)) + || (j >= lower && k >= lower && (i == lower || i == upper)) + || (k >= lower && i >= lower && (j == lower || j == upper)) { + expected[idx] = [i, j, k]; + idx += 1; + } + } + } + } + + // Test ordering. + for i in 0..ncoeffs { + let point = vec![surface_idxs[i*dim], surface_idxs[i*dim+1], surface_idxs[i*dim+2]]; + assert_eq!(point, expected[i]); + } + + // Test scaling + let level = 2; + let key = MortonKey::from_point(&point, &domain, level); + let surface = key.compute_surface(&domain, order, alpha); + + let min_x = surface.iter(). + step_by(3).fold(f64::INFINITY, |a, &b| a.min(b)); + + let max_x = surface.iter(). + step_by(3).fold(0f64, |a, &b| a.max(b)); + + let diam_x = max_x-min_x; + + let expected = key.diameter(&domain)[0]; + assert_eq!(diam_x, expected); + + // Test shifting + let point = [0.1, 0.2, 0.3]; + let level = 2; + let key = MortonKey::from_point(&point, &domain, level); + let surface = key.compute_surface(&domain, order, alpha); + let scaled_surface = key.scale_surface(surface.clone(), &domain, alpha); + let expected = key.centre(&domain); + + let c_x = surface.iter() + .step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); + let c_y = surface.iter() + .skip(1).step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); + let c_z = surface.iter() + .skip(2).step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); + + let result = vec![c_x, c_y, c_z]; + + assert_eq!(result, expected); + } + + #[test] + fn test_convolution_grid() { + let point = [0.5, 0.5, 0.5]; + let domain = Domain { + origin: [0., 0., 0.], + diameter: [1., 1., 1.], + }; + + let order = 3; + let alpha = 1.0; + + let key = MortonKey::from_point(&point, &domain, 0); + + + let surface = key.compute_surface(&domain, order, alpha); + let conv_grid = key.convolution_grid(order, &domain, &surface, alpha); + + // Test that surface grid is embedded in convolution grid + let surf_grid: Vec<[f64; 3]> = surface + .chunks_exact(3) + .map(|chunk| { + [chunk[0], chunk[1], chunk[2]] + }) + .collect(); + + assert!(surf_grid.iter().all(|point| conv_grid.contains(point))); + + } } From 4950c84e54a13649e21dbac12e66d9785a047e3c Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 30 Jun 2023 18:32:38 +0100 Subject: [PATCH 02/40] Add flat surface creation --- field/src/lib.rs | 29 +--- fmm/src/fmm.rs | 180 ++++++++++-------------- tree/src/implementations/impl_morton.rs | 2 +- 3 files changed, 83 insertions(+), 128 deletions(-) diff --git a/field/src/lib.rs b/field/src/lib.rs index c7cb45b8..d8978d73 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -206,18 +206,15 @@ where t.target .compute_surface(&domain, expansion_order, self.alpha); // Find min target - let sums: Vec = target_check_surface - .iter() - .map(|point| point.iter().sum()) - .collect_vec(); + let sums: Vec = target_check_surface.chunks(self.kernel.dim()).map(|point| point.iter().sum()).collect(); let min_index = sums .iter() .enumerate() .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); - let min_target = target_check_surface[min_index]; - + let min_target = [target_check_surface[min_index*self.kernel.dim() as usize], target_check_surface[min_index*(self.kernel.dim() as usize)+1], target_check_surface[min_index*(self.kernel.dim() as usize)+2]]; + let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); let m = kernel.len(); let n = kernel[0].len(); @@ -310,17 +307,11 @@ where for (i, t) in self.transfer_vectors.iter().enumerate() { let source_equivalent_surface = t .source - .compute_surface(&domain, expansion_order, self.alpha) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&domain, expansion_order, self.alpha); let target_check_surface = t .target - .compute_surface(&domain, expansion_order, self.alpha) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&domain, expansion_order, self.alpha); let mut tmp_gram = Vec::new(); self.kernel.gram( @@ -408,17 +399,11 @@ where for (i, t) in self.transfer_vectors.iter().enumerate() { let source_equivalent_surface = t .source - .compute_surface(&domain, expansion_order, self.alpha) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&domain, expansion_order, self.alpha); let target_check_surface = t .target - .compute_surface(&domain, expansion_order, self.alpha) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&domain, expansion_order, self.alpha); let mut tmp_gram = Vec::new(); self.kernel.gram( diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 834f7c3a..b9413deb 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -75,28 +75,16 @@ where m2l: U, ) -> Self { let upward_equivalent_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_inner) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_inner); let upward_check_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_outer) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_outer); let downward_equivalent_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_outer) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_outer); let downward_check_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_inner) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_inner); // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. @@ -137,16 +125,10 @@ where for child in children.iter() { let child_upward_equivalent_surface = child - .compute_surface(tree.get_domain(), order, alpha_inner) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_inner); let child_downward_check_surface = child - .compute_surface(tree.get_domain(), order, alpha_inner) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(tree.get_domain(), order, alpha_inner); let mut pc2ce = Vec::new(); kernel.gram( @@ -245,10 +227,7 @@ where .collect_vec(); let upward_check_surface = leaf - .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer); let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); @@ -371,10 +350,7 @@ where fmm_arc.tree().get_domain(), fmm_arc.order(), fmm_arc.alpha_inner, - ) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + ); let source_multipole_lock = source_multipole_arc.lock().unwrap(); let source_multipole_view = @@ -430,10 +406,7 @@ where .collect_vec(); let downward_equivalent_surface = leaf - .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer); let source_local_lock = source_local_arc.lock().unwrap(); let source_local_ref = ArrayView::from(source_local_lock.deref()); @@ -488,10 +461,7 @@ where &fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_inner, - ) - .into_iter() - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + ); let mut downward_check_potential = vec![0f64; downward_check_surface.len() / fmm_arc.kernel().dim()]; @@ -872,14 +842,14 @@ where let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); let mut tmp = Vec::new(); - for index in target_surface_idxs.iter() { + for index in target_surface_idxs.chunks_exact(fmm_arc.kernel.dim()) { let element = check_potential[[index[0], index[1], index[2]]]; tmp.push(element); } // Compute local coefficients from check potentials let check_potential = - Array::from_shape_vec(target_surface_idxs.len(), tmp).unwrap(); + Array::from_shape_vec(target_surface_idxs.len() / fmm_arc.kernel.dim(), tmp).unwrap(); // Compute local let target_local_owned = self.m2l_scale(target.level()) @@ -1165,25 +1135,25 @@ mod test { #[test] fn test_fmm() { - // let npoints = 10000; - // let points = points_fixture(npoints); - // let points_clone = points.clone(); - // let depth = 4; - // let n_crit = 150; + let npoints = 100000; + let points = points_fixture(npoints); + let points_clone = points.clone(); + let depth = 4; + let n_crit = 150; - // let order = 4; - // let alpha_inner = 1.05; - // let alpha_outer = 2.9; - // let adaptive = true; - // // let k = 453; + let order = 5; + let alpha_inner = 1.05; + let alpha_outer = 2.9; + let adaptive = true; + let k = 453; - // let kernel = LaplaceKernel::new(3, false, 3); + let kernel = LaplaceKernel::new(3, false, 3); - // let start = Instant::now(); - // let tree = SingleNodeTree::new(&points, adaptive, Some(n_crit), Some(depth)); - // println!("Tree = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + let tree = SingleNodeTree::new(&points, adaptive, Some(n_crit), Some(depth)); + println!("Tree = {:?}ms", start.elapsed().as_millis()); - // let start = Instant::now(); + let start = Instant::now(); // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( // kernel.clone(), @@ -1200,64 +1170,64 @@ mod test { // tree.get_domain().clone(), // alpha_inner, // ); - // println!("SVD operators = {:?}ms", start.elapsed().as_millis()); + println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - // let start = Instant::now(); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - - // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + let start = Instant::now(); + let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + kernel.clone(), + order, + tree.get_domain().clone(), + alpha_inner, + ); + println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - // let charges = Charges::new(); + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); - // let datatree = FmmData::new(fmm, charges); + let charges = Charges::new(); - // datatree.run(); + let datatree = FmmData::new(fmm, charges); - // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + datatree.run(); - // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - // let mut direct = vec![0f64; pts.len()]; - // let all_point_coordinates = points_clone - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); + let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - // let leaf_coordinates = pts - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); - // let all_charges = vec![1f64; points_clone.len()]; + let mut direct = vec![0f64; pts.len()]; + let all_point_coordinates = points_clone + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); - // let kernel = LaplaceKernel { - // dim: 3, - // is_singular: false, - // value_dimension: 3, - // }; - // kernel.potential( - // &all_point_coordinates[..], - // &all_charges[..], - // &leaf_coordinates[..], - // &mut direct[..], - // ); + let leaf_coordinates = pts + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let all_charges = vec![1f64; points_clone.len()]; + + let kernel = LaplaceKernel { + dim: 3, + is_singular: false, + value_dimension: 3, + }; + kernel.potential( + &all_point_coordinates[..], + &all_charges[..], + &leaf_coordinates[..], + &mut direct[..], + ); - // let abs_error: f64 = potentials - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + let abs_error: f64 = potentials + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); - // println!("p={:?} rel_error={:?}\n", order, rel_error); - // assert!(false) + println!("p={:?} rel_error={:?}\n", order, rel_error); + assert!(false) } } diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index f4d16095..2f4578a7 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -1782,4 +1782,4 @@ mod test { assert!(surf_grid.iter().all(|point| conv_grid.contains(point))); } -} +} \ No newline at end of file From 3077210e14c3afd81757894254992d45037b7fca Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 30 Jun 2023 18:32:56 +0100 Subject: [PATCH 03/40] Fmt --- field/src/lib.rs | 37 +++++---- fmm/src/fmm.rs | 64 ++++++++------- tree/src/implementations/impl_morton.rs | 104 ++++++++++++------------ 3 files changed, 106 insertions(+), 99 deletions(-) diff --git a/field/src/lib.rs b/field/src/lib.rs index d8978d73..e1591295 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -206,15 +206,22 @@ where t.target .compute_surface(&domain, expansion_order, self.alpha); // Find min target - let sums: Vec = target_check_surface.chunks(self.kernel.dim()).map(|point| point.iter().sum()).collect(); + let sums: Vec = target_check_surface + .chunks(self.kernel.dim()) + .map(|point| point.iter().sum()) + .collect(); let min_index = sums .iter() .enumerate() .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); - let min_target = [target_check_surface[min_index*self.kernel.dim() as usize], target_check_surface[min_index*(self.kernel.dim() as usize)+1], target_check_surface[min_index*(self.kernel.dim() as usize)+2]]; - + let min_target = [ + target_check_surface[min_index * self.kernel.dim() as usize], + target_check_surface[min_index * (self.kernel.dim() as usize) + 1], + target_check_surface[min_index * (self.kernel.dim() as usize) + 2], + ]; + let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); let m = kernel.len(); let n = kernel[0].len(); @@ -305,13 +312,13 @@ where Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = t - .source - .compute_surface(&domain, expansion_order, self.alpha); + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); - let target_check_surface = t - .target - .compute_surface(&domain, expansion_order, self.alpha); + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); let mut tmp_gram = Vec::new(); self.kernel.gram( @@ -397,13 +404,13 @@ where Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = t - .source - .compute_surface(&domain, expansion_order, self.alpha); + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); - let target_check_surface = t - .target - .compute_surface(&domain, expansion_order, self.alpha); + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); let mut tmp_gram = Vec::new(); self.kernel.gram( diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index b9413deb..f3c2c542 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -74,17 +74,14 @@ where tree: SingleNodeTree, m2l: U, ) -> Self { - let upward_equivalent_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_inner); + let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); - let upward_check_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_outer); + let upward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); - let downward_equivalent_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_outer); + let downward_equivalent_surface = + ROOT.compute_surface(tree.get_domain(), order, alpha_outer); - let downward_check_surface = ROOT - .compute_surface(tree.get_domain(), order, alpha_inner); + let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. @@ -124,11 +121,11 @@ where let children = ROOT.children(); for child in children.iter() { - let child_upward_equivalent_surface = child - .compute_surface(tree.get_domain(), order, alpha_inner); + let child_upward_equivalent_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); - let child_downward_check_surface = child - .compute_surface(tree.get_domain(), order, alpha_inner); + let child_downward_check_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); let mut pc2ce = Vec::new(); kernel.gram( @@ -226,8 +223,11 @@ where .flat_map(|[x, y, z]| vec![x, y, z]) .collect_vec(); - let upward_check_surface = leaf - .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer); + let upward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); @@ -345,12 +345,11 @@ where let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let upward_equivalent_surface = source - .compute_surface( - fmm_arc.tree().get_domain(), - fmm_arc.order(), - fmm_arc.alpha_inner, - ); + let upward_equivalent_surface = source.compute_surface( + fmm_arc.tree().get_domain(), + fmm_arc.order(), + fmm_arc.alpha_inner, + ); let source_multipole_lock = source_multipole_arc.lock().unwrap(); let source_multipole_view = @@ -405,8 +404,11 @@ where .flat_map(|[x, y, z]| vec![x, y, z]) .collect_vec(); - let downward_equivalent_surface = leaf - .compute_surface(&fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer); + let downward_equivalent_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); let source_local_lock = source_local_arc.lock().unwrap(); let source_local_ref = ArrayView::from(source_local_lock.deref()); @@ -456,12 +458,11 @@ where let source_charges_view = ArrayView::from(source_charges.deref()); let source_charges_slice = source_charges_view.as_slice().unwrap(); - let downward_check_surface = leaf - .compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_inner, - ); + let downward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_inner, + ); let mut downward_check_potential = vec![0f64; downward_check_surface.len() / fmm_arc.kernel().dim()]; @@ -848,8 +849,11 @@ where } // Compute local coefficients from check potentials - let check_potential = - Array::from_shape_vec(target_surface_idxs.len() / fmm_arc.kernel.dim(), tmp).unwrap(); + let check_potential = Array::from_shape_vec( + target_surface_idxs.len() / fmm_arc.kernel.dim(), + tmp, + ) + .unwrap(); // Compute local let target_local_owned = self.m2l_scale(target.level()) diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index 2f4578a7..f8b147c0 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -722,14 +722,21 @@ impl MortonKey { .unwrap(); let max_conv_point = grid[max_index]; - let sums: Vec = surface.chunks(dim as usize).map(|point| point.iter().sum()).collect(); + let sums: Vec = surface + .chunks(dim as usize) + .map(|point| point.iter().sum()) + .collect(); let max_index = sums .iter() .enumerate() .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); - let max_surface_point = [surface[max_index*dim as usize], surface[max_index*(dim as usize)+1], surface[max_index*(dim as usize)+2]]; + let max_surface_point = [ + surface[max_index * dim as usize], + surface[max_index * (dim as usize) + 1], + surface[max_index * (dim as usize) + 2], + ]; let diff = max_conv_point .iter() @@ -748,11 +755,10 @@ impl MortonKey { } pub fn surface_grid(&self, order: usize) -> (Vec, Vec) { - let dim = 3; let n_coeffs = 6 * (order - 1).pow(2) + 2; - let mut surface: Vec = vec![0f64; dim*n_coeffs]; + let mut surface: Vec = vec![0f64; dim * n_coeffs]; let lower = 0; let upper = order - 1; @@ -766,9 +772,9 @@ impl MortonKey { || (j >= lower && k >= lower && (i == lower || i == upper)) || (k >= lower && i >= lower && (j == lower || j == upper)) { - surface[dim*idx] = i as f64; - surface[dim*idx+1] = j as f64; - surface[dim*idx+2] = k as f64; + surface[dim * idx] = i as f64; + surface[dim * idx + 1] = j as f64; + surface[dim * idx + 2] = k as f64; idx += 1; } } @@ -776,30 +782,19 @@ impl MortonKey { } // Map surface points to indices - let surface_idxs = surface - .iter() - .clone() - .map(|&x| x as usize) - .collect(); + let surface_idxs = surface.iter().clone().map(|&x| x as usize).collect(); // Shift and scale surface so that it's centered at the origin and has side length of 1 surface.iter_mut().for_each(|point| { - *point *= 2.0/ (order as f64 -1.0); + *point *= 2.0 / (order as f64 - 1.0); }); - surface - .iter_mut() - .for_each(|point| *point-= 1.0); + surface.iter_mut().for_each(|point| *point -= 1.0); (surface, surface_idxs) } - pub fn scale_surface( - &self, - surface: Vec, - domain: &Domain, - alpha: f64, - ) -> Vec { + pub fn scale_surface(&self, surface: Vec, domain: &Domain, alpha: f64) -> Vec { let dim = 3; // Translate box to specified centre, and scale let scaled_diameter = self.diameter(domain); @@ -811,9 +806,11 @@ impl MortonKey { let n = surface.len() / 3; for i in 0..n { - scaled_surface[i*dim] = (surface[i*dim] * (dilated_diameter[0] / 2.0)) + centre[0]; - scaled_surface[i*dim+1] = (surface[i*dim+1] * (dilated_diameter[1] / 2.0)) + centre[1]; - scaled_surface[i*dim+2] = (surface[i*dim+2] * (dilated_diameter[2] / 2.0)) + centre[2]; + scaled_surface[i * dim] = (surface[i * dim] * (dilated_diameter[0] / 2.0)) + centre[0]; + scaled_surface[i * dim + 1] = + (surface[i * dim + 1] * (dilated_diameter[1] / 2.0)) + centre[1]; + scaled_surface[i * dim + 2] = + (surface[i * dim + 2] * (dilated_diameter[2] / 2.0)) + centre[2]; } scaled_surface @@ -1681,30 +1678,31 @@ mod test { diameter: [1., 1., 1.], }; let key = MortonKey::from_point(&point, &domain, 0); - + let order = 2; let alpha = 1.; let dim = 3; - let ncoeffs = 6*(order-1 as usize).pow(2) + 2; + let ncoeffs = 6 * (order - 1 as usize).pow(2) + 2; // Test lengths let surface = key.compute_surface(&domain, order, alpha); - assert_eq!(surface.len(), ncoeffs*dim); - + assert_eq!(surface.len(), ncoeffs * dim); + let (surface, surface_idxs) = key.surface_grid(order); - assert_eq!(surface.len(), ncoeffs*dim); - assert_eq!(surface_idxs.len(), ncoeffs*dim); + assert_eq!(surface.len(), ncoeffs * dim); + assert_eq!(surface_idxs.len(), ncoeffs * dim); let mut expected = vec![[0usize; 3]; ncoeffs]; let lower = 0; - let upper = order -1; + let upper = order - 1; let mut idx = 0; for i in 0..order { for j in 0..order { for k in 0..order { if (i >= lower && j >= lower && (k == lower || k == upper)) - || (j >= lower && k >= lower && (i == lower || i == upper)) - || (k >= lower && i >= lower && (j == lower || j == upper)) { + || (j >= lower && k >= lower && (i == lower || i == upper)) + || (k >= lower && i >= lower && (j == lower || j == upper)) + { expected[idx] = [i, j, k]; idx += 1; } @@ -1714,7 +1712,11 @@ mod test { // Test ordering. for i in 0..ncoeffs { - let point = vec![surface_idxs[i*dim], surface_idxs[i*dim+1], surface_idxs[i*dim+2]]; + let point = vec![ + surface_idxs[i * dim], + surface_idxs[i * dim + 1], + surface_idxs[i * dim + 2], + ]; assert_eq!(point, expected[i]); } @@ -1723,18 +1725,19 @@ mod test { let key = MortonKey::from_point(&point, &domain, level); let surface = key.compute_surface(&domain, order, alpha); - let min_x = surface.iter(). - step_by(3).fold(f64::INFINITY, |a, &b| a.min(b)); + let min_x = surface + .iter() + .step_by(3) + .fold(f64::INFINITY, |a, &b| a.min(b)); - let max_x = surface.iter(). - step_by(3).fold(0f64, |a, &b| a.max(b)); + let max_x = surface.iter().step_by(3).fold(0f64, |a, &b| a.max(b)); - let diam_x = max_x-min_x; + let diam_x = max_x - min_x; let expected = key.diameter(&domain)[0]; assert_eq!(diam_x, expected); - // Test shifting + // Test shifting let point = [0.1, 0.2, 0.3]; let level = 2; let key = MortonKey::from_point(&point, &domain, level); @@ -1742,15 +1745,12 @@ mod test { let scaled_surface = key.scale_surface(surface.clone(), &domain, alpha); let expected = key.centre(&domain); - let c_x = surface.iter() - .step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); - let c_y = surface.iter() - .skip(1).step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); - let c_z = surface.iter() - .skip(2).step_by(3).fold(0f64, |a, &b| a+b) / (ncoeffs as f64); + let c_x = surface.iter().step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_y = surface.iter().skip(1).step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_z = surface.iter().skip(2).step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); let result = vec![c_x, c_y, c_z]; - + assert_eq!(result, expected); } @@ -1767,19 +1767,15 @@ mod test { let key = MortonKey::from_point(&point, &domain, 0); - let surface = key.compute_surface(&domain, order, alpha); let conv_grid = key.convolution_grid(order, &domain, &surface, alpha); // Test that surface grid is embedded in convolution grid let surf_grid: Vec<[f64; 3]> = surface .chunks_exact(3) - .map(|chunk| { - [chunk[0], chunk[1], chunk[2]] - }) + .map(|chunk| [chunk[0], chunk[1], chunk[2]]) .collect(); assert!(surf_grid.iter().all(|point| conv_grid.contains(point))); - } -} \ No newline at end of file +} From a7c573938595e6020f041b46dc98451ff40a2d95 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 3 Jul 2023 18:27:17 +0100 Subject: [PATCH 04/40] Start making field compatible with rlst --- field/Cargo.toml | 4 +- field/src/lib.rs | 850 +++++++++++++----------- kernel/src/helpers.rs | 7 +- kernel/src/laplace_3d.rs | 154 ++++- kernel/src/traits.rs | 114 ++-- kernel/src/types.rs | 42 +- traits/Cargo.toml | 1 + traits/src/field.rs | 1 + traits/src/kernel.rs | 102 ++- tree/src/implementations/impl_morton.rs | 69 +- 10 files changed, 842 insertions(+), 502 deletions(-) diff --git a/field/Cargo.toml b/field/Cargo.toml index 299cffc2..8823352f 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -22,9 +22,11 @@ crate-type = ["lib", "cdylib"] [dependencies] bempp-traits = { path = "../traits" } bempp-tree = { path = "../tree" } +bempp-kernel = { path = "../kernel" } itertools = "0.10" ndarray = { version = "*", features = ["blas"]} ndarray-linalg = { version = "*", features = ["openblas-system"] } ndarray-ndimage = "0.3.0" ndrustfft = "0.4.0" -num = "0.4" \ No newline at end of file +num = "0.4" +rlst = {git = "https://github.com/linalg-rs/rlst.git" } \ No newline at end of file diff --git a/field/src/lib.rs b/field/src/lib.rs index e1591295..7e235a20 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -6,11 +6,20 @@ use ndarray_linalg::SVDDC; use ndarray_ndimage::{pad, PadMode}; use ndrustfft::{ndfft, ndfft_r2c, Complex, FftHandler, R2cFftHandler}; -use bempp_traits::{field::FieldTranslationData, kernel::Kernel}; +use rlst; +use rlst::common::traits::{NewLikeSelf, Transpose, NewLikeTranspose}; +use rlst::common::{tools::PrettyPrint, traits::{Copy, Eval}}; +use rlst::dense::{traits::*, rlst_fixed_mat, rlst_mat, rlst_pointer_mat, Shape, Dot}; +use rlst::algorithms::traits::svd::{Svd, Mode}; +use rlst::algorithms::linalg::LinAlg; +use rlst::dense::{matrix::{Matrix}, base_matrix::{BaseMatrix}, data_container::{VectorContainer}}; + +use bempp_traits::{field::FieldTranslationData, kernel::{Kernel, EvalType, KernelType}, types::{Scalar}}; use bempp_tree::types::{domain::Domain, morton::MortonKey}; type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; -type SvdM2lEntry = ArrayBase, Dim<[usize; 2]>>; +type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + #[derive(Default)] pub struct FftFieldTranslationNaiveKiFmm @@ -34,7 +43,7 @@ where pub kernel: T, } -#[derive(Default)] +// #[derive(Default)] pub struct SvdFieldTranslationNaiveKiFmm where T: Kernel + Default, @@ -54,7 +63,7 @@ where pub kernel: T, } -#[derive(Default)] +// #[derive(Default)] pub struct SvdFieldTranslationKiFmm where T: Kernel + Default, @@ -174,113 +183,117 @@ pub fn compute_transfer_vectors() -> Vec { result } -impl FieldTranslationData for FftFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - type Domain = Domain; - type M2LOperators = Vec>, Dim<[usize; 3]>>>; - type TransferVector = Vec; - - fn compute_m2l_operators( - &self, - expansion_order: usize, - domain: Self::Domain, - ) -> Self::M2LOperators { - type TranslationType = ArrayBase>, Dim<[usize; 3]>>; - let mut result: Vec = Vec::new(); - - for t in self.transfer_vectors.iter() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - - let conv_grid_sources = t.source.convolution_grid( - expansion_order, - &domain, - &source_equivalent_surface, - self.alpha, - ); - - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); - // Find min target - let sums: Vec = target_check_surface - .chunks(self.kernel.dim()) - .map(|point| point.iter().sum()) - .collect(); - let min_index = sums - .iter() - .enumerate() - .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) - .map(|(index, _)| index) - .unwrap(); - let min_target = [ - target_check_surface[min_index * self.kernel.dim() as usize], - target_check_surface[min_index * (self.kernel.dim() as usize) + 1], - target_check_surface[min_index * (self.kernel.dim() as usize) + 2], - ]; - - let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); - let m = kernel.len(); - let n = kernel[0].len(); - let k = kernel[0][0].len(); - - // Precompute and store the FFT of each unique kernel interaction - let kernel = - Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) - .unwrap(); - - // Begin by calculating pad lengths along each dimension - let p = 2 * m; - let q = 2 * n; - let r = 2 * k; - - let padding = [[0, p - m], [0, q - n], [0, r - k]]; - - let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); - - // Flip the kernel - let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); - let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); - - // Compute FFT of kernel for this transfer vector - { - // 1. Init the handlers for FFTs along each axis - let mut handler_ax0 = FftHandler::::new(p); - let mut handler_ax1 = FftHandler::::new(q); - let mut handler_ax2 = R2cFftHandler::::new(r); - - // 2. Compute the transform along each axis - let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); - let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); - ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); - } - - // Store FFT of kernel for this transfer vector - { - result.push(padded_kernel_hat); - } - } - - result - } - - fn compute_transfer_vectors(&self) -> Self::TransferVector { - compute_transfer_vectors() - } - - fn ncoeffs(&self, expansion_order: usize) -> usize { - 6 * (expansion_order - 1).pow(2) + 2 - } -} +// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// type Domain = Domain; +// type M2LOperators = Vec>, Dim<[usize; 3]>>>; +// type TransferVector = Vec; + +// fn compute_m2l_operators( +// &self, +// expansion_order: usize, +// domain: Self::Domain, +// ) -> Self::M2LOperators { +// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; +// let mut result: Vec = Vec::new(); + +// for t in self.transfer_vectors.iter() { +// let source_equivalent_surface = +// t.source +// .compute_surface(&domain, expansion_order, self.alpha); + +// let conv_grid_sources = t.source.convolution_grid( +// expansion_order, +// &domain, +// &source_equivalent_surface, +// self.alpha, +// ); + +// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + +// // TODO: Remove dim +// let dim = 3; +// // Find min target +// let ncoeffs: usize = target_check_surface.len() / dim; +// let sums: Vec<_> = (0..ncoeffs) +// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) +// .collect(); + +// let min_index = sums +// .iter() +// .enumerate() +// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) +// .map(|(index, _)| index) +// .unwrap(); + +// let min_target = [ +// target_check_surface[min_index], +// target_check_surface[min_index + ncoeffs], +// target_check_surface[min_index + 2 * ncoeffs], +// ]; + +// // TODO: Fix compute_kernel to work with new kernel +// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); +// let m = kernel.len(); +// let n = kernel[0].len(); +// let k = kernel[0][0].len(); + +// // Precompute and store the FFT of each unique kernel interaction +// let kernel = +// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) +// .unwrap(); + +// // Begin by calculating pad lengths along each dimension +// let p = 2 * m; +// let q = 2 * n; +// let r = 2 * k; + +// let padding = [[0, p - m], [0, q - n], [0, r - k]]; + +// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); + +// // Flip the kernel +// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); +// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); + +// // Compute FFT of kernel for this transfer vector +// { +// // 1. Init the handlers for FFTs along each axis +// let mut handler_ax0 = FftHandler::::new(p); +// let mut handler_ax1 = FftHandler::::new(q); +// let mut handler_ax2 = R2cFftHandler::::new(r); + +// // 2. Compute the transform along each axis +// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); +// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); +// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); +// } + +// // Store FFT of kernel for this transfer vector +// { +// result.push(padded_kernel_hat); +// } +// } + +// result +// } + +// fn compute_transfer_vectors(&self) -> Self::TransferVector { +// compute_transfer_vectors() +// } + +// fn ncoeffs(&self, expansion_order: usize) -> usize { +// 6 * (expansion_order - 1).pow(2) + 2 +// } +// } impl FieldTranslationData for SvdFieldTranslationKiFmm where - T: Kernel + Default, + T: Kernel + Default, { type TransferVector = Vec; type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); @@ -294,212 +307,269 @@ where 6 * (expansion_order - 1).pow(2) + 2 } - fn compute_m2l_operators( + fn compute_m2l_operators<'a>( &self, expansion_order: usize, domain: Self::Domain, ) -> Self::M2LOperators { + // ){ // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) // Compute interaction matrices between source and unique targets, defined by unique transfer vectors let nrows = self.ncoeffs(expansion_order); let ncols = self.ncoeffs(expansion_order); - let mut se2tc_fat: SvdM2lEntry = - Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_fat: SvdM2lEntry = + // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + let ntransfer_vectors = self.transfer_vectors.len(); + let mut se2tc_fat = rlst_mat![f64, (nrows, ncols*ntransfer_vectors)]; - let mut se2tc_thin: SvdM2lEntry = - Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + let mut se2tc_thin= rlst_mat![f64, (nrows*ntransfer_vectors, ncols)]; - for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); + for (i, t) in self.transfer_vectors.iter().enumerate() { + let source_equivalent_surface = t.source.compute_surface(&domain, expansion_order, self.alpha); + let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); + let source_equivalent_surface = unsafe { rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] }; + + let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + let ntargets = target_check_surface.len() / self.kernel.space_dimension(); + let target_check_surface = unsafe { rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] }; + + let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; - let mut tmp_gram = Vec::new(); self.kernel.gram( - &source_equivalent_surface[..], - &target_check_surface[..], - &mut tmp_gram, + EvalType::Value, + source_equivalent_surface.data(), + target_check_surface.data(), + tmp_gram.data_mut(), ); - let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); + // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); let lidx_sources = i * ncols; let ridx_sources = lidx_sources + ncols; - se2tc_fat - .slice_mut(s![.., lidx_sources..ridx_sources]) - .assign(&tmp_gram); - - se2tc_thin - .slice_mut(s![lidx_sources..ridx_sources, ..]) - .assign(&tmp_gram); - } - - let left: usize = 0; - let right: usize = std::cmp::min(self.k, nrows); - - let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - - let u = u.unwrap().slice(s![.., left..right]).to_owned(); - let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - - let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - - let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + let block_size = nrows*ncols; + let start_idx = i * block_size; + let end_idx = start_idx+block_size; + let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + block.copy_from_slice(tmp_gram.data_mut()); + + // // se2tc_fat + // // .slice_mut(s![.., lidx_sources..ridx_sources]) + // // .assign(&tmp_gram); + + // se2tc_fat.block_mut((0, lidx_sources), tmp_gram.shape()); + for j in 0..ncols { + let start_idx = j * ntransfer_vectors * nrows + i * nrows; + let end_idx = start_idx + nrows; + let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let mut gram_column = tmp_gram.get_slice_mut(j*ncols, j*ncols+ncols); + block_column.copy_from_slice(gram_column); + } - // Store compressed M2L operators - let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - for i in 0..self.transfer_vectors.len() { - let v_lidx = i * ncols; - let v_ridx = v_lidx + ncols; - let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - let tmp = sigma.dot(&vt_sub.dot(&st.t())); - let lidx = i * self.k; - let ridx = lidx + self.k; - - c.slice_mut(s![.., lidx..ridx]).assign(&tmp); } - (u, st, c) - } -} - -impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - type TransferVector = Vec; - type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); - type Domain = Domain; - - fn compute_transfer_vectors(&self) -> Self::TransferVector { - compute_transfer_vectors() - } - - fn ncoeffs(&self, expansion_order: usize) -> usize { - 6 * (expansion_order - 1).pow(2) + 2 - } - - fn compute_m2l_operators( - &self, - expansion_order: usize, - domain: Self::Domain, - ) -> Self::M2LOperators { - // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) - - // Compute interaction matrices between source and unique targets, defined by unique transfer vectors - let nrows = self.ncoeffs(expansion_order); - let ncols = self.ncoeffs(expansion_order); - - let mut se2tc_fat: SvdM2lEntry = - Array2::zeros((nrows, ncols * self.transfer_vectors.len())); - - let mut se2tc_thin: SvdM2lEntry = - Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); - for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); - - let mut tmp_gram = Vec::new(); - self.kernel.gram( - &source_equivalent_surface[..], - &target_check_surface[..], - &mut tmp_gram, - ); - - let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); - let lidx_sources = i * ncols; - let ridx_sources = lidx_sources + ncols; + let left: usize = 0; + let right: usize = self.k; + let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); - se2tc_fat - .slice_mut(s![.., lidx_sources..ridx_sources]) - .assign(&tmp_gram); + let u = u.unwrap(); + let vt = vt.unwrap(); - se2tc_thin - .slice_mut(s![lidx_sources..ridx_sources, ..]) - .assign(&tmp_gram); + // Keep 'k' singular values + let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; + for i in 0..self.k { + sigma_mat[[i, i]] = sigma[i] } - let left: usize = 0; - let right: usize = std::cmp::min(self.k, nrows); + let (mu, nu) = u.shape(); + let u = u.block((0, 0), (mu, self.k)).eval(); - let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + let (mvt, nvt) = vt.shape(); + let vt = vt.block((0, 0), (right, nvt)).eval(); + // println!("u {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape()); + + // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - let u = u.unwrap().slice(s![.., left..right]).to_owned(); - let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::All, Mode::All).unwrap(); + let st = st.unwrap(); + let (mst, nst) = st.shape(); + let st_block = st.block((0, 0), (self.k, nst)); + let s = st_block.transpose().eval(); + let st = st.block((0, 0), (self.k, nst)).eval(); - let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - - let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); // Store compressed M2L operators - let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + let mut c = rlst_mat![f64, (self.k, self.k*ntransfer_vectors)]; + for i in 0..self.transfer_vectors.len() { - let v_lidx = i * ncols; - let v_ridx = v_lidx + ncols; - let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - let tmp = sigma.dot(&vt_sub.dot(&st.t())); - let lidx = i * self.k; - let ridx = lidx + self.k; - - c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + // let v_lidx = i * ncols; + // let v_ridx = v_lidx + ncols; + // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); + + // let block_size = right*ncols; + // let start_idx = i * block_size; + // let end_idx = start_idx+block_size; + let top_left = (0, i*ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s)); + // let tmp = sigma.dot(&vt_sub.dot(&st.t())); + // let lidx = i * self.k; + // let ridx = lidx + self.k; + + let top_left = (0, i*self.k); + let dim = (self.k, ncols); + // let mut c_block =; + c.block_mut(top_left, dim).data_mut().copy_from_slice(tmp.data()); + + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); } (u, st, c) + // assert!(false) } } -impl SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - pub fn new( - kernel: T, - k: Option, - expansion_order: usize, - domain: Domain, - alpha: f64, - ) -> Self { - let mut result = SvdFieldTranslationNaiveKiFmm::default(); - - if let Some(k) = k { - // Compression rank <= number of coefficients - let ncoeffs = result.ncoeffs(expansion_order); - if k <= ncoeffs { - result.k = k - } else { - result.k = ncoeffs; - } - } else { - // TODO: Should be data driven if nothing is provided by the user - result.k = 50; - } - - result.alpha = alpha; - result.kernel = kernel; - result.transfer_vectors = result.compute_transfer_vectors(); - result.m2l = result.compute_m2l_operators(expansion_order, domain); - - result - } -} +// impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// type TransferVector = Vec; +// type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); +// type Domain = Domain; + +// fn compute_transfer_vectors(&self) -> Self::TransferVector { +// compute_transfer_vectors() +// } + +// fn ncoeffs(&self, expansion_order: usize) -> usize { +// 6 * (expansion_order - 1).pow(2) + 2 +// } + +// fn compute_m2l_operators( +// &self, +// expansion_order: usize, +// domain: Self::Domain, +// ) -> Self::M2LOperators { +// // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + +// // Compute interaction matrices between source and unique targets, defined by unique transfer vectors +// let nrows = self.ncoeffs(expansion_order); +// let ncols = self.ncoeffs(expansion_order); + +// let mut se2tc_fat: SvdM2lEntry = +// Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + +// let mut se2tc_thin: SvdM2lEntry = +// Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + +// for (i, t) in self.transfer_vectors.iter().enumerate() { +// let source_equivalent_surface = +// t.source.compute_surface(&domain, expansion_order, self.alpha); + +// let target_check_surface = +// t.target.compute_surface(&domain, expansion_order, self.alpha); + +// let mut tmp_gram = Vec::new(); +// self.kernel.gram( +// &source_equivalent_surface[..], +// &target_check_surface[..], +// &mut tmp_gram, +// ); + +// let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); +// let lidx_sources = i * ncols; +// let ridx_sources = lidx_sources + ncols; + +// se2tc_fat +// .slice_mut(s![.., lidx_sources..ridx_sources]) +// .assign(&tmp_gram); + +// se2tc_thin +// .slice_mut(s![lidx_sources..ridx_sources, ..]) +// .assign(&tmp_gram); +// } + +// let left: usize = 0; +// let right: usize = std::cmp::min(self.k, nrows); + +// let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + +// let u = u.unwrap().slice(s![.., left..right]).to_owned(); +// let sigma = Array2::from_diag(&sigma.slice(s![left..right])); +// let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + +// let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + +// let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + +// // Store compressed M2L operators +// let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); +// for i in 0..self.transfer_vectors.len() { +// let v_lidx = i * ncols; +// let v_ridx = v_lidx + ncols; +// let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); +// let tmp = sigma.dot(&vt_sub.dot(&st.t())); +// let lidx = i * self.k; +// let ridx = lidx + self.k; + +// c.slice_mut(s![.., lidx..ridx]).assign(&tmp); +// } + +// (u, st, c) +// } +// } + +// impl SvdFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// pub fn new( +// kernel: T, +// k: Option, +// expansion_order: usize, +// domain: Domain, +// alpha: f64, +// ) -> Self { +// let mut result = SvdFieldTranslationNaiveKiFmm::default(); + +// if let Some(k) = k { +// // Compression rank <= number of coefficients +// let ncoeffs = result.ncoeffs(expansion_order); +// if k <= ncoeffs { +// result.k = k +// } else { +// result.k = ncoeffs; +// } +// } else { +// // TODO: Should be data driven if nothing is provided by the user +// result.k = 50; +// } + +// result.alpha = alpha; +// result.kernel = kernel; +// result.transfer_vectors = result.compute_transfer_vectors(); +// result.m2l = result.compute_m2l_operators(expansion_order, domain); + +// result +// } +// } impl SvdFieldTranslationKiFmm where - T: Kernel + Default, + T: Kernel + Default, { pub fn new( kernel: T, @@ -508,7 +578,17 @@ where domain: Domain, alpha: f64, ) -> Self { - let mut result = SvdFieldTranslationKiFmm::default(); + + let dummy = rlst_mat![f64, (1,1)]; + + // TODO: There should be a default for matrices to make code cleaner. + let mut result = SvdFieldTranslationKiFmm { + alpha, + k: 100, + kernel, + m2l: (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()), + transfer_vectors: Vec::new() + }; if let Some(k) = k { // Compression rank <= number of coefficients @@ -523,8 +603,6 @@ where result.k = 50; } - result.alpha = alpha; - result.kernel = kernel; result.transfer_vectors = result.compute_transfer_vectors(); result.m2l = result.compute_m2l_operators(expansion_order, domain); @@ -532,102 +610,132 @@ where } } -impl FftFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { - let mut result = FftFieldTranslationNaiveKiFmm::default(); - - // Create maps between surface and convolution grids - let (surf_to_conv, conv_to_surf) = - FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); - result.surf_to_conv_map = surf_to_conv; - result.conv_to_surf_map = conv_to_surf; - - result.kernel = kernel; - - result.alpha = alpha; - result.transfer_vectors = result.compute_transfer_vectors(); - result.m2l = result.compute_m2l_operators(expansion_order, domain); - - result - } - - pub fn compute_surf_to_conv_map( - expansion_order: usize, - ) -> (HashMap, HashMap) { - let n = 2 * expansion_order - 1; - - // Index maps between surface and convolution grids - let mut surf_to_conv: HashMap = HashMap::new(); - let mut conv_to_surf: HashMap = HashMap::new(); - - // Initialise surface grid index - let mut surf_index = 0; - - // The boundaries of the surface grid - let lower = expansion_order - 1; - let upper = 2 * expansion_order - 2; - - // Iterate through the entire convolution grid marking the boundaries - // This makes the map much easier to understand and debug - for i in 0..n { - for j in 0..n { - for k in 0..n { - let conv_idx = i * n * n + j * n + k; - if (i >= lower && j >= lower && (k == lower || k == upper)) - || (j >= lower && k >= lower && (i == lower || i == upper)) - || (k >= lower && i >= lower && (j == lower || j == upper)) - { - surf_to_conv.insert(surf_index, conv_idx); - conv_to_surf.insert(conv_idx, surf_index); - surf_index += 1; - } - } - } - } - - (surf_to_conv, conv_to_surf) - } - - pub fn compute_kernel( - &self, - expansion_order: usize, - convolution_grid: &[[f64; 3]], - min_target: [f64; 3], - ) -> Vec>> { - let n = 2 * expansion_order - 1; - let mut result = vec![vec![vec![0f64; n]; n]; n]; - - for (i, result_i) in result.iter_mut().enumerate() { - for (j, result_ij) in result_i.iter_mut().enumerate() { - for (k, result_ijk) in result_ij.iter_mut().enumerate() { - let conv_idx = i * n * n + j * n + k; - let src = convolution_grid[conv_idx]; - *result_ijk = self.kernel.kernel(&src[..], &min_target[..]); - } - } - } - result - } - - pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { - let n = 2 * expansion_order - 1; - let mut result = vec![vec![vec![0f64; n]; n]; n]; - - for (i, result_i) in result.iter_mut().enumerate() { - for (j, result_ij) in result_i.iter_mut().enumerate() { - for (k, result_ijk) in result_ij.iter_mut().enumerate() { - let conv_idx = i * n * n + j * n + k; - if self.conv_to_surf_map.contains_key(&conv_idx) { - let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); - *result_ijk = charges[*surf_idx] - } - } - } - } +// impl FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { +// let mut result = FftFieldTranslationNaiveKiFmm::default(); + +// // Create maps between surface and convolution grids +// let (surf_to_conv, conv_to_surf) = +// FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); +// result.surf_to_conv_map = surf_to_conv; +// result.conv_to_surf_map = conv_to_surf; + +// result.kernel = kernel; + +// result.alpha = alpha; +// result.transfer_vectors = result.compute_transfer_vectors(); +// result.m2l = result.compute_m2l_operators(expansion_order, domain); + +// result +// } + +// pub fn compute_surf_to_conv_map( +// expansion_order: usize, +// ) -> (HashMap, HashMap) { +// let n = 2 * expansion_order - 1; + +// // Index maps between surface and convolution grids +// let mut surf_to_conv: HashMap = HashMap::new(); +// let mut conv_to_surf: HashMap = HashMap::new(); + +// // Initialise surface grid index +// let mut surf_index = 0; + +// // The boundaries of the surface grid +// let lower = expansion_order - 1; +// let upper = 2 * expansion_order - 2; + +// // Iterate through the entire convolution grid marking the boundaries +// // This makes the map much easier to understand and debug +// for i in 0..n { +// for j in 0..n { +// for k in 0..n { +// let conv_idx = i * n * n + j * n + k; +// if (i >= lower && j >= lower && (k == lower || k == upper)) +// || (j >= lower && k >= lower && (i == lower || i == upper)) +// || (k >= lower && i >= lower && (j == lower || j == upper)) +// { +// surf_to_conv.insert(surf_index, conv_idx); +// conv_to_surf.insert(conv_idx, surf_index); +// surf_index += 1; +// } +// } +// } +// } + +// (surf_to_conv, conv_to_surf) +// } + +// pub fn compute_kernel( +// &self, +// expansion_order: usize, +// convolution_grid: &[[f64; 3]], +// min_target: [f64; 3], +// ) -> Vec>> { +// let n = 2 * expansion_order - 1; +// let mut result = vec![vec![vec![0f64; n]; n]; n]; + +// for (i, result_i) in result.iter_mut().enumerate() { +// for (j, result_ij) in result_i.iter_mut().enumerate() { +// for (k, result_ijk) in result_ij.iter_mut().enumerate() { +// let conv_idx = i * n * n + j * n + k; +// let src = convolution_grid[conv_idx]; +// *result_ijk = self.kernel.kernel(&src[..], &min_target[..]); +// } +// } +// } +// result +// } + +// pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { +// let n = 2 * expansion_order - 1; +// let mut result = vec![vec![vec![0f64; n]; n]; n]; + +// for (i, result_i) in result.iter_mut().enumerate() { +// for (j, result_ij) in result_i.iter_mut().enumerate() { +// for (k, result_ijk) in result_ij.iter_mut().enumerate() { +// let conv_idx = i * n * n + j * n + k; +// if self.conv_to_surf_map.contains_key(&conv_idx) { +// let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); +// *result_ijk = charges[*surf_idx] +// } +// } +// } +// } + +// result +// } +// } + +use std::any::type_name; + +fn type_of(_: T) -> &'static str { + type_name::() +} - result +mod test { + + use super::*; + use bempp_kernel::laplace_3d::Laplace3dKernel; + + #[test] + fn test_svd() { + + let kernel = Laplace3dKernel::::default(); + let k = 100; + let order = 2; + let domain = Domain {origin: [0., 0., 0.], diameter: [1., 1., 1.]}; + let alpha_inner = 1.05; + + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel, + Some(k), + order, + domain, + alpha_inner, + ); } -} +} \ No newline at end of file diff --git a/kernel/src/helpers.rs b/kernel/src/helpers.rs index b28c02f8..9c36dc80 100644 --- a/kernel/src/helpers.rs +++ b/kernel/src/helpers.rs @@ -1,6 +1,7 @@ -use crate::traits::Kernel; -use crate::types::EvalType; -use bempp_traits::types::Scalar; +use bempp_traits::{ + types::Scalar, + kernel::{EvalType, Kernel} +}; pub(crate) fn check_dimensions_evaluate( kernel: &K, diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index a886a232..8feabbc5 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -1,11 +1,13 @@ //! Implementation of the Laplace kernel -use crate::traits::Kernel; -use crate::types::{EvalType, KernelType}; +use bempp_traits:: + { + kernel::{Kernel, EvalType, KernelType}, + types::{Scalar}, fmm::Fmm + }; use num; use std::marker::PhantomData; use crate::helpers::check_dimensions_evaluate; -use bempp_traits::types::Scalar; use num::traits::FloatConst; pub struct Laplace3dKernel { @@ -48,7 +50,7 @@ where fn evaluate_st( &self, - eval_type: crate::types::EvalType, + eval_type: EvalType, sources: &[::Real], targets: &[::Real], charges: &[Self::T], @@ -60,7 +62,7 @@ where fn evaluate_mt( &self, - eval_type: crate::types::EvalType, + eval_type: EvalType, sources: &[::Real], targets: &[::Real], charges: &[Self::T], @@ -87,9 +89,99 @@ where }) } - fn range_component_count(&self, eval_type: crate::types::EvalType) -> usize { + fn range_component_count(&self, eval_type: EvalType) -> usize { laplace_component_count(eval_type) } + + fn gram(&self, eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T]) { + + let ntargets = targets.len() / self.space_dimension(); + let nsources= sources.len() / self.space_dimension(); + for i in 0..ntargets { + let target = [ + targets[i], + targets[ntargets + i], + targets[2 * ntargets + i], + ]; + for j in 0..nsources { + let source = [ + sources[j], + sources[nsources + j], + sources[2 * nsources + j], + ]; + let idx = i+ntargets*j; + + evaluate_laplace_one_target_one_source::(eval_type, &target, &source, &mut result[idx..idx+1]); + } + } + } + + fn scale(&self, level: u64) -> f64 { + 1. / (2f64.powf(level as f64)) + } +} + +pub fn evaluate_laplace_one_target_one_source ( + eval_type: EvalType, + target: &[::Real], + source: &[::Real], + result: &mut [T], +) { + + let m_inv_4pi = num::cast::(0.25 * f64::FRAC_1_PI()).unwrap(); + let zero_real = ::zero(); + let one_real = ::one(); + + match eval_type { + EvalType::Value => { + let mut my_result = T::zero(); + let diff_norm = ((target[0] - source[0]) * (target[0] - source[0]) + + (target[1] - source[1]) + * (target[1] - source[1]) + + (target[2] - source[2]) + * (target[2] - source[2])) + .sqrt(); + let inv_diff_norm = { + if diff_norm == zero_real { + zero_real + } else { + one_real / diff_norm + } + }; + + my_result += T::one().mul_real(inv_diff_norm); + result[0] = my_result.mul_real(m_inv_4pi) + } + EvalType::ValueDeriv => { + // Cannot simply use an array my_result as this is not + // correctly auto-vectorized. + + let mut my_result0 = T::zero(); + let mut my_result1 = T::zero(); + let mut my_result2 = T::zero(); + let mut my_result3 = T::zero(); + + let diff0 = source[0] - target[0]; + let diff1 = source[1] - target[1]; + let diff2 = source[2] - target[2]; + let diff_norm = (diff0 * diff0 + diff1 * diff1 + diff2 * diff2).sqrt(); + let inv_diff_norm = { + if diff_norm == zero_real { + zero_real + } else { + one_real / diff_norm + } + }; + let inv_diff_norm_cubed = inv_diff_norm * inv_diff_norm * inv_diff_norm; + + + result[0] = my_result0.mul_real(m_inv_4pi); + result[1] = my_result1.mul_real(m_inv_4pi); + result[2] = my_result2.mul_real(m_inv_4pi); + result[3] = my_result3.mul_real(m_inv_4pi); + } + } + } pub fn evaluate_laplace_one_target( @@ -180,7 +272,7 @@ mod test { use rlst; use rlst::common::tools::PrettyPrint; use rlst::common::traits::{Copy, Eval}; - use rlst::dense::traits::*; + use rlst::dense::{traits::*, rlst_pointer_mat}; #[test] fn test_laplace_3d() { @@ -296,4 +388,52 @@ mod test { green_value.pretty_print(); } + + #[test] + fn test_gram() { + let eps = 1E-12; + + let nsources = 5; + let ntargets = 3; + + let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; + let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; + + let mut gram = rlst::dense::rlst_rand_mat![f64, (ntargets, nsources)]; + + let kernel = Laplace3dKernel::::default(); + + kernel.gram( + EvalType::Value, + sources.data(), + targets.data(), + gram.data_mut(), + ); + + for i in 0..ntargets { + let target = [ + targets.data()[i], + targets.data()[ntargets+i], + targets.data()[ntargets * 2 + i] + ]; + + for j in 0..nsources { + let source = [ + sources.data()[j], + sources.data()[nsources + j], + sources.data()[nsources * 2 + j] + ]; + + let result = gram[[i, j]]; + let mut expected = vec![0f64]; + evaluate_laplace_one_target_one_source::(EvalType::Value, &target, &source, &mut expected); + assert_relative_eq!( + expected[0], + result, + epsilon = eps + ); + + } + } + } } diff --git a/kernel/src/traits.rs b/kernel/src/traits.rs index 8e4943d2..67aa1cd0 100644 --- a/kernel/src/traits.rs +++ b/kernel/src/traits.rs @@ -1,65 +1,65 @@ -//! Trait for Green's function kernels -use crate::types::EvalType; -use crate::types::KernelType; -use bempp_traits::types::Scalar; +// //! Trait for Green's function kernels +// use crate::types::EvalType; +// use crate::types::KernelType; +// use bempp_traits::types::Scalar; -use rayon::ThreadPool; +// use rayon::ThreadPool; -/// Interface to evaluating Green's functions for given sources and targets. -pub trait Kernel { - type T: Scalar; +// /// Interface to evaluating Green's functions for given sources and targets. +// pub trait Kernel { +// type T: Scalar; - /// Single threaded evaluation of Green's functions. - /// - /// - `eval_type`: Either [EvalType::Value] to only return Green's function values - /// or [EvalType::ValueDeriv] to return values and derivatives. - /// - `sources`: A slice defining the source points. The points must be given in the form - /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is - /// the value for each dimension must be continuously contained in the slice. - /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. - /// - `charges`: A slice defining the charges. For each source point there needs to be one charge. - /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] - /// then `result` has the same number of elemens as there are targets. For a scalar kernel - /// in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains - /// for each target in consecutive order the value of the kernel and the three components - /// of its derivative. - fn evaluate_st( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - charges: &[Self::T], - result: &mut [Self::T], - ); +// /// Single threaded evaluation of Green's functions. +// /// +// /// - `eval_type`: Either [EvalType::Value] to only return Green's function values +// /// or [EvalType::ValueDeriv] to return values and derivatives. +// /// - `sources`: A slice defining the source points. The points must be given in the form +// /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is +// /// the value for each dimension must be continuously contained in the slice. +// /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. +// /// - `charges`: A slice defining the charges. For each source point there needs to be one charge. +// /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] +// /// then `result` has the same number of elemens as there are targets. For a scalar kernel +// /// in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains +// /// for each target in consecutive order the value of the kernel and the three components +// /// of its derivative. +// fn evaluate_st( +// &self, +// eval_type: EvalType, +// sources: &[::Real], +// targets: &[::Real], +// charges: &[Self::T], +// result: &mut [Self::T], +// ); - /// Multi-threaded evaluation of a Green's function kernel. - /// - /// The method parallelizes over the given targets. It expects a Rayon [ThreadPool] - /// in which the multi-threaded execution can be scheduled. - fn evaluate_mt( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - charges: &[Self::T], - result: &mut [Self::T], - thread_pool: &ThreadPool, - ); +// /// Multi-threaded evaluation of a Green's function kernel. +// /// +// /// The method parallelizes over the given targets. It expects a Rayon [ThreadPool] +// /// in which the multi-threaded execution can be scheduled. +// fn evaluate_mt( +// &self, +// eval_type: EvalType, +// sources: &[::Real], +// targets: &[::Real], +// charges: &[Self::T], +// result: &mut [Self::T], +// thread_pool: &ThreadPool, +// ); - /// Return the type of the kernel. - fn kernel_type(&self) -> &KernelType; +// /// Return the type of the kernel. +// fn kernel_type(&self) -> &KernelType; - /// Return the domain component count of the Green's fct. - /// - /// For a scalar kernel this is `1`. - fn domain_component_count(&self) -> usize; +// /// Return the domain component count of the Green's fct. +// /// +// /// For a scalar kernel this is `1`. +// fn domain_component_count(&self) -> usize; - /// Return the space dimension. - fn space_dimension(&self) -> usize; +// /// Return the space dimension. +// fn space_dimension(&self) -> usize; - /// Return the range component count of the Green's fct. - /// - /// For a scalar kernel this is `1` if [EvalType::Value] is - /// given, and `4` if [EvalType::ValueDeriv] is given. - fn range_component_count(&self, eval_type: EvalType) -> usize; -} +// /// Return the range component count of the Green's fct. +// /// +// /// For a scalar kernel this is `1` if [EvalType::Value] is +// /// given, and `4` if [EvalType::ValueDeriv] is given. +// fn range_component_count(&self, eval_type: EvalType) -> usize; +// } diff --git a/kernel/src/types.rs b/kernel/src/types.rs index ba32e1dd..2f7c9e41 100644 --- a/kernel/src/types.rs +++ b/kernel/src/types.rs @@ -1,23 +1,23 @@ -use bempp_traits::types::c64; +// use bempp_traits::types::c64; -/// Evaluation Mode. -/// -/// - `Value`: Declares that only values required. -/// - `Deriv`: Declare that only derivative required. -/// - `ValueDeriv` Both values and derivatives required. -#[derive(Clone, Copy)] -pub enum EvalType { - Value, - ValueDeriv, -} +// /// Evaluation Mode. +// /// +// /// - `Value`: Declares that only values required. +// /// - `Deriv`: Declare that only derivative required. +// /// - `ValueDeriv` Both values and derivatives required. +// #[derive(Clone, Copy)] +// pub enum EvalType { +// Value, +// ValueDeriv, +// } -/// This enum defines the type of the kernel. -#[derive(Clone, Copy)] -pub enum KernelType { - /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) - Laplace, - /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) - Helmholtz(c64), - /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) - ModifiedHelmholtz(f64), -} +// /// This enum defines the type of the kernel. +// #[derive(Clone, Copy)] +// pub enum KernelType { +// /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) +// Laplace, +// /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) +// Helmholtz(c64), +// /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) +// ModifiedHelmholtz(f64), +// } diff --git a/traits/Cargo.toml b/traits/Cargo.toml index 9b642c10..5cac16d0 100644 --- a/traits/Cargo.toml +++ b/traits/Cargo.toml @@ -22,3 +22,4 @@ crate-type = ["lib", "cdylib"] cauchy="0.4.*" thiserror="1.*" num = "0.4" +rayon = "1.7" \ No newline at end of file diff --git a/traits/src/field.rs b/traits/src/field.rs index 93a67b99..8b7b0f2c 100644 --- a/traits/src/field.rs +++ b/traits/src/field.rs @@ -19,6 +19,7 @@ where &self, expansion_order: usize, domain: Self::Domain, + // ); ) -> Self::M2LOperators; fn ncoeffs(&self, expansion_order: usize) -> usize; diff --git a/traits/src/kernel.rs b/traits/src/kernel.rs index 2ca72d13..69b7d749 100644 --- a/traits/src/kernel.rs +++ b/traits/src/kernel.rs @@ -1,27 +1,95 @@ -//! Traits for creating integral equation kernels. +// //! Traits for creating integral equation kernels. -/// Interface for FMM kernels. + +//! Trait for Green's function kernels + +use rayon::ThreadPool; + +use crate::types::{Scalar, c64}; + +/// Evaluation Mode. +/// +/// - `Value`: Declares that only values required. +/// - `Deriv`: Declare that only derivative required. +/// - `ValueDeriv` Both values and derivatives required. +#[derive(Clone, Copy)] +pub enum EvalType { + Value, + ValueDeriv, +} + +/// This enum defines the type of the kernel. +#[derive(Clone, Copy)] +pub enum KernelType { + /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) + Laplace, + /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) + Helmholtz(c64), + /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) + ModifiedHelmholtz(f64), +} + +/// Interface to evaluating Green's functions for given sources and targets. pub trait Kernel { - /// Space dimensions for the input of the kernel. - fn dim(&self) -> usize; + type T: Scalar; - /// Dimensionality of the output values. - fn value_dimension(&self) -> usize; + /// Single threaded evaluation of Green's functions. + /// + /// - `eval_type`: Either [EvalType::Value] to only return Green's function values + /// or [EvalType::ValueDeriv] to return values and derivatives. + /// - `sources`: A slice defining the source points. The points must be given in the form + /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is + /// the value for each dimension must be continuously contained in the slice. + /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. + /// - `charges`: A slice defining the charges. For each source point there needs to be one charge. + /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] + /// then `result` has the same number of elements as there are targets. For a scalar kernel + /// in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains + /// for each target in consecutive order the value of the kernel and the three components + /// of its derivative. + fn evaluate_st( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + charges: &[Self::T], + result: &mut [Self::T], + ); - /// Return of the kernel is singular. + /// Multi-threaded evaluation of a Green's function kernel. /// - /// A singular kernel is not defined - /// when sources and charges are identical. - fn is_singular(&self) -> bool; + /// The method parallelizes over the given targets. It expects a Rayon [ThreadPool] + /// in which the multi-threaded execution can be scheduled. + fn evaluate_mt( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + charges: &[Self::T], + result: &mut [Self::T], + thread_pool: &ThreadPool, + ); - /// Evaluate the potential kernel. - fn potential(&self, sources: &[f64], charges: &[f64], targets: &[f64], potentials: &mut [f64]); + /// Return the type of the kernel. + fn kernel_type(&self) -> &KernelType; - /// Evaluate the Gram matrix. - fn gram(&self, sources: &[f64], targets: &[f64], result: &mut Vec); + /// Return the domain component count of the Green's fct. + /// + /// For a scalar kernel this is `1`. + fn domain_component_count(&self) -> usize; - /// Scale the kernel to a given level of an associated tree. - fn scale(&self, level: u64) -> f64; + /// Return the space dimension. + fn space_dimension(&self) -> usize; - fn kernel(&self, source: &[f64], target: &[f64]) -> f64; + /// Return the range component count of the Green's fct. + /// + /// For a scalar kernel this is `1` if [EvalType::Value] is + /// given, and `4` if [EvalType::ValueDeriv] is given. + fn range_component_count(&self, eval_type: EvalType) -> usize; + + // Return a Gram matrix between the sources and targets + fn gram(&self, eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T]); + + // Scale the kernel to a given level of the associated tree, for the FMM. + fn scale(&self, level: u64) -> f64; } diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index f8b147c0..b9899c02 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -722,20 +722,23 @@ impl MortonKey { .unwrap(); let max_conv_point = grid[max_index]; - let sums: Vec = surface - .chunks(dim as usize) - .map(|point| point.iter().sum()) + + let ncoeffs = surface.len() /3; + let sums: Vec<_> = (0..ncoeffs) + .map(|i| surface[i] + surface[ncoeffs + i] + surface[2*ncoeffs + i]) .collect(); + let max_index = sums .iter() .enumerate() .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); + let max_surface_point = [ - surface[max_index * dim as usize], - surface[max_index * (dim as usize) + 1], - surface[max_index * (dim as usize) + 2], + surface[max_index], + surface[max_index + ncoeffs], + surface[max_index + 2 * ncoeffs], ]; let diff = max_conv_point @@ -754,6 +757,9 @@ impl MortonKey { grid } + /// Compute surface grid for KiFMM at this Morton key. + /// + /// Returned in row major order, [x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N] pub fn surface_grid(&self, order: usize) -> (Vec, Vec) { let dim = 3; let n_coeffs = 6 * (order - 1).pow(2) + 2; @@ -772,9 +778,9 @@ impl MortonKey { || (j >= lower && k >= lower && (i == lower || i == upper)) || (k >= lower && i >= lower && (j == lower || j == upper)) { - surface[dim * idx] = i as f64; - surface[dim * idx + 1] = j as f64; - surface[dim * idx + 2] = k as f64; + surface[idx] = i as f64; + surface[ (dim-2) * n_coeffs + idx] = j as f64; + surface[ (dim-1) * n_coeffs + idx] = k as f64; idx += 1; } } @@ -804,13 +810,13 @@ impl MortonKey { let centre = self.centre(domain); - let n = surface.len() / 3; - for i in 0..n { - scaled_surface[i * dim] = (surface[i * dim] * (dilated_diameter[0] / 2.0)) + centre[0]; - scaled_surface[i * dim + 1] = - (surface[i * dim + 1] * (dilated_diameter[1] / 2.0)) + centre[1]; - scaled_surface[i * dim + 2] = - (surface[i * dim + 2] * (dilated_diameter[2] / 2.0)) + centre[2]; + let ncoeffs = surface.len() / 3; + for i in 0..ncoeffs { + scaled_surface[i] = (surface[i] * (dilated_diameter[0] / 2.0)) + centre[0]; + scaled_surface[ (dim-2) * ncoeffs + i] = + (surface[ (dim-2) * ncoeffs + i] * (dilated_diameter[1] / 2.0)) + centre[1]; + scaled_surface[ (dim-1) * ncoeffs + i] = + (surface[ (dim-1) * ncoeffs + i] * (dilated_diameter[2] / 2.0)) + centre[2]; } scaled_surface @@ -1713,9 +1719,9 @@ mod test { // Test ordering. for i in 0..ncoeffs { let point = vec![ - surface_idxs[i * dim], - surface_idxs[i * dim + 1], - surface_idxs[i * dim + 2], + surface_idxs[i], + surface_idxs[i + ncoeffs ], + surface_idxs[i + 2 * ncoeffs], ]; assert_eq!(point, expected[i]); } @@ -1727,10 +1733,13 @@ mod test { let min_x = surface .iter() - .step_by(3) + .take(ncoeffs) .fold(f64::INFINITY, |a, &b| a.min(b)); - let max_x = surface.iter().step_by(3).fold(0f64, |a, &b| a.max(b)); + let max_x = surface + .iter() + .take(ncoeffs) + .fold(0f64, |a, &b| a.max(b)); let diam_x = max_x - min_x; @@ -1742,12 +1751,22 @@ mod test { let level = 2; let key = MortonKey::from_point(&point, &domain, level); let surface = key.compute_surface(&domain, order, alpha); - let scaled_surface = key.scale_surface(surface.clone(), &domain, alpha); let expected = key.centre(&domain); - let c_x = surface.iter().step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); - let c_y = surface.iter().skip(1).step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); - let c_z = surface.iter().skip(2).step_by(3).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_x = surface + .iter() + .take(ncoeffs) + .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_y = surface + .iter() + .skip(ncoeffs) + .take(ncoeffs) + .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_z = surface + .iter() + .skip(2*ncoeffs) + .take(ncoeffs) + .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); let result = vec![c_x, c_y, c_z]; From c37e756f4a03d335303ded130feae4a3f2df24eb Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 3 Jul 2023 18:29:12 +0100 Subject: [PATCH 05/40] Run formatter --- field/src/lib.rs | 126 +++++++++++++----------- kernel/src/helpers.rs | 2 +- kernel/src/laplace_3d.rs | 83 ++++++++-------- traits/src/field.rs | 2 +- traits/src/kernel.rs | 11 ++- tree/src/implementations/impl_morton.rs | 41 ++++---- 6 files changed, 138 insertions(+), 127 deletions(-) diff --git a/field/src/lib.rs b/field/src/lib.rs index 7e235a20..ee709c23 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -7,19 +7,26 @@ use ndarray_ndimage::{pad, PadMode}; use ndrustfft::{ndfft, ndfft_r2c, Complex, FftHandler, R2cFftHandler}; use rlst; -use rlst::common::traits::{NewLikeSelf, Transpose, NewLikeTranspose}; -use rlst::common::{tools::PrettyPrint, traits::{Copy, Eval}}; -use rlst::dense::{traits::*, rlst_fixed_mat, rlst_mat, rlst_pointer_mat, Shape, Dot}; -use rlst::algorithms::traits::svd::{Svd, Mode}; use rlst::algorithms::linalg::LinAlg; -use rlst::dense::{matrix::{Matrix}, base_matrix::{BaseMatrix}, data_container::{VectorContainer}}; - -use bempp_traits::{field::FieldTranslationData, kernel::{Kernel, EvalType, KernelType}, types::{Scalar}}; +use rlst::algorithms::traits::svd::{Mode, Svd}; +use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; +use rlst::common::{ + tools::PrettyPrint, + traits::{Copy, Eval}, +}; +use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; +use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; + +use bempp_traits::{ + field::FieldTranslationData, + kernel::{EvalType, Kernel, KernelType}, + types::Scalar, +}; use bempp_tree::types::{domain::Domain, morton::MortonKey}; type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; -type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; - +type SvdM2lEntry = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; #[derive(Default)] pub struct FftFieldTranslationNaiveKiFmm @@ -214,20 +221,20 @@ pub fn compute_transfer_vectors() -> Vec { // let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); // // TODO: Remove dim -// let dim = 3; +// let dim = 3; // // Find min target // let ncoeffs: usize = target_check_surface.len() / dim; // let sums: Vec<_> = (0..ncoeffs) // .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) // .collect(); - + // let min_index = sums // .iter() // .enumerate() // .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) // .map(|(index, _)| index) // .unwrap(); - + // let min_target = [ // target_check_surface[min_index], // target_check_surface[min_index + ncoeffs], @@ -312,7 +319,7 @@ where expansion_order: usize, domain: Self::Domain, ) -> Self::M2LOperators { - // ){ + // ){ // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) // Compute interaction matrices between source and unique targets, defined by unique transfer vectors @@ -322,22 +329,29 @@ where // let mut se2tc_fat: SvdM2lEntry = // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); let ntransfer_vectors = self.transfer_vectors.len(); - let mut se2tc_fat = rlst_mat![f64, (nrows, ncols*ntransfer_vectors)]; + let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; // let mut se2tc_thin: SvdM2lEntry = // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); - let mut se2tc_thin= rlst_mat![f64, (nrows*ntransfer_vectors, ncols)]; - + let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = t.source.compute_surface(&domain, expansion_order, self.alpha); + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); - let source_equivalent_surface = unsafe { rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] }; - - let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + let source_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] + }; + + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); let ntargets = target_check_surface.len() / self.kernel.space_dimension(); - let target_check_surface = unsafe { rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] }; - + let target_check_surface = unsafe { + rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] + }; + let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; self.kernel.gram( @@ -351,9 +365,9 @@ where let lidx_sources = i * ncols; let ridx_sources = lidx_sources + ncols; - let block_size = nrows*ncols; + let block_size = nrows * ncols; let start_idx = i * block_size; - let end_idx = start_idx+block_size; + let end_idx = start_idx + block_size; let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); block.copy_from_slice(tmp_gram.data_mut()); @@ -366,13 +380,11 @@ where let start_idx = j * ntransfer_vectors * nrows + i * nrows; let end_idx = start_idx + nrows; let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let mut gram_column = tmp_gram.get_slice_mut(j*ncols, j*ncols+ncols); + let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); block_column.copy_from_slice(gram_column); } - } - let left: usize = 0; let right: usize = self.k; let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); @@ -392,7 +404,7 @@ where let (mvt, nvt) = vt.shape(); let vt = vt.block((0, 0), (right, nvt)).eval(); // println!("u {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape()); - + // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); // let u = u.unwrap().slice(s![.., left..right]).to_owned(); // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); @@ -401,16 +413,16 @@ where let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::All, Mode::All).unwrap(); let st = st.unwrap(); let (mst, nst) = st.shape(); - let st_block = st.block((0, 0), (self.k, nst)); + let st_block = st.block((0, 0), (self.k, nst)); let s = st_block.transpose().eval(); - let st = st.block((0, 0), (self.k, nst)).eval(); + let st = st.block((0, 0), (self.k, nst)).eval(); // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); // Store compressed M2L operators // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - let mut c = rlst_mat![f64, (self.k, self.k*ntransfer_vectors)]; + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; for i in 0..self.transfer_vectors.len() { // let v_lidx = i * ncols; @@ -419,22 +431,24 @@ where // let block_size = right*ncols; // let start_idx = i * block_size; - // let end_idx = start_idx+block_size; - let top_left = (0, i*ncols); + // let end_idx = start_idx+block_size; + let top_left = (0, i * ncols); let dim = (self.k, ncols); let vt_block = vt.block(top_left, dim); let tmp = sigma_mat.dot(&vt_block.dot(&s)); - // let tmp = sigma.dot(&vt_sub.dot(&st.t())); - // let lidx = i * self.k; - // let ridx = lidx + self.k; + // let tmp = sigma.dot(&vt_sub.dot(&st.t())); + // let lidx = i * self.k; + // let ridx = lidx + self.k; - let top_left = (0, i*self.k); + let top_left = (0, i * self.k); let dim = (self.k, ncols); // let mut c_block =; - c.block_mut(top_left, dim).data_mut().copy_from_slice(tmp.data()); - - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + c.block_mut(top_left, dim) + .data_mut() + .copy_from_slice(tmp.data()); + + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); } (u, st, c) @@ -569,7 +583,7 @@ where impl SvdFieldTranslationKiFmm where - T: Kernel + Default, + T: Kernel + Default, { pub fn new( kernel: T, @@ -578,16 +592,19 @@ where domain: Domain, alpha: f64, ) -> Self { + let dummy = rlst_mat![f64, (1, 1)]; - let dummy = rlst_mat![f64, (1,1)]; - // TODO: There should be a default for matrices to make code cleaner. let mut result = SvdFieldTranslationKiFmm { alpha, k: 100, kernel, - m2l: (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()), - transfer_vectors: Vec::new() + m2l: ( + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + ), + transfer_vectors: Vec::new(), }; if let Some(k) = k { @@ -723,19 +740,16 @@ mod test { #[test] fn test_svd() { - let kernel = Laplace3dKernel::::default(); let k = 100; let order = 2; - let domain = Domain {origin: [0., 0., 0.], diameter: [1., 1., 1.]}; + let domain = Domain { + origin: [0., 0., 0.], + diameter: [1., 1., 1.], + }; let alpha_inner = 1.05; - - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel, - Some(k), - order, - domain, - alpha_inner, - ); + + let m2l_data_svd = + SvdFieldTranslationKiFmm::new(kernel, Some(k), order, domain, alpha_inner); } -} \ No newline at end of file +} diff --git a/kernel/src/helpers.rs b/kernel/src/helpers.rs index 9c36dc80..86e793bd 100644 --- a/kernel/src/helpers.rs +++ b/kernel/src/helpers.rs @@ -1,6 +1,6 @@ use bempp_traits::{ + kernel::{EvalType, Kernel}, types::Scalar, - kernel::{EvalType, Kernel} }; pub(crate) fn check_dimensions_evaluate( diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index 8feabbc5..fd5c5db2 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -1,9 +1,9 @@ //! Implementation of the Laplace kernel -use bempp_traits:: - { - kernel::{Kernel, EvalType, KernelType}, - types::{Scalar}, fmm::Fmm - }; +use bempp_traits::{ + fmm::Fmm, + kernel::{EvalType, Kernel, KernelType}, + types::Scalar, +}; use num; use std::marker::PhantomData; @@ -93,25 +93,27 @@ where laplace_component_count(eval_type) } - fn gram(&self, eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T]) { - + fn gram( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + result: &mut [Self::T], + ) { let ntargets = targets.len() / self.space_dimension(); - let nsources= sources.len() / self.space_dimension(); + let nsources = sources.len() / self.space_dimension(); for i in 0..ntargets { - let target = [ - targets[i], - targets[ntargets + i], - targets[2 * ntargets + i], - ]; + let target = [targets[i], targets[ntargets + i], targets[2 * ntargets + i]]; for j in 0..nsources { - let source = [ - sources[j], - sources[nsources + j], - sources[2 * nsources + j], - ]; - let idx = i+ntargets*j; - - evaluate_laplace_one_target_one_source::(eval_type, &target, &source, &mut result[idx..idx+1]); + let source = [sources[j], sources[nsources + j], sources[2 * nsources + j]]; + let idx = i + ntargets * j; + + evaluate_laplace_one_target_one_source::( + eval_type, + &target, + &source, + &mut result[idx..idx + 1], + ); } } } @@ -121,13 +123,12 @@ where } } -pub fn evaluate_laplace_one_target_one_source ( +pub fn evaluate_laplace_one_target_one_source( eval_type: EvalType, target: &[::Real], source: &[::Real], result: &mut [T], ) { - let m_inv_4pi = num::cast::(0.25 * f64::FRAC_1_PI()).unwrap(); let zero_real = ::zero(); let one_real = ::one(); @@ -136,11 +137,9 @@ pub fn evaluate_laplace_one_target_one_source ( EvalType::Value => { let mut my_result = T::zero(); let diff_norm = ((target[0] - source[0]) * (target[0] - source[0]) - + (target[1] - source[1]) - * (target[1] - source[1]) - + (target[2] - source[2]) - * (target[2] - source[2])) - .sqrt(); + + (target[1] - source[1]) * (target[1] - source[1]) + + (target[2] - source[2]) * (target[2] - source[2])) + .sqrt(); let inv_diff_norm = { if diff_norm == zero_real { zero_real @@ -148,7 +147,7 @@ pub fn evaluate_laplace_one_target_one_source ( one_real / diff_norm } }; - + my_result += T::one().mul_real(inv_diff_norm); result[0] = my_result.mul_real(m_inv_4pi) } @@ -174,14 +173,12 @@ pub fn evaluate_laplace_one_target_one_source ( }; let inv_diff_norm_cubed = inv_diff_norm * inv_diff_norm * inv_diff_norm; - result[0] = my_result0.mul_real(m_inv_4pi); result[1] = my_result1.mul_real(m_inv_4pi); result[2] = my_result2.mul_real(m_inv_4pi); result[3] = my_result3.mul_real(m_inv_4pi); } } - } pub fn evaluate_laplace_one_target( @@ -272,7 +269,7 @@ mod test { use rlst; use rlst::common::tools::PrettyPrint; use rlst::common::traits::{Copy, Eval}; - use rlst::dense::{traits::*, rlst_pointer_mat}; + use rlst::dense::{rlst_pointer_mat, traits::*}; #[test] fn test_laplace_3d() { @@ -400,9 +397,9 @@ mod test { let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; let mut gram = rlst::dense::rlst_rand_mat![f64, (ntargets, nsources)]; - + let kernel = Laplace3dKernel::::default(); - + kernel.gram( EvalType::Value, sources.data(), @@ -413,26 +410,26 @@ mod test { for i in 0..ntargets { let target = [ targets.data()[i], - targets.data()[ntargets+i], - targets.data()[ntargets * 2 + i] + targets.data()[ntargets + i], + targets.data()[ntargets * 2 + i], ]; for j in 0..nsources { let source = [ sources.data()[j], sources.data()[nsources + j], - sources.data()[nsources * 2 + j] + sources.data()[nsources * 2 + j], ]; let result = gram[[i, j]]; let mut expected = vec![0f64]; - evaluate_laplace_one_target_one_source::(EvalType::Value, &target, &source, &mut expected); - assert_relative_eq!( - expected[0], - result, - epsilon = eps + evaluate_laplace_one_target_one_source::( + EvalType::Value, + &target, + &source, + &mut expected, ); - + assert_relative_eq!(expected[0], result, epsilon = eps); } } } diff --git a/traits/src/field.rs b/traits/src/field.rs index 8b7b0f2c..1885574d 100644 --- a/traits/src/field.rs +++ b/traits/src/field.rs @@ -19,7 +19,7 @@ where &self, expansion_order: usize, domain: Self::Domain, - // ); + // ); ) -> Self::M2LOperators; fn ncoeffs(&self, expansion_order: usize) -> usize; diff --git a/traits/src/kernel.rs b/traits/src/kernel.rs index 69b7d749..ee904754 100644 --- a/traits/src/kernel.rs +++ b/traits/src/kernel.rs @@ -1,11 +1,10 @@ // //! Traits for creating integral equation kernels. - //! Trait for Green's function kernels use rayon::ThreadPool; -use crate::types::{Scalar, c64}; +use crate::types::{c64, Scalar}; /// Evaluation Mode. /// @@ -88,7 +87,13 @@ pub trait Kernel { fn range_component_count(&self, eval_type: EvalType) -> usize; // Return a Gram matrix between the sources and targets - fn gram(&self, eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T]); + fn gram( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + result: &mut [Self::T], + ); // Scale the kernel to a given level of the associated tree, for the FMM. fn scale(&self, level: u64) -> f64; diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index b9899c02..6db2acbe 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -722,10 +722,9 @@ impl MortonKey { .unwrap(); let max_conv_point = grid[max_index]; - - let ncoeffs = surface.len() /3; + let ncoeffs = surface.len() / 3; let sums: Vec<_> = (0..ncoeffs) - .map(|i| surface[i] + surface[ncoeffs + i] + surface[2*ncoeffs + i]) + .map(|i| surface[i] + surface[ncoeffs + i] + surface[2 * ncoeffs + i]) .collect(); let max_index = sums @@ -734,7 +733,7 @@ impl MortonKey { .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) .map(|(index, _)| index) .unwrap(); - + let max_surface_point = [ surface[max_index], surface[max_index + ncoeffs], @@ -758,7 +757,7 @@ impl MortonKey { } /// Compute surface grid for KiFMM at this Morton key. - /// + /// /// Returned in row major order, [x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N] pub fn surface_grid(&self, order: usize) -> (Vec, Vec) { let dim = 3; @@ -779,8 +778,8 @@ impl MortonKey { || (k >= lower && i >= lower && (j == lower || j == upper)) { surface[idx] = i as f64; - surface[ (dim-2) * n_coeffs + idx] = j as f64; - surface[ (dim-1) * n_coeffs + idx] = k as f64; + surface[(dim - 2) * n_coeffs + idx] = j as f64; + surface[(dim - 1) * n_coeffs + idx] = k as f64; idx += 1; } } @@ -813,10 +812,10 @@ impl MortonKey { let ncoeffs = surface.len() / 3; for i in 0..ncoeffs { scaled_surface[i] = (surface[i] * (dilated_diameter[0] / 2.0)) + centre[0]; - scaled_surface[ (dim-2) * ncoeffs + i] = - (surface[ (dim-2) * ncoeffs + i] * (dilated_diameter[1] / 2.0)) + centre[1]; - scaled_surface[ (dim-1) * ncoeffs + i] = - (surface[ (dim-1) * ncoeffs + i] * (dilated_diameter[2] / 2.0)) + centre[2]; + scaled_surface[(dim - 2) * ncoeffs + i] = + (surface[(dim - 2) * ncoeffs + i] * (dilated_diameter[1] / 2.0)) + centre[1]; + scaled_surface[(dim - 1) * ncoeffs + i] = + (surface[(dim - 1) * ncoeffs + i] * (dilated_diameter[2] / 2.0)) + centre[2]; } scaled_surface @@ -1720,7 +1719,7 @@ mod test { for i in 0..ncoeffs { let point = vec![ surface_idxs[i], - surface_idxs[i + ncoeffs ], + surface_idxs[i + ncoeffs], surface_idxs[i + 2 * ncoeffs], ]; assert_eq!(point, expected[i]); @@ -1736,10 +1735,7 @@ mod test { .take(ncoeffs) .fold(f64::INFINITY, |a, &b| a.min(b)); - let max_x = surface - .iter() - .take(ncoeffs) - .fold(0f64, |a, &b| a.max(b)); + let max_x = surface.iter().take(ncoeffs).fold(0f64, |a, &b| a.max(b)); let diam_x = max_x - min_x; @@ -1753,20 +1749,19 @@ mod test { let surface = key.compute_surface(&domain, order, alpha); let expected = key.centre(&domain); - let c_x = surface - .iter() - .take(ncoeffs) - .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + let c_x = surface.iter().take(ncoeffs).fold(0f64, |a, &b| a + b) / (ncoeffs as f64); let c_y = surface .iter() .skip(ncoeffs) .take(ncoeffs) - .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + .fold(0f64, |a, &b| a + b) + / (ncoeffs as f64); let c_z = surface .iter() - .skip(2*ncoeffs) + .skip(2 * ncoeffs) .take(ncoeffs) - .fold(0f64, |a, &b| a + b) / (ncoeffs as f64); + .fold(0f64, |a, &b| a + b) + / (ncoeffs as f64); let result = vec![c_x, c_y, c_z]; From 4c7564638707778b348ed76d663af928168ff9a9 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 3 Jul 2023 18:45:07 +0100 Subject: [PATCH 06/40] Fix up both svd field calcs --- field/src/lib.rs | 280 +++++++++++++++++++++++++++++------------------ 1 file changed, 176 insertions(+), 104 deletions(-) diff --git a/field/src/lib.rs b/field/src/lib.rs index ee709c23..43405f5c 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -28,7 +28,7 @@ type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; -#[derive(Default)] +// #[derive(Default)] pub struct FftFieldTranslationNaiveKiFmm where T: Kernel + Default, @@ -375,7 +375,6 @@ where // // .slice_mut(s![.., lidx_sources..ridx_sources]) // // .assign(&tmp_gram); - // se2tc_fat.block_mut((0, lidx_sources), tmp_gram.shape()); for j in 0..ncols { let start_idx = j * ntransfer_vectors * nrows + i * nrows; let end_idx = start_idx + nrows; @@ -403,7 +402,6 @@ where let (mvt, nvt) = vt.shape(); let vt = vt.block((0, 0), (right, nvt)).eval(); - // println!("u {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape()); // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); // let u = u.unwrap().slice(s![.., left..right]).to_owned(); @@ -456,130 +454,204 @@ where } } -// impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// type TransferVector = Vec; -// type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); -// type Domain = Domain; +impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + type TransferVector = Vec; + type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); + type Domain = Domain; -// fn compute_transfer_vectors(&self) -> Self::TransferVector { -// compute_transfer_vectors() -// } + fn compute_transfer_vectors(&self) -> Self::TransferVector { + compute_transfer_vectors() + } -// fn ncoeffs(&self, expansion_order: usize) -> usize { -// 6 * (expansion_order - 1).pow(2) + 2 -// } + fn ncoeffs(&self, expansion_order: usize) -> usize { + 6 * (expansion_order - 1).pow(2) + 2 + } -// fn compute_m2l_operators( -// &self, -// expansion_order: usize, -// domain: Self::Domain, -// ) -> Self::M2LOperators { -// // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + fn compute_m2l_operators<'a>( + &self, + expansion_order: usize, + domain: Self::Domain, + ) -> Self::M2LOperators { + // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) -// // Compute interaction matrices between source and unique targets, defined by unique transfer vectors -// let nrows = self.ncoeffs(expansion_order); -// let ncols = self.ncoeffs(expansion_order); + // Compute interaction matrices between source and unique targets, defined by unique transfer vectors + let nrows = self.ncoeffs(expansion_order); + let ncols = self.ncoeffs(expansion_order); -// let mut se2tc_fat: SvdM2lEntry = -// Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_fat: SvdM2lEntry = + // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + let ntransfer_vectors = self.transfer_vectors.len(); + let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; -// let mut se2tc_thin: SvdM2lEntry = -// Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; -// for (i, t) in self.transfer_vectors.iter().enumerate() { -// let source_equivalent_surface = -// t.source.compute_surface(&domain, expansion_order, self.alpha); + for (i, t) in self.transfer_vectors.iter().enumerate() { + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); + let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); + let source_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] + }; -// let target_check_surface = -// t.target.compute_surface(&domain, expansion_order, self.alpha); + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); + let ntargets = target_check_surface.len() / self.kernel.space_dimension(); + let target_check_surface = unsafe { + rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] + }; -// let mut tmp_gram = Vec::new(); -// self.kernel.gram( -// &source_equivalent_surface[..], -// &target_check_surface[..], -// &mut tmp_gram, -// ); + let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; -// let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); -// let lidx_sources = i * ncols; -// let ridx_sources = lidx_sources + ncols; + self.kernel.gram( + EvalType::Value, + source_equivalent_surface.data(), + target_check_surface.data(), + tmp_gram.data_mut(), + ); -// se2tc_fat -// .slice_mut(s![.., lidx_sources..ridx_sources]) -// .assign(&tmp_gram); + // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); + let lidx_sources = i * ncols; + let ridx_sources = lidx_sources + ncols; -// se2tc_thin -// .slice_mut(s![lidx_sources..ridx_sources, ..]) -// .assign(&tmp_gram); -// } + let block_size = nrows * ncols; + let start_idx = i * block_size; + let end_idx = start_idx + block_size; + let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + block.copy_from_slice(tmp_gram.data_mut()); -// let left: usize = 0; -// let right: usize = std::cmp::min(self.k, nrows); + // // se2tc_fat + // // .slice_mut(s![.., lidx_sources..ridx_sources]) + // // .assign(&tmp_gram); -// let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + for j in 0..ncols { + let start_idx = j * ntransfer_vectors * nrows + i * nrows; + let end_idx = start_idx + nrows; + let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); + block_column.copy_from_slice(gram_column); + } + } -// let u = u.unwrap().slice(s![.., left..right]).to_owned(); -// let sigma = Array2::from_diag(&sigma.slice(s![left..right])); -// let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + let left: usize = 0; + let right: usize = self.k; + let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); -// let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + let u = u.unwrap(); + let vt = vt.unwrap(); -// let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + // Keep 'k' singular values + let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; + for i in 0..self.k { + sigma_mat[[i, i]] = sigma[i] + } -// // Store compressed M2L operators -// let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); -// for i in 0..self.transfer_vectors.len() { -// let v_lidx = i * ncols; -// let v_ridx = v_lidx + ncols; -// let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); -// let tmp = sigma.dot(&vt_sub.dot(&st.t())); -// let lidx = i * self.k; -// let ridx = lidx + self.k; + let (mu, nu) = u.shape(); + let u = u.block((0, 0), (mu, self.k)).eval(); -// c.slice_mut(s![.., lidx..ridx]).assign(&tmp); -// } + let (mvt, nvt) = vt.shape(); + let vt = vt.block((0, 0), (right, nvt)).eval(); -// (u, st, c) -// } -// } + // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); -// impl SvdFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// pub fn new( -// kernel: T, -// k: Option, -// expansion_order: usize, -// domain: Domain, -// alpha: f64, -// ) -> Self { -// let mut result = SvdFieldTranslationNaiveKiFmm::default(); - -// if let Some(k) = k { -// // Compression rank <= number of coefficients -// let ncoeffs = result.ncoeffs(expansion_order); -// if k <= ncoeffs { -// result.k = k -// } else { -// result.k = ncoeffs; -// } -// } else { -// // TODO: Should be data driven if nothing is provided by the user -// result.k = 50; -// } + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::All, Mode::All).unwrap(); + let st = st.unwrap(); + let (mst, nst) = st.shape(); + let st_block = st.block((0, 0), (self.k, nst)); + let s = st_block.transpose().eval(); + let st = st.block((0, 0), (self.k, nst)).eval(); -// result.alpha = alpha; -// result.kernel = kernel; -// result.transfer_vectors = result.compute_transfer_vectors(); -// result.m2l = result.compute_m2l_operators(expansion_order, domain); + // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); -// result -// } -// } + // Store compressed M2L operators + // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; + + for i in 0..self.transfer_vectors.len() { + // let v_lidx = i * ncols; + // let v_ridx = v_lidx + ncols; + // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); + + // let block_size = right*ncols; + // let start_idx = i * block_size; + // let end_idx = start_idx+block_size; + let top_left = (0, i * ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s)); + // let tmp = sigma.dot(&vt_sub.dot(&st.t())); + // let lidx = i * self.k; + // let ridx = lidx + self.k; + + let top_left = (0, i * self.k); + let dim = (self.k, ncols); + // let mut c_block =; + c.block_mut(top_left, dim) + .data_mut() + .copy_from_slice(tmp.data()); + + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + } + (u, st, c) + } +} + +impl SvdFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + pub fn new( + kernel: T, + k: Option, + expansion_order: usize, + domain: Domain, + alpha: f64, + ) -> Self { + let dummy = rlst_mat![f64, (1, 1)]; + + // TODO: There should be a default for matrices to make code cleaner. + let mut result = SvdFieldTranslationNaiveKiFmm { + alpha, + k: 100, + kernel, + m2l: ( + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + ), + transfer_vectors: Vec::new(), + }; + + if let Some(k) = k { + // Compression rank <= number of coefficients + let ncoeffs = result.ncoeffs(expansion_order); + if k <= ncoeffs { + result.k = k + } else { + result.k = ncoeffs; + } + } else { + // TODO: Should be data driven if nothing is provided by the user + result.k = 50; + } + + result.transfer_vectors = result.compute_transfer_vectors(); + result.m2l = result.compute_m2l_operators(expansion_order, domain); + + result + } +} impl SvdFieldTranslationKiFmm where From 115f7a5102c9777d47165cfa77da3f5f3ef1efd2 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 4 Jul 2023 15:44:58 +0100 Subject: [PATCH 07/40] Make trees work with rlst --- fmm/Cargo.toml | 1 + fmm/src/helmholtz.rs | 1 - fmm/src/laplace.rs | 380 +++++++++---------- tree/Cargo.toml | 3 + tree/src/implementations/impl_domain.rs | 100 +++-- tree/src/implementations/impl_single_node.rs | 264 ++++++++----- 6 files changed, 421 insertions(+), 328 deletions(-) delete mode 100644 fmm/src/helmholtz.rs diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index b984f239..b3c1aaa7 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -24,6 +24,7 @@ bempp-tools = { path = "../tools" } bempp-tree = { path = "../tree" } bempp-traits = { path = "../traits" } bempp-field = { path = "../field" } +bempp-kernel = { path = "../kernel" } approx = "0.5" cauchy = "0.4.*" itertools = "0.10" diff --git a/fmm/src/helmholtz.rs b/fmm/src/helmholtz.rs deleted file mode 100644 index b8a61e3b..00000000 --- a/fmm/src/helmholtz.rs +++ /dev/null @@ -1 +0,0 @@ -//! Helmholtz kernel implementation. diff --git a/fmm/src/laplace.rs b/fmm/src/laplace.rs index c0b32953..60fa2649 100644 --- a/fmm/src/laplace.rs +++ b/fmm/src/laplace.rs @@ -1,190 +1,190 @@ -//! Laplace kernel implementation. -use bempp_traits::kernel::Kernel; - -#[derive(Debug, Default, Clone)] -pub struct LaplaceKernel { - pub dim: usize, - pub is_singular: bool, - pub value_dimension: usize, -} - -impl LaplaceKernel { - pub fn new(dim: usize, is_singular: bool, value_dimension: usize) -> LaplaceKernel { - LaplaceKernel { - dim, - is_singular, - value_dimension, - } - } - - pub fn potential_kernel_3_d(&self, source: &[f64], target: &[f64]) -> f64 { - let mut tmp = source - .iter() - .zip(target.iter()) - .map(|(s, t)| (s - t).powf(2.0)) - .sum::() - .powf(0.5) - * std::f64::consts::PI - * 4.0; - - tmp = tmp.recip(); - - if tmp.is_finite() { - tmp - } else { - 0. - } - } -} - -impl Kernel for LaplaceKernel { - fn dim(&self) -> usize { - self.dim - } - - fn is_singular(&self) -> bool { - self.is_singular - } - - fn value_dimension(&self) -> usize { - self.value_dimension - } - - fn kernel(&self, source: &[f64], target: &[f64]) -> f64 { - let mut tmp = source - .iter() - .zip(target.iter()) - .map(|(s, t)| (s - t).powf(2.0)) - .sum::() - .powf(0.5) - * std::f64::consts::PI - * 4.0; - - tmp = tmp.recip(); - - if tmp.is_finite() { - tmp - } else { - 0. - } - } - - fn potential(&self, sources: &[f64], charges: &[f64], targets: &[f64], potentials: &mut [f64]) { - for (i, j) in (0..targets.len()).step_by(self.dim()).enumerate() { - let mut potential = 0.0; - let target = &targets[j..(j + self.dim())]; - - for (k, l) in (0..sources.len()).step_by(self.dim()).enumerate() { - let source = &sources[l..(l + self.dim())]; - let tmp; - if self.dim() == 3 { - tmp = self.kernel(source, target); - } else { - panic!("Kernel not implemented for dimension={:?}!", self.dim()) - } - - potential += charges[k] * tmp; - } - potentials[i] = potential - } - } - - fn gram(&self, sources: &[f64], targets: &[f64], result: &mut Vec) { - // let mut result: Vec = Vec::new(); - - for i in (0..targets.len()).step_by(self.dim()) { - let target = &targets[i..(i + self.dim())]; - let mut row: Vec = Vec::new(); - - for j in (0..sources.len()).step_by(self.dim()) { - let source = &sources[j..(j + self.dim())]; - let tmp; - if self.dim() == 3 { - tmp = self.potential_kernel_3_d(source, target); - } else { - panic!("Gram not implemented for dimension={:?}!", self.dim()) - } - - row.push(tmp); - } - result.append(&mut row); - } - // Result::Ok(result) - } - - fn scale(&self, level: u64) -> f64 { - 1. / (2f64.powf(level as f64)) - } -} - -#[allow(unused_imports)] -pub mod tests { - - use rand::prelude::*; - use rand::SeedableRng; - - use super::*; - - #[allow(dead_code)] - fn points_fixture(npoints: usize, dim: usize) -> Vec { - let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..1.0); - let mut points = Vec::new(); - - for _ in 0..npoints { - for _ in 0..dim { - points.push(between.sample(&mut range)) - } - } - - points - } - - #[test] - #[should_panic(expected = "Kernel not implemented for dimension=2!")] - pub fn test_potential_panics() { - let dim = 2; - let npoints = 100; - let sources = points_fixture(npoints, dim); - let targets = points_fixture(npoints, dim); - let charges = vec![1.0; npoints]; - let mut potentials = vec![0.; npoints]; - - let kernel = LaplaceKernel::new(dim, false, dim); - kernel.potential( - &sources[..], - &charges[..], - &targets[..], - &mut potentials[..], - ); - } - - #[test] - #[should_panic(expected = "Gram not implemented for dimension=2!")] - pub fn test_gram_panics() { - let dim = 2; - let npoints = 100; - let sources = points_fixture(npoints, dim); - let targets = points_fixture(npoints, dim); - - let kernel = LaplaceKernel::new(dim, false, dim); - let mut gram = Vec::::new(); - kernel.gram(&sources[..], &targets[..], &mut gram); - } - - #[test] - pub fn test_gram() { - let dim = 3; - let nsources = 100; - let ntargets = 200; - let sources = points_fixture(nsources, dim); - let targets = points_fixture(ntargets, dim); - - let kernel = LaplaceKernel::new(dim, false, dim); - let mut gram = Vec::::new(); - kernel.gram(&sources[..], &targets[..], &mut gram); - - // Test dimension of output - assert_eq!(gram.len(), ntargets * nsources); - } -} +// //! Laplace kernel implementation. +// use bempp_traits::kernel::Kernel; + +// #[derive(Debug, Default, Clone)] +// pub struct LaplaceKernel { +// pub dim: usize, +// pub is_singular: bool, +// pub value_dimension: usize, +// } + +// impl LaplaceKernel { +// pub fn new(dim: usize, is_singular: bool, value_dimension: usize) -> LaplaceKernel { +// LaplaceKernel { +// dim, +// is_singular, +// value_dimension, +// } +// } + +// pub fn potential_kernel_3_d(&self, source: &[f64], target: &[f64]) -> f64 { +// let mut tmp = source +// .iter() +// .zip(target.iter()) +// .map(|(s, t)| (s - t).powf(2.0)) +// .sum::() +// .powf(0.5) +// * std::f64::consts::PI +// * 4.0; + +// tmp = tmp.recip(); + +// if tmp.is_finite() { +// tmp +// } else { +// 0. +// } +// } +// } + +// impl Kernel for LaplaceKernel { +// fn dim(&self) -> usize { +// self.dim +// } + +// fn is_singular(&self) -> bool { +// self.is_singular +// } + +// fn value_dimension(&self) -> usize { +// self.value_dimension +// } + +// fn kernel(&self, source: &[f64], target: &[f64]) -> f64 { +// let mut tmp = source +// .iter() +// .zip(target.iter()) +// .map(|(s, t)| (s - t).powf(2.0)) +// .sum::() +// .powf(0.5) +// * std::f64::consts::PI +// * 4.0; + +// tmp = tmp.recip(); + +// if tmp.is_finite() { +// tmp +// } else { +// 0. +// } +// } + +// fn potential(&self, sources: &[f64], charges: &[f64], targets: &[f64], potentials: &mut [f64]) { +// for (i, j) in (0..targets.len()).step_by(self.dim()).enumerate() { +// let mut potential = 0.0; +// let target = &targets[j..(j + self.dim())]; + +// for (k, l) in (0..sources.len()).step_by(self.dim()).enumerate() { +// let source = &sources[l..(l + self.dim())]; +// let tmp; +// if self.dim() == 3 { +// tmp = self.kernel(source, target); +// } else { +// panic!("Kernel not implemented for dimension={:?}!", self.dim()) +// } + +// potential += charges[k] * tmp; +// } +// potentials[i] = potential +// } +// } + +// fn gram(&self, sources: &[f64], targets: &[f64], result: &mut Vec) { +// // let mut result: Vec = Vec::new(); + +// for i in (0..targets.len()).step_by(self.dim()) { +// let target = &targets[i..(i + self.dim())]; +// let mut row: Vec = Vec::new(); + +// for j in (0..sources.len()).step_by(self.dim()) { +// let source = &sources[j..(j + self.dim())]; +// let tmp; +// if self.dim() == 3 { +// tmp = self.potential_kernel_3_d(source, target); +// } else { +// panic!("Gram not implemented for dimension={:?}!", self.dim()) +// } + +// row.push(tmp); +// } +// result.append(&mut row); +// } +// // Result::Ok(result) +// } + +// fn scale(&self, level: u64) -> f64 { +// 1. / (2f64.powf(level as f64)) +// } +// } + +// #[allow(unused_imports)] +// pub mod tests { + +// use rand::prelude::*; +// use rand::SeedableRng; + +// use super::*; + +// #[allow(dead_code)] +// fn points_fixture(npoints: usize, dim: usize) -> Vec { +// let mut range = StdRng::seed_from_u64(0); +// let between = rand::distributions::Uniform::from(0.0..1.0); +// let mut points = Vec::new(); + +// for _ in 0..npoints { +// for _ in 0..dim { +// points.push(between.sample(&mut range)) +// } +// } + +// points +// } + +// #[test] +// #[should_panic(expected = "Kernel not implemented for dimension=2!")] +// pub fn test_potential_panics() { +// let dim = 2; +// let npoints = 100; +// let sources = points_fixture(npoints, dim); +// let targets = points_fixture(npoints, dim); +// let charges = vec![1.0; npoints]; +// let mut potentials = vec![0.; npoints]; + +// let kernel = LaplaceKernel::new(dim, false, dim); +// kernel.potential( +// &sources[..], +// &charges[..], +// &targets[..], +// &mut potentials[..], +// ); +// } + +// #[test] +// #[should_panic(expected = "Gram not implemented for dimension=2!")] +// pub fn test_gram_panics() { +// let dim = 2; +// let npoints = 100; +// let sources = points_fixture(npoints, dim); +// let targets = points_fixture(npoints, dim); + +// let kernel = LaplaceKernel::new(dim, false, dim); +// let mut gram = Vec::::new(); +// kernel.gram(&sources[..], &targets[..], &mut gram); +// } + +// #[test] +// pub fn test_gram() { +// let dim = 3; +// let nsources = 100; +// let ntargets = 200; +// let sources = points_fixture(nsources, dim); +// let targets = points_fixture(ntargets, dim); + +// let kernel = LaplaceKernel::new(dim, false, dim); +// let mut gram = Vec::::new(); +// kernel.gram(&sources[..], &targets[..], &mut gram); + +// // Test dimension of output +// assert_eq!(gram.len(), ntargets * nsources); +// } +// } diff --git a/tree/Cargo.toml b/tree/Cargo.toml index 2c0dfd02..ea6d1120 100644 --- a/tree/Cargo.toml +++ b/tree/Cargo.toml @@ -25,3 +25,6 @@ bempp-traits = { path = "../traits" } [features] mpi = ["dep:mpi", "dep:hyksort"] strict = [] + +[dev-dependencies] +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} \ No newline at end of file diff --git a/tree/src/implementations/impl_domain.rs b/tree/src/implementations/impl_domain.rs index 3a691f55..f2f88414 100644 --- a/tree/src/implementations/impl_domain.rs +++ b/tree/src/implementations/impl_domain.rs @@ -4,34 +4,49 @@ impl Domain { /// Compute the domain defined by a set of points on a local node. When defined by a set of points /// The domain adds a small threshold such that no points lie on the actual edge of the domain to /// ensure correct Morton Encoding. - pub fn from_local_points(points: &[[PointType; 3]]) -> Domain { + pub fn from_local_points(points: &[PointType]) -> Domain { // Increase size of bounding box to capture all points let err: f64 = 0.001; - let max_x = points - .iter() - .max_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) - .unwrap()[0]; - let max_y = points - .iter() - .max_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) - .unwrap()[1]; - let max_z = points - .iter() - .max_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) - .unwrap()[2]; + // TODO: Should be parametrised by dimension + let dim = 3; + let npoints = points.len() / dim; + let x = points[0..npoints].to_vec(); + let y = points[npoints..2 * npoints].to_vec(); + let z = points[2 * npoints..].to_vec(); - let min_x = points - .iter() - .min_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) - .unwrap()[0]; - let min_y = points - .iter() - .min_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) - .unwrap()[1]; - let min_z = points - .iter() - .min_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) - .unwrap()[2]; + let max_x = x.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + let max_y = y.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + let max_z = z.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + + let min_x = x.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + let min_y = y.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + let min_z = z.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + + // let max_x = points + // .iter() + // .max_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) + // .unwrap()[0]; + // let max_y = points + // .iter() + // .max_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) + // .unwrap()[1]; + // let max_z = points + // .iter() + // .max_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) + // .unwrap()[2]; + + // let min_x = points + // .iter() + // .min_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) + // .unwrap()[0]; + // let min_y = points + // .iter() + // .min_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) + // .unwrap()[1]; + // let min_z = points + // .iter() + // .min_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) + // .unwrap()[2]; Domain { origin: [min_x - err, min_y - err, min_z - err], @@ -51,28 +66,37 @@ mod test { use rand::SeedableRng; use crate::types::domain::Domain; + use rlst::common::traits::ColumnMajorIterator; + use rlst::dense::{base_matrix::BaseMatrix, rlst_mat, Dynamic, Matrix, VectorContainer, RawAccess}; - const NPOINTS: u64 = 100000; - - #[test] - fn test_compute_bounds() { + fn points_fixture( + npoints: usize, + ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> + { // Generate a set of randomly distributed points let mut range = StdRng::seed_from_u64(0); let between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); - let mut points = Vec::new(); + let mut points = rlst_mat![f64, (npoints, 3)]; - for _ in 0..NPOINTS { - points.push([ - between.sample(&mut range), - between.sample(&mut range), - between.sample(&mut range), - ]) + for i in 0..npoints { + points[[i, 0]] = between.sample(&mut range); + points[[i, 1]] = between.sample(&mut range); + points[[i, 2]] = between.sample(&mut range); } - let domain = Domain::from_local_points(&points); + points + } + + #[test] + fn test_compute_bounds() { + let npoints = 10000; + let points = points_fixture(npoints); + let domain = Domain::from_local_points(&points.data()); // Test that all local points are contained within the local domain - for point in points { + for i in 0..npoints { + let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; + assert!( domain.origin[0] <= point[0] && point[0] <= domain.origin[0] + domain.diameter[0] ); diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index 77006b01..6d37be64 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -19,23 +19,42 @@ use crate::{ impl SingleNodeTree { /// Constructor for uniform trees - pub fn uniform_tree(points: &[[PointType; 3]], &domain: &Domain, depth: u64) -> SingleNodeTree { + pub fn uniform_tree(points: &[PointType], &domain: &Domain, depth: u64) -> SingleNodeTree { // Encode points at deepest level, and map to specified depth let start = Instant::now(); - let mut points: Points = points - .iter() - .enumerate() - .map(|(i, &p)| { - let base_key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); - let encoded_key = MortonKey::from_point(&p, &domain, depth); - Point { - coordinate: p, - base_key, - encoded_key, - global_idx: i, - } + + // TODO: Automatically infer dimension + let dim = 3; + let npoints = points.len() / dim; + + let mut tmp = Points::default(); + for i in 0..npoints { + let point = [points[i], points[i + npoints], points[i + 2 * npoints]]; + let base_key = MortonKey::from_point(&point, &domain, DEEPEST_LEVEL); + let encoded_key = MortonKey::from_point(&point, &domain, depth); + tmp.points.push(Point { + coordinate: point, + base_key, + encoded_key, + global_idx: i, }) - .collect(); + } + let mut points = tmp; + + // let mut points: Points = points + // .iter() + // .enumerate() + // .map(|(i, &p)| { + // let base_key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); + // let encoded_key = MortonKey::from_point(&p, &domain, depth); + // Point { + // coordinate: p, + // base_key, + // encoded_key, + // global_idx: i, + // } + // }) + // .collect(); points.sort(); // Generate complete tree at specified depth @@ -126,26 +145,39 @@ impl SingleNodeTree { } /// Constructor for adaptive trees - pub fn adaptive_tree( - points: &[[PointType; 3]], - &domain: &Domain, - n_crit: u64, - ) -> SingleNodeTree { + pub fn adaptive_tree(points: &[PointType], &domain: &Domain, n_crit: u64) -> SingleNodeTree { // Encode points at deepest level let start = Instant::now(); - let mut points: Points = points - .iter() - .enumerate() - .map(|(i, &p)| { - let key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); - Point { - coordinate: p, - base_key: key, - encoded_key: key, - global_idx: i, - } + // let mut points: Points = points + // .iter() + // .enumerate() + // .map(|(i, &p)| { + // let key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); + // Point { + // coordinate: p, + // base_key: key, + // encoded_key: key, + // global_idx: i, + // } + // }) + // .collect(); + // TODO: Automatically infer dimension + let dim = 3; + let npoints = points.len() / dim; + + let mut tmp = Points::default(); + for i in 0..npoints { + let point = [points[i], points[i + npoints], points[i + 2 * npoints]]; + let key = MortonKey::from_point(&point, &domain, DEEPEST_LEVEL); + tmp.points.push(Point { + coordinate: point, + base_key: key, + encoded_key: key, + global_idx: i, }) - .collect(); + } + let mut points = tmp; + points.sort(); println!("Tree - Encoding Time {:?}ms", start.elapsed().as_millis()); @@ -419,30 +451,30 @@ impl Tree for SingleNodeTree { type NodeIndices = MortonKeys; type Point = Point; type PointSlice<'a> = &'a [Point]; - type PointData = Vec; - type PointDataSlice<'a> = &'a [Vec]; + type PointData = f64; + type PointDataSlice<'a> = &'a [f64]; /// Create a new single-node tree. If non-adaptive (uniform) trees are created, they are specified /// by a user defined maximum depth, if an adaptive tree is created it is specified by only by the /// user defined maximum leaf maximum occupancy n_crit. fn new( - points: Self::PointSlice<'_>, + points: Self::PointDataSlice<'_>, adaptive: bool, n_crit: Option, depth: Option, ) -> SingleNodeTree { - // HACK: Come back and reconcile a runtime point dimension detector - let points = points.iter().map(|p| p.coordinate).collect_vec(); + // TODO: Come back and reconcile a runtime point dimension detector + // let points = points.iter().map(|p| p.coordinate).collect_vec(); - let domain = Domain::from_local_points(&points[..]); + let domain = Domain::from_local_points(points); let n_crit = n_crit.unwrap_or(NCRIT); let depth = depth.unwrap_or(DEEPEST_LEVEL); if adaptive { - SingleNodeTree::adaptive_tree(&points[..], &domain, n_crit) + SingleNodeTree::adaptive_tree(points, &domain, n_crit) } else { - SingleNodeTree::uniform_tree(&points[..], &domain, depth) + SingleNodeTree::uniform_tree(points, &domain, depth) } } @@ -500,40 +532,44 @@ mod test { use super::*; use rand::prelude::*; use rand::SeedableRng; - - pub fn points_fixture(npoints: i32) -> Vec { + use rlst::dense::RawAccess; + use rlst::dense::rlst_mat; + use rlst::dense::{Matrix, base_matrix::BaseMatrix, VectorContainer, Dynamic}; + + fn points_fixture( + npoints: usize, + min: Option, + max: Option + ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> + { + // Generate a set of randomly distributed points let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..1.0); - let mut points: Vec<[PointType; 3]> = Vec::new(); - - for _ in 0..npoints { - points.push([ - between.sample(&mut range), - between.sample(&mut range), - between.sample(&mut range), - ]) + + let between; + if let (Some(min),Some(max)) = (min, max) { + between = rand::distributions::Uniform::from(min..max); + } else { + between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); + } + + let mut points = rlst_mat![f64, (npoints, 3)]; + + for i in 0..npoints { + points[[i, 0]] = between.sample(&mut range); + points[[i, 1]] = between.sample(&mut range); + points[[i, 2]] = between.sample(&mut range); } - let points = points - .iter() - .enumerate() - .map(|(i, p)| Point { - coordinate: *p, - global_idx: i, - base_key: MortonKey::default(), - encoded_key: MortonKey::default(), - }) - .collect_vec(); points } #[test] pub fn test_uniform_tree() { let npoints = 10000; - let points = points_fixture(npoints); + let points = points_fixture(npoints, None, None); let depth = 3; let n_crit = 150; - let tree = SingleNodeTree::new(&points, false, Some(n_crit), Some(depth)); + let tree = SingleNodeTree::new(points.data(), false, Some(n_crit), Some(depth)); // Test that the tree really is uniform let levels: Vec = tree @@ -552,11 +588,11 @@ mod test { #[test] pub fn test_adaptive_tree() { let npoints = 10000; - let points = points_fixture(npoints); + let points = points_fixture(npoints, None, None); let adaptive = true; let n_crit = 150; - let tree = SingleNodeTree::new(&points, adaptive, Some(n_crit), None); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(n_crit), None); // Test that tree is not uniform let levels: Vec = tree @@ -596,9 +632,9 @@ mod test { #[test] pub fn test_no_overlaps() { let npoints = 10000; - let points = points_fixture(npoints); - let uniform = SingleNodeTree::new(&points, false, Some(150), Some(4)); - let adaptive = SingleNodeTree::new(&points, true, Some(150), None); + let points = points_fixture(npoints, None, None); + let uniform = SingleNodeTree::new(points.data(), false, Some(150), Some(4)); + let adaptive = SingleNodeTree::new(points.data(), true, Some(150), None); test_no_overlaps_helper(uniform.get_leaves().unwrap()); test_no_overlaps_helper(adaptive.get_leaves().unwrap()); } @@ -607,37 +643,52 @@ mod test { pub fn test_assign_nodes_to_points() { // Generate points in a single octant of the domain let npoints = 10; - let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..0.5); - let mut points: Vec<[PointType; 3]> = Vec::new(); - - for _ in 0..npoints { - points.push([ - between.sample(&mut range), - between.sample(&mut range), - between.sample(&mut range), - ]) - } + // let mut range = StdRng::seed_from_u64(0); + // let between = rand::distributions::Uniform::from(0.0..0.5); + // let mut points: Vec<[PointType; 3]> = Vec::new(); + + // for _ in 0..npoints { + // points.push([ + // between.sample(&mut range), + // between.sample(&mut range), + // between.sample(&mut range), + // ]) + // } + let points = points_fixture(npoints, Some(0.), Some(0.5)); let domain = Domain { origin: [0.0, 0.0, 0.0], diameter: [1.0, 1.0, 1.0], }; let depth = 1; - - let mut points: Points = points - .iter() - .enumerate() - .map(|(i, p)| { - let key = MortonKey::from_point(p, &domain, depth); - Point { - coordinate: *p, - encoded_key: key, - base_key: key, - global_idx: i, - } + + let dim = 3; + + let mut tmp = Points::default(); + for i in 0..npoints { + let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; + let key = MortonKey::from_point(&point, &domain, DEEPEST_LEVEL); + tmp.points.push(Point { + coordinate: point, + base_key: key, + encoded_key: key, + global_idx: i, }) - .collect(); + } + let mut points = tmp; + // let mut points: Points = points + // .iter() + // .enumerate() + // .map(|(i, p)| { + // let key = MortonKey::from_point(p, &domain, depth); + // Point { + // coordinate: *p, + // encoded_key: key, + // base_key: key, + // global_idx: i, + // } + // }) + // .collect(); let keys = MortonKeys { keys: ROOT.children(), @@ -674,15 +725,30 @@ mod test { #[test] pub fn test_split_blocks() { - let _domain = Domain { + let domain = Domain { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0], }; let _depth = 5; - let mut points = Points { - points: points_fixture(10000), - index: 0, - }; + // let mut points = Points { + // points: points_fixture(10000, None, None).data(), + // index: 0, + // }; + let dim = 3; + let npoints = 10000; + let points = points_fixture(npoints, None, None); + let mut tmp = Points::default(); + for i in 0..npoints { + let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; + let key = MortonKey::from_point(&point, &domain, DEEPEST_LEVEL); + tmp.points.push(Point { + coordinate: point, + base_key: key, + encoded_key: key, + global_idx: i, + }) + } + let mut points = tmp; let n_crit = 15; @@ -750,9 +816,9 @@ mod test { pub fn test_levels_to_keys() { // Uniform tree let npoints = 10000; - let points = points_fixture(npoints); + let points = points_fixture(npoints, None, None); let depth = 3; - let tree = SingleNodeTree::new(&points, false, None, Some(depth)); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); let keys = tree.get_all_keys().unwrap(); @@ -782,7 +848,7 @@ mod test { // Adaptive tree let ncrit = 150; - let tree = SingleNodeTree::new(&points, true, Some(ncrit), None); + let tree = SingleNodeTree::new(points.data(), true, Some(ncrit), None); let keys = tree.get_all_keys().unwrap(); let depth = tree.get_depth(); From 10bcf4b5048b990c0fdc2023a2f8ed165baf8900 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 4 Jul 2023 15:47:10 +0100 Subject: [PATCH 08/40] Remove timing from tree --- tree/src/implementations/impl_single_node.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index 6d37be64..a4016b74 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -1,8 +1,5 @@ use itertools::Itertools; -use std::{ - collections::{HashMap, HashSet}, - time::Instant, -}; +use std::collections::{HashMap, HashSet}; use bempp_traits::tree::Tree; @@ -21,7 +18,6 @@ impl SingleNodeTree { /// Constructor for uniform trees pub fn uniform_tree(points: &[PointType], &domain: &Domain, depth: u64) -> SingleNodeTree { // Encode points at deepest level, and map to specified depth - let start = Instant::now(); // TODO: Automatically infer dimension let dim = 3; @@ -72,7 +68,6 @@ impl SingleNodeTree { .collect(), index: 0, }; - println!("Tree - Encoding Time {:?}ms", start.elapsed().as_millis()); // Assign keys to points let unmapped = SingleNodeTree::assign_nodes_to_points(&leaves, &mut points); @@ -147,7 +142,6 @@ impl SingleNodeTree { /// Constructor for adaptive trees pub fn adaptive_tree(points: &[PointType], &domain: &Domain, n_crit: u64) -> SingleNodeTree { // Encode points at deepest level - let start = Instant::now(); // let mut points: Points = points // .iter() // .enumerate() @@ -180,7 +174,6 @@ impl SingleNodeTree { points.sort(); - println!("Tree - Encoding Time {:?}ms", start.elapsed().as_millis()); // Complete the region spanned by the points let mut complete = MortonKeys { keys: points.points.iter().map(|p| p.encoded_key).collect_vec(), From d73854e0cd72817c8b46a1777170111315c5cce6 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 4 Jul 2023 16:57:52 +0100 Subject: [PATCH 09/40] Precomputations working for svd cases --- field/Cargo.toml | 2 +- field/src/lib.rs | 118 +- fmm/Cargo.toml | 1 + fmm/src/fmm.rs | 2280 ++++++++++++++++++++------------------ fmm/src/lib.rs | 3 +- fmm/src/linalg.rs | 163 +-- kernel/Cargo.toml | 2 +- kernel/src/laplace_3d.rs | 1 + traits/src/tree.rs | 2 +- 9 files changed, 1355 insertions(+), 1217 deletions(-) diff --git a/field/Cargo.toml b/field/Cargo.toml index 8823352f..a8c70c98 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -29,4 +29,4 @@ ndarray-linalg = { version = "*", features = ["openblas-system"] } ndarray-ndimage = "0.3.0" ndrustfft = "0.4.0" num = "0.4" -rlst = {git = "https://github.com/linalg-rs/rlst.git" } \ No newline at end of file +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} \ No newline at end of file diff --git a/field/src/lib.rs b/field/src/lib.rs index 43405f5c..3faf5ee7 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -328,11 +328,11 @@ where // let mut se2tc_fat: SvdM2lEntry = // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); let ntransfer_vectors = self.transfer_vectors.len(); let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; for (i, t) in self.transfer_vectors.iter().enumerate() { @@ -362,6 +362,9 @@ where ); // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); + // // se2tc_fat + // // .slice_mut(s![.., lidx_sources..ridx_sources]) + // // .assign(&tmp_gram); let lidx_sources = i * ncols; let ridx_sources = lidx_sources + ncols; @@ -371,9 +374,6 @@ where let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); block.copy_from_slice(tmp_gram.data_mut()); - // // se2tc_fat - // // .slice_mut(s![.., lidx_sources..ridx_sources]) - // // .assign(&tmp_gram); for j in 0..ncols { let start_idx = j * ntransfer_vectors * nrows + i * nrows; @@ -401,27 +401,29 @@ where let u = u.block((0, 0), (mu, self.k)).eval(); let (mvt, nvt) = vt.shape(); - let vt = vt.block((0, 0), (right, nvt)).eval(); + let vt = vt.block((0, 0), (self.k, nvt)).eval(); - // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::All, Mode::All).unwrap(); + // // Store compressed M2L operators + // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); let st = st.unwrap(); let (mst, nst) = st.shape(); let st_block = st.block((0, 0), (self.k, nst)); - let s = st_block.transpose().eval(); - let st = st.block((0, 0), (self.k, nst)).eval(); - - // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // Store compressed M2L operators - // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + let s_block = st_block.transpose().eval(); + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); + // let st = s_block.transpose().eval(); + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); for i in 0..self.transfer_vectors.len() { // let v_lidx = i * ncols; // let v_ridx = v_lidx + ncols; @@ -430,26 +432,28 @@ where // let block_size = right*ncols; // let start_idx = i * block_size; // let end_idx = start_idx+block_size; - let top_left = (0, i * ncols); - let dim = (self.k, ncols); - let vt_block = vt.block(top_left, dim); - let tmp = sigma_mat.dot(&vt_block.dot(&s)); // let tmp = sigma.dot(&vt_sub.dot(&st.t())); // let lidx = i * self.k; // let ridx = lidx + self.k; + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + let top_left = (0, i * ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); let top_left = (0, i * self.k); - let dim = (self.k, ncols); - // let mut c_block =; + let dim = (self.k, self.k); + c.block_mut(top_left, dim) .data_mut() .copy_from_slice(tmp.data()); - - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); } (u, st, c) + // let dummy = rlst_mat![f64, (1, 1)]; + // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) // assert!(false) } } @@ -483,11 +487,11 @@ where // let mut se2tc_fat: SvdM2lEntry = // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); let ntransfer_vectors = self.transfer_vectors.len(); let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; for (i, t) in self.transfer_vectors.iter().enumerate() { @@ -517,6 +521,9 @@ where ); // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); + // // se2tc_fat + // // .slice_mut(s![.., lidx_sources..ridx_sources]) + // // .assign(&tmp_gram); let lidx_sources = i * ncols; let ridx_sources = lidx_sources + ncols; @@ -526,9 +533,6 @@ where let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); block.copy_from_slice(tmp_gram.data_mut()); - // // se2tc_fat - // // .slice_mut(s![.., lidx_sources..ridx_sources]) - // // .assign(&tmp_gram); for j in 0..ncols { let start_idx = j * ntransfer_vectors * nrows + i * nrows; @@ -556,27 +560,29 @@ where let u = u.block((0, 0), (mu, self.k)).eval(); let (mvt, nvt) = vt.shape(); - let vt = vt.block((0, 0), (right, nvt)).eval(); + let vt = vt.block((0, 0), (self.k, nvt)).eval(); + + // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // Store compressed M2L operators + // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::All, Mode::All).unwrap(); + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); let st = st.unwrap(); let (mst, nst) = st.shape(); let st_block = st.block((0, 0), (self.k, nst)); - let s = st_block.transpose().eval(); - let st = st.block((0, 0), (self.k, nst)).eval(); - - // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // Store compressed M2L operators - // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + let s_block = st_block.transpose().eval(); + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); + // let st = s_block.transpose().eval(); + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); for i in 0..self.transfer_vectors.len() { // let v_lidx = i * ncols; // let v_ridx = v_lidx + ncols; @@ -585,25 +591,29 @@ where // let block_size = right*ncols; // let start_idx = i * block_size; // let end_idx = start_idx+block_size; - let top_left = (0, i * ncols); - let dim = (self.k, ncols); - let vt_block = vt.block(top_left, dim); - let tmp = sigma_mat.dot(&vt_block.dot(&s)); // let tmp = sigma.dot(&vt_sub.dot(&st.t())); // let lidx = i * self.k; // let ridx = lidx + self.k; + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + let top_left = (0, i * ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); let top_left = (0, i * self.k); - let dim = (self.k, ncols); - // let mut c_block =; + let dim = (self.k, self.k); + c.block_mut(top_left, dim) .data_mut() .copy_from_slice(tmp.data()); - - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); } + (u, st, c) + // let dummy = rlst_mat![f64, (1, 1)]; + // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) + // assert!(false) } } diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index b3c1aaa7..3c3fc7e7 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -41,6 +41,7 @@ num_cpus = "1" ndrustfft = "0.4.0" num = "0.4" ndarray-ndimage = "0.3.0" +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse" } [target.aarch64-apple-darwin] rustflags = [ "-C", "target-feature=+neon"] diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index f3c2c542..181c68ce 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,9 +1,9 @@ extern crate blas_src; use itertools::Itertools; -use ndarray::*; -use ndarray_ndimage::{pad, PadMode}; -use ndrustfft::{ndfft, ndfft_r2c, ndifft, ndifft_r2c, Complex, FftHandler, R2cFftHandler}; +// use ndarray::*; +// use ndarray_ndimage::{pad, PadMode}; +// use ndrustfft::{ndfft, ndfft_r2c, ndifft, ndifft_r2c, Complex, FftHandler, R2cFftHandler}; use rayon::prelude::*; use std::{ collections::HashMap, @@ -12,13 +12,25 @@ use std::{ time::Instant, }; +use rlst; +use rlst::algorithms::linalg::LinAlg; +use rlst::algorithms::traits::svd::{Mode, Svd}; +use rlst::algorithms::traits::pseudo_inverse::Pinv; +use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; +use rlst::common::{ + tools::PrettyPrint, + traits::{Copy, Eval}, +}; +use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; +use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; + use bempp_field::{ FftFieldTranslationNaiveKiFmm, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm, }; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, FmmLoop, InteractionLists, SourceTranslation, TargetTranslation}, - kernel::Kernel, + kernel::{Kernel, EvalType}, tree::Tree, }; use bempp_tree::{ @@ -30,7 +42,7 @@ use bempp_tree::{ }, }; -use crate::{charge::Charges, linalg::pinv}; +use crate::charge::Charges; pub struct FmmData { fmm: Arc, multipoles: HashMap>>>, @@ -40,20 +52,20 @@ pub struct FmmData { charges: HashMap>>, } -type UC2Type = ArrayBase, Dim<[usize; 2]>>; +type C2EType = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; pub struct KiFmm> { order: usize, - uc2e_inv: (UC2Type, UC2Type), + uc2e_inv: C2EType, - dc2e_inv: (UC2Type, UC2Type), + dc2e_inv: C2EType, alpha_inner: f64, alpha_outer: f64, - m2m: Vec, ndarray::Dim<[usize; 2]>>>, - l2l: Vec, ndarray::Dim<[usize; 2]>>>, + m2m: Vec, + l2l: Vec, tree: T, kernel: U, m2l: V, @@ -62,90 +74,145 @@ pub struct KiFmm> { #[allow(dead_code)] impl KiFmm where - T: Kernel, + T: Kernel, U: FieldTranslationData, { - pub fn new( + pub fn new<'a>( order: usize, alpha_inner: f64, alpha_outer: f64, - // k: usize, kernel: T, tree: SingleNodeTree, m2l: U, ) -> Self { let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); - let upward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let downward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); - let downward_equivalent_surface = - ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension(); + let ncheck_surface = upward_check_surface.len() / kernel.space_dimension(); - let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); + // Store in RLST matrices + let upward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let upward_check_surface= unsafe { + rlst_pointer_mat!['a, f64, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; + let downward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let downward_check_surface = unsafe { + rlst_pointer_mat!['a, f64, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. - let mut uc2e = Vec::::new(); - kernel.gram(&upward_equivalent_surface, &upward_check_surface, &mut uc2e); + // let mut uc2e = Vec::::new(); + // kernel.gram(&upward_equivalent_surface, &upward_check_surface, &mut uc2e); + let mut uc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; + kernel.gram( + EvalType::Value, + upward_equivalent_surface.data(), + upward_check_surface.data(), + uc2e.data_mut() + ); - let mut dc2e = Vec::::new(); + // let mut dc2e = Vec::::new(); + // kernel.gram( + // &downward_equivalent_surface, + // &downward_check_surface, + // &mut dc2e, + // ); + + let mut dc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( - &downward_equivalent_surface, - &downward_check_surface, - &mut dc2e, + EvalType::Value, + downward_equivalent_surface.data(), + downward_check_surface.data(), + dc2e.data_mut() ); - let mut m2m: Vec = Vec::new(); - let mut l2l: Vec = Vec::new(); let nrows = m2l.ncoeffs(order); let ncols = m2l.ncoeffs(order); - let uc2e = Array1::from(uc2e) - .to_shape((nrows, ncols)) - .unwrap() - .to_owned(); - - let (a, b, c) = pinv(&uc2e); - let uc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); - - let dc2e = Array1::from(dc2e) - .to_shape((nrows, ncols)) - .unwrap() - .to_owned(); - let (a, b, c) = pinv(&dc2e); - let dc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); + // let uc2e = Array1::from(uc2e) + // .to_shape((nrows, ncols)) + // .unwrap() + // .to_owned(); + + // let (a, b, c) = pinv(&uc2e); + // let uc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); + let (s, ut, v) = uc2e.linalg().pinv(None).unwrap(); + let s = s.unwrap(); + let ut = ut.unwrap(); + let v = v.unwrap(); + let mut mat_s = rlst_mat![f64, (s.len(), s.len())]; + for i in 0..s.len() { + mat_s[[i, i]] = s[i]; + } + let uc2e_inv = v.dot(&mat_s).dot(&ut); + + + // let dc2e = Array1::from(dc2e) + // .to_shape((nrows, ncols)) + // .unwrap() + // .to_owned(); + // let (a, b, c) = pinv(&dc2e); + // let dc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); + let (s, ut, v) = dc2e.linalg().pinv(None).unwrap(); + let s = s.unwrap(); + let ut = ut.unwrap(); + let v = v.unwrap(); + let mut mat_s = rlst_mat![f64, (s.len(), s.len())]; + for i in 0..s.len() { + mat_s[[i, i]] = s[i]; + } + let dc2e_inv = v.dot(&mat_s).dot(&ut); // Calculate M2M/L2L matrices let children = ROOT.children(); + let mut m2m: Vec = Vec::new(); + let mut l2l: Vec = Vec::new(); for child in children.iter() { - let child_upward_equivalent_surface = - child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_upward_equivalent_surface = child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_downward_check_surface = child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_upward_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] + }; + let child_downward_check_surface= unsafe { + rlst_pointer_mat!['a, f64, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] + }; - let child_downward_check_surface = - child.compute_surface(tree.get_domain(), order, alpha_inner); + let mut pc2ce = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - let mut pc2ce = Vec::new(); kernel.gram( - &child_upward_equivalent_surface, - &upward_check_surface, - &mut pc2ce, + EvalType::Value, + child_upward_equivalent_surface.data(), + upward_check_surface.data(), + pc2ce.data_mut(), ); - let pc2e = Array::from_shape_vec((nrows, ncols), pc2ce).unwrap(); - m2m.push(uc2e_inv.0.dot(&uc2e_inv.1.dot(&pc2e))); + // let pc2e = Array::from_shape_vec((nrows, ncols), pc2ce).unwrap(); + // m2m.push(uc2e_inv.0.dot(&uc2e_inv.1.dot(&pc2e))); + m2m.push(uc2e_inv.dot(&pc2ce).eval()); + + // let mut cc2pe = Vec::new(); + let mut cc2pe = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - let mut cc2pe = Vec::new(); kernel.gram( - &downward_equivalent_surface, - &child_downward_check_surface, - &mut cc2pe, + EvalType::Value, + downward_equivalent_surface.data(), + &child_downward_check_surface.data(), + cc2pe.data_mut(), ); - let cc2pe = Array::from_shape_vec((ncols, nrows), cc2pe).unwrap(); - - l2l.push(kernel.scale(child.level()) * dc2e_inv.0.dot(&dc2e_inv.1.dot(&cc2pe))) + // let cc2pe = Array::from_shape_vec((ncols, nrows), cc2pe).unwrap(); + l2l.push((kernel.scale(child.level()) * dc2e_inv.dot(&cc2pe)).eval()); + // l2l.push(kernel.scale(child.level()) * dc2e_inv.0.dot(&dc2e_inv.1.dot(&cc2pe))) } Self { @@ -163,1075 +230,1110 @@ where } } -#[allow(dead_code)] -impl FmmData> -where - T: Kernel, - U: FieldTranslationData, -{ - pub fn new(fmm: KiFmm, _charges: Charges) -> Self { - let mut multipoles = HashMap::new(); - let mut locals = HashMap::new(); - let mut potentials = HashMap::new(); - let mut points = HashMap::new(); - let mut charges = HashMap::new(); - - if let Some(keys) = fmm.tree().get_all_keys() { - for key in keys.iter() { - multipoles.insert(*key, Arc::new(Mutex::new(Vec::new()))); - locals.insert(*key, Arc::new(Mutex::new(Vec::new()))); - potentials.insert(*key, Arc::new(Mutex::new(Vec::new()))); - if let Some(point_data) = fmm.tree().get_points(key) { - points.insert(*key, point_data.iter().cloned().collect_vec()); - - // TODO: Replace with a global index lookup at some point - charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); - } - } - } - - let fmm = Arc::new(fmm); - - Self { - fmm, - multipoles, - locals, - potentials, - points, - charges, - } - } -} - -impl SourceTranslation for FmmData> -where - T: Kernel + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, -{ - fn p2m(&self) { - if let Some(leaves) = self.fmm.tree.get_leaves() { - leaves.par_iter().for_each(move |&leaf| { - let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); - - if let Some(leaf_points) = self.points.get(&leaf) { - // Lookup data - let leaf_coordinates = leaf_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let upward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - - let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); - let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); - - // Calculate check potential - let mut check_potential = - vec![0.; upward_check_surface.len() / self.fmm.kernel.dim()]; - - fmm_arc.kernel.potential( - &leaf_coordinates[..], - leaf_charges_slice, - &upward_check_surface[..], - &mut check_potential[..], - ); - let check_potential = Array1::from_vec(check_potential); - - // Calculate multipole expansion - let leaf_multipole_owned = fmm_arc.kernel.scale(leaf.level()) - * fmm_arc - .uc2e_inv - .0 - .dot(&fmm_arc.uc2e_inv.1.dot(&check_potential)); - - let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - - if !leaf_multipole_lock.is_empty() { - leaf_multipole_lock - .iter_mut() - .zip(leaf_multipole_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - leaf_multipole_lock.extend(leaf_multipole_owned); - } - } - }); - } - } - - fn m2m(&self, level: u64) { - // Parallelise over nodes at a given level - if let Some(sources) = self.fmm.tree.get_keys(level) { - sources.par_iter().for_each(move |&source| { - let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - if !source_multipole_lock.is_empty() { - let target_multipole_arc = - Arc::clone(self.multipoles.get(&source.parent()).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - - let operator_index = - source.siblings().iter().position(|&x| x == source).unwrap(); - - let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - let target_multipole_owned = - fmm_arc.m2m[operator_index].dot(&source_multipole_view); - let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - if !target_multipole_lock.is_empty() { - target_multipole_lock - .iter_mut() - .zip(target_multipole_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - target_multipole_lock.extend(target_multipole_owned); - } - } - }) - } - } -} - -impl TargetTranslation for FmmData> -where - T: Kernel + std::marker::Sync + std::marker::Send, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, -{ - fn l2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree.get_keys(level) { - targets.par_iter().for_each(move |&target| { - let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - let fmm = Arc::clone(&self.fmm); - - let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); - - let source_local_lock = source_local_arc.lock().unwrap(); - let source_local_view = ArrayView::from(source_local_lock.deref()); - - let target_local_owned = fmm.l2l[operator_index].dot(&source_local_view); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - if !target_local_lock.is_empty() { - target_local_lock - .iter_mut() - .zip(target_local_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - target_local_lock.extend(target_local_owned); - } - }) - } - } - - fn m2p(&self) { - if let Some(targets) = self.fmm.tree.get_leaves() { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - - if let Some(points) = fmm_arc.tree().get_points(&target) { - if let Some(w_list) = fmm_arc.get_w_list(&target) { - for source in w_list.iter() { - let source_multipole_arc = - Arc::clone(self.multipoles.get(source).unwrap()); - - let upward_equivalent_surface = source.compute_surface( - fmm_arc.tree().get_domain(), - fmm_arc.order(), - fmm_arc.alpha_inner, - ); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let source_multipole_view = - ArrayView::from(source_multipole_lock.deref()); - let source_multipole_slice = source_multipole_view.as_slice().unwrap(); - - let target_coordinates = points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let mut target_potential = - vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - - fmm_arc.kernel().potential( - &upward_equivalent_surface[..], - source_multipole_slice, - &target_coordinates[..], - &mut target_potential, - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - if !target_potential_lock.is_empty() { - target_potential_lock - .iter_mut() - .zip(target_potential.iter()) - .for_each(|(p, n)| *p += *n); - } else { - target_potential_lock.extend(target_potential); - } - } - } - } - }) - } - } - - fn l2p(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); - let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { - // Lookup data - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let downward_equivalent_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - - let source_local_lock = source_local_arc.lock().unwrap(); - let source_local_ref = ArrayView::from(source_local_lock.deref()); - let source_local_slice = source_local_ref.as_slice().unwrap(); - - let mut target_potential = - vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - - fmm_arc.kernel().potential( - &downward_equivalent_surface[..], - source_local_slice, - &target_coordinates[..], - &mut target_potential, - ); - - let mut out_potential_lock = target_potential_arc.lock().unwrap(); - - if !out_potential_lock.is_empty() { - out_potential_lock - .iter_mut() - .zip(target_potential.iter()) - .for_each(|(p, n)| *p += *n); - } else { - out_potential_lock.extend(target_potential); - } - } - }) - } - } - - fn p2l(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - - if let Some(x_list) = fmm_arc.get_x_list(&leaf) { - for source in x_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let source_charges = self.charges.get(source).unwrap(); - let source_charges_view = ArrayView::from(source_charges.deref()); - let source_charges_slice = source_charges_view.as_slice().unwrap(); - - let downward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_inner, - ); - - let mut downward_check_potential = - vec![0f64; downward_check_surface.len() / fmm_arc.kernel().dim()]; - - fmm_arc.kernel.potential( - &source_coordinates[..], - source_charges_slice, - &downward_check_surface[..], - &mut downward_check_potential[..], - ); - - let downward_check_potential = - ArrayView::from(&downward_check_potential); - - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let target_local_owned = fmm_arc.kernel().scale(leaf.level()) - * &fmm_arc - .dc2e_inv - .0 - .dot(&fmm_arc.dc2e_inv.1.dot(&downward_check_potential)); - - if !target_local_lock.is_empty() { - target_local_lock - .iter_mut() - .zip(target_local_owned.iter()) - .for_each(|(o, l)| *o += *l); - } else { - target_local_lock.extend(target_local_owned); - } - } - } - } - }) - } - } - - fn p2p(&self) { - if let Some(targets) = self.fmm.tree.get_leaves() { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - - if let Some(target_points) = fmm_arc.tree().get_points(&target) { - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - if let Some(u_list) = fmm_arc.get_u_list(&target) { - for source in u_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let source_charges_arc = - Arc::clone(self.charges.get(source).unwrap()); - let source_charges_view = - ArrayView::from(source_charges_arc.deref()); - let source_charges_slice = source_charges_view.as_slice().unwrap(); - - let mut target_potential = - vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - - fmm_arc.kernel.potential( - &source_coordinates[..], - source_charges_slice, - &target_coordinates[..], - &mut target_potential, - ); - - let mut target_potential_lock = - target_potential_arc.lock().unwrap(); - - if !target_potential_lock.is_empty() { - target_potential_lock - .iter_mut() - .zip(target_potential.iter()) - .for_each(|(c, p)| *c += *p); - } else { - target_potential_lock.extend(target_potential) - } - } - } - } - } - }) - } - } -} - -impl FieldTranslation for FmmData>> -where - T: Kernel + std::marker::Sync + std::marker::Send + Default, -{ - fn m2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - let mut transfer_vector_to_m2l = - HashMap::>>>::new(); - - for tv in self.fmm.m2l.transfer_vectors.iter() { - transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); - } - - targets.par_iter().enumerate().for_each(|(_i, &target)| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let calculated_transfer_vectors = v_list - .iter() - .map(|source| target.find_transfer_vector(source)) - .collect::>(); - for (transfer_vector, &source) in - calculated_transfer_vectors.iter().zip(v_list.iter()) - { - let m2l_arc = - Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); - let mut m2l_lock = m2l_arc.lock().unwrap(); - m2l_lock.push((source, target)); - } - } - }); - - let mut transfer_vector_to_m2l_rw_lock = - HashMap::>>>::new(); - - // Find all multipole expansions and allocate - for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { - transfer_vector_to_m2l_rw_lock.insert( - transfer_vector, - Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), - ); - } - - transfer_vector_to_m2l_rw_lock - .par_iter() - .for_each(|(transfer_vector, m2l_arc)| { - let c_idx = self - .fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == *transfer_vector) - .unwrap(); - - let c_lidx = c_idx * self.fmm.m2l.k; - let c_ridx = c_lidx + self.fmm.m2l.k; - let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - - let m2l_rw = m2l_arc.read().unwrap(); - let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); - - for (i, (source, _)) in m2l_rw.iter().enumerate() { - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - // Compressed multipole - let compressed_source_multipole_owned = - self.fmm.m2l.m2l.1.dot(&source_multipole_view); - - multipoles - .slice_mut(s![.., i]) - .assign(&compressed_source_multipole_owned); - } - - // // Compute convolution - let compressed_check_potential_owned = c_sub.dot(&multipoles); - - // Post process to find check potential - let check_potential_owned = - self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); - - // Compute local - let locals_owned = self.m2l_scale(level) - * self.fmm.kernel.scale(level) - * self - .fmm - .dc2e_inv - .0 - .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - - // Assign locals - for (i, (_, target)) in m2l_rw.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - let target_local_owned = locals_owned.slice(s![.., i]); - if !target_local_lock.is_empty() { - target_local_lock - .iter_mut() - .zip(target_local_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - target_local_lock.extend(target_local_owned); - } - } - }); - } - } - - fn m2l_scale(&self, level: u64) -> f64 { - if level < 2 { - panic!("M2L only performed on level 2 and below") - } - - if level == 2 { - 1. / 2. - } else { - 2_f64.powf((level - 3) as f64) - } - } -} - -impl FieldTranslation for FmmData>> -where - T: Kernel + std::marker::Sync + std::marker::Send + Default, -{ - fn m2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - // Find transfer vectors - targets.par_iter().for_each(move |&target| { - let fmm_arc: Arc>> = - Arc::clone(&self.fmm); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - - if let Some(v_list) = fmm_arc.get_v_list(&target) { - for (_i, source) in v_list.iter().enumerate() { - // Locate correct components of compressed M2L matrix. - let transfer_vector = target.find_transfer_vector(source); - - let c_idx = fmm_arc - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == transfer_vector) - .unwrap(); - let c_lidx = c_idx * fmm_arc.m2l.k; - let c_ridx = c_lidx + fmm_arc.m2l.k; - let c_sub = fmm_arc.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - // Compressed multipole - let compressed_source_multipole_owned = - fmm_arc.m2l.m2l.1.dot(&source_multipole_view); - - // Convolution to find compressed check potential - let compressed_check_potential_owned = - c_sub.dot(&compressed_source_multipole_owned); - - // Post process to find check potential - let check_potential_owned = - fmm_arc.m2l.m2l.0.dot(&compressed_check_potential_owned); - - // Compute local - let target_local_owned = self.m2l_scale(target.level()) - * fmm_arc.kernel.scale(target.level()) - * fmm_arc - .dc2e_inv - .0 - .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - - // Store computation - let mut target_local_lock = target_local_arc.lock().unwrap(); - - if !target_local_lock.is_empty() { - target_local_lock - .iter_mut() - .zip(target_local_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - target_local_lock.extend(target_local_owned); - } - } - } - }) - } - } - - fn m2l_scale(&self, level: u64) -> f64 { - if level < 2 { - panic!("M2L only performed on level 2 and below") - } - - if level == 2 { - 1. / 2. - } else { - 2_f64.powf((level - 3) as f64) - } - } -} - -impl FieldTranslation for FmmData>> -where - T: Kernel + std::marker::Sync + std::marker::Send + Default, -{ - fn m2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - - if let Some(v_list) = fmm_arc.get_v_list(&target) { - for (_, source) in v_list.iter().enumerate() { - let transfer_vector = target.find_transfer_vector(source); - - // Locate correct precomputed FFT of kernel interactions - let k_idx = fmm_arc - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == transfer_vector) - .unwrap(); - - // Compute FFT of signal - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let signal = fmm_arc - .m2l - .compute_signal(fmm_arc.order, source_multipole_lock.deref()); - - // 1. Pad the signal - let m = signal.len(); - let n = signal[0].len(); - let k = signal[0][0].len(); - - let p = 2 * m; - let q = 2 * n; - let r = 2 * k; - - let signal = Array3::from_shape_vec( - (m, n, k), - signal.into_iter().flatten().flatten().collect(), - ) - .unwrap(); - - let padding = [[p - m, 0], [q - n, 0], [r - k, 0]]; - let padded_signal = pad(&signal, &padding, PadMode::Constant(0.)); - - // 2. FFT of the padded signal - // 2.1 Init the handlers for FFTs along each axis - let mut handler_ax0 = FftHandler::::new(p); - let mut handler_ax1 = FftHandler::::new(q); - let mut handler_ax2 = R2cFftHandler::::new(r); - - // 2.2 Compute the transform along each axis - let mut padded_signal_hat: Array3> = - Array3::zeros((p, q, r / 2 + 1)); - let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndfft_r2c(&padded_signal, &mut tmp1, &mut handler_ax2, 2); - let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); - ndfft(&tmp2, &mut padded_signal_hat, &mut handler_ax0, 0); - - // 3.Compute convolution to find check potential - let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; - - // Hadamard product - let check_potential_hat = padded_kernel_hat * padded_signal_hat; - - // 3.1 Compute iFFT to find check potentials - let mut check_potential: Array3 = Array3::zeros((p, q, r)); - let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndifft(&check_potential_hat, &mut tmp1, &mut handler_ax0, 0); - let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); - ndifft(&tmp1, &mut tmp2, &mut handler_ax1, 1); - ndifft_r2c(&tmp2, &mut check_potential, &mut handler_ax2, 2); - - // Filter check potentials - let check_potential = - check_potential.slice(s![p - m - 1..p, q - n - 1..q, r - k - 1..r]); - - let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); - - let mut tmp = Vec::new(); - for index in target_surface_idxs.chunks_exact(fmm_arc.kernel.dim()) { - let element = check_potential[[index[0], index[1], index[2]]]; - tmp.push(element); - } - - // Compute local coefficients from check potentials - let check_potential = Array::from_shape_vec( - target_surface_idxs.len() / fmm_arc.kernel.dim(), - tmp, - ) - .unwrap(); - - // Compute local - let target_local_owned = self.m2l_scale(target.level()) - * fmm_arc.kernel.scale(target.level()) - * fmm_arc - .dc2e_inv - .0 - .dot(&self.fmm.dc2e_inv.1.dot(&check_potential)); - - // Store computation - let mut target_local_lock = target_local_arc.lock().unwrap(); - - if !target_local_lock.is_empty() { - target_local_lock - .iter_mut() - .zip(target_local_owned.iter()) - .for_each(|(c, m)| *c += *m); - } else { - target_local_lock.extend(target_local_owned); - } - } - } - }) - } - } - - fn m2l_scale(&self, level: u64) -> f64 { - if level < 2 { - panic!("M2L only performed on level 2 and below") - } - - if level == 2 { - 1. / 2. - } else { - 2_f64.powf((level - 3) as f64) - } - } -} - -impl InteractionLists for KiFmm -where - T: Tree, - U: Kernel, - V: FieldTranslationData, -{ - type Tree = T; - - fn get_u_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - let mut u_list = Vec::::new(); - let neighbours = key.neighbors(); - - // Child level - let mut neighbors_children_adj: Vec = neighbours - .iter() - .flat_map(|n| n.children()) - .filter(|nc| self.tree().get_all_keys_set().contains(nc) && key.is_adjacent(nc)) - .collect(); - - // Key level - let mut neighbors_adj: Vec = neighbours - .iter() - .filter(|n| self.tree().get_all_keys_set().contains(n) && key.is_adjacent(n)) - .cloned() - .collect(); - - // Parent level - let mut parent_neighbours_adj: Vec = key - .parent() - .neighbors() - .into_iter() - .filter(|pn| self.tree().get_all_keys_set().contains(pn) && key.is_adjacent(pn)) - .collect(); - - u_list.append(&mut neighbors_children_adj); - u_list.append(&mut neighbors_adj); - u_list.append(&mut parent_neighbours_adj); - u_list.push(*key); - - if !u_list.is_empty() { - Some(MortonKeys { - keys: u_list, - index: 0, - }) - } else { - None - } - } - - fn get_v_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - if key.level() >= 2 { - let v_list = key - .parent() - .neighbors() - .iter() - .flat_map(|pn| pn.children()) - .filter(|pnc| self.tree().get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) - .collect_vec(); - - if !v_list.is_empty() { - return Some(MortonKeys { - keys: v_list, - index: 0, - }); - } else { - return None; - } - } - None - } - - fn get_w_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - // Child level - let w_list = key - .neighbors() - .iter() - .flat_map(|n| n.children()) - .filter(|nc| self.tree().get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) - .collect_vec(); - - if !w_list.is_empty() { - Some(MortonKeys { - keys: w_list, - index: 0, - }) - } else { - None - } - } - - fn get_x_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - let x_list = key - .parent() - .neighbors() - .into_iter() - .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) - .collect_vec(); - - if !x_list.is_empty() { - Some(MortonKeys { - keys: x_list, - index: 0, - }) - } else { - None - } - } -} - -impl Fmm for KiFmm -where - T: Tree, - U: Kernel, - V: FieldTranslationData, -{ - type Tree = T; - type Kernel = U; - - fn order(&self) -> usize { - self.order - } - - fn kernel(&self) -> &Self::Kernel { - &self.kernel - } - - fn tree(&self) -> &Self::Tree { - &self.tree - } -} - -impl FmmLoop for FmmData -where - T: Fmm, - FmmData: SourceTranslation + TargetTranslation + FieldTranslation, -{ - fn upward_pass(&self) { - // Particle to Multipole - let start = Instant::now(); - self.p2m(); - println!("P2M = {:?}ms", start.elapsed().as_millis()); - - // Multipole to Multipole - let depth = self.fmm.tree().get_depth(); - let start = Instant::now(); - for level in (1..=depth).rev() { - self.m2m(level) - } - println!("M2M = {:?}ms", start.elapsed().as_millis()); - } - - fn downward_pass(&self) { - let depth = self.fmm.tree().get_depth(); - let mut l2l_time = 0; - let mut m2l_time = 0; - for level in 2..=depth { - if level > 2 { - let start = Instant::now(); - self.l2l(level); - l2l_time += start.elapsed().as_millis(); - } - - let start = Instant::now(); - self.m2l(level); - m2l_time += start.elapsed().as_millis(); - } - println!("M2L = {:?}ms", m2l_time); - println!("L2L = {:?}ms", l2l_time); - - let start = Instant::now(); - // Leaf level computations - self.p2l(); - println!("P2L = {:?}ms", start.elapsed().as_millis()); - - // Sum all potential contributions - let start = Instant::now(); - self.m2p(); - println!("M2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.p2p(); - println!("P2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.l2p(); - println!("L2P = {:?}ms", start.elapsed().as_millis()); - } - - fn run(&self) { - self.upward_pass(); - self.downward_pass(); - } -} +// #[allow(dead_code)] +// impl FmmData> +// where +// T: Kernel, +// U: FieldTranslationData, +// { +// pub fn new(fmm: KiFmm, _charges: Charges) -> Self { +// let mut multipoles = HashMap::new(); +// let mut locals = HashMap::new(); +// let mut potentials = HashMap::new(); +// let mut points = HashMap::new(); +// let mut charges = HashMap::new(); + +// if let Some(keys) = fmm.tree().get_all_keys() { +// for key in keys.iter() { +// multipoles.insert(*key, Arc::new(Mutex::new(Vec::new()))); +// locals.insert(*key, Arc::new(Mutex::new(Vec::new()))); +// potentials.insert(*key, Arc::new(Mutex::new(Vec::new()))); +// if let Some(point_data) = fmm.tree().get_points(key) { +// points.insert(*key, point_data.iter().cloned().collect_vec()); + +// // TODO: Replace with a global index lookup at some point +// charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); +// } +// } +// } + +// let fmm = Arc::new(fmm); + +// Self { +// fmm, +// multipoles, +// locals, +// potentials, +// points, +// charges, +// } +// } +// } + +// impl SourceTranslation for FmmData> +// where +// T: Kernel + std::marker::Send + std::marker::Sync, +// U: FieldTranslationData + std::marker::Sync + std::marker::Send, +// { +// fn p2m(&self) { +// if let Some(leaves) = self.fmm.tree.get_leaves() { +// leaves.par_iter().for_each(move |&leaf| { +// let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); +// let fmm_arc = Arc::clone(&self.fmm); +// let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); + +// if let Some(leaf_points) = self.points.get(&leaf) { +// // Lookup data +// let leaf_coordinates = leaf_points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// let upward_check_surface = leaf.compute_surface( +// &fmm_arc.tree().domain, +// fmm_arc.order, +// fmm_arc.alpha_outer, +// ); + +// let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); +// let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); + +// // Calculate check potential +// let mut check_potential = +// vec![0.; upward_check_surface.len() / self.fmm.kernel.dim()]; + +// fmm_arc.kernel.potential( +// &leaf_coordinates[..], +// leaf_charges_slice, +// &upward_check_surface[..], +// &mut check_potential[..], +// ); +// let check_potential = Array1::from_vec(check_potential); + +// // Calculate multipole expansion +// let leaf_multipole_owned = fmm_arc.kernel.scale(leaf.level()) +// * fmm_arc +// .uc2e_inv +// .0 +// .dot(&fmm_arc.uc2e_inv.1.dot(&check_potential)); + +// let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); + +// if !leaf_multipole_lock.is_empty() { +// leaf_multipole_lock +// .iter_mut() +// .zip(leaf_multipole_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// leaf_multipole_lock.extend(leaf_multipole_owned); +// } +// } +// }); +// } +// } + +// fn m2m(&self, level: u64) { +// // Parallelise over nodes at a given level +// if let Some(sources) = self.fmm.tree.get_keys(level) { +// sources.par_iter().for_each(move |&source| { +// let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); +// let source_multipole_lock = source_multipole_arc.lock().unwrap(); + +// if !source_multipole_lock.is_empty() { +// let target_multipole_arc = +// Arc::clone(self.multipoles.get(&source.parent()).unwrap()); +// let fmm_arc = Arc::clone(&self.fmm); + +// let operator_index = +// source.siblings().iter().position(|&x| x == source).unwrap(); + +// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + +// let target_multipole_owned = +// fmm_arc.m2m[operator_index].dot(&source_multipole_view); +// let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + +// if !target_multipole_lock.is_empty() { +// target_multipole_lock +// .iter_mut() +// .zip(target_multipole_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// target_multipole_lock.extend(target_multipole_owned); +// } +// } +// }) +// } +// } +// } + +// impl TargetTranslation for FmmData> +// where +// T: Kernel + std::marker::Sync + std::marker::Send, +// U: FieldTranslationData + std::marker::Sync + std::marker::Send, +// { +// fn l2l(&self, level: u64) { +// if let Some(targets) = self.fmm.tree.get_keys(level) { +// targets.par_iter().for_each(move |&target| { +// let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); +// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); +// let fmm = Arc::clone(&self.fmm); + +// let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); + +// let source_local_lock = source_local_arc.lock().unwrap(); +// let source_local_view = ArrayView::from(source_local_lock.deref()); + +// let target_local_owned = fmm.l2l[operator_index].dot(&source_local_view); +// let mut target_local_lock = target_local_arc.lock().unwrap(); + +// if !target_local_lock.is_empty() { +// target_local_lock +// .iter_mut() +// .zip(target_local_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// target_local_lock.extend(target_local_owned); +// } +// }) +// } +// } + +// fn m2p(&self) { +// if let Some(targets) = self.fmm.tree.get_leaves() { +// targets.par_iter().for_each(move |&target| { +// let fmm_arc = Arc::clone(&self.fmm); +// let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + +// if let Some(points) = fmm_arc.tree().get_points(&target) { +// if let Some(w_list) = fmm_arc.get_w_list(&target) { +// for source in w_list.iter() { +// let source_multipole_arc = +// Arc::clone(self.multipoles.get(source).unwrap()); + +// let upward_equivalent_surface = source.compute_surface( +// fmm_arc.tree().get_domain(), +// fmm_arc.order(), +// fmm_arc.alpha_inner, +// ); + +// let source_multipole_lock = source_multipole_arc.lock().unwrap(); +// let source_multipole_view = +// ArrayView::from(source_multipole_lock.deref()); +// let source_multipole_slice = source_multipole_view.as_slice().unwrap(); + +// let target_coordinates = points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// let mut target_potential = +// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; + +// fmm_arc.kernel().potential( +// &upward_equivalent_surface[..], +// source_multipole_slice, +// &target_coordinates[..], +// &mut target_potential, +// ); + +// let mut target_potential_lock = target_potential_arc.lock().unwrap(); + +// if !target_potential_lock.is_empty() { +// target_potential_lock +// .iter_mut() +// .zip(target_potential.iter()) +// .for_each(|(p, n)| *p += *n); +// } else { +// target_potential_lock.extend(target_potential); +// } +// } +// } +// } +// }) +// } +// } + +// fn l2p(&self) { +// if let Some(targets) = self.fmm.tree().get_leaves() { +// targets.par_iter().for_each(move |&leaf| { +// let fmm_arc = Arc::clone(&self.fmm); +// let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); +// let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + +// if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { +// // Lookup data +// let target_coordinates = target_points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// let downward_equivalent_surface = leaf.compute_surface( +// &fmm_arc.tree().domain, +// fmm_arc.order, +// fmm_arc.alpha_outer, +// ); + +// let source_local_lock = source_local_arc.lock().unwrap(); +// let source_local_ref = ArrayView::from(source_local_lock.deref()); +// let source_local_slice = source_local_ref.as_slice().unwrap(); + +// let mut target_potential = +// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; + +// fmm_arc.kernel().potential( +// &downward_equivalent_surface[..], +// source_local_slice, +// &target_coordinates[..], +// &mut target_potential, +// ); + +// let mut out_potential_lock = target_potential_arc.lock().unwrap(); + +// if !out_potential_lock.is_empty() { +// out_potential_lock +// .iter_mut() +// .zip(target_potential.iter()) +// .for_each(|(p, n)| *p += *n); +// } else { +// out_potential_lock.extend(target_potential); +// } +// } +// }) +// } +// } + +// fn p2l(&self) { +// if let Some(targets) = self.fmm.tree().get_leaves() { +// targets.par_iter().for_each(move |&leaf| { +// let fmm_arc = Arc::clone(&self.fmm); +// let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + +// if let Some(x_list) = fmm_arc.get_x_list(&leaf) { +// for source in x_list.iter() { +// if let Some(source_points) = fmm_arc.tree().get_points(source) { +// let source_coordinates = source_points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// let source_charges = self.charges.get(source).unwrap(); +// let source_charges_view = ArrayView::from(source_charges.deref()); +// let source_charges_slice = source_charges_view.as_slice().unwrap(); + +// let downward_check_surface = leaf.compute_surface( +// &fmm_arc.tree().domain, +// fmm_arc.order, +// fmm_arc.alpha_inner, +// ); + +// let mut downward_check_potential = +// vec![0f64; downward_check_surface.len() / fmm_arc.kernel().dim()]; + +// fmm_arc.kernel.potential( +// &source_coordinates[..], +// source_charges_slice, +// &downward_check_surface[..], +// &mut downward_check_potential[..], +// ); + +// let downward_check_potential = +// ArrayView::from(&downward_check_potential); + +// let mut target_local_lock = target_local_arc.lock().unwrap(); + +// let target_local_owned = fmm_arc.kernel().scale(leaf.level()) +// * &fmm_arc +// .dc2e_inv +// .0 +// .dot(&fmm_arc.dc2e_inv.1.dot(&downward_check_potential)); + +// if !target_local_lock.is_empty() { +// target_local_lock +// .iter_mut() +// .zip(target_local_owned.iter()) +// .for_each(|(o, l)| *o += *l); +// } else { +// target_local_lock.extend(target_local_owned); +// } +// } +// } +// } +// }) +// } +// } + +// fn p2p(&self) { +// if let Some(targets) = self.fmm.tree.get_leaves() { +// targets.par_iter().for_each(move |&target| { +// let fmm_arc = Arc::clone(&self.fmm); +// let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + +// if let Some(target_points) = fmm_arc.tree().get_points(&target) { +// let target_coordinates = target_points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// if let Some(u_list) = fmm_arc.get_u_list(&target) { +// for source in u_list.iter() { +// if let Some(source_points) = fmm_arc.tree().get_points(source) { +// let source_coordinates = source_points +// .iter() +// .map(|p| p.coordinate) +// .flat_map(|[x, y, z]| vec![x, y, z]) +// .collect_vec(); + +// let source_charges_arc = +// Arc::clone(self.charges.get(source).unwrap()); +// let source_charges_view = +// ArrayView::from(source_charges_arc.deref()); +// let source_charges_slice = source_charges_view.as_slice().unwrap(); + +// let mut target_potential = +// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; + +// fmm_arc.kernel.potential( +// &source_coordinates[..], +// source_charges_slice, +// &target_coordinates[..], +// &mut target_potential, +// ); + +// let mut target_potential_lock = +// target_potential_arc.lock().unwrap(); + +// if !target_potential_lock.is_empty() { +// target_potential_lock +// .iter_mut() +// .zip(target_potential.iter()) +// .for_each(|(c, p)| *c += *p); +// } else { +// target_potential_lock.extend(target_potential) +// } +// } +// } +// } +// } +// }) +// } +// } +// } + +// impl FieldTranslation for FmmData>> +// where +// T: Kernel + std::marker::Sync + std::marker::Send + Default, +// { +// fn m2l(&self, level: u64) { +// if let Some(targets) = self.fmm.tree().get_keys(level) { +// let mut transfer_vector_to_m2l = +// HashMap::>>>::new(); + +// for tv in self.fmm.m2l.transfer_vectors.iter() { +// transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); +// } + +// targets.par_iter().enumerate().for_each(|(_i, &target)| { +// if let Some(v_list) = self.fmm.get_v_list(&target) { +// let calculated_transfer_vectors = v_list +// .iter() +// .map(|source| target.find_transfer_vector(source)) +// .collect::>(); +// for (transfer_vector, &source) in +// calculated_transfer_vectors.iter().zip(v_list.iter()) +// { +// let m2l_arc = +// Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); +// let mut m2l_lock = m2l_arc.lock().unwrap(); +// m2l_lock.push((source, target)); +// } +// } +// }); + +// let mut transfer_vector_to_m2l_rw_lock = +// HashMap::>>>::new(); + +// // Find all multipole expansions and allocate +// for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { +// transfer_vector_to_m2l_rw_lock.insert( +// transfer_vector, +// Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), +// ); +// } + +// transfer_vector_to_m2l_rw_lock +// .par_iter() +// .for_each(|(transfer_vector, m2l_arc)| { +// let c_idx = self +// .fmm +// .m2l +// .transfer_vectors +// .iter() +// .position(|x| x.vector == *transfer_vector) +// .unwrap(); + +// let c_lidx = c_idx * self.fmm.m2l.k; +// let c_ridx = c_lidx + self.fmm.m2l.k; +// let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); + +// let m2l_rw = m2l_arc.read().unwrap(); +// let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); + +// for (i, (source, _)) in m2l_rw.iter().enumerate() { +// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); +// let source_multipole_lock = source_multipole_arc.lock().unwrap(); +// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + +// // Compressed multipole +// let compressed_source_multipole_owned = +// self.fmm.m2l.m2l.1.dot(&source_multipole_view); + +// multipoles +// .slice_mut(s![.., i]) +// .assign(&compressed_source_multipole_owned); +// } + +// // // Compute convolution +// let compressed_check_potential_owned = c_sub.dot(&multipoles); + +// // Post process to find check potential +// let check_potential_owned = +// self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); + +// // Compute local +// let locals_owned = self.m2l_scale(level) +// * self.fmm.kernel.scale(level) +// * self +// .fmm +// .dc2e_inv +// .0 +// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); + +// // Assign locals +// for (i, (_, target)) in m2l_rw.iter().enumerate() { +// let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); +// let mut target_local_lock = target_local_arc.lock().unwrap(); +// let target_local_owned = locals_owned.slice(s![.., i]); +// if !target_local_lock.is_empty() { +// target_local_lock +// .iter_mut() +// .zip(target_local_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// target_local_lock.extend(target_local_owned); +// } +// } +// }); +// } +// } + +// fn m2l_scale(&self, level: u64) -> f64 { +// if level < 2 { +// panic!("M2L only performed on level 2 and below") +// } + +// if level == 2 { +// 1. / 2. +// } else { +// 2_f64.powf((level - 3) as f64) +// } +// } +// } + +// impl FieldTranslation for FmmData>> +// where +// T: Kernel + std::marker::Sync + std::marker::Send + Default, +// { +// fn m2l(&self, level: u64) { +// if let Some(targets) = self.fmm.tree().get_keys(level) { +// // Find transfer vectors +// targets.par_iter().for_each(move |&target| { +// let fmm_arc: Arc>> = +// Arc::clone(&self.fmm); +// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + +// if let Some(v_list) = fmm_arc.get_v_list(&target) { +// for (_i, source) in v_list.iter().enumerate() { +// // Locate correct components of compressed M2L matrix. +// let transfer_vector = target.find_transfer_vector(source); + +// let c_idx = fmm_arc +// .m2l +// .transfer_vectors +// .iter() +// .position(|x| x.vector == transfer_vector) +// .unwrap(); +// let c_lidx = c_idx * fmm_arc.m2l.k; +// let c_ridx = c_lidx + fmm_arc.m2l.k; +// let c_sub = fmm_arc.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); + +// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); +// let source_multipole_lock = source_multipole_arc.lock().unwrap(); +// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + +// // Compressed multipole +// let compressed_source_multipole_owned = +// fmm_arc.m2l.m2l.1.dot(&source_multipole_view); + +// // Convolution to find compressed check potential +// let compressed_check_potential_owned = +// c_sub.dot(&compressed_source_multipole_owned); + +// // Post process to find check potential +// let check_potential_owned = +// fmm_arc.m2l.m2l.0.dot(&compressed_check_potential_owned); + +// // Compute local +// let target_local_owned = self.m2l_scale(target.level()) +// * fmm_arc.kernel.scale(target.level()) +// * fmm_arc +// .dc2e_inv +// .0 +// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); + +// // Store computation +// let mut target_local_lock = target_local_arc.lock().unwrap(); + +// if !target_local_lock.is_empty() { +// target_local_lock +// .iter_mut() +// .zip(target_local_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// target_local_lock.extend(target_local_owned); +// } +// } +// } +// }) +// } +// } + +// fn m2l_scale(&self, level: u64) -> f64 { +// if level < 2 { +// panic!("M2L only performed on level 2 and below") +// } + +// if level == 2 { +// 1. / 2. +// } else { +// 2_f64.powf((level - 3) as f64) +// } +// } +// } + +// impl FieldTranslation for FmmData>> +// where +// T: Kernel + std::marker::Sync + std::marker::Send + Default, +// { +// fn m2l(&self, level: u64) { +// if let Some(targets) = self.fmm.tree().get_keys(level) { +// targets.par_iter().for_each(move |&target| { +// let fmm_arc = Arc::clone(&self.fmm); +// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + +// if let Some(v_list) = fmm_arc.get_v_list(&target) { +// for (_, source) in v_list.iter().enumerate() { +// let transfer_vector = target.find_transfer_vector(source); + +// // Locate correct precomputed FFT of kernel interactions +// let k_idx = fmm_arc +// .m2l +// .transfer_vectors +// .iter() +// .position(|x| x.vector == transfer_vector) +// .unwrap(); + +// // Compute FFT of signal +// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); +// let source_multipole_lock = source_multipole_arc.lock().unwrap(); + +// let signal = fmm_arc +// .m2l +// .compute_signal(fmm_arc.order, source_multipole_lock.deref()); + +// // 1. Pad the signal +// let m = signal.len(); +// let n = signal[0].len(); +// let k = signal[0][0].len(); + +// let p = 2 * m; +// let q = 2 * n; +// let r = 2 * k; + +// let signal = Array3::from_shape_vec( +// (m, n, k), +// signal.into_iter().flatten().flatten().collect(), +// ) +// .unwrap(); + +// let padding = [[p - m, 0], [q - n, 0], [r - k, 0]]; +// let padded_signal = pad(&signal, &padding, PadMode::Constant(0.)); + +// // 2. FFT of the padded signal +// // 2.1 Init the handlers for FFTs along each axis +// let mut handler_ax0 = FftHandler::::new(p); +// let mut handler_ax1 = FftHandler::::new(q); +// let mut handler_ax2 = R2cFftHandler::::new(r); + +// // 2.2 Compute the transform along each axis +// let mut padded_signal_hat: Array3> = +// Array3::zeros((p, q, r / 2 + 1)); +// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft_r2c(&padded_signal, &mut tmp1, &mut handler_ax2, 2); +// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); +// ndfft(&tmp2, &mut padded_signal_hat, &mut handler_ax0, 0); + +// // 3.Compute convolution to find check potential +// let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + +// // Hadamard product +// let check_potential_hat = padded_kernel_hat * padded_signal_hat; + +// // 3.1 Compute iFFT to find check potentials +// let mut check_potential: Array3 = Array3::zeros((p, q, r)); +// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndifft(&check_potential_hat, &mut tmp1, &mut handler_ax0, 0); +// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndifft(&tmp1, &mut tmp2, &mut handler_ax1, 1); +// ndifft_r2c(&tmp2, &mut check_potential, &mut handler_ax2, 2); + +// // Filter check potentials +// let check_potential = +// check_potential.slice(s![p - m - 1..p, q - n - 1..q, r - k - 1..r]); + +// let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); + +// let mut tmp = Vec::new(); +// for index in target_surface_idxs.chunks_exact(fmm_arc.kernel.dim()) { +// let element = check_potential[[index[0], index[1], index[2]]]; +// tmp.push(element); +// } + +// // Compute local coefficients from check potentials +// let check_potential = Array::from_shape_vec( +// target_surface_idxs.len() / fmm_arc.kernel.dim(), +// tmp, +// ) +// .unwrap(); + +// // Compute local +// let target_local_owned = self.m2l_scale(target.level()) +// * fmm_arc.kernel.scale(target.level()) +// * fmm_arc +// .dc2e_inv +// .0 +// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential)); + +// // Store computation +// let mut target_local_lock = target_local_arc.lock().unwrap(); + +// if !target_local_lock.is_empty() { +// target_local_lock +// .iter_mut() +// .zip(target_local_owned.iter()) +// .for_each(|(c, m)| *c += *m); +// } else { +// target_local_lock.extend(target_local_owned); +// } +// } +// } +// }) +// } +// } + +// fn m2l_scale(&self, level: u64) -> f64 { +// if level < 2 { +// panic!("M2L only performed on level 2 and below") +// } + +// if level == 2 { +// 1. / 2. +// } else { +// 2_f64.powf((level - 3) as f64) +// } +// } +// } + +// impl InteractionLists for KiFmm +// where +// T: Tree, +// U: Kernel, +// V: FieldTranslationData, +// { +// type Tree = T; + +// fn get_u_list( +// &self, +// key: &::NodeIndex, +// ) -> Option<::NodeIndices> { +// let mut u_list = Vec::::new(); +// let neighbours = key.neighbors(); + +// // Child level +// let mut neighbors_children_adj: Vec = neighbours +// .iter() +// .flat_map(|n| n.children()) +// .filter(|nc| self.tree().get_all_keys_set().contains(nc) && key.is_adjacent(nc)) +// .collect(); + +// // Key level +// let mut neighbors_adj: Vec = neighbours +// .iter() +// .filter(|n| self.tree().get_all_keys_set().contains(n) && key.is_adjacent(n)) +// .cloned() +// .collect(); + +// // Parent level +// let mut parent_neighbours_adj: Vec = key +// .parent() +// .neighbors() +// .into_iter() +// .filter(|pn| self.tree().get_all_keys_set().contains(pn) && key.is_adjacent(pn)) +// .collect(); + +// u_list.append(&mut neighbors_children_adj); +// u_list.append(&mut neighbors_adj); +// u_list.append(&mut parent_neighbours_adj); +// u_list.push(*key); + +// if !u_list.is_empty() { +// Some(MortonKeys { +// keys: u_list, +// index: 0, +// }) +// } else { +// None +// } +// } + +// fn get_v_list( +// &self, +// key: &::NodeIndex, +// ) -> Option<::NodeIndices> { +// if key.level() >= 2 { +// let v_list = key +// .parent() +// .neighbors() +// .iter() +// .flat_map(|pn| pn.children()) +// .filter(|pnc| self.tree().get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) +// .collect_vec(); + +// if !v_list.is_empty() { +// return Some(MortonKeys { +// keys: v_list, +// index: 0, +// }); +// } else { +// return None; +// } +// } +// None +// } + +// fn get_w_list( +// &self, +// key: &::NodeIndex, +// ) -> Option<::NodeIndices> { +// // Child level +// let w_list = key +// .neighbors() +// .iter() +// .flat_map(|n| n.children()) +// .filter(|nc| self.tree().get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) +// .collect_vec(); + +// if !w_list.is_empty() { +// Some(MortonKeys { +// keys: w_list, +// index: 0, +// }) +// } else { +// None +// } +// } + +// fn get_x_list( +// &self, +// key: &::NodeIndex, +// ) -> Option<::NodeIndices> { +// let x_list = key +// .parent() +// .neighbors() +// .into_iter() +// .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) +// .collect_vec(); + +// if !x_list.is_empty() { +// Some(MortonKeys { +// keys: x_list, +// index: 0, +// }) +// } else { +// None +// } +// } +// } + +// impl Fmm for KiFmm +// where +// T: Tree, +// U: Kernel, +// V: FieldTranslationData, +// { +// type Tree = T; +// type Kernel = U; + +// fn order(&self) -> usize { +// self.order +// } + +// fn kernel(&self) -> &Self::Kernel { +// &self.kernel +// } + +// fn tree(&self) -> &Self::Tree { +// &self.tree +// } +// } + +// impl FmmLoop for FmmData +// where +// T: Fmm, +// FmmData: SourceTranslation + TargetTranslation + FieldTranslation, +// { +// fn upward_pass(&self) { +// // Particle to Multipole +// let start = Instant::now(); +// self.p2m(); +// println!("P2M = {:?}ms", start.elapsed().as_millis()); + +// // Multipole to Multipole +// let depth = self.fmm.tree().get_depth(); +// let start = Instant::now(); +// for level in (1..=depth).rev() { +// self.m2m(level) +// } +// println!("M2M = {:?}ms", start.elapsed().as_millis()); +// } + +// fn downward_pass(&self) { +// let depth = self.fmm.tree().get_depth(); +// let mut l2l_time = 0; +// let mut m2l_time = 0; +// for level in 2..=depth { +// if level > 2 { +// let start = Instant::now(); +// self.l2l(level); +// l2l_time += start.elapsed().as_millis(); +// } + +// let start = Instant::now(); +// self.m2l(level); +// m2l_time += start.elapsed().as_millis(); +// } +// println!("M2L = {:?}ms", m2l_time); +// println!("L2L = {:?}ms", l2l_time); + +// let start = Instant::now(); +// // Leaf level computations +// self.p2l(); +// println!("P2L = {:?}ms", start.elapsed().as_millis()); + +// // Sum all potential contributions +// let start = Instant::now(); +// self.m2p(); +// println!("M2P = {:?}ms", start.elapsed().as_millis()); +// let start = Instant::now(); +// self.p2p(); +// println!("P2P = {:?}ms", start.elapsed().as_millis()); +// let start = Instant::now(); +// self.l2p(); +// println!("L2P = {:?}ms", start.elapsed().as_millis()); +// } + +// fn run(&self) { +// self.upward_pass(); +// self.downward_pass(); +// } +// } #[allow(unused_imports)] mod test { - use approx::{assert_relative_eq, RelativeEq}; + // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; - use bempp_tree::types::point::PointType; - use rayon::ThreadPool; + // use bempp_tree::types::point::PointType; + // use rayon::ThreadPool; - use crate::laplace::LaplaceKernel; + use bempp_kernel::laplace_3d::Laplace3dKernel; + // // use crate::laplace::LaplaceKernel; + + use rlst::{dense::rlst_rand_mat, common::traits::ColumnMajorIterator}; use super::*; - #[allow(dead_code)] - fn points_fixture(npoints: usize) -> Vec { + // #[allow(dead_code)] + // fn points_fixture(npoints: usize) -> Vec { + // let mut range = StdRng::seed_from_u64(0); + // let between = rand::distributions::Uniform::from(0.0..1.0); + // let mut points: Vec<[PointType; 3]> = Vec::new(); + + // for _ in 0..npoints { + // points.push([ + // between.sample(&mut range), + // between.sample(&mut range), + // between.sample(&mut range), + // ]) + // } + + // let points = points + // .iter() + // .enumerate() + // .map(|(i, p)| Point { + // coordinate: *p, + // global_idx: i, + // base_key: MortonKey::default(), + // encoded_key: MortonKey::default(), + // }) + // .collect_vec(); + // points + // } + fn points_fixture( + npoints: usize, + min: Option, + max: Option + ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> + { + // Generate a set of randomly distributed points let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..1.0); - let mut points: Vec<[PointType; 3]> = Vec::new(); - - for _ in 0..npoints { - points.push([ - between.sample(&mut range), - between.sample(&mut range), - between.sample(&mut range), - ]) + + let between; + if let (Some(min),Some(max)) = (min, max) { + between = rand::distributions::Uniform::from(min..max); + } else { + between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); + } + + let mut points = rlst_mat![f64, (npoints, 3)]; + + for i in 0..npoints { + points[[i, 0]] = between.sample(&mut range); + points[[i, 1]] = between.sample(&mut range); + points[[i, 2]] = between.sample(&mut range); } - let points = points - .iter() - .enumerate() - .map(|(i, p)| Point { - coordinate: *p, - global_idx: i, - base_key: MortonKey::default(), - encoded_key: MortonKey::default(), - }) - .collect_vec(); points } #[test] fn test_fmm() { - let npoints = 100000; - let points = points_fixture(npoints); - let points_clone = points.clone(); - let depth = 4; - let n_crit = 150; - - let order = 5; + let npoints = 1000; + // let points = points_fixture(npoints); + // let points_clone = points.clone(); + // let depth = 4; + // let n_crit = 150; + let points = points_fixture(npoints, None, None); + + let order = 2; let alpha_inner = 1.05; let alpha_outer = 2.9; - let adaptive = true; - let k = 453; - - let kernel = LaplaceKernel::new(3, false, 3); + let adaptive = false; + let k = 50; + let ncrit = 100; + let depth = 3; + let kernel = Laplace3dKernel::::default(); let start = Instant::now(); - let tree = SingleNodeTree::new(&points, adaptive, Some(n_crit), Some(depth)); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); println!("Tree = {:?}ms", start.elapsed().as_millis()); let start = Instant::now(); - // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // Some(k), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - - // let m2l_data_svd = SvdFieldTranslationKiFmm::new( - // kernel.clone(), - // Some(k), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - println!("SVD operators = {:?}ms", start.elapsed().as_millis()); + // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( + // // kernel.clone(), + // // Some(k), + // // order, + // // tree.get_domain().clone(), + // // alpha_inner, + // // ); - let start = Instant::now(); - let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + let m2l_data_svd = SvdFieldTranslationKiFmm::new( kernel.clone(), + Some(k), order, tree.get_domain().clone(), alpha_inner, ); - println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); - - let charges = Charges::new(); - - let datatree = FmmData::new(fmm, charges); - - datatree.run(); - - let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - - let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_clone - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + // let start = Instant::now(); + // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // kernel.clone(), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - let all_charges = vec![1f64; points_clone.len()]; + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - let kernel = LaplaceKernel { - dim: 3, - is_singular: false, - value_dimension: 3, - }; - kernel.potential( - &all_point_coordinates[..], - &all_charges[..], - &leaf_coordinates[..], - &mut direct[..], - ); + fmm.m2m[0].pretty_print(); - let abs_error: f64 = potentials - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f64 = abs_error / (direct.iter().sum::()); - - println!("p={:?} rel_error={:?}\n", order, rel_error); assert!(false) + + // let charges = Charges::new(); + + // let datatree = FmmData::new(fmm, charges); + + // datatree.run(); + + // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + + // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + // let mut direct = vec![0f64; pts.len()]; + // let all_point_coordinates = points_clone + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); + + // let leaf_coordinates = pts + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); + // let all_charges = vec![1f64; points_clone.len()]; + + // let kernel = LaplaceKernel { + // dim: 3, + // is_singular: false, + // value_dimension: 3, + // }; + // kernel.potential( + // &all_point_coordinates[..], + // &all_charges[..], + // &leaf_coordinates[..], + // &mut direct[..], + // ); + + // let abs_error: f64 = potentials + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); + + // println!("p={:?} rel_error={:?}\n", order, rel_error); + // assert!(false) } } diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 34d059cf..c2f54f23 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -2,7 +2,6 @@ #![cfg_attr(feature = "strict", deny(warnings))] pub mod charge; pub mod fmm; -pub mod helmholtz; pub mod impl_charge; -pub mod laplace; +// pub mod laplace; pub mod linalg; diff --git a/fmm/src/linalg.rs b/fmm/src/linalg.rs index bd3213ff..6bda3e4a 100644 --- a/fmm/src/linalg.rs +++ b/fmm/src/linalg.rs @@ -1,92 +1,117 @@ -//! Temporary home of linear algebra utilities. TODO: Replace with routines from Householder. -use ndarray::*; -use ndarray_linalg::*; +// //! Temporary home of linear algebra utilities. TODO: Replace with routines from Householder. +// use ndarray::*; +// // use ndarray_linalg::*; -const F64_EPSILON: f64 = 2.220_446_049_250_313E-16f64; -type D = Dim<[usize; 2]>; -type Type1 = ArrayBase, D>; -type Type2 = ArrayBase::Real>, D>; +// use bempp_traits::types::Scalar; -/// Calculate the Moore-Penrose pseudoinverse. -pub fn pinv(array: &Array2) -> (Type1, Type2, Type1) { - let (u, mut s, vt): (_, Array1<_>, _) = array.svd(true, true).unwrap(); +// use rlst; +// use rlst::dense::{base_matrix::BaseMatrix, VectorContainer}; - let u = u.unwrap(); - // Truncate u - let vt = vt.unwrap(); - let max_s = s[0]; +// // const F64_EPSILON: f64 = 2.220_446_049_250_313E-16f64; +// // type D = Dim<[usize; 2]>; +// // type Type1 = ArrayBase, D>; +// // type Type2 = ArrayBase::Real>, D>; - // Hacky, should really work with type check at runtime. - for s in s.iter_mut() { - if *s > T::real(4.) * max_s * T::real(F64_EPSILON) { - *s = T::real(1.) / *s; - } else { - *s = T::real(0); - } - } +// // /// Calculate the Moore-Penrose pseudoinverse. +// // pub fn pinv(array: &Array2) -> (Type1, Type2, Type1) { +// // let (u, mut s, vt): (_, Array1<_>, _) = array.svd(true, true).unwrap(); - let v = vt.t(); - let ut = u.t(); +// // let u = u.unwrap(); +// // // Truncate u +// // let vt = vt.unwrap(); - let s_inv_mat = Array2::from_diag(&s); +// // let max_s = s[0]; - // Return components - (v.to_owned(), s_inv_mat.to_owned(), ut.to_owned()) -} +// // // Hacky, should really work with type check at runtime. +// // for s in s.iter_mut() { +// // if *s > T::real(4.) * max_s * T::real(F64_EPSILON) { +// // *s = T::real(1.) / *s; +// // } else { +// // *s = T::real(0); +// // } +// // } -pub fn matrix_rank(array: &Array2) -> usize { - let (_, s, _): (_, Array1<_>, _) = array.svd(false, false).unwrap(); - let shape = array.shape(); - let max_dim = shape.iter().max().unwrap(); +// // let v = vt.t(); +// // let ut = u.t(); - let tol = s[0] * T::real(*max_dim as f64) * T::real(F64_EPSILON); +// // let s_inv_mat = Array2::from_diag(&s); - let significant: Vec = s.iter().map(|sv| sv > &tol).filter(|sv| *sv).collect(); - let rank = significant.iter().len(); +// // // Return components +// // (v.to_owned(), s_inv_mat.to_owned(), ut.to_owned()) +// // } - rank -} +// type Type1 = Matrix<::Real, BaseMatrix<::Real, VectorContainer<::Real>, Dynamic, Dynamic>, Dynamic, Dynamic>; -#[allow(unused_imports)] -mod test { +// pub fn pinv(array: Type1) { - use super::*; +// let (s, u, vt) = array.linalg.svd().unwrap(Mode::All, Mode::All); - use float_cmp::assert_approx_eq; - use rand::prelude::*; - use rand::SeedableRng; +// let max_s = s[0]; - #[test] - fn test_pinv() { - let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..1.0); +// for s in s.iter_mut() { + +// } - // Setup a random square matrix, of dimension 'dim' - let mut data: Vec = Vec::new(); - let dim: usize = 5; - let nvals = dim.pow(2); - for _ in 0..nvals { - data.push(between.sample(&mut range)) - } +// } - let data = Array1::from_vec(data).into_shape((dim, dim)).unwrap(); +// // pub fn matrix_rank(array: &Array2) -> usize { +// // let (_, s, _): (_, Array1<_>, _) = array.svd(false, false).unwrap(); +// // let shape = array.shape(); +// // let max_dim = shape.iter().max().unwrap(); - let (a, b, c) = pinv(&data); +// // let tol = s[0] * T::real(*max_dim as f64) * T::real(F64_EPSILON); - // Test dimensions of computed inverse are correct - let inv = a.dot(&b).dot(&c); - assert_eq!(inv.ncols(), dim); - assert_eq!(inv.nrows(), dim); +// // let significant: Vec = s.iter().map(|sv| sv > &tol).filter(|sv| *sv).collect(); +// // let rank = significant.iter().len(); - // Test that the inverse is approximately correct - let res = inv.dot(&data); +// // rank +// // } - let ones = Array1::from_vec(vec![1.; dim]); - let id = Array2::from_diag(&ones); +// #[allow(unused_imports)] +// mod test { - for (a, b) in id.iter().zip(res.iter()) { - assert_approx_eq!(f64, *a, *b, epsilon = 1e-14); - } - } -} +// use super::*; + +// use rlst; +// use rlst::common::tools::PrettyPrint; +// use rlst::dense::rlst_rand_mat; + + +// #[test] +// fn test_pinv() { +// // let mut range = StdRng::seed_from_u64(0); +// // let between = rand::distributions::Uniform::from(0.0..1.0); + +// // // Setup a random square matrix, of dimension 'dim' +// // let mut data: Vec = Vec::new(); +// // let dim: usize = 5; +// // let nvals = dim.pow(2); +// // for _ in 0..nvals { +// // data.push(between.sample(&mut range)) +// // } + +// // let data = Array1::from_vec(data).into_shape((dim, dim)).unwrap(); + +// let dim = 5; +// let data = rlst_rand_mat![f64, (dim, dim)]; + +// pinv(&data); +// // let (a, b, c) = pinv(&data); + +// // // Test dimensions of computed inverse are correct +// // let inv = a.dot(&b).dot(&c); +// // assert_eq!(inv.ncols(), dim); +// // assert_eq!(inv.nrows(), dim); + +// // // Test that the inverse is approximately correct +// // let res = inv.dot(&data); + +// // let ones = Array1::from_vec(vec![1.; dim]); +// // let id = Array2::from_diag(&ones); + +// // for (a, b) in id.iter().zip(res.iter()) { +// // assert_approx_eq!(f64, *a, *b, epsilon = 1e-14); +// // } +// } +// } diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 4be75bf8..542e64d3 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -30,4 +30,4 @@ num = "0.4" num_cpus = "1" [dev-dependencies] -rlst = {git = "https://github.com/linalg-rs/rlst.git" } \ No newline at end of file +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} \ No newline at end of file diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index fd5c5db2..0a1e2f3f 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -10,6 +10,7 @@ use std::marker::PhantomData; use crate::helpers::check_dimensions_evaluate; use num::traits::FloatConst; +#[derive(Clone)] pub struct Laplace3dKernel { kernel_type: KernelType, _phantom_t: std::marker::PhantomData, diff --git a/traits/src/tree.rs b/traits/src/tree.rs index 6132e5bf..e5fdafd2 100644 --- a/traits/src/tree.rs +++ b/traits/src/tree.rs @@ -37,7 +37,7 @@ pub trait Tree { type NodeIndices: IntoIterator; fn new( - points: Self::PointSlice<'_>, + points: Self::PointDataSlice<'_>, adaptive: bool, n_crit: Option, depth: Option, From 8a7709315e830a56e329bed2e7d033af5f1c58da Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 4 Jul 2023 19:20:25 +0100 Subject: [PATCH 10/40] Begin working on svd fmm --- fmm/src/fmm.rs | 759 ++++++++++++++++++++++++++----------------------- 1 file changed, 406 insertions(+), 353 deletions(-) diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 181c68ce..d56e49c1 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,6 +1,7 @@ extern crate blas_src; use itertools::Itertools; +use ndarray::AssignElem; // use ndarray::*; // use ndarray_ndimage::{pad, PadMode}; // use ndrustfft::{ndfft, ndfft_r2c, ndifft, ndifft_r2c, Complex, FftHandler, R2cFftHandler}; @@ -12,7 +13,7 @@ use std::{ time::Instant, }; -use rlst; +use rlst::{self, dense::rlst_mut_pointer_mat}; use rlst::algorithms::linalg::LinAlg; use rlst::algorithms::traits::svd::{Mode, Svd}; use rlst::algorithms::traits::pseudo_inverse::Pinv; @@ -43,13 +44,22 @@ use bempp_tree::{ }; use crate::charge::Charges; + +type Expansions = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +type Potentials = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + pub struct FmmData { fmm: Arc, - multipoles: HashMap>>>, - locals: HashMap>>>, - potentials: HashMap>>>, + multipoles: HashMap>>, + locals: HashMap>>, + potentials: HashMap>>, points: HashMap>, charges: HashMap>>, + // multipoles: HashMap>>>, + // locals: HashMap>>>, + // potentials: HashMap>>>, + // points: HashMap>, + // charges: HashMap>>, } type C2EType = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; @@ -230,45 +240,47 @@ where } } -// #[allow(dead_code)] -// impl FmmData> -// where -// T: Kernel, -// U: FieldTranslationData, -// { -// pub fn new(fmm: KiFmm, _charges: Charges) -> Self { -// let mut multipoles = HashMap::new(); -// let mut locals = HashMap::new(); -// let mut potentials = HashMap::new(); -// let mut points = HashMap::new(); -// let mut charges = HashMap::new(); - -// if let Some(keys) = fmm.tree().get_all_keys() { -// for key in keys.iter() { -// multipoles.insert(*key, Arc::new(Mutex::new(Vec::new()))); -// locals.insert(*key, Arc::new(Mutex::new(Vec::new()))); -// potentials.insert(*key, Arc::new(Mutex::new(Vec::new()))); -// if let Some(point_data) = fmm.tree().get_points(key) { -// points.insert(*key, point_data.iter().cloned().collect_vec()); - -// // TODO: Replace with a global index lookup at some point -// charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); -// } -// } -// } +#[allow(dead_code)] +impl FmmData> +where + T: Kernel, + U: FieldTranslationData, +{ + pub fn new(fmm: KiFmm, _charges: Charges) -> Self { + let mut multipoles = HashMap::new(); + let mut locals = HashMap::new(); + let mut potentials = HashMap::new(); + let mut points = HashMap::new(); + let mut charges = HashMap::new(); + + let dummy = rlst_mat![f64, (1,1)]; + + if let Some(keys) = fmm.tree().get_all_keys() { + for key in keys.iter() { + multipoles.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); + locals.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); + potentials.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); + if let Some(point_data) = fmm.tree().get_points(key) { + points.insert(*key, point_data.iter().cloned().collect_vec()); + + // TODO: Replace with a global index lookup at some point + charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); + } + } + } -// let fmm = Arc::new(fmm); + let fmm = Arc::new(fmm); -// Self { -// fmm, -// multipoles, -// locals, -// potentials, -// points, -// charges, -// } -// } -// } + Self { + fmm, + multipoles, + locals, + potentials, + points, + charges, + } + } +} // impl SourceTranslation for FmmData> // where @@ -625,125 +637,166 @@ where // } // } -// impl FieldTranslation for FmmData>> -// where -// T: Kernel + std::marker::Sync + std::marker::Send + Default, -// { -// fn m2l(&self, level: u64) { -// if let Some(targets) = self.fmm.tree().get_keys(level) { -// let mut transfer_vector_to_m2l = -// HashMap::>>>::new(); - -// for tv in self.fmm.m2l.transfer_vectors.iter() { -// transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); -// } - -// targets.par_iter().enumerate().for_each(|(_i, &target)| { -// if let Some(v_list) = self.fmm.get_v_list(&target) { -// let calculated_transfer_vectors = v_list -// .iter() -// .map(|source| target.find_transfer_vector(source)) -// .collect::>(); -// for (transfer_vector, &source) in -// calculated_transfer_vectors.iter().zip(v_list.iter()) -// { -// let m2l_arc = -// Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); -// let mut m2l_lock = m2l_arc.lock().unwrap(); -// m2l_lock.push((source, target)); -// } -// } -// }); - -// let mut transfer_vector_to_m2l_rw_lock = -// HashMap::>>>::new(); - -// // Find all multipole expansions and allocate -// for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { -// transfer_vector_to_m2l_rw_lock.insert( -// transfer_vector, -// Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), -// ); -// } - -// transfer_vector_to_m2l_rw_lock -// .par_iter() -// .for_each(|(transfer_vector, m2l_arc)| { -// let c_idx = self -// .fmm -// .m2l -// .transfer_vectors -// .iter() -// .position(|x| x.vector == *transfer_vector) -// .unwrap(); - -// let c_lidx = c_idx * self.fmm.m2l.k; -// let c_ridx = c_lidx + self.fmm.m2l.k; -// let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - -// let m2l_rw = m2l_arc.read().unwrap(); -// let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); - -// for (i, (source, _)) in m2l_rw.iter().enumerate() { -// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); -// let source_multipole_lock = source_multipole_arc.lock().unwrap(); -// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - -// // Compressed multipole -// let compressed_source_multipole_owned = -// self.fmm.m2l.m2l.1.dot(&source_multipole_view); - -// multipoles -// .slice_mut(s![.., i]) -// .assign(&compressed_source_multipole_owned); -// } - -// // // Compute convolution -// let compressed_check_potential_owned = c_sub.dot(&multipoles); - -// // Post process to find check potential -// let check_potential_owned = -// self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); - -// // Compute local -// let locals_owned = self.m2l_scale(level) -// * self.fmm.kernel.scale(level) -// * self -// .fmm -// .dc2e_inv -// .0 -// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - -// // Assign locals -// for (i, (_, target)) in m2l_rw.iter().enumerate() { -// let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); -// let mut target_local_lock = target_local_arc.lock().unwrap(); -// let target_local_owned = locals_owned.slice(s![.., i]); -// if !target_local_lock.is_empty() { -// target_local_lock -// .iter_mut() -// .zip(target_local_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// target_local_lock.extend(target_local_owned); -// } -// } -// }); -// } -// } +impl FieldTranslation for FmmData>> +where + T: Kernel + std::marker::Sync + std::marker::Send + Default, +{ + fn m2l<'a>(&self, level: u64) { + if let Some(targets) = self.fmm.tree().get_keys(level) { + let mut transfer_vector_to_m2l = + HashMap::>>>::new(); + + for tv in self.fmm.m2l.transfer_vectors.iter() { + transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); + } + + targets.par_iter().enumerate().for_each(|(_i, &target)| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let calculated_transfer_vectors = v_list + .iter() + .map(|source| target.find_transfer_vector(source)) + .collect::>(); + for (transfer_vector, &source) in + calculated_transfer_vectors.iter().zip(v_list.iter()) + { + let m2l_arc = + Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); + let mut m2l_lock = m2l_arc.lock().unwrap(); + m2l_lock.push((source, target)); + } + } + }); + + let mut transfer_vector_to_m2l_rw_lock = + HashMap::>>>::new(); + + // Find all multipole expansions and allocate + for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { + transfer_vector_to_m2l_rw_lock.insert( + transfer_vector, + Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), + ); + } + + transfer_vector_to_m2l_rw_lock + .par_iter() + .for_each(|(transfer_vector, m2l_arc)| { + let c_idx = self + .fmm + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == *transfer_vector) + .unwrap(); + + let c_lidx = c_idx * self.fmm.m2l.k; + let c_ridx = c_lidx + self.fmm.m2l.k; + // let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); + + let (nrows, _) = self.m2l.m2l.2.shape(); + let top_left = (0, c_lidx); + let dim = (nrows, c_ridx); + let c_sub = self.fmm.m2l.m2l.2.block(top_left, dim); + + let m2l_rw = m2l_arc.read().unwrap(); + // let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); + let mut multipoles = rlst_mat![f64, (self.fmm.m2l.k, m2l_rw.len())]; + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + for (i, (source, _)) in m2l_rw.iter().enumerate() { + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + // Column vector + let mut source_multipole_ptr = source_multipole_lock.deref().as_ptr(); + let source_multipole_view = unsafe { + rlst_mut_pointer_mat!['a, f64, source_multipole_ptr.as_mut_ptr(), (ncoeffs, 1), (1, ncoeffs)] + }; + + // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + + // Compressed multipole + let compressed_source_multipole_owned = + self.fmm.m2l.m2l.1.dot(&source_multipole_view); + + let first = i*self.fmm.m2l.k; + let last = firsr+self.fmm.m2l.k; + + let multipole_slice = multipoles.get_slice_mut(first, last); + multipole_slice.copy_from_slice(compressed_source_multipole_owned); + // multipoles + // .slice_mut(s![.., i]) + // .assign(&compressed_source_multipole_owned); + } + + // // Compute convolution + let compressed_check_potential_owned = c_sub.dot(&multipoles); + + // Post process to find check potential + let check_potential_owned = + self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); + + // Compute local + // let locals_owned = self.m2l_scale(level) + // * self.fmm.kernel.scale(level) + // * self + // .fmm + // .dc2e_inv + // .0 + // .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); + let mut locals_owned = self.fmm.dc2e_inv.dot(&check_potential_owned); + + for i in 0..nceoffs { + locals_owned[[i, 0]] *= self.fmm.kernel.scale(level)*self.m2l_scale(level); + } + + + // Assign locals + for (i, (_, target)) in m2l_rw.iter().enumerate() { + let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + // Column vector + let mut target_local_ptr = target_local_lock.deref().as_mut_ptr(); + + // let target_local_view = unsafe { + // rlst_mut_pointer_mat!['a, f64, source_multipole_ptr.as_mut_ptr(), (ncoeffs, 1), (1, ncoeffs)] + // }; + let first = i*self.fmm.m2l.k; + let last = first+self.fmm.m2l.k; + let target_local_owned = locals_owned.get_slice_mut(first, last); + + // let target_local_owned = locals_owned.slice(s![.., i]); + + if !target_local_lock.is_empty() { + + target_local + // target_local_lock + // .iter_mut() + // .zip(target_local_owned.iter()) + // .for_each(|(c, m)| *c += *m); + + } else { + target_local_lock.extend(target_local_owned); + } + } + }); + } + } -// fn m2l_scale(&self, level: u64) -> f64 { -// if level < 2 { -// panic!("M2L only performed on level 2 and below") -// } + fn m2l_scale(&self, level: u64) -> f64 { + if level < 2 { + panic!("M2L only performed on level 2 and below") + } -// if level == 2 { -// 1. / 2. -// } else { -// 2_f64.powf((level - 3) as f64) -// } -// } -// } + if level == 2 { + 1. / 2. + } else { + 2_f64.powf((level - 3) as f64) + } + } +} // impl FieldTranslation for FmmData>> // where @@ -960,209 +1013,209 @@ where // } // } -// impl InteractionLists for KiFmm -// where -// T: Tree, -// U: Kernel, -// V: FieldTranslationData, -// { -// type Tree = T; - -// fn get_u_list( -// &self, -// key: &::NodeIndex, -// ) -> Option<::NodeIndices> { -// let mut u_list = Vec::::new(); -// let neighbours = key.neighbors(); - -// // Child level -// let mut neighbors_children_adj: Vec = neighbours -// .iter() -// .flat_map(|n| n.children()) -// .filter(|nc| self.tree().get_all_keys_set().contains(nc) && key.is_adjacent(nc)) -// .collect(); - -// // Key level -// let mut neighbors_adj: Vec = neighbours -// .iter() -// .filter(|n| self.tree().get_all_keys_set().contains(n) && key.is_adjacent(n)) -// .cloned() -// .collect(); - -// // Parent level -// let mut parent_neighbours_adj: Vec = key -// .parent() -// .neighbors() -// .into_iter() -// .filter(|pn| self.tree().get_all_keys_set().contains(pn) && key.is_adjacent(pn)) -// .collect(); - -// u_list.append(&mut neighbors_children_adj); -// u_list.append(&mut neighbors_adj); -// u_list.append(&mut parent_neighbours_adj); -// u_list.push(*key); - -// if !u_list.is_empty() { -// Some(MortonKeys { -// keys: u_list, -// index: 0, -// }) -// } else { -// None -// } -// } +impl InteractionLists for KiFmm +where + T: Tree, + U: Kernel, + V: FieldTranslationData, +{ + type Tree = T; + + fn get_u_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let mut u_list = Vec::::new(); + let neighbours = key.neighbors(); + + // Child level + let mut neighbors_children_adj: Vec = neighbours + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree().get_all_keys_set().contains(nc) && key.is_adjacent(nc)) + .collect(); + + // Key level + let mut neighbors_adj: Vec = neighbours + .iter() + .filter(|n| self.tree().get_all_keys_set().contains(n) && key.is_adjacent(n)) + .cloned() + .collect(); + + // Parent level + let mut parent_neighbours_adj: Vec = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree().get_all_keys_set().contains(pn) && key.is_adjacent(pn)) + .collect(); + + u_list.append(&mut neighbors_children_adj); + u_list.append(&mut neighbors_adj); + u_list.append(&mut parent_neighbours_adj); + u_list.push(*key); + + if !u_list.is_empty() { + Some(MortonKeys { + keys: u_list, + index: 0, + }) + } else { + None + } + } -// fn get_v_list( -// &self, -// key: &::NodeIndex, -// ) -> Option<::NodeIndices> { -// if key.level() >= 2 { -// let v_list = key -// .parent() -// .neighbors() -// .iter() -// .flat_map(|pn| pn.children()) -// .filter(|pnc| self.tree().get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) -// .collect_vec(); - -// if !v_list.is_empty() { -// return Some(MortonKeys { -// keys: v_list, -// index: 0, -// }); -// } else { -// return None; -// } -// } -// None -// } + fn get_v_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + if key.level() >= 2 { + let v_list = key + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| self.tree().get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) + .collect_vec(); + + if !v_list.is_empty() { + return Some(MortonKeys { + keys: v_list, + index: 0, + }); + } else { + return None; + } + } + None + } -// fn get_w_list( -// &self, -// key: &::NodeIndex, -// ) -> Option<::NodeIndices> { -// // Child level -// let w_list = key -// .neighbors() -// .iter() -// .flat_map(|n| n.children()) -// .filter(|nc| self.tree().get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) -// .collect_vec(); - -// if !w_list.is_empty() { -// Some(MortonKeys { -// keys: w_list, -// index: 0, -// }) -// } else { -// None -// } -// } + fn get_w_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + // Child level + let w_list = key + .neighbors() + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree().get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) + .collect_vec(); + + if !w_list.is_empty() { + Some(MortonKeys { + keys: w_list, + index: 0, + }) + } else { + None + } + } -// fn get_x_list( -// &self, -// key: &::NodeIndex, -// ) -> Option<::NodeIndices> { -// let x_list = key -// .parent() -// .neighbors() -// .into_iter() -// .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) -// .collect_vec(); - -// if !x_list.is_empty() { -// Some(MortonKeys { -// keys: x_list, -// index: 0, -// }) -// } else { -// None -// } -// } -// } + fn get_x_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let x_list = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) + .collect_vec(); + + if !x_list.is_empty() { + Some(MortonKeys { + keys: x_list, + index: 0, + }) + } else { + None + } + } +} -// impl Fmm for KiFmm -// where -// T: Tree, -// U: Kernel, -// V: FieldTranslationData, -// { -// type Tree = T; -// type Kernel = U; +impl Fmm for KiFmm +where + T: Tree, + U: Kernel, + V: FieldTranslationData, +{ + type Tree = T; + type Kernel = U; -// fn order(&self) -> usize { -// self.order -// } + fn order(&self) -> usize { + self.order + } -// fn kernel(&self) -> &Self::Kernel { -// &self.kernel -// } + fn kernel(&self) -> &Self::Kernel { + &self.kernel + } -// fn tree(&self) -> &Self::Tree { -// &self.tree -// } -// } + fn tree(&self) -> &Self::Tree { + &self.tree + } +} -// impl FmmLoop for FmmData -// where -// T: Fmm, -// FmmData: SourceTranslation + TargetTranslation + FieldTranslation, -// { -// fn upward_pass(&self) { -// // Particle to Multipole -// let start = Instant::now(); -// self.p2m(); -// println!("P2M = {:?}ms", start.elapsed().as_millis()); - -// // Multipole to Multipole -// let depth = self.fmm.tree().get_depth(); -// let start = Instant::now(); -// for level in (1..=depth).rev() { -// self.m2m(level) -// } -// println!("M2M = {:?}ms", start.elapsed().as_millis()); -// } +impl FmmLoop for FmmData +where + T: Fmm, + FmmData: SourceTranslation + TargetTranslation + FieldTranslation, +{ + fn upward_pass(&self) { + // Particle to Multipole + let start = Instant::now(); + self.p2m(); + println!("P2M = {:?}ms", start.elapsed().as_millis()); -// fn downward_pass(&self) { -// let depth = self.fmm.tree().get_depth(); -// let mut l2l_time = 0; -// let mut m2l_time = 0; -// for level in 2..=depth { -// if level > 2 { -// let start = Instant::now(); -// self.l2l(level); -// l2l_time += start.elapsed().as_millis(); -// } - -// let start = Instant::now(); -// self.m2l(level); -// m2l_time += start.elapsed().as_millis(); -// } -// println!("M2L = {:?}ms", m2l_time); -// println!("L2L = {:?}ms", l2l_time); - -// let start = Instant::now(); -// // Leaf level computations -// self.p2l(); -// println!("P2L = {:?}ms", start.elapsed().as_millis()); - -// // Sum all potential contributions -// let start = Instant::now(); -// self.m2p(); -// println!("M2P = {:?}ms", start.elapsed().as_millis()); -// let start = Instant::now(); -// self.p2p(); -// println!("P2P = {:?}ms", start.elapsed().as_millis()); -// let start = Instant::now(); -// self.l2p(); -// println!("L2P = {:?}ms", start.elapsed().as_millis()); -// } + // Multipole to Multipole + let depth = self.fmm.tree().get_depth(); + let start = Instant::now(); + for level in (1..=depth).rev() { + self.m2m(level) + } + println!("M2M = {:?}ms", start.elapsed().as_millis()); + } -// fn run(&self) { -// self.upward_pass(); -// self.downward_pass(); -// } -// } + fn downward_pass(&self) { + let depth = self.fmm.tree().get_depth(); + let mut l2l_time = 0; + let mut m2l_time = 0; + for level in 2..=depth { + if level > 2 { + let start = Instant::now(); + self.l2l(level); + l2l_time += start.elapsed().as_millis(); + } + + let start = Instant::now(); + self.m2l(level); + m2l_time += start.elapsed().as_millis(); + } + println!("M2L = {:?}ms", m2l_time); + println!("L2L = {:?}ms", l2l_time); + + let start = Instant::now(); + // Leaf level computations + self.p2l(); + println!("P2L = {:?}ms", start.elapsed().as_millis()); + + // Sum all potential contributions + let start = Instant::now(); + self.m2p(); + println!("M2P = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + self.p2p(); + println!("P2P = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + self.l2p(); + println!("L2P = {:?}ms", start.elapsed().as_millis()); + } + + fn run(&self) { + self.upward_pass(); + self.downward_pass(); + } +} #[allow(unused_imports)] mod test { @@ -1285,9 +1338,9 @@ mod test { let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - fmm.m2m[0].pretty_print(); + // fmm.m2m[0].pretty_print(); - assert!(false) + // assert!(false) // let charges = Charges::new(); From 1fd446fa8e018a822f9014b3926e9bc026f5cff7 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Wed, 5 Jul 2023 15:18:56 +0100 Subject: [PATCH 11/40] Begin debugging upward pass --- fmm/src/fmm.rs | 914 ++++++++++++++++++++++++++++------------------ fmm/src/linalg.rs | 4 +- 2 files changed, 568 insertions(+), 350 deletions(-) diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index d56e49c1..b5150c40 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,5 +1,6 @@ extern crate blas_src; +use cauchy::Scalar; use itertools::Itertools; use ndarray::AssignElem; // use ndarray::*; @@ -8,15 +9,14 @@ use ndarray::AssignElem; use rayon::prelude::*; use std::{ collections::HashMap, - ops::Deref, + ops::{Deref, DerefMut}, sync::{Arc, Mutex, RwLock}, time::Instant, }; -use rlst::{self, dense::rlst_mut_pointer_mat}; use rlst::algorithms::linalg::LinAlg; -use rlst::algorithms::traits::svd::{Mode, Svd}; use rlst::algorithms::traits::pseudo_inverse::Pinv; +use rlst::algorithms::traits::svd::{Mode, Svd}; use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; use rlst::common::{ tools::PrettyPrint, @@ -24,6 +24,11 @@ use rlst::common::{ }; use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; +use rlst::{ + self, + common::traits::ColumnMajorIterator, + dense::{rlst_col_vec, rlst_mut_pointer_mat}, +}; use bempp_field::{ FftFieldTranslationNaiveKiFmm, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm, @@ -31,7 +36,7 @@ use bempp_field::{ use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, FmmLoop, InteractionLists, SourceTranslation, TargetTranslation}, - kernel::{Kernel, EvalType}, + kernel::{EvalType, Kernel}, tree::Tree, }; use bempp_tree::{ @@ -45,8 +50,10 @@ use bempp_tree::{ use crate::charge::Charges; -type Expansions = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; -type Potentials = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +type Expansions = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +type Potentials = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; pub struct FmmData { fmm: Arc, @@ -62,7 +69,8 @@ pub struct FmmData { // charges: HashMap>>, } -type C2EType = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +type C2EType = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; pub struct KiFmm> { order: usize, @@ -97,7 +105,8 @@ where ) -> Self { let upward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); let upward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); - let downward_equivalent_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_outer); + let downward_equivalent_surface = + ROOT.compute_surface(tree.get_domain(), order, alpha_outer); let downward_check_surface = ROOT.compute_surface(tree.get_domain(), order, alpha_inner); let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension(); @@ -107,7 +116,7 @@ where let upward_equivalent_surface = unsafe { rlst_pointer_mat!['a, f64, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] }; - let upward_check_surface= unsafe { + let upward_check_surface = unsafe { rlst_pointer_mat!['a, f64, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] }; let downward_equivalent_surface = unsafe { @@ -124,10 +133,10 @@ where // kernel.gram(&upward_equivalent_surface, &upward_check_surface, &mut uc2e); let mut uc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( - EvalType::Value, - upward_equivalent_surface.data(), - upward_check_surface.data(), - uc2e.data_mut() + EvalType::Value, + upward_equivalent_surface.data(), + upward_check_surface.data(), + uc2e.data_mut(), ); // let mut dc2e = Vec::::new(); @@ -136,16 +145,15 @@ where // &downward_check_surface, // &mut dc2e, // ); - + let mut dc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( - EvalType::Value, - downward_equivalent_surface.data(), - downward_check_surface.data(), - dc2e.data_mut() + EvalType::Value, + downward_equivalent_surface.data(), + downward_check_surface.data(), + dc2e.data_mut(), ); - let nrows = m2l.ncoeffs(order); let ncols = m2l.ncoeffs(order); @@ -166,7 +174,6 @@ where } let uc2e_inv = v.dot(&mat_s).dot(&ut); - // let dc2e = Array1::from(dc2e) // .to_shape((nrows, ncols)) // .unwrap() @@ -189,16 +196,18 @@ where let mut l2l: Vec = Vec::new(); for child in children.iter() { - let child_upward_equivalent_surface = child.compute_surface(tree.get_domain(), order, alpha_inner); - let child_downward_check_surface = child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_upward_equivalent_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); + let child_downward_check_surface = + child.compute_surface(tree.get_domain(), order, alpha_inner); let child_upward_equivalent_surface = unsafe { rlst_pointer_mat!['a, f64, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)] }; - let child_downward_check_surface= unsafe { + let child_downward_check_surface = unsafe { rlst_pointer_mat!['a, f64, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)] }; - let mut pc2ce = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; + let mut pc2ce = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( EvalType::Value, @@ -212,7 +221,7 @@ where m2m.push(uc2e_inv.dot(&pc2ce).eval()); // let mut cc2pe = Vec::new(); - let mut cc2pe = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; + let mut cc2pe = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( EvalType::Value, @@ -252,14 +261,15 @@ where let mut potentials = HashMap::new(); let mut points = HashMap::new(); let mut charges = HashMap::new(); + let ncoeffs = fmm.m2l.ncoeffs(fmm.order); - let dummy = rlst_mat![f64, (1,1)]; + let dummy = rlst_col_vec![f64, ncoeffs]; if let Some(keys) = fmm.tree().get_all_keys() { for key in keys.iter() { - multipoles.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); - locals.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); - potentials.insert(*key, Arc::new(Mutex::new(dummy.new_like_self()))); + multipoles.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); + locals.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); + potentials.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); if let Some(point_data) = fmm.tree().get_points(key) { points.insert(*key, point_data.iter().cloned().collect_vec()); @@ -282,103 +292,187 @@ where } } -// impl SourceTranslation for FmmData> -// where -// T: Kernel + std::marker::Send + std::marker::Sync, -// U: FieldTranslationData + std::marker::Sync + std::marker::Send, -// { -// fn p2m(&self) { -// if let Some(leaves) = self.fmm.tree.get_leaves() { -// leaves.par_iter().for_each(move |&leaf| { -// let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); -// let fmm_arc = Arc::clone(&self.fmm); -// let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); +impl SourceTranslation for FmmData> +where + T: Kernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, +{ + fn p2m<'a>(&self) { + if let Some(leaves) = self.fmm.tree.get_leaves() { + leaves.par_iter().for_each(move |&leaf| { + let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); + let fmm_arc = Arc::clone(&self.fmm); -// if let Some(leaf_points) = self.points.get(&leaf) { -// // Lookup data -// let leaf_coordinates = leaf_points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); + if let Some(leaf_points) = self.points.get(&leaf) { + let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); -// let upward_check_surface = leaf.compute_surface( -// &fmm_arc.tree().domain, -// fmm_arc.order, -// fmm_arc.alpha_outer, -// ); + // Lookup data + let leaf_coordinates = leaf_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (nsources, 1)] + }; + + let upward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); + + let ntargets = upward_check_surface.len() / fmm_arc.kernel.space_dimension(); + let upward_check_surface = unsafe { + rlst_pointer_mat!['a, f64, upward_check_surface.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (1, ntargets)] + }; + + // let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); + // let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); + let n_leaf_pts = leaf_coordinates.shape().0; + let leaf_charges = unsafe { rlst_pointer_mat!['a, f64, leaf_charges_arc.deref().as_ptr(), (n_leaf_pts, 1), (n_leaf_pts, 1)] }; + + // Calculate check potential + // let mut check_potential = + // vec![0.; upward_check_surface.len() / self.fmm.kernel.dim()]; + + let mut check_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + leaf_coordinates.data(), + upward_check_surface.data(), + leaf_charges.data(), + check_potential.data_mut() + ); + + // println!("{:?}", upward_check_surface.data()); + // println!("{:?}", leaf_coordinates.data()); + // println!("{:?}", leaf_charges.data()); + // println!("HERE {:?}", check_potential.data()); + // println!(); + + // fmm_arc.kernel.potential( + // &leaf_coordinates[..], + // leaf_charges_slice, + // &upward_check_surface[..], + // &mut check_potential[..], + // ); + // let check_potential = Array1::from_vec(check_potential); + + // Calculate multipole expansion + // let leaf_multipole_owned = fmm_arc.kernel.scale(leaf.level()) + // * fmm_arc + // .uc2e_inv + // .0 + // .dot(&fmm_arc.uc2e_inv.1.dot(&check_potential)); + let leaf_multipole_owned: Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> = ( + fmm_arc.kernel.scale(leaf.level()) + * fmm_arc.uc2e_inv.dot(&check_potential) + ).eval(); -// let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); -// let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); + let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); -// // Calculate check potential -// let mut check_potential = -// vec![0.; upward_check_surface.len() / self.fmm.kernel.dim()]; + // println!("target lock {:?}", target_local_lock.shape()); + for i in 0..leaf_multipole_lock.shape().0 { + leaf_multipole_lock[[i, 0]] += leaf_multipole_owned[[i, 0]]; + } -// fmm_arc.kernel.potential( -// &leaf_coordinates[..], -// leaf_charges_slice, -// &upward_check_surface[..], -// &mut check_potential[..], -// ); -// let check_potential = Array1::from_vec(check_potential); + // if !leaf_multipole_lock.is_empty() { + // leaf_multipole_lock + // .iter_mut() + // .zip(leaf_multipole_owned.iter()) + // .for_each(|(c, m)| *c += *m); + // } else { + // leaf_multipole_lock.extend(leaf_multipole_owned); + // } + } + }); + } + } -// // Calculate multipole expansion -// let leaf_multipole_owned = fmm_arc.kernel.scale(leaf.level()) -// * fmm_arc -// .uc2e_inv -// .0 -// .dot(&fmm_arc.uc2e_inv.1.dot(&check_potential)); + fn m2m<'a>(&self, level: u64) { + // Parallelise over nodes at a given level + if let Some(sources) = self.fmm.tree.get_keys(level) { + sources.par_iter().for_each(move |&source| { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); -// let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); + let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); -// if !leaf_multipole_lock.is_empty() { -// leaf_multipole_lock -// .iter_mut() -// .zip(leaf_multipole_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// leaf_multipole_lock.extend(leaf_multipole_owned); -// } -// } -// }); -// } -// } + let target_multipole_arc = + Arc::clone(self.multipoles.get(&source.parent()).unwrap()); + let fmm_arc = Arc::clone(&self.fmm); -// fn m2m(&self, level: u64) { -// // Parallelise over nodes at a given level -// if let Some(sources) = self.fmm.tree.get_keys(level) { -// sources.par_iter().for_each(move |&source| { -// let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); -// let source_multipole_lock = source_multipole_arc.lock().unwrap(); + let source_multipole_ptr = source_multipole_lock.deref().data().as_ptr(); + let source_multipole_view = unsafe { + rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] + }; -// if !source_multipole_lock.is_empty() { -// let target_multipole_arc = -// Arc::clone(self.multipoles.get(&source.parent()).unwrap()); -// let fmm_arc = Arc::clone(&self.fmm); + let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); -// let operator_index = -// source.siblings().iter().position(|&x| x == source).unwrap(); + // let source_multipole_view = rlst_col_vec![f64, ncoeffs]; + // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); -// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + let target_multipole_owned = + fmm_arc.m2m[operator_index].dot(&source_multipole_view); -// let target_multipole_owned = -// fmm_arc.m2m[operator_index].dot(&source_multipole_view); -// let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); -// if !target_multipole_lock.is_empty() { -// target_multipole_lock -// .iter_mut() -// .zip(target_multipole_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// target_multipole_lock.extend(target_multipole_owned); -// } -// } -// }) -// } -// } -// } + // target_multipole_lock.pretty_print(); + // target_multipole_owned.pretty_print(); + // println!("HERE {:?} {:?}", target_multipole_lock.shape(), target_multipole_owned.shape()); + + // println!("Attempting to sum : {:?} \n {:?} using {:?} \n", target_multipole_lock.data(), target_multipole_owned.data(), source_multipole_view.data()); + + for i in 0..ncoeffs { + target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; + } + // for i in 0..ncoeffs { + // // println!("{:?} {:?} \n {:?} \n {:?} \n \n", target_multipole_lock[[i, 0]], target_multipole_owned[[i, 0]], target_multipole_lock.data(), target_multipole_owned.data()); + // [[i, 0]] = target_multipole_lock[[i, 0]] + target_multipole_owned[[i, 0]]; + // } + // let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + + // // fmm_arc.m2m[operator_index].pretty_print(); + // // // source_multipole_view.pretty_print(); + // // target_multipole_owned.pretty_print(); + // // println!("HERE"); + + // for i in 0..ncoeffs { + // target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; + // } + // target_multipole_lock.pretty_print(); + // println!("{:?} ", source.parent().anchor()); + // if !source_multipole_lock.is_empty() { + // let target_multipole_arc = + // Arc::clone(self.multipoles.get(&source.parent()).unwrap()); + // let fmm_arc = Arc::clone(&self.fmm); + + // let operator_index = + // source.siblings().iter().position(|&x| x == source).unwrap(); + + // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + + // let target_multipole_owned = + // fmm_arc.m2m[operator_index].dot(&source_multipole_view); + // let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + + // if !target_multipole_lock.is_empty() { + // target_multipole_lock + // .iter_mut() + // .zip(target_multipole_owned.iter()) + // .for_each(|(c, m)| *c += *m); + // } else { + // target_multipole_lock.extend(target_multipole_owned); + // } + // } + }) + } + } +} // impl TargetTranslation for FmmData> // where @@ -639,150 +733,167 @@ where impl FieldTranslation for FmmData>> where - T: Kernel + std::marker::Sync + std::marker::Send + Default, + T: Kernel + std::marker::Sync + std::marker::Send + Default, { fn m2l<'a>(&self, level: u64) { - if let Some(targets) = self.fmm.tree().get_keys(level) { - let mut transfer_vector_to_m2l = - HashMap::>>>::new(); + let Some(targets) = self.fmm.tree().get_keys(level) else { return }; + let mut transfer_vector_to_m2l = + HashMap::>>>::new(); - for tv in self.fmm.m2l.transfer_vectors.iter() { - transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); - } + for tv in self.fmm.m2l.transfer_vectors.iter() { + transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); + } - targets.par_iter().enumerate().for_each(|(_i, &target)| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let calculated_transfer_vectors = v_list - .iter() - .map(|source| target.find_transfer_vector(source)) - .collect::>(); - for (transfer_vector, &source) in - calculated_transfer_vectors.iter().zip(v_list.iter()) - { - let m2l_arc = - Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); - let mut m2l_lock = m2l_arc.lock().unwrap(); - m2l_lock.push((source, target)); - } + targets.par_iter().enumerate().for_each(|(_i, &target)| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let calculated_transfer_vectors = v_list + .iter() + .map(|source| target.find_transfer_vector(source)) + .collect::>(); + for (transfer_vector, &source) in + calculated_transfer_vectors.iter().zip(v_list.iter()) + { + let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); + let mut m2l_lock = m2l_arc.lock().unwrap(); + m2l_lock.push((source, target)); } - }); - - let mut transfer_vector_to_m2l_rw_lock = - HashMap::>>>::new(); - - // Find all multipole expansions and allocate - for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { - transfer_vector_to_m2l_rw_lock.insert( - transfer_vector, - Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), - ); } + }); - transfer_vector_to_m2l_rw_lock - .par_iter() - .for_each(|(transfer_vector, m2l_arc)| { - let c_idx = self - .fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == *transfer_vector) - .unwrap(); - - let c_lidx = c_idx * self.fmm.m2l.k; - let c_ridx = c_lidx + self.fmm.m2l.k; - // let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - - let (nrows, _) = self.m2l.m2l.2.shape(); - let top_left = (0, c_lidx); - let dim = (nrows, c_ridx); - let c_sub = self.fmm.m2l.m2l.2.block(top_left, dim); - - let m2l_rw = m2l_arc.read().unwrap(); - // let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); - let mut multipoles = rlst_mat![f64, (self.fmm.m2l.k, m2l_rw.len())]; - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - for (i, (source, _)) in m2l_rw.iter().enumerate() { - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - // Column vector - let mut source_multipole_ptr = source_multipole_lock.deref().as_ptr(); - let source_multipole_view = unsafe { - rlst_mut_pointer_mat!['a, f64, source_multipole_ptr.as_mut_ptr(), (ncoeffs, 1), (1, ncoeffs)] - }; - - // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - // Compressed multipole - let compressed_source_multipole_owned = - self.fmm.m2l.m2l.1.dot(&source_multipole_view); - - let first = i*self.fmm.m2l.k; - let last = firsr+self.fmm.m2l.k; - - let multipole_slice = multipoles.get_slice_mut(first, last); - multipole_slice.copy_from_slice(compressed_source_multipole_owned); - // multipoles - // .slice_mut(s![.., i]) - // .assign(&compressed_source_multipole_owned); - } + let mut transfer_vector_to_m2l_rw_lock = + HashMap::>>>::new(); - // // Compute convolution - let compressed_check_potential_owned = c_sub.dot(&multipoles); + // Find all multipole expansions and allocate + for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { + transfer_vector_to_m2l_rw_lock.insert( + transfer_vector, + Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), + ); + } - // Post process to find check potential - let check_potential_owned = - self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); + transfer_vector_to_m2l_rw_lock + .par_iter() + .for_each(|(transfer_vector, m2l_arc)| { + let c_idx = self + .fmm + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == *transfer_vector) + .unwrap(); + + let c_lidx = c_idx * self.fmm.m2l.k; + let c_ridx = c_lidx + self.fmm.m2l.k; + // let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); + + let (nrows, _) = self.fmm.m2l.m2l.2.shape(); + let top_left = (0, c_lidx); + let dim = (nrows, self.fmm.m2l.k); + + // println!("{:?} {:?} {:?}", top_left, dim, self.fmm.m2l.m2l.2.shape()); + let c_sub = self.fmm.m2l.m2l.2.block(top_left, dim); + + let m2l_rw = m2l_arc.read().unwrap(); + // let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); + let mut multipoles = rlst_mat![f64, (self.fmm.m2l.k, m2l_rw.len())]; + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + for (i, (source, _)) in m2l_rw.iter().enumerate() { + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + // Column vector + let source_multipole_ptr = source_multipole_lock.deref().data().as_ptr(); + let source_multipole_view = unsafe { + rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] + }; + + // // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + + // Compressed multipole + let compressed_source_multipole_owned = + self.fmm.m2l.m2l.1.dot(&source_multipole_view); + + let first = i * self.fmm.m2l.k; + let last = first + self.fmm.m2l.k; + + let multipole_slice = multipoles.get_slice_mut(first, last); + multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); + // multipoles + // .slice_mut(s![.., i]) + // .assign(&compressed_source_multipole_owned); + } - // Compute local - // let locals_owned = self.m2l_scale(level) - // * self.fmm.kernel.scale(level) - // * self - // .fmm - // .dc2e_inv - // .0 - // .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - let mut locals_owned = self.fmm.dc2e_inv.dot(&check_potential_owned); - - for i in 0..nceoffs { - locals_owned[[i, 0]] *= self.fmm.kernel.scale(level)*self.m2l_scale(level); + // // Compute convolution + let compressed_check_potential_owned = c_sub.dot(&multipoles); + + // Post process to find check potential + let check_potential_owned = + self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); + + // Compute local + // // let locals_owned = self.m2l_scale(level) + // // * self.fmm.kernel.scale(level) + // // * self + // // .fmm + // // .dc2e_inv + // // .0 + // // .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); + let locals_owned: Matrix< + f64, + BaseMatrix, Dynamic, Dynamic>, + Dynamic, + Dynamic, + > = (self.fmm.dc2e_inv.dot(&check_potential_owned) + * self.fmm.kernel.scale(level) + * self.m2l_scale(level)) + .eval(); + + // multipoles.pretty_print(); + // assert!(false); + + // Assign locals + for (i, (_, target)) in m2l_rw.iter().enumerate() { + let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + // Column vector + let mut target_local_ptr = + target_local_lock.deref_mut().data_mut().as_mut_ptr(); + + let mut target_local_view = unsafe { + rlst_mut_pointer_mat!['a, f64, target_local_ptr, (ncoeffs, 1), (1, ncoeffs)] + }; + let first = i * self.fmm.m2l.k; + let last = first + self.fmm.m2l.k; + + let top_left = (0, i); + let dim = (self.fmm.m2l.k, 1); + let target_local_owned = locals_owned.block(top_left, dim); + + // let target_local_owned = locals_owned.slice(s![.., i]); + + // println!("target lock {:?}", target_local_lock.shape()); + for i in 0..target_local_lock.shape().0 { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; } - - // Assign locals - for (i, (_, target)) in m2l_rw.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - // Column vector - let mut target_local_ptr = target_local_lock.deref().as_mut_ptr(); - - // let target_local_view = unsafe { - // rlst_mut_pointer_mat!['a, f64, source_multipole_ptr.as_mut_ptr(), (ncoeffs, 1), (1, ncoeffs)] - // }; - let first = i*self.fmm.m2l.k; - let last = first+self.fmm.m2l.k; - let target_local_owned = locals_owned.get_slice_mut(first, last); - - // let target_local_owned = locals_owned.slice(s![.., i]); - - if !target_local_lock.is_empty() { - - target_local - // target_local_lock - // .iter_mut() - // .zip(target_local_owned.iter()) - // .for_each(|(c, m)| *c += *m); - - } else { - target_local_lock.extend(target_local_owned); - } - } - }); - } + // target_local_lock.pretty_print(); + // assert!(false); + // if !target_local_lock.is_empty() { + // // // target_local + // // // target_local_lock + // // // .iter_mut() + // // // .zip(target_local_owned.iter()) + // // // .for_each(|(c, m)| *c += *m); + + // // } else { + // // // target_local_lock.extend(target_local_owned); + // // } + } + }); + // assert!(false) } fn m2l_scale(&self, level: u64) -> f64 { @@ -797,7 +908,11 @@ where } } } +use std::any::type_name; +fn type_of(_: T) -> &'static str { + type_name::() +} // impl FieldTranslation for FmmData>> // where // T: Kernel + std::marker::Sync + std::marker::Send + Default, @@ -1159,7 +1274,7 @@ where impl FmmLoop for FmmData where T: Fmm, - FmmData: SourceTranslation + TargetTranslation + FieldTranslation, + FmmData: SourceTranslation + FieldTranslation, // + TargetTranslation, { fn upward_pass(&self) { // Particle to Multipole @@ -1176,49 +1291,50 @@ where println!("M2M = {:?}ms", start.elapsed().as_millis()); } - fn downward_pass(&self) { - let depth = self.fmm.tree().get_depth(); - let mut l2l_time = 0; - let mut m2l_time = 0; - for level in 2..=depth { - if level > 2 { - let start = Instant::now(); - self.l2l(level); - l2l_time += start.elapsed().as_millis(); - } - - let start = Instant::now(); - self.m2l(level); - m2l_time += start.elapsed().as_millis(); - } - println!("M2L = {:?}ms", m2l_time); - println!("L2L = {:?}ms", l2l_time); + // fn downward_pass(&self) { + // let depth = self.fmm.tree().get_depth(); + // let mut l2l_time = 0; + // let mut m2l_time = 0; + // for level in 2..=depth { + // if level > 2 { + // let start = Instant::now(); + // self.l2l(level); + // l2l_time += start.elapsed().as_millis(); + // } + + // let start = Instant::now(); + // self.m2l(level); + // m2l_time += start.elapsed().as_millis(); + // } + // println!("M2L = {:?}ms", m2l_time); + // println!("L2L = {:?}ms", l2l_time); - let start = Instant::now(); - // Leaf level computations - self.p2l(); - println!("P2L = {:?}ms", start.elapsed().as_millis()); + // let start = Instant::now(); + // // Leaf level computations + // self.p2l(); + // println!("P2L = {:?}ms", start.elapsed().as_millis()); - // Sum all potential contributions - let start = Instant::now(); - self.m2p(); - println!("M2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.p2p(); - println!("P2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.l2p(); - println!("L2P = {:?}ms", start.elapsed().as_millis()); - } + // // Sum all potential contributions + // let start = Instant::now(); + // self.m2p(); + // println!("M2P = {:?}ms", start.elapsed().as_millis()); + // let start = Instant::now(); + // self.p2p(); + // println!("P2P = {:?}ms", start.elapsed().as_millis()); + // let start = Instant::now(); + // self.l2p(); + // println!("L2P = {:?}ms", start.elapsed().as_millis()); + // } fn run(&self) { self.upward_pass(); - self.downward_pass(); + // self.downward_pass(); } } #[allow(unused_imports)] mod test { + use bempp_kernel::laplace_3d::evaluate_laplace_one_target; // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; @@ -1229,7 +1345,7 @@ mod test { use bempp_kernel::laplace_3d::Laplace3dKernel; // // use crate::laplace::LaplaceKernel; - use rlst::{dense::rlst_rand_mat, common::traits::ColumnMajorIterator}; + use rlst::{common::traits::ColumnMajorIterator, dense::rlst_rand_mat}; use super::*; @@ -1262,19 +1378,19 @@ mod test { fn points_fixture( npoints: usize, min: Option, - max: Option + max: Option, ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> { // Generate a set of randomly distributed points let mut range = StdRng::seed_from_u64(0); - + let between; - if let (Some(min),Some(max)) = (min, max) { + if let (Some(min), Some(max)) = (min, max) { between = rand::distributions::Uniform::from(min..max); } else { between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); } - + let mut points = rlst_mat![f64, (npoints, 3)]; for i in 0..npoints { @@ -1287,21 +1403,17 @@ mod test { } #[test] - fn test_fmm() { + fn test_upward_pass() { let npoints = 1000; - // let points = points_fixture(npoints); - // let points_clone = points.clone(); - // let depth = 4; - // let n_crit = 150; let points = points_fixture(npoints, None, None); - - let order = 2; + + let order = 5; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; let k = 50; let ncrit = 100; - let depth = 3; + let depth = 2; let kernel = Laplace3dKernel::::default(); let start = Instant::now(); @@ -1310,13 +1422,13 @@ mod test { let start = Instant::now(); - // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // // kernel.clone(), - // // Some(k), - // // order, - // // tree.get_domain().clone(), - // // alpha_inner, - // // ); + // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( + // // kernel.clone(), + // // Some(k), + // // order, + // // tree.get_domain().clone(), + // // alpha_inner, + // // ); let m2l_data_svd = SvdFieldTranslationKiFmm::new( kernel.clone(), @@ -1327,66 +1439,174 @@ mod test { ); println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - // let start = Instant::now(); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); + // let start = Instant::now(); + // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // kernel.clone(), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - // fmm.m2m[0].pretty_print(); + let charges = Charges::new(); + let datatree = FmmData::new(fmm, charges); + datatree.upward_pass(); + + // let e = e.unwrap().lock().unwrap(); + // let e= datatree.multipoles.get(&ROOT).unwrap().lock().unwrap().deref(); + + let pt = vec![100., 0., 0.]; + let distant_point = unsafe { rlst_pointer_mat!['static, f64, pt.as_ptr(), (1, 3), (1, 1)] }; + + let charges = vec![1.0; npoints]; + let charges = + unsafe { rlst_pointer_mat!['static, f64, charges.as_ptr(), (1, npoints), (1, 1)] }; + let mut direct = rlst_col_vec![f64, 1]; + evaluate_laplace_one_target( + EvalType::Value, + distant_point.data(), + points.data(), + charges.data(), + direct.data_mut(), + ); - // assert!(false) + let mut result = rlst_col_vec![f64, 1]; - // let charges = Charges::new(); + let upward_equivalent_surface = ROOT.compute_surface( + datatree.fmm.tree().get_domain(), + datatree.fmm.order, + datatree.fmm.alpha_inner, + ); + let binding = datatree.multipoles.get(&ROOT).unwrap().lock().unwrap(); + let multipole_expansion = binding.deref(); + + evaluate_laplace_one_target( + EvalType::Value, + distant_point.data(), + &upward_equivalent_surface[..], + multipole_expansion.data(), + result.data_mut(), + ); - // let datatree = FmmData::new(fmm, charges); + result.pretty_print(); + direct.pretty_print(); + // kernel.evaluate_st(EvalType::Value, points.data(), , charges, result) + // println!("distant {:?}", distant_point) + assert!(false) + } - // datatree.run(); + #[test] + fn test_fmm() { + let npoints = 1000; + // let points = points_fixture(npoints); + // let points_clone = points.clone(); + // let depth = 4; + // let n_crit = 150; + let points = points_fixture(npoints, None, None); - // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + let order = 2; + let alpha_inner = 1.05; + let alpha_outer = 2.9; + let adaptive = false; + let k = 50; + let ncrit = 100; + let depth = 2; + let kernel = Laplace3dKernel::::default(); - // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + let start = Instant::now(); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); + println!("Tree = {:?}ms", start.elapsed().as_millis()); - // let mut direct = vec![0f64; pts.len()]; - // let all_point_coordinates = points_clone - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); + let start = Instant::now(); - // let leaf_coordinates = pts - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); - // let all_charges = vec![1f64; points_clone.len()]; - - // let kernel = LaplaceKernel { - // dim: 3, - // is_singular: false, - // value_dimension: 3, - // }; - // kernel.potential( - // &all_point_coordinates[..], - // &all_charges[..], - // &leaf_coordinates[..], - // &mut direct[..], - // ); - - // let abs_error: f64 = potentials - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( + // // kernel.clone(), + // // Some(k), + // // order, + // // tree.get_domain().clone(), + // // alpha_inner, + // // ); + + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + tree.get_domain().clone(), + alpha_inner, + ); + println!("SVD operators = {:?}ms", start.elapsed().as_millis()); + + // let start = Instant::now(); + // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // kernel.clone(), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); + + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + let charges = Charges::new(); + let datatree = FmmData::new(fmm, charges); + datatree.upward_pass(); + + // let e = datatree.multipoles.get(&ROOT); + + // println!("{:?}", ); + // println!("e {:?}", e); + // e.pretty_print(); + + assert!(false); + // fmm.m2m[0].pretty_print(); - // println!("p={:?} rel_error={:?}\n", order, rel_error); - // assert!(false) + // let charges = Charges::new(); + + // let datatree = FmmData::new(fmm, charges); + + // datatree.run(); + + // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + + // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + // let mut direct = vec![0f64; pts.len()]; + // let all_point_coordinates = points_clone + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); + + // let leaf_coordinates = pts + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); + // let all_charges = vec![1f64; points_clone.len()]; + + // let kernel = LaplaceKernel { + // dim: 3, + // is_singular: false, + // value_dimension: 3, + // }; + // kernel.potential( + // &all_point_coordinates[..], + // &all_charges[..], + // &leaf_coordinates[..], + // &mut direct[..], + // ); + + // let abs_error: f64 = potentials + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); + + // println!("p={:?} rel_error={:?}\n", order, rel_error); + // assert!(false) } } diff --git a/fmm/src/linalg.rs b/fmm/src/linalg.rs index 6bda3e4a..58300cec 100644 --- a/fmm/src/linalg.rs +++ b/fmm/src/linalg.rs @@ -7,7 +7,6 @@ // use rlst; // use rlst::dense::{base_matrix::BaseMatrix, VectorContainer}; - // // const F64_EPSILON: f64 = 2.220_446_049_250_313E-16f64; // // type D = Dim<[usize; 2]>; // // type Type1 = ArrayBase, D>; @@ -50,7 +49,7 @@ // let max_s = s[0]; // for s in s.iter_mut() { - + // } // } @@ -77,7 +76,6 @@ // use rlst::common::tools::PrettyPrint; // use rlst::dense::rlst_rand_mat; - // #[test] // fn test_pinv() { // // let mut range = StdRng::seed_from_u64(0); From 4562f6199dc1a623d85c5240c534ca0f2626e2fa Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Wed, 5 Jul 2023 18:59:31 +0100 Subject: [PATCH 12/40] Working upward pass with rlst --- field/src/lib.rs | 4 - fmm/src/fmm.rs | 358 +++++++++++++++++----------------------------- traits/src/fmm.rs | 2 +- 3 files changed, 136 insertions(+), 228 deletions(-) diff --git a/field/src/lib.rs b/field/src/lib.rs index 3faf5ee7..1fc0a318 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -809,11 +809,7 @@ where // } // } -use std::any::type_name; -fn type_of(_: T) -> &'static str { - type_name::() -} mod test { diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index b5150c40..41283e3b 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -2,7 +2,7 @@ extern crate blas_src; use cauchy::Scalar; use itertools::Itertools; -use ndarray::AssignElem; +// use ndarray::AssignElem; // use ndarray::*; // use ndarray_ndimage::{pad, PadMode}; // use ndrustfft::{ndfft, ndfft_r2c, ndifft, ndifft_r2c, Complex, FftHandler, R2cFftHandler}; @@ -312,82 +312,43 @@ where .map(|p| p.coordinate) .flat_map(|[x, y, z]| vec![x, y, z]) .collect_vec(); - let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (nsources, 1)] - }; + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); let upward_check_surface = leaf.compute_surface( &fmm_arc.tree().domain, fmm_arc.order, fmm_arc.alpha_outer, ); - let ntargets = upward_check_surface.len() / fmm_arc.kernel.space_dimension(); - let upward_check_surface = unsafe { - rlst_pointer_mat!['a, f64, upward_check_surface.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (1, ntargets)] - }; - // let leaf_charges_view = ArrayView::from(leaf_charges_arc.deref()); - // let leaf_charges_slice = leaf_charges_view.as_slice().unwrap(); - let n_leaf_pts = leaf_coordinates.shape().0; - let leaf_charges = unsafe { rlst_pointer_mat!['a, f64, leaf_charges_arc.deref().as_ptr(), (n_leaf_pts, 1), (n_leaf_pts, 1)] }; + let leaf_charges = leaf_charges_arc.deref(); // Calculate check potential - // let mut check_potential = - // vec![0.; upward_check_surface.len() / self.fmm.kernel.dim()]; - let mut check_potential = rlst_col_vec![f64, ntargets]; fmm_arc.kernel.evaluate_st( EvalType::Value, leaf_coordinates.data(), - upward_check_surface.data(), - leaf_charges.data(), + &upward_check_surface[..], + &leaf_charges[..], check_potential.data_mut() ); - // println!("{:?}", upward_check_surface.data()); - // println!("{:?}", leaf_coordinates.data()); - // println!("{:?}", leaf_charges.data()); - // println!("HERE {:?}", check_potential.data()); - // println!(); - - // fmm_arc.kernel.potential( - // &leaf_coordinates[..], - // leaf_charges_slice, - // &upward_check_surface[..], - // &mut check_potential[..], - // ); - // let check_potential = Array1::from_vec(check_potential); - - // Calculate multipole expansion - // let leaf_multipole_owned = fmm_arc.kernel.scale(leaf.level()) - // * fmm_arc - // .uc2e_inv - // .0 - // .dot(&fmm_arc.uc2e_inv.1.dot(&check_potential)); - let leaf_multipole_owned: Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> = ( + let leaf_multipole_owned = ( fmm_arc.kernel.scale(leaf.level()) * fmm_arc.uc2e_inv.dot(&check_potential) ).eval(); let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - // println!("target lock {:?}", target_local_lock.shape()); for i in 0..leaf_multipole_lock.shape().0 { leaf_multipole_lock[[i, 0]] += leaf_multipole_owned[[i, 0]]; } - - // if !leaf_multipole_lock.is_empty() { - // leaf_multipole_lock - // .iter_mut() - // .zip(leaf_multipole_owned.iter()) - // .for_each(|(c, m)| *c += *m); - // } else { - // leaf_multipole_lock.extend(leaf_multipole_owned); - // } } }); } @@ -399,76 +360,22 @@ where sources.par_iter().for_each(move |&source| { let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let target_multipole_arc = Arc::clone(self.multipoles.get(&source.parent()).unwrap()); let fmm_arc = Arc::clone(&self.fmm); - let source_multipole_ptr = source_multipole_lock.deref().data().as_ptr(); - let source_multipole_view = unsafe { - rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] - }; - - let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); - - // let source_multipole_view = rlst_col_vec![f64, ncoeffs]; - // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); let target_multipole_owned = - fmm_arc.m2m[operator_index].dot(&source_multipole_view); + fmm_arc.m2m[operator_index].dot(&source_multipole_lock); let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - // target_multipole_lock.pretty_print(); - // target_multipole_owned.pretty_print(); - // println!("HERE {:?} {:?}", target_multipole_lock.shape(), target_multipole_owned.shape()); - - // println!("Attempting to sum : {:?} \n {:?} using {:?} \n", target_multipole_lock.data(), target_multipole_owned.data(), source_multipole_view.data()); - for i in 0..ncoeffs { target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; } - // for i in 0..ncoeffs { - // // println!("{:?} {:?} \n {:?} \n {:?} \n \n", target_multipole_lock[[i, 0]], target_multipole_owned[[i, 0]], target_multipole_lock.data(), target_multipole_owned.data()); - // [[i, 0]] = target_multipole_lock[[i, 0]] + target_multipole_owned[[i, 0]]; - // } - // let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - // // fmm_arc.m2m[operator_index].pretty_print(); - // // // source_multipole_view.pretty_print(); - // // target_multipole_owned.pretty_print(); - // // println!("HERE"); - - // for i in 0..ncoeffs { - // target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; - // } - // target_multipole_lock.pretty_print(); - // println!("{:?} ", source.parent().anchor()); - // if !source_multipole_lock.is_empty() { - // let target_multipole_arc = - // Arc::clone(self.multipoles.get(&source.parent()).unwrap()); - // let fmm_arc = Arc::clone(&self.fmm); - - // let operator_index = - // source.siblings().iter().position(|&x| x == source).unwrap(); - - // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - // let target_multipole_owned = - // fmm_arc.m2m[operator_index].dot(&source_multipole_view); - // let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - // if !target_multipole_lock.is_empty() { - // target_multipole_lock - // .iter_mut() - // .zip(target_multipole_owned.iter()) - // .for_each(|(c, m)| *c += *m); - // } else { - // target_multipole_lock.extend(target_multipole_owned); - // } - // } }) } } @@ -489,19 +396,24 @@ where // let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); // let source_local_lock = source_local_arc.lock().unwrap(); -// let source_local_view = ArrayView::from(source_local_lock.deref()); +// // let source_local_view = ArrayView::from(source_local_lock.deref()); + +// let source_local_ptr = source_local_lock.deref().data().as_ptr(); +// let source_multipole_view = unsafe { +// rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] +// }; // let target_local_owned = fmm.l2l[operator_index].dot(&source_local_view); // let mut target_local_lock = target_local_arc.lock().unwrap(); -// if !target_local_lock.is_empty() { -// target_local_lock -// .iter_mut() -// .zip(target_local_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// target_local_lock.extend(target_local_owned); -// } +// // if !target_local_lock.is_empty() { +// // target_local_lock +// // .iter_mut() +// // .zip(target_local_owned.iter()) +// // .for_each(|(c, m)| *c += *m); +// // } else { +// // target_local_lock.extend(target_local_owned); +// // } // }) // } // } @@ -1497,116 +1409,116 @@ mod test { assert!(false) } - #[test] - fn test_fmm() { - let npoints = 1000; - // let points = points_fixture(npoints); - // let points_clone = points.clone(); - // let depth = 4; - // let n_crit = 150; - let points = points_fixture(npoints, None, None); - - let order = 2; - let alpha_inner = 1.05; - let alpha_outer = 2.9; - let adaptive = false; - let k = 50; - let ncrit = 100; - let depth = 2; - let kernel = Laplace3dKernel::::default(); + // #[test] + // fn test_fmm() { + // let npoints = 1000; + // // let points = points_fixture(npoints); + // // let points_clone = points.clone(); + // // let depth = 4; + // // let n_crit = 150; + // let points = points_fixture(npoints, None, None); + + // let order = 2; + // let alpha_inner = 1.05; + // let alpha_outer = 2.9; + // let adaptive = false; + // let k = 50; + // let ncrit = 100; + // let depth = 2; + // let kernel = Laplace3dKernel::::default(); - let start = Instant::now(); - let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - println!("Tree = {:?}ms", start.elapsed().as_millis()); - - let start = Instant::now(); - - // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // // kernel.clone(), - // // Some(k), - // // order, - // // tree.get_domain().clone(), - // // alpha_inner, - // // ); - - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - tree.get_domain().clone(), - alpha_inner, - ); - println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - - // let start = Instant::now(); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - let charges = Charges::new(); - let datatree = FmmData::new(fmm, charges); - datatree.upward_pass(); - - // let e = datatree.multipoles.get(&ROOT); - - // println!("{:?}", ); - // println!("e {:?}", e); - // e.pretty_print(); - - assert!(false); - // fmm.m2m[0].pretty_print(); - - // let charges = Charges::new(); - - // let datatree = FmmData::new(fmm, charges); - - // datatree.run(); - - // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - - // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - - // let mut direct = vec![0f64; pts.len()]; - // let all_point_coordinates = points_clone - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); - - // let leaf_coordinates = pts - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); - // let all_charges = vec![1f64; points_clone.len()]; - - // let kernel = LaplaceKernel { - // dim: 3, - // is_singular: false, - // value_dimension: 3, - // }; - // kernel.potential( - // &all_point_coordinates[..], - // &all_charges[..], - // &leaf_coordinates[..], - // &mut direct[..], - // ); + // let start = Instant::now(); + // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); + // println!("Tree = {:?}ms", start.elapsed().as_millis()); - // let abs_error: f64 = potentials - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + // let start = Instant::now(); - // println!("p={:?} rel_error={:?}\n", order, rel_error); - // assert!(false) - } + // // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( + // // // kernel.clone(), + // // // Some(k), + // // // order, + // // // tree.get_domain().clone(), + // // // alpha_inner, + // // // ); + + // let m2l_data_svd = SvdFieldTranslationKiFmm::new( + // kernel.clone(), + // Some(k), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + // println!("SVD operators = {:?}ms", start.elapsed().as_millis()); + + // // let start = Instant::now(); + // // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // // kernel.clone(), + // // order, + // // tree.get_domain().clone(), + // // alpha_inner, + // // ); + // // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); + + // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + // let charges = Charges::new(); + // let datatree = FmmData::new(fmm, charges); + // datatree.upward_pass(); + + // // let e = datatree.multipoles.get(&ROOT); + + // // println!("{:?}", ); + // // println!("e {:?}", e); + // // e.pretty_print(); + + // assert!(false); + // // fmm.m2m[0].pretty_print(); + + // // let charges = Charges::new(); + + // // let datatree = FmmData::new(fmm, charges); + + // // datatree.run(); + + // // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + + // // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + // // let mut direct = vec![0f64; pts.len()]; + // // let all_point_coordinates = points_clone + // // .iter() + // // .map(|p| p.coordinate) + // // .flat_map(|[x, y, z]| vec![x, y, z]) + // // .collect_vec(); + + // // let leaf_coordinates = pts + // // .iter() + // // .map(|p| p.coordinate) + // // .flat_map(|[x, y, z]| vec![x, y, z]) + // // .collect_vec(); + // // let all_charges = vec![1f64; points_clone.len()]; + + // // let kernel = LaplaceKernel { + // // dim: 3, + // // is_singular: false, + // // value_dimension: 3, + // // }; + // // kernel.potential( + // // &all_point_coordinates[..], + // // &all_charges[..], + // // &leaf_coordinates[..], + // // &mut direct[..], + // // ); + + // // let abs_error: f64 = potentials + // // .iter() + // // .zip(direct.iter()) + // // .map(|(a, b)| (a - b).abs()) + // // .sum(); + // // let rel_error: f64 = abs_error / (direct.iter().sum::()); + + // // println!("p={:?} rel_error={:?}\n", order, rel_error); + // // assert!(false) + // } } diff --git a/traits/src/fmm.rs b/traits/src/fmm.rs index 453d9956..b0cc516b 100644 --- a/traits/src/fmm.rs +++ b/traits/src/fmm.rs @@ -40,7 +40,7 @@ pub trait Fmm { pub trait FmmLoop { fn upward_pass(&self); - fn downward_pass(&self); + // fn downward_pass(&self); fn run(&self); } From 296fe010280ea302f43eeedc32ae227370e32973 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Wed, 5 Jul 2023 22:37:56 +0100 Subject: [PATCH 13/40] Add something that converges, with caveats --- fmm/src/fmm.rs | 941 +++++++++++++++++++++------------------------- traits/src/fmm.rs | 2 +- 2 files changed, 439 insertions(+), 504 deletions(-) diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 41283e3b..58acfa1b 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,4 +1,7 @@ extern crate blas_src; +// TODO Should be generic over kernel float type parmeter +// TODO SHould be generic over kernel evaluation type +// TODO should check what happens with rectangular distributions of points! use cauchy::Scalar; use itertools::Itertools; @@ -128,9 +131,6 @@ where // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. - - // let mut uc2e = Vec::::new(); - // kernel.gram(&upward_equivalent_surface, &upward_check_surface, &mut uc2e); let mut uc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( EvalType::Value, @@ -139,13 +139,6 @@ where uc2e.data_mut(), ); - // let mut dc2e = Vec::::new(); - // kernel.gram( - // &downward_equivalent_surface, - // &downward_check_surface, - // &mut dc2e, - // ); - let mut dc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( EvalType::Value, @@ -157,13 +150,6 @@ where let nrows = m2l.ncoeffs(order); let ncols = m2l.ncoeffs(order); - // let uc2e = Array1::from(uc2e) - // .to_shape((nrows, ncols)) - // .unwrap() - // .to_owned(); - - // let (a, b, c) = pinv(&uc2e); - // let uc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); let (s, ut, v) = uc2e.linalg().pinv(None).unwrap(); let s = s.unwrap(); let ut = ut.unwrap(); @@ -174,12 +160,6 @@ where } let uc2e_inv = v.dot(&mat_s).dot(&ut); - // let dc2e = Array1::from(dc2e) - // .to_shape((nrows, ncols)) - // .unwrap() - // .to_owned(); - // let (a, b, c) = pinv(&dc2e); - // let dc2e_inv = (a.to_owned(), b.dot(&c).to_owned()); let (s, ut, v) = dc2e.linalg().pinv(None).unwrap(); let s = s.unwrap(); let ut = ut.unwrap(); @@ -216,11 +196,8 @@ where pc2ce.data_mut(), ); - // let pc2e = Array::from_shape_vec((nrows, ncols), pc2ce).unwrap(); - // m2m.push(uc2e_inv.0.dot(&uc2e_inv.1.dot(&pc2e))); m2m.push(uc2e_inv.dot(&pc2ce).eval()); - // let mut cc2pe = Vec::new(); let mut cc2pe = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; kernel.gram( @@ -229,9 +206,7 @@ where &child_downward_check_surface.data(), cc2pe.data_mut(), ); - // let cc2pe = Array::from_shape_vec((ncols, nrows), cc2pe).unwrap(); l2l.push((kernel.scale(child.level()) * dc2e_inv.dot(&cc2pe)).eval()); - // l2l.push(kernel.scale(child.level()) * dc2e_inv.0.dot(&dc2e_inv.1.dot(&cc2pe))) } Self { @@ -261,6 +236,7 @@ where let mut potentials = HashMap::new(); let mut points = HashMap::new(); let mut charges = HashMap::new(); + let ncoeffs = fmm.m2l.ncoeffs(fmm.order); let dummy = rlst_col_vec![f64, ncoeffs]; @@ -269,10 +245,12 @@ where for key in keys.iter() { multipoles.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); locals.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); - potentials.insert(*key, Arc::new(Mutex::new(dummy.new_like_self().eval()))); if let Some(point_data) = fmm.tree().get_points(key) { points.insert(*key, point_data.iter().cloned().collect_vec()); + // TODO: Fragile + let npoints = point_data.len(); + potentials.insert(*key, Arc::new(Mutex::new(rlst_col_vec![f64, npoints]))); // TODO: Replace with a global index lookup at some point charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); } @@ -381,267 +359,265 @@ where } } -// impl TargetTranslation for FmmData> -// where -// T: Kernel + std::marker::Sync + std::marker::Send, -// U: FieldTranslationData + std::marker::Sync + std::marker::Send, -// { -// fn l2l(&self, level: u64) { -// if let Some(targets) = self.fmm.tree.get_keys(level) { -// targets.par_iter().for_each(move |&target| { -// let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); -// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); -// let fmm = Arc::clone(&self.fmm); +impl TargetTranslation for FmmData> +where + T: Kernel + std::marker::Sync + std::marker::Send, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, +{ + fn l2l(&self, level: u64) { + if let Some(targets) = self.fmm.tree.get_keys(level) { + targets.par_iter().for_each(move |&target| { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + let fmm = Arc::clone(&self.fmm); -// let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); + let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); -// let source_local_lock = source_local_arc.lock().unwrap(); + let source_local_lock = source_local_arc.lock().unwrap(); -// // let source_local_view = ArrayView::from(source_local_lock.deref()); + let target_local_owned = fmm.l2l[operator_index].dot(&source_local_lock); + let mut target_local_lock = target_local_arc.lock().unwrap(); -// let source_local_ptr = source_local_lock.deref().data().as_ptr(); -// let source_multipole_view = unsafe { -// rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] -// }; -// let target_local_owned = fmm.l2l[operator_index].dot(&source_local_view); -// let mut target_local_lock = target_local_arc.lock().unwrap(); + for i in 0..ncoeffs { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; + } + }) + } + } -// // if !target_local_lock.is_empty() { -// // target_local_lock -// // .iter_mut() -// // .zip(target_local_owned.iter()) -// // .for_each(|(c, m)| *c += *m); -// // } else { -// // target_local_lock.extend(target_local_owned); -// // } -// }) -// } -// } + fn m2p<'a>(&self) { + if let Some(targets) = self.fmm.tree.get_leaves() { + targets.par_iter().for_each(move |&target| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); -// fn m2p(&self) { -// if let Some(targets) = self.fmm.tree.get_leaves() { -// targets.par_iter().for_each(move |&target| { -// let fmm_arc = Arc::clone(&self.fmm); -// let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - -// if let Some(points) = fmm_arc.tree().get_points(&target) { -// if let Some(w_list) = fmm_arc.get_w_list(&target) { -// for source in w_list.iter() { -// let source_multipole_arc = -// Arc::clone(self.multipoles.get(source).unwrap()); - -// let upward_equivalent_surface = source.compute_surface( -// fmm_arc.tree().get_domain(), -// fmm_arc.order(), -// fmm_arc.alpha_inner, -// ); - -// let source_multipole_lock = source_multipole_arc.lock().unwrap(); -// let source_multipole_view = -// ArrayView::from(source_multipole_lock.deref()); -// let source_multipole_slice = source_multipole_view.as_slice().unwrap(); - -// let target_coordinates = points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); - -// let mut target_potential = -// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - -// fmm_arc.kernel().potential( -// &upward_equivalent_surface[..], -// source_multipole_slice, -// &target_coordinates[..], -// &mut target_potential, -// ); - -// let mut target_potential_lock = target_potential_arc.lock().unwrap(); - -// if !target_potential_lock.is_empty() { -// target_potential_lock -// .iter_mut() -// .zip(target_potential.iter()) -// .for_each(|(p, n)| *p += *n); -// } else { -// target_potential_lock.extend(target_potential); -// } -// } -// } -// } -// }) -// } -// } + if let Some(points) = fmm_arc.tree().get_points(&target) { + if let Some(w_list) = fmm_arc.get_w_list(&target) { + for source in w_list.iter() { + let source_multipole_arc = + Arc::clone(self.multipoles.get(source).unwrap()); + + let upward_equivalent_surface = source.compute_surface( + fmm_arc.tree().get_domain(), + fmm_arc.order(), + fmm_arc.alpha_inner, + ); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let target_coordinates = points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + &upward_equivalent_surface[..], + target_coordinates.data(), + source_multipole_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + } + } + }) + } + } -// fn l2p(&self) { -// if let Some(targets) = self.fmm.tree().get_leaves() { -// targets.par_iter().for_each(move |&leaf| { -// let fmm_arc = Arc::clone(&self.fmm); -// let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); -// let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - -// if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { -// // Lookup data -// let target_coordinates = target_points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); - -// let downward_equivalent_surface = leaf.compute_surface( -// &fmm_arc.tree().domain, -// fmm_arc.order, -// fmm_arc.alpha_outer, -// ); - -// let source_local_lock = source_local_arc.lock().unwrap(); -// let source_local_ref = ArrayView::from(source_local_lock.deref()); -// let source_local_slice = source_local_ref.as_slice().unwrap(); - -// let mut target_potential = -// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - -// fmm_arc.kernel().potential( -// &downward_equivalent_surface[..], -// source_local_slice, -// &target_coordinates[..], -// &mut target_potential, -// ); - -// let mut out_potential_lock = target_potential_arc.lock().unwrap(); - -// if !out_potential_lock.is_empty() { -// out_potential_lock -// .iter_mut() -// .zip(target_potential.iter()) -// .for_each(|(p, n)| *p += *n); -// } else { -// out_potential_lock.extend(target_potential); -// } -// } -// }) -// } -// } + fn l2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&leaf| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); + let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); -// fn p2l(&self) { -// if let Some(targets) = self.fmm.tree().get_leaves() { -// targets.par_iter().for_each(move |&leaf| { -// let fmm_arc = Arc::clone(&self.fmm); -// let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - -// if let Some(x_list) = fmm_arc.get_x_list(&leaf) { -// for source in x_list.iter() { -// if let Some(source_points) = fmm_arc.tree().get_points(source) { -// let source_coordinates = source_points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); - -// let source_charges = self.charges.get(source).unwrap(); -// let source_charges_view = ArrayView::from(source_charges.deref()); -// let source_charges_slice = source_charges_view.as_slice().unwrap(); - -// let downward_check_surface = leaf.compute_surface( -// &fmm_arc.tree().domain, -// fmm_arc.order, -// fmm_arc.alpha_inner, -// ); - -// let mut downward_check_potential = -// vec![0f64; downward_check_surface.len() / fmm_arc.kernel().dim()]; - -// fmm_arc.kernel.potential( -// &source_coordinates[..], -// source_charges_slice, -// &downward_check_surface[..], -// &mut downward_check_potential[..], -// ); - -// let downward_check_potential = -// ArrayView::from(&downward_check_potential); - -// let mut target_local_lock = target_local_arc.lock().unwrap(); - -// let target_local_owned = fmm_arc.kernel().scale(leaf.level()) -// * &fmm_arc -// .dc2e_inv -// .0 -// .dot(&fmm_arc.dc2e_inv.1.dot(&downward_check_potential)); - -// if !target_local_lock.is_empty() { -// target_local_lock -// .iter_mut() -// .zip(target_local_owned.iter()) -// .for_each(|(o, l)| *o += *l); -// } else { -// target_local_lock.extend(target_local_owned); -// } -// } -// } -// } -// }) -// } -// } + if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { + // Lookup data + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); -// fn p2p(&self) { -// if let Some(targets) = self.fmm.tree.get_leaves() { -// targets.par_iter().for_each(move |&target| { -// let fmm_arc = Arc::clone(&self.fmm); -// let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - -// if let Some(target_points) = fmm_arc.tree().get_points(&target) { -// let target_coordinates = target_points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); - -// if let Some(u_list) = fmm_arc.get_u_list(&target) { -// for source in u_list.iter() { -// if let Some(source_points) = fmm_arc.tree().get_points(source) { -// let source_coordinates = source_points -// .iter() -// .map(|p| p.coordinate) -// .flat_map(|[x, y, z]| vec![x, y, z]) -// .collect_vec(); - -// let source_charges_arc = -// Arc::clone(self.charges.get(source).unwrap()); -// let source_charges_view = -// ArrayView::from(source_charges_arc.deref()); -// let source_charges_slice = source_charges_view.as_slice().unwrap(); - -// let mut target_potential = -// vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - -// fmm_arc.kernel.potential( -// &source_coordinates[..], -// source_charges_slice, -// &target_coordinates[..], -// &mut target_potential, -// ); - -// let mut target_potential_lock = -// target_potential_arc.lock().unwrap(); - -// if !target_potential_lock.is_empty() { -// target_potential_lock -// .iter_mut() -// .zip(target_potential.iter()) -// .for_each(|(c, p)| *c += *p); -// } else { -// target_potential_lock.extend(target_potential) -// } -// } -// } -// } -// } -// }) -// } -// } -// } + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let downward_equivalent_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); + + let source_local_lock = source_local_arc.lock().unwrap(); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + &downward_equivalent_surface[..], + target_coordinates.data(), + source_local_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + }) + } + } + + fn p2l<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&leaf| { + let fmm_arc = Arc::clone(&self.fmm); + let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + if let Some(x_list) = fmm_arc.get_x_list(&leaf) { + for source in x_list.iter() { + if let Some(source_points) = fmm_arc.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let source_coordinates = unsafe { + rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges = self.charges.get(source).unwrap(); + + let downward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_inner, + ); + + let ntargets = downward_check_surface.len() / fmm_arc.kernel.space_dimension(); + let mut downward_check_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + &downward_check_surface[..], + &source_charges[..], + downward_check_potential.data_mut() + ); + + + let mut target_local_lock = target_local_arc.lock().unwrap(); + + let target_local_owned = (fmm_arc.kernel.scale(leaf.level()) * fmm_arc.dc2e_inv.dot(&downward_check_potential)).eval(); + + for i in 0..ncoeffs { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; + } + } + } + } + }) + } + } + + fn p2p<'a>(&self) { + if let Some(targets) = self.fmm.tree.get_leaves() { + targets.par_iter().for_each(move |&target| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + + if let Some(target_points) = fmm_arc.tree().get_points(&target) { + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + if let Some(u_list) = fmm_arc.get_u_list(&target) { + for source in u_list.iter() { + if let Some(source_points) = fmm_arc.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let source_coordinates = unsafe { + rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges_arc = + Arc::clone(self.charges.get(source).unwrap()); + + // let source_charges_view = + // ArrayView::from(source_charges_arc.deref()); + // let source_charges_slice = source_charges_view.as_slice().unwrap(); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + // let mut target_potential = + // vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + target_coordinates.data(), + &source_charges_arc[..], + target_potential.data_mut(), + ); + + let mut target_potential_lock = + target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + } + } + } + }) + } + } +} impl FieldTranslation for FmmData>> where @@ -715,17 +691,11 @@ where let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); let source_multipole_lock = source_multipole_arc.lock().unwrap(); - // Column vector - let source_multipole_ptr = source_multipole_lock.deref().data().as_ptr(); - let source_multipole_view = unsafe { - rlst_pointer_mat!['a, f64, source_multipole_ptr, (ncoeffs, 1), (1, ncoeffs)] - }; - // // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); // Compressed multipole let compressed_source_multipole_owned = - self.fmm.m2l.m2l.1.dot(&source_multipole_view); + self.fmm.m2l.m2l.1.dot(&source_multipole_lock).eval(); let first = i * self.fmm.m2l.k; let last = first + self.fmm.m2l.k; @@ -741,8 +711,13 @@ where let compressed_check_potential_owned = c_sub.dot(&multipoles); // Post process to find check potential - let check_potential_owned = - self.fmm.m2l.m2l.0.dot(&compressed_check_potential_owned); + let check_potential_owned = self + .fmm + .m2l + .m2l + .0 + .dot(&compressed_check_potential_owned) + .eval(); // Compute local // // let locals_owned = self.m2l_scale(level) @@ -752,31 +727,16 @@ where // // .dc2e_inv // // .0 // // .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - let locals_owned: Matrix< - f64, - BaseMatrix, Dynamic, Dynamic>, - Dynamic, - Dynamic, - > = (self.fmm.dc2e_inv.dot(&check_potential_owned) + let locals_owned = (self.fmm.dc2e_inv.dot(&check_potential_owned) * self.fmm.kernel.scale(level) * self.m2l_scale(level)) .eval(); - // multipoles.pretty_print(); - // assert!(false); - // Assign locals for (i, (_, target)) in m2l_rw.iter().enumerate() { let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); let mut target_local_lock = target_local_arc.lock().unwrap(); - // Column vector - let mut target_local_ptr = - target_local_lock.deref_mut().data_mut().as_mut_ptr(); - - let mut target_local_view = unsafe { - rlst_mut_pointer_mat!['a, f64, target_local_ptr, (ncoeffs, 1), (1, ncoeffs)] - }; let first = i * self.fmm.m2l.k; let last = first + self.fmm.m2l.k; @@ -790,22 +750,8 @@ where for i in 0..target_local_lock.shape().0 { target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; } - - // target_local_lock.pretty_print(); - // assert!(false); - // if !target_local_lock.is_empty() { - // // // target_local - // // // target_local_lock - // // // .iter_mut() - // // // .zip(target_local_owned.iter()) - // // // .for_each(|(c, m)| *c += *m); - - // // } else { - // // // target_local_lock.extend(target_local_owned); - // // } } }); - // assert!(false) } fn m2l_scale(&self, level: u64) -> f64 { @@ -820,11 +766,7 @@ where } } } -use std::any::type_name; -fn type_of(_: T) -> &'static str { - type_name::() -} // impl FieldTranslation for FmmData>> // where // T: Kernel + std::marker::Sync + std::marker::Send + Default, @@ -1186,7 +1128,7 @@ where impl FmmLoop for FmmData where T: Fmm, - FmmData: SourceTranslation + FieldTranslation, // + TargetTranslation, + FmmData: SourceTranslation + FieldTranslation + TargetTranslation, { fn upward_pass(&self) { // Particle to Multipole @@ -1203,44 +1145,44 @@ where println!("M2M = {:?}ms", start.elapsed().as_millis()); } - // fn downward_pass(&self) { - // let depth = self.fmm.tree().get_depth(); - // let mut l2l_time = 0; - // let mut m2l_time = 0; - // for level in 2..=depth { - // if level > 2 { - // let start = Instant::now(); - // self.l2l(level); - // l2l_time += start.elapsed().as_millis(); - // } - - // let start = Instant::now(); - // self.m2l(level); - // m2l_time += start.elapsed().as_millis(); - // } - // println!("M2L = {:?}ms", m2l_time); - // println!("L2L = {:?}ms", l2l_time); + fn downward_pass(&self) { + let depth = self.fmm.tree().get_depth(); + let mut l2l_time = 0; + let mut m2l_time = 0; + for level in 2..=depth { + if level > 2 { + let start = Instant::now(); + self.l2l(level); + l2l_time += start.elapsed().as_millis(); + } - // let start = Instant::now(); - // // Leaf level computations - // self.p2l(); - // println!("P2L = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + self.m2l(level); + m2l_time += start.elapsed().as_millis(); + } + println!("M2L = {:?}ms", m2l_time); + println!("L2L = {:?}ms", l2l_time); - // // Sum all potential contributions - // let start = Instant::now(); - // self.m2p(); - // println!("M2P = {:?}ms", start.elapsed().as_millis()); - // let start = Instant::now(); - // self.p2p(); - // println!("P2P = {:?}ms", start.elapsed().as_millis()); - // let start = Instant::now(); - // self.l2p(); - // println!("L2P = {:?}ms", start.elapsed().as_millis()); - // } + let start = Instant::now(); + // Leaf level computations + self.p2l(); + println!("P2L = {:?}ms", start.elapsed().as_millis()); + + // // Sum all potential contributions + let start = Instant::now(); + self.m2p(); + println!("M2P = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + self.p2p(); + println!("P2P = {:?}ms", start.elapsed().as_millis()); + let start = Instant::now(); + self.l2p(); + println!("L2P = {:?}ms", start.elapsed().as_millis()); + } fn run(&self) { self.upward_pass(); - // self.downward_pass(); + self.downward_pass(); } } @@ -1314,111 +1256,12 @@ mod test { points } - #[test] - fn test_upward_pass() { - let npoints = 1000; - let points = points_fixture(npoints, None, None); - - let order = 5; - let alpha_inner = 1.05; - let alpha_outer = 2.9; - let adaptive = false; - let k = 50; - let ncrit = 100; - let depth = 2; - let kernel = Laplace3dKernel::::default(); - - let start = Instant::now(); - let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - println!("Tree = {:?}ms", start.elapsed().as_millis()); - - let start = Instant::now(); - - // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // // kernel.clone(), - // // Some(k), - // // order, - // // tree.get_domain().clone(), - // // alpha_inner, - // // ); - - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - tree.get_domain().clone(), - alpha_inner, - ); - println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - - // let start = Instant::now(); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - let charges = Charges::new(); - let datatree = FmmData::new(fmm, charges); - datatree.upward_pass(); - - // let e = e.unwrap().lock().unwrap(); - // let e= datatree.multipoles.get(&ROOT).unwrap().lock().unwrap().deref(); - - let pt = vec![100., 0., 0.]; - let distant_point = unsafe { rlst_pointer_mat!['static, f64, pt.as_ptr(), (1, 3), (1, 1)] }; - - let charges = vec![1.0; npoints]; - let charges = - unsafe { rlst_pointer_mat!['static, f64, charges.as_ptr(), (1, npoints), (1, 1)] }; - let mut direct = rlst_col_vec![f64, 1]; - evaluate_laplace_one_target( - EvalType::Value, - distant_point.data(), - points.data(), - charges.data(), - direct.data_mut(), - ); - - let mut result = rlst_col_vec![f64, 1]; - - let upward_equivalent_surface = ROOT.compute_surface( - datatree.fmm.tree().get_domain(), - datatree.fmm.order, - datatree.fmm.alpha_inner, - ); - let binding = datatree.multipoles.get(&ROOT).unwrap().lock().unwrap(); - let multipole_expansion = binding.deref(); - - evaluate_laplace_one_target( - EvalType::Value, - distant_point.data(), - &upward_equivalent_surface[..], - multipole_expansion.data(), - result.data_mut(), - ); - - result.pretty_print(); - direct.pretty_print(); - // kernel.evaluate_st(EvalType::Value, points.data(), , charges, result) - // println!("distant {:?}", distant_point) - assert!(false) - } - // #[test] - // fn test_fmm() { + // fn test_upward_pass() { // let npoints = 1000; - // // let points = points_fixture(npoints); - // // let points_clone = points.clone(); - // // let depth = 4; - // // let n_crit = 150; // let points = points_fixture(npoints, None, None); - // let order = 2; + // let order = 5; // let alpha_inner = 1.05; // let alpha_outer = 2.9; // let adaptive = false; @@ -1465,60 +1308,152 @@ mod test { // let datatree = FmmData::new(fmm, charges); // datatree.upward_pass(); - // // let e = datatree.multipoles.get(&ROOT); + // // let e = e.unwrap().lock().unwrap(); + // // let e= datatree.multipoles.get(&ROOT).unwrap().lock().unwrap().deref(); + + // let pt = vec![100., 0., 0.]; + // let distant_point = unsafe { rlst_pointer_mat!['static, f64, pt.as_ptr(), (1, 3), (1, 1)] }; + + // let charges = vec![1.0; npoints]; + // let charges = + // unsafe { rlst_pointer_mat!['static, f64, charges.as_ptr(), (1, npoints), (1, 1)] }; + // let mut direct = rlst_col_vec![f64, 1]; + // evaluate_laplace_one_target( + // EvalType::Value, + // distant_point.data(), + // points.data(), + // charges.data(), + // direct.data_mut(), + // ); + + // let mut result = rlst_col_vec![f64, 1]; + + // let upward_equivalent_surface = ROOT.compute_surface( + // datatree.fmm.tree().get_domain(), + // datatree.fmm.order, + // datatree.fmm.alpha_inner, + // ); + // let binding = datatree.multipoles.get(&ROOT).unwrap().lock().unwrap(); + // let multipole_expansion = binding.deref(); + + // evaluate_laplace_one_target( + // EvalType::Value, + // distant_point.data(), + // &upward_equivalent_surface[..], + // multipole_expansion.data(), + // result.data_mut(), + // ); + + // result.pretty_print(); + // direct.pretty_print(); + // // kernel.evaluate_st(EvalType::Value, points.data(), , charges, result) + // // println!("distant {:?}", distant_point) + // assert!(false) + // } + + #[test] + fn test_fmm<'a>() { + let npoints = 1000; + // let points = points_fixture(npoints); + // let points_clone = points.clone(); + // let depth = 4; + // let n_crit = 150; + let points = points_fixture(npoints, None, None); + + let order = 6; + let alpha_inner = 1.05; + let alpha_outer = 2.9; + let adaptive = false; + // TODO: Have to pass this information to data tree creation!!!! + let k = 1000; + let ncrit = 100; + let depth = 2; + let kernel = Laplace3dKernel::::default(); + + let start = Instant::now(); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); + println!("Tree = {:?}ms", start.elapsed().as_millis()); + + let start = Instant::now(); + + // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( + // // kernel.clone(), + // // Some(k), + // // order, + // // tree.get_domain().clone(), + // // alpha_inner, + // // ); - // // println!("{:?}", ); - // // println!("e {:?}", e); - // // e.pretty_print(); + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + tree.get_domain().clone(), + alpha_inner, + ); + println!("SVD operators = {:?}ms", start.elapsed().as_millis()); + + // let start = Instant::now(); + // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // kernel.clone(), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); + + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - // assert!(false); - // // fmm.m2m[0].pretty_print(); + let charges = Charges::new(); + let datatree = FmmData::new(fmm, charges); + datatree.run(); - // // let charges = Charges::new(); + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - // // let datatree = FmmData::new(fmm, charges); + let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + let leaf_coordinates = pts + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); - // // datatree.run(); + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - // // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + // Get into row major order + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); - // // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - // // let mut direct = vec![0f64; pts.len()]; - // // let all_point_coordinates = points_clone - // // .iter() - // // .map(|p| p.coordinate) - // // .flat_map(|[x, y, z]| vec![x, y, z]) - // // .collect_vec(); + let mut direct = vec![0f64; pts.len()]; + let all_point_coordinates = points_fixture(npoints, None, None); - // // let leaf_coordinates = pts - // // .iter() - // // .map(|p| p.coordinate) - // // .flat_map(|[x, y, z]| vec![x, y, z]) - // // .collect_vec(); - // // let all_charges = vec![1f64; points_clone.len()]; + let all_charges = vec![1f64; npoints]; - // // let kernel = LaplaceKernel { - // // dim: 3, - // // is_singular: false, - // // value_dimension: 3, - // // }; - // // kernel.potential( - // // &all_point_coordinates[..], - // // &all_charges[..], - // // &leaf_coordinates[..], - // // &mut direct[..], - // // ); + let kernel = Laplace3dKernel::::default(); + + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..] + ); - // // let abs_error: f64 = potentials - // // .iter() - // // .zip(direct.iter()) - // // .map(|(a, b)| (a - b).abs()) - // // .sum(); - // // let rel_error: f64 = abs_error / (direct.iter().sum::()); + println!("potentials {:?}", potentials.data()); + println!("direct {:?}", direct); - // // println!("p={:?} rel_error={:?}\n", order, rel_error); - // // assert!(false) - // } + // let abs_error: f64 = potentials + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); + + // println!("p={:?} rel_error={:?}\n", order, rel_error); + assert!(false) + + } } diff --git a/traits/src/fmm.rs b/traits/src/fmm.rs index b0cc516b..453d9956 100644 --- a/traits/src/fmm.rs +++ b/traits/src/fmm.rs @@ -40,7 +40,7 @@ pub trait Fmm { pub trait FmmLoop { fn upward_pass(&self); - // fn downward_pass(&self); + fn downward_pass(&self); fn run(&self); } From c7e4c431c0005fb2c5bfb5bb0486925f07a21d9c Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 10:52:02 +0100 Subject: [PATCH 14/40] Begin reorganisation --- field/src/field.rs | 655 +++++++++++++++++++++++++++++++++ field/src/helpers.rs | 100 ++++++ field/src/lib.rs | 836 +------------------------------------------ field/src/types.rs | 76 ++++ fmm/src/fmm.rs | 45 +-- fmm/src/laplace.rs | 190 ---------- fmm/src/lib.rs | 2 - fmm/src/linalg.rs | 115 ------ 8 files changed, 849 insertions(+), 1170 deletions(-) create mode 100644 field/src/field.rs create mode 100644 field/src/helpers.rs create mode 100644 field/src/types.rs delete mode 100644 fmm/src/laplace.rs delete mode 100644 fmm/src/linalg.rs diff --git a/field/src/field.rs b/field/src/field.rs new file mode 100644 index 00000000..dc8253de --- /dev/null +++ b/field/src/field.rs @@ -0,0 +1,655 @@ +use std::collections::{HashMap, HashSet}; + +use itertools::Itertools; + +use rlst; +use rlst::algorithms::linalg::LinAlg; +use rlst::algorithms::traits::svd::{Mode, Svd}; +use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; +use rlst::common::{ + tools::PrettyPrint, + traits::{Copy, Eval}, +}; +use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; +use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; + +use bempp_traits::{ + field::FieldTranslationData, + kernel::{EvalType, Kernel, KernelType}, + types::Scalar, +}; +use bempp_tree::types::{domain::Domain, morton::MortonKey}; + +use crate::{helpers::compute_transfer_vectors, types::{TransferVector, SvdM2lEntry, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm}}; + + +// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// type Domain = Domain; +// type M2LOperators = Vec>, Dim<[usize; 3]>>>; +// type TransferVector = Vec; + +// fn compute_m2l_operators( +// &self, +// expansion_order: usize, +// domain: Self::Domain, +// ) -> Self::M2LOperators { +// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; +// let mut result: Vec = Vec::new(); + +// for t in self.transfer_vectors.iter() { +// let source_equivalent_surface = +// t.source +// .compute_surface(&domain, expansion_order, self.alpha); + +// let conv_grid_sources = t.source.convolution_grid( +// expansion_order, +// &domain, +// &source_equivalent_surface, +// self.alpha, +// ); + +// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + +// // TODO: Remove dim +// let dim = 3; +// // Find min target +// let ncoeffs: usize = target_check_surface.len() / dim; +// let sums: Vec<_> = (0..ncoeffs) +// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) +// .collect(); + +// let min_index = sums +// .iter() +// .enumerate() +// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) +// .map(|(index, _)| index) +// .unwrap(); + +// let min_target = [ +// target_check_surface[min_index], +// target_check_surface[min_index + ncoeffs], +// target_check_surface[min_index + 2 * ncoeffs], +// ]; + +// // TODO: Fix compute_kernel to work with new kernel +// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); +// let m = kernel.len(); +// let n = kernel[0].len(); +// let k = kernel[0][0].len(); + +// // Precompute and store the FFT of each unique kernel interaction +// let kernel = +// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) +// .unwrap(); + +// // Begin by calculating pad lengths along each dimension +// let p = 2 * m; +// let q = 2 * n; +// let r = 2 * k; + +// let padding = [[0, p - m], [0, q - n], [0, r - k]]; + +// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); + +// // Flip the kernel +// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); +// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); + +// // Compute FFT of kernel for this transfer vector +// { +// // 1. Init the handlers for FFTs along each axis +// let mut handler_ax0 = FftHandler::::new(p); +// let mut handler_ax1 = FftHandler::::new(q); +// let mut handler_ax2 = R2cFftHandler::::new(r); + +// // 2. Compute the transform along each axis +// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); +// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); +// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); +// } + +// // Store FFT of kernel for this transfer vector +// { +// result.push(padded_kernel_hat); +// } +// } + +// result +// } + +// fn compute_transfer_vectors(&self) -> Self::TransferVector { +// compute_transfer_vectors() +// } + +// fn ncoeffs(&self, expansion_order: usize) -> usize { +// 6 * (expansion_order - 1).pow(2) + 2 +// } +// } + +impl FieldTranslationData for SvdFieldTranslationKiFmm +where + T: Kernel + Default, +{ + type TransferVector = Vec; + type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); + type Domain = Domain; + + fn compute_transfer_vectors(&self) -> Self::TransferVector { + compute_transfer_vectors() + } + + fn ncoeffs(&self, expansion_order: usize) -> usize { + 6 * (expansion_order - 1).pow(2) + 2 + } + + fn compute_m2l_operators<'a>( + &self, + expansion_order: usize, + domain: Self::Domain, + ) -> Self::M2LOperators { + // ){ + // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + + // Compute interaction matrices between source and unique targets, defined by unique transfer vectors + let nrows = self.ncoeffs(expansion_order); + let ncols = self.ncoeffs(expansion_order); + + // let mut se2tc_fat: SvdM2lEntry = + // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + let ntransfer_vectors = self.transfer_vectors.len(); + let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; + + let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; + + for (i, t) in self.transfer_vectors.iter().enumerate() { + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); + let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); + + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); + let ntargets = target_check_surface.len() / self.kernel.space_dimension(); + + let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; + + self.kernel.gram( + EvalType::Value, + &source_equivalent_surface[..], + &target_check_surface[..], + tmp_gram.data_mut(), + ); + + let lidx_sources = i * ncols; + let ridx_sources = lidx_sources + ncols; + + let block_size = nrows * ncols; + let start_idx = i * block_size; + let end_idx = start_idx + block_size; + let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + block.copy_from_slice(tmp_gram.data_mut()); + + for j in 0..ncols { + let start_idx = j * ntransfer_vectors * nrows + i * nrows; + let end_idx = start_idx + nrows; + let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); + block_column.copy_from_slice(gram_column); + } + } + + let left: usize = 0; + let right: usize = self.k; + let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); + + let u = u.unwrap(); + let vt = vt.unwrap(); + + // Keep 'k' singular values + let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; + for i in 0..self.k { + sigma_mat[[i, i]] = sigma[i] + } + + let (mu, nu) = u.shape(); + let u = u.block((0, 0), (mu, self.k)).eval(); + + let (mvt, nvt) = vt.shape(); + let vt = vt.block((0, 0), (self.k, nvt)).eval(); + + // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + + // // Store compressed M2L operators + // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); + let st = st.unwrap(); + let (mst, nst) = st.shape(); + let st_block = st.block((0, 0), (self.k, nst)); + let s_block = st_block.transpose().eval(); + + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); + + // let st = s_block.transpose().eval(); + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); + for i in 0..self.transfer_vectors.len() { + // let v_lidx = i * ncols; + // let v_ridx = v_lidx + ncols; + // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); + + // let block_size = right*ncols; + // let start_idx = i * block_size; + // let end_idx = start_idx+block_size; + + // let tmp = sigma.dot(&vt_sub.dot(&st.t())); + // let lidx = i * self.k; + // let ridx = lidx + self.k; + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + let top_left = (0, i * ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); + + let top_left = (0, i * self.k); + let dim = (self.k, self.k); + + c.block_mut(top_left, dim) + .data_mut() + .copy_from_slice(tmp.data()); + } + + (u, st, c) + // let dummy = rlst_mat![f64, (1, 1)]; + // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) + // assert!(false) + } +} + +impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + type TransferVector = Vec; + type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); + type Domain = Domain; + + fn compute_transfer_vectors(&self) -> Self::TransferVector { + compute_transfer_vectors() + } + + fn ncoeffs(&self, expansion_order: usize) -> usize { + 6 * (expansion_order - 1).pow(2) + 2 + } + + fn compute_m2l_operators<'a>( + &self, + expansion_order: usize, + domain: Self::Domain, + ) -> Self::M2LOperators { + // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) + + // Compute interaction matrices between source and unique targets, defined by unique transfer vectors + let nrows = self.ncoeffs(expansion_order); + let ncols = self.ncoeffs(expansion_order); + + // let mut se2tc_fat: SvdM2lEntry = + // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); + // let mut se2tc_thin: SvdM2lEntry = + // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); + let ntransfer_vectors = self.transfer_vectors.len(); + let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; + + let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; + + for (i, t) in self.transfer_vectors.iter().enumerate() { + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); + let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); + let source_equivalent_surface = unsafe { + rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] + }; + + let target_check_surface = + t.target + .compute_surface(&domain, expansion_order, self.alpha); + let ntargets = target_check_surface.len() / self.kernel.space_dimension(); + let target_check_surface = unsafe { + rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] + }; + + let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; + + self.kernel.gram( + EvalType::Value, + source_equivalent_surface.data(), + target_check_surface.data(), + tmp_gram.data_mut(), + ); + + // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); + // // se2tc_fat + // // .slice_mut(s![.., lidx_sources..ridx_sources]) + // // .assign(&tmp_gram); + let lidx_sources = i * ncols; + let ridx_sources = lidx_sources + ncols; + + let block_size = nrows * ncols; + let start_idx = i * block_size; + let end_idx = start_idx + block_size; + let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + block.copy_from_slice(tmp_gram.data_mut()); + + + for j in 0..ncols { + let start_idx = j * ntransfer_vectors * nrows + i * nrows; + let end_idx = start_idx + nrows; + let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); + block_column.copy_from_slice(gram_column); + } + } + + let left: usize = 0; + let right: usize = self.k; + let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); + + let u = u.unwrap(); + let vt = vt.unwrap(); + + // Keep 'k' singular values + let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; + for i in 0..self.k { + sigma_mat[[i, i]] = sigma[i] + } + + let (mu, nu) = u.shape(); + let u = u.block((0, 0), (mu, self.k)).eval(); + + let (mvt, nvt) = vt.shape(); + let vt = vt.block((0, 0), (self.k, nvt)).eval(); + + // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); + // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); + // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); + // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); + // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); + + // // Store compressed M2L operators + // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); + + let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); + let st = st.unwrap(); + let (mst, nst) = st.shape(); + let st_block = st.block((0, 0), (self.k, nst)); + let s_block = st_block.transpose().eval(); + + let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); + + // let st = s_block.transpose().eval(); + // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); + for i in 0..self.transfer_vectors.len() { + // let v_lidx = i * ncols; + // let v_ridx = v_lidx + ncols; + // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); + + // let block_size = right*ncols; + // let start_idx = i * block_size; + // let end_idx = start_idx+block_size; + + // let tmp = sigma.dot(&vt_sub.dot(&st.t())); + // let lidx = i * self.k; + // let ridx = lidx + self.k; + // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); + let top_left = (0, i * ncols); + let dim = (self.k, ncols); + let vt_block = vt.block(top_left, dim); + + let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); + + let top_left = (0, i * self.k); + let dim = (self.k, self.k); + + c.block_mut(top_left, dim) + .data_mut() + .copy_from_slice(tmp.data()); + } + + (u, st, c) + // let dummy = rlst_mat![f64, (1, 1)]; + // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) + // assert!(false) + } +} + +impl SvdFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + pub fn new( + kernel: T, + k: Option, + expansion_order: usize, + domain: Domain, + alpha: f64, + ) -> Self { + let dummy = rlst_mat![f64, (1, 1)]; + + // TODO: There should be a default for matrices to make code cleaner. + let mut result = SvdFieldTranslationNaiveKiFmm { + alpha, + k: 100, + kernel, + m2l: ( + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + ), + transfer_vectors: Vec::new(), + }; + + if let Some(k) = k { + // Compression rank <= number of coefficients + let ncoeffs = result.ncoeffs(expansion_order); + if k <= ncoeffs { + result.k = k + } else { + result.k = ncoeffs; + } + } else { + // TODO: Should be data driven if nothing is provided by the user + result.k = 50; + } + + result.transfer_vectors = result.compute_transfer_vectors(); + result.m2l = result.compute_m2l_operators(expansion_order, domain); + + result + } +} + +impl SvdFieldTranslationKiFmm +where + T: Kernel + Default, +{ + pub fn new( + kernel: T, + k: Option, + expansion_order: usize, + domain: Domain, + alpha: f64, + ) -> Self { + let dummy = rlst_mat![f64, (1, 1)]; + + // TODO: There should be a default for matrices to make code cleaner. + let mut result = SvdFieldTranslationKiFmm { + alpha, + k: 100, + kernel, + m2l: ( + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + dummy.new_like_self().eval(), + ), + transfer_vectors: Vec::new(), + }; + + if let Some(k) = k { + // Compression rank <= number of coefficients + let ncoeffs = result.ncoeffs(expansion_order); + if k <= ncoeffs { + result.k = k + } else { + result.k = ncoeffs; + } + } else { + // TODO: Should be data driven if nothing is provided by the user + result.k = 50; + } + + result.transfer_vectors = result.compute_transfer_vectors(); + result.m2l = result.compute_m2l_operators(expansion_order, domain); + + result + } +} + +// impl FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { +// let mut result = FftFieldTranslationNaiveKiFmm::default(); + +// // Create maps between surface and convolution grids +// let (surf_to_conv, conv_to_surf) = +// FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); +// result.surf_to_conv_map = surf_to_conv; +// result.conv_to_surf_map = conv_to_surf; + +// result.kernel = kernel; + +// result.alpha = alpha; +// result.transfer_vectors = result.compute_transfer_vectors(); +// result.m2l = result.compute_m2l_operators(expansion_order, domain); + +// result +// } + +// pub fn compute_surf_to_conv_map( +// expansion_order: usize, +// ) -> (HashMap, HashMap) { +// let n = 2 * expansion_order - 1; + +// // Index maps between surface and convolution grids +// let mut surf_to_conv: HashMap = HashMap::new(); +// let mut conv_to_surf: HashMap = HashMap::new(); + +// // Initialise surface grid index +// let mut surf_index = 0; + +// // The boundaries of the surface grid +// let lower = expansion_order - 1; +// let upper = 2 * expansion_order - 2; + +// // Iterate through the entire convolution grid marking the boundaries +// // This makes the map much easier to understand and debug +// for i in 0..n { +// for j in 0..n { +// for k in 0..n { +// let conv_idx = i * n * n + j * n + k; +// if (i >= lower && j >= lower && (k == lower || k == upper)) +// || (j >= lower && k >= lower && (i == lower || i == upper)) +// || (k >= lower && i >= lower && (j == lower || j == upper)) +// { +// surf_to_conv.insert(surf_index, conv_idx); +// conv_to_surf.insert(conv_idx, surf_index); +// surf_index += 1; +// } +// } +// } +// } + +// (surf_to_conv, conv_to_surf) +// } + +// pub fn compute_kernel( +// &self, +// expansion_order: usize, +// convolution_grid: &[[f64; 3]], +// min_target: [f64; 3], +// ) -> Vec>> { +// let n = 2 * expansion_order - 1; +// let mut result = vec![vec![vec![0f64; n]; n]; n]; + +// for (i, result_i) in result.iter_mut().enumerate() { +// for (j, result_ij) in result_i.iter_mut().enumerate() { +// for (k, result_ijk) in result_ij.iter_mut().enumerate() { +// let conv_idx = i * n * n + j * n + k; +// let src = convolution_grid[conv_idx]; +// *result_ijk = self.kernel.kernel(&src[..], &min_target[..]); +// } +// } +// } +// result +// } + +// pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { +// let n = 2 * expansion_order - 1; +// let mut result = vec![vec![vec![0f64; n]; n]; n]; + +// for (i, result_i) in result.iter_mut().enumerate() { +// for (j, result_ij) in result_i.iter_mut().enumerate() { +// for (k, result_ijk) in result_ij.iter_mut().enumerate() { +// let conv_idx = i * n * n + j * n + k; +// if self.conv_to_surf_map.contains_key(&conv_idx) { +// let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); +// *result_ijk = charges[*surf_idx] +// } +// } +// } +// } + +// result +// } +// } + + + +mod test { + + use super::*; + use bempp_kernel::laplace_3d::Laplace3dKernel; + + #[test] + fn test_svd() { + let kernel = Laplace3dKernel::::default(); + let k = 100; + let order = 2; + let domain = Domain { + origin: [0., 0., 0.], + diameter: [1., 1., 1.], + }; + let alpha_inner = 1.05; + + let m2l_data_svd = + SvdFieldTranslationKiFmm::new(kernel, Some(k), order, domain, alpha_inner); + } +} diff --git a/field/src/helpers.rs b/field/src/helpers.rs new file mode 100644 index 00000000..6fe6d514 --- /dev/null +++ b/field/src/helpers.rs @@ -0,0 +1,100 @@ +use std::collections::{HashMap, HashSet}; + +use itertools::Itertools; + +use bempp_tree::{types::{morton::MortonKey, domain::Domain}}; + +use crate::types::TransferVector; + +/// Algebraically defined list of unique M2L interactions, called 'transfer vectors', for 3D FMM. +pub fn compute_transfer_vectors() -> Vec { + let point = [0.5, 0.5, 0.5]; + let domain = Domain { + origin: [0., 0., 0.], + diameter: [1., 1., 1.], + }; + + // Encode point in centre of domain + let key = MortonKey::from_point(&point, &domain, 3); + + // Add neighbours, and their resp. siblings to v list. + let mut neighbours = key.neighbors(); + let mut keys: Vec = Vec::new(); + keys.push(key); + keys.append(&mut neighbours); + + for key in neighbours.iter() { + let mut siblings = key.siblings(); + keys.append(&mut siblings); + } + + // Keep only unique keys + let keys: Vec<&MortonKey> = keys.iter().unique().collect(); + + let mut transfer_vectors: Vec = Vec::new(); + let mut targets: Vec = Vec::new(); + let mut sources: Vec = Vec::new(); + + for key in keys.iter() { + // Dense v_list + let v_list = key + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| !key.is_adjacent(pnc)) + .collect_vec(); + + // Find transfer vectors for everything in dense v list of each key + let tmp: Vec = v_list + .iter() + .map(|v| key.find_transfer_vector(v)) + .collect_vec(); + + transfer_vectors.extend(&mut tmp.iter().cloned()); + sources.extend(&mut v_list.iter().cloned()); + + let tmp_targets = vec![**key; tmp.len()]; + targets.extend(&mut tmp_targets.iter().cloned()); + } + + let mut unique_transfer_vectors = Vec::new(); + let mut unique_indices = HashSet::new(); + + for (i, vec) in transfer_vectors.iter().enumerate() { + if !unique_transfer_vectors.contains(vec) { + unique_transfer_vectors.push(*vec); + unique_indices.insert(i); + } + } + + let unique_sources: Vec = sources + .iter() + .enumerate() + .filter(|(i, _)| unique_indices.contains(i)) + .map(|(_, x)| *x) + .collect_vec(); + + let unique_targets: Vec = targets + .iter() + .enumerate() + .filter(|(i, _)| unique_indices.contains(i)) + .map(|(_, x)| *x) + .collect_vec(); + + let mut result = Vec::::new(); + + for ((t, s), v) in unique_targets + .into_iter() + .zip(unique_sources) + .zip(unique_transfer_vectors) + { + result.push(TransferVector { + vector: v, + target: t, + source: s, + }) + } + + result +} \ No newline at end of file diff --git a/field/src/lib.rs b/field/src/lib.rs index 1fc0a318..97b05ee2 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -1,833 +1,3 @@ -use std::collections::{HashMap, HashSet}; - -use itertools::Itertools; -use ndarray::*; -use ndarray_linalg::SVDDC; -use ndarray_ndimage::{pad, PadMode}; -use ndrustfft::{ndfft, ndfft_r2c, Complex, FftHandler, R2cFftHandler}; - -use rlst; -use rlst::algorithms::linalg::LinAlg; -use rlst::algorithms::traits::svd::{Mode, Svd}; -use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; -use rlst::common::{ - tools::PrettyPrint, - traits::{Copy, Eval}, -}; -use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; -use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; - -use bempp_traits::{ - field::FieldTranslationData, - kernel::{EvalType, Kernel, KernelType}, - types::Scalar, -}; -use bempp_tree::types::{domain::Domain, morton::MortonKey}; - -type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; -type SvdM2lEntry = - Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; - -// #[derive(Default)] -pub struct FftFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - // Amount to dilate inner check surface by - pub alpha: f64, - - // Maps between convolution and surface grids - pub surf_to_conv_map: HashMap, - pub conv_to_surf_map: HashMap, - - // Precomputed FFT of unique kernel interactions placed on - // convolution grid. - pub m2l: Vec, - - // Unique transfer vectors to lookup m2l unique kernel interactions - pub transfer_vectors: Vec, - - pub kernel: T, -} - -// #[derive(Default)] -pub struct SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - // Amount to dilate inner check surface by - pub alpha: f64, - - // Compression rank, if unspecified estimated from data. - pub k: usize, - - // Precomputed SVD compressed m2l interaction - pub m2l: (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry), - - // Unique transfer vectors to lookup m2l unique kernel interactions - pub transfer_vectors: Vec, - - pub kernel: T, -} - -// #[derive(Default)] -pub struct SvdFieldTranslationKiFmm -where - T: Kernel + Default, -{ - // Amount to dilate inner check surface by - pub alpha: f64, - - // Compression rank, if unspecified estimated from data. - pub k: usize, - - // Precomputed SVD compressed m2l interaction - pub m2l: (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry), - - // Unique transfer vectors to lookup m2l unique kernel interactions - pub transfer_vectors: Vec, - - pub kernel: T, -} - -#[derive(Debug)] -pub struct TransferVector { - pub vector: usize, - pub source: MortonKey, - pub target: MortonKey, -} - -/// Algebraically defined list of unique M2L interactions, called 'transfer vectors', for 3D FMM. -pub fn compute_transfer_vectors() -> Vec { - let point = [0.5, 0.5, 0.5]; - let domain = Domain { - origin: [0., 0., 0.], - diameter: [1., 1., 1.], - }; - - // Encode point in centre of domain - let key = MortonKey::from_point(&point, &domain, 3); - - // Add neighbours, and their resp. siblings to v list. - let mut neighbours = key.neighbors(); - let mut keys: Vec = Vec::new(); - keys.push(key); - keys.append(&mut neighbours); - - for key in neighbours.iter() { - let mut siblings = key.siblings(); - keys.append(&mut siblings); - } - - // Keep only unique keys - let keys: Vec<&MortonKey> = keys.iter().unique().collect(); - - let mut transfer_vectors: Vec = Vec::new(); - let mut targets: Vec = Vec::new(); - let mut sources: Vec = Vec::new(); - - for key in keys.iter() { - // Dense v_list - let v_list = key - .parent() - .neighbors() - .iter() - .flat_map(|pn| pn.children()) - .filter(|pnc| !key.is_adjacent(pnc)) - .collect_vec(); - - // Find transfer vectors for everything in dense v list of each key - let tmp: Vec = v_list - .iter() - .map(|v| key.find_transfer_vector(v)) - .collect_vec(); - - transfer_vectors.extend(&mut tmp.iter().cloned()); - sources.extend(&mut v_list.iter().cloned()); - - let tmp_targets = vec![**key; tmp.len()]; - targets.extend(&mut tmp_targets.iter().cloned()); - } - - let mut unique_transfer_vectors = Vec::new(); - let mut unique_indices = HashSet::new(); - - for (i, vec) in transfer_vectors.iter().enumerate() { - if !unique_transfer_vectors.contains(vec) { - unique_transfer_vectors.push(*vec); - unique_indices.insert(i); - } - } - - let unique_sources: Vec = sources - .iter() - .enumerate() - .filter(|(i, _)| unique_indices.contains(i)) - .map(|(_, x)| *x) - .collect_vec(); - - let unique_targets: Vec = targets - .iter() - .enumerate() - .filter(|(i, _)| unique_indices.contains(i)) - .map(|(_, x)| *x) - .collect_vec(); - - let mut result = Vec::::new(); - - for ((t, s), v) in unique_targets - .into_iter() - .zip(unique_sources) - .zip(unique_transfer_vectors) - { - result.push(TransferVector { - vector: v, - target: t, - source: s, - }) - } - - result -} - -// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// type Domain = Domain; -// type M2LOperators = Vec>, Dim<[usize; 3]>>>; -// type TransferVector = Vec; - -// fn compute_m2l_operators( -// &self, -// expansion_order: usize, -// domain: Self::Domain, -// ) -> Self::M2LOperators { -// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; -// let mut result: Vec = Vec::new(); - -// for t in self.transfer_vectors.iter() { -// let source_equivalent_surface = -// t.source -// .compute_surface(&domain, expansion_order, self.alpha); - -// let conv_grid_sources = t.source.convolution_grid( -// expansion_order, -// &domain, -// &source_equivalent_surface, -// self.alpha, -// ); - -// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); - -// // TODO: Remove dim -// let dim = 3; -// // Find min target -// let ncoeffs: usize = target_check_surface.len() / dim; -// let sums: Vec<_> = (0..ncoeffs) -// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) -// .collect(); - -// let min_index = sums -// .iter() -// .enumerate() -// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) -// .map(|(index, _)| index) -// .unwrap(); - -// let min_target = [ -// target_check_surface[min_index], -// target_check_surface[min_index + ncoeffs], -// target_check_surface[min_index + 2 * ncoeffs], -// ]; - -// // TODO: Fix compute_kernel to work with new kernel -// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); -// let m = kernel.len(); -// let n = kernel[0].len(); -// let k = kernel[0][0].len(); - -// // Precompute and store the FFT of each unique kernel interaction -// let kernel = -// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) -// .unwrap(); - -// // Begin by calculating pad lengths along each dimension -// let p = 2 * m; -// let q = 2 * n; -// let r = 2 * k; - -// let padding = [[0, p - m], [0, q - n], [0, r - k]]; - -// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); - -// // Flip the kernel -// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); -// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); - -// // Compute FFT of kernel for this transfer vector -// { -// // 1. Init the handlers for FFTs along each axis -// let mut handler_ax0 = FftHandler::::new(p); -// let mut handler_ax1 = FftHandler::::new(q); -// let mut handler_ax2 = R2cFftHandler::::new(r); - -// // 2. Compute the transform along each axis -// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); -// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); -// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); -// } - -// // Store FFT of kernel for this transfer vector -// { -// result.push(padded_kernel_hat); -// } -// } - -// result -// } - -// fn compute_transfer_vectors(&self) -> Self::TransferVector { -// compute_transfer_vectors() -// } - -// fn ncoeffs(&self, expansion_order: usize) -> usize { -// 6 * (expansion_order - 1).pow(2) + 2 -// } -// } - -impl FieldTranslationData for SvdFieldTranslationKiFmm -where - T: Kernel + Default, -{ - type TransferVector = Vec; - type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); - type Domain = Domain; - - fn compute_transfer_vectors(&self) -> Self::TransferVector { - compute_transfer_vectors() - } - - fn ncoeffs(&self, expansion_order: usize) -> usize { - 6 * (expansion_order - 1).pow(2) + 2 - } - - fn compute_m2l_operators<'a>( - &self, - expansion_order: usize, - domain: Self::Domain, - ) -> Self::M2LOperators { - // ){ - // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) - - // Compute interaction matrices between source and unique targets, defined by unique transfer vectors - let nrows = self.ncoeffs(expansion_order); - let ncols = self.ncoeffs(expansion_order); - - // let mut se2tc_fat: SvdM2lEntry = - // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); - let ntransfer_vectors = self.transfer_vectors.len(); - let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; - - let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; - - for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); - let source_equivalent_surface = unsafe { - rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] - }; - - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); - let ntargets = target_check_surface.len() / self.kernel.space_dimension(); - let target_check_surface = unsafe { - rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] - }; - - let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; - - self.kernel.gram( - EvalType::Value, - source_equivalent_surface.data(), - target_check_surface.data(), - tmp_gram.data_mut(), - ); - - // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); - // // se2tc_fat - // // .slice_mut(s![.., lidx_sources..ridx_sources]) - // // .assign(&tmp_gram); - let lidx_sources = i * ncols; - let ridx_sources = lidx_sources + ncols; - - let block_size = nrows * ncols; - let start_idx = i * block_size; - let end_idx = start_idx + block_size; - let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); - block.copy_from_slice(tmp_gram.data_mut()); - - - for j in 0..ncols { - let start_idx = j * ntransfer_vectors * nrows + i * nrows; - let end_idx = start_idx + nrows; - let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); - block_column.copy_from_slice(gram_column); - } - } - - let left: usize = 0; - let right: usize = self.k; - let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); - - let u = u.unwrap(); - let vt = vt.unwrap(); - - // Keep 'k' singular values - let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; - for i in 0..self.k { - sigma_mat[[i, i]] = sigma[i] - } - - let (mu, nu) = u.shape(); - let u = u.block((0, 0), (mu, self.k)).eval(); - - let (mvt, nvt) = vt.shape(); - let vt = vt.block((0, 0), (self.k, nvt)).eval(); - - // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // // Store compressed M2L operators - // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - - let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); - let st = st.unwrap(); - let (mst, nst) = st.shape(); - let st_block = st.block((0, 0), (self.k, nst)); - let s_block = st_block.transpose().eval(); - - let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); - - // let st = s_block.transpose().eval(); - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); - for i in 0..self.transfer_vectors.len() { - // let v_lidx = i * ncols; - // let v_ridx = v_lidx + ncols; - // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - - // let block_size = right*ncols; - // let start_idx = i * block_size; - // let end_idx = start_idx+block_size; - - // let tmp = sigma.dot(&vt_sub.dot(&st.t())); - // let lidx = i * self.k; - // let ridx = lidx + self.k; - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); - let top_left = (0, i * ncols); - let dim = (self.k, ncols); - let vt_block = vt.block(top_left, dim); - - let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); - - let top_left = (0, i * self.k); - let dim = (self.k, self.k); - - c.block_mut(top_left, dim) - .data_mut() - .copy_from_slice(tmp.data()); - } - - (u, st, c) - // let dummy = rlst_mat![f64, (1, 1)]; - // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) - // assert!(false) - } -} - -impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - type TransferVector = Vec; - type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); - type Domain = Domain; - - fn compute_transfer_vectors(&self) -> Self::TransferVector { - compute_transfer_vectors() - } - - fn ncoeffs(&self, expansion_order: usize) -> usize { - 6 * (expansion_order - 1).pow(2) + 2 - } - - fn compute_m2l_operators<'a>( - &self, - expansion_order: usize, - domain: Self::Domain, - ) -> Self::M2LOperators { - // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) - - // Compute interaction matrices between source and unique targets, defined by unique transfer vectors - let nrows = self.ncoeffs(expansion_order); - let ncols = self.ncoeffs(expansion_order); - - // let mut se2tc_fat: SvdM2lEntry = - // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); - let ntransfer_vectors = self.transfer_vectors.len(); - let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; - - let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; - - for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); - let source_equivalent_surface = unsafe { - rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] - }; - - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); - let ntargets = target_check_surface.len() / self.kernel.space_dimension(); - let target_check_surface = unsafe { - rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] - }; - - let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; - - self.kernel.gram( - EvalType::Value, - source_equivalent_surface.data(), - target_check_surface.data(), - tmp_gram.data_mut(), - ); - - // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); - // // se2tc_fat - // // .slice_mut(s![.., lidx_sources..ridx_sources]) - // // .assign(&tmp_gram); - let lidx_sources = i * ncols; - let ridx_sources = lidx_sources + ncols; - - let block_size = nrows * ncols; - let start_idx = i * block_size; - let end_idx = start_idx + block_size; - let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); - block.copy_from_slice(tmp_gram.data_mut()); - - - for j in 0..ncols { - let start_idx = j * ntransfer_vectors * nrows + i * nrows; - let end_idx = start_idx + nrows; - let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); - block_column.copy_from_slice(gram_column); - } - } - - let left: usize = 0; - let right: usize = self.k; - let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); - - let u = u.unwrap(); - let vt = vt.unwrap(); - - // Keep 'k' singular values - let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; - for i in 0..self.k { - sigma_mat[[i, i]] = sigma[i] - } - - let (mu, nu) = u.shape(); - let u = u.block((0, 0), (mu, self.k)).eval(); - - let (mvt, nvt) = vt.shape(); - let vt = vt.block((0, 0), (self.k, nvt)).eval(); - - // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // // Store compressed M2L operators - // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - - let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); - let st = st.unwrap(); - let (mst, nst) = st.shape(); - let st_block = st.block((0, 0), (self.k, nst)); - let s_block = st_block.transpose().eval(); - - let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); - - // let st = s_block.transpose().eval(); - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); - for i in 0..self.transfer_vectors.len() { - // let v_lidx = i * ncols; - // let v_ridx = v_lidx + ncols; - // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - - // let block_size = right*ncols; - // let start_idx = i * block_size; - // let end_idx = start_idx+block_size; - - // let tmp = sigma.dot(&vt_sub.dot(&st.t())); - // let lidx = i * self.k; - // let ridx = lidx + self.k; - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); - let top_left = (0, i * ncols); - let dim = (self.k, ncols); - let vt_block = vt.block(top_left, dim); - - let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); - - let top_left = (0, i * self.k); - let dim = (self.k, self.k); - - c.block_mut(top_left, dim) - .data_mut() - .copy_from_slice(tmp.data()); - } - - (u, st, c) - // let dummy = rlst_mat![f64, (1, 1)]; - // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) - // assert!(false) - } -} - -impl SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - pub fn new( - kernel: T, - k: Option, - expansion_order: usize, - domain: Domain, - alpha: f64, - ) -> Self { - let dummy = rlst_mat![f64, (1, 1)]; - - // TODO: There should be a default for matrices to make code cleaner. - let mut result = SvdFieldTranslationNaiveKiFmm { - alpha, - k: 100, - kernel, - m2l: ( - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - ), - transfer_vectors: Vec::new(), - }; - - if let Some(k) = k { - // Compression rank <= number of coefficients - let ncoeffs = result.ncoeffs(expansion_order); - if k <= ncoeffs { - result.k = k - } else { - result.k = ncoeffs; - } - } else { - // TODO: Should be data driven if nothing is provided by the user - result.k = 50; - } - - result.transfer_vectors = result.compute_transfer_vectors(); - result.m2l = result.compute_m2l_operators(expansion_order, domain); - - result - } -} - -impl SvdFieldTranslationKiFmm -where - T: Kernel + Default, -{ - pub fn new( - kernel: T, - k: Option, - expansion_order: usize, - domain: Domain, - alpha: f64, - ) -> Self { - let dummy = rlst_mat![f64, (1, 1)]; - - // TODO: There should be a default for matrices to make code cleaner. - let mut result = SvdFieldTranslationKiFmm { - alpha, - k: 100, - kernel, - m2l: ( - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - ), - transfer_vectors: Vec::new(), - }; - - if let Some(k) = k { - // Compression rank <= number of coefficients - let ncoeffs = result.ncoeffs(expansion_order); - if k <= ncoeffs { - result.k = k - } else { - result.k = ncoeffs; - } - } else { - // TODO: Should be data driven if nothing is provided by the user - result.k = 50; - } - - result.transfer_vectors = result.compute_transfer_vectors(); - result.m2l = result.compute_m2l_operators(expansion_order, domain); - - result - } -} - -// impl FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { -// let mut result = FftFieldTranslationNaiveKiFmm::default(); - -// // Create maps between surface and convolution grids -// let (surf_to_conv, conv_to_surf) = -// FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); -// result.surf_to_conv_map = surf_to_conv; -// result.conv_to_surf_map = conv_to_surf; - -// result.kernel = kernel; - -// result.alpha = alpha; -// result.transfer_vectors = result.compute_transfer_vectors(); -// result.m2l = result.compute_m2l_operators(expansion_order, domain); - -// result -// } - -// pub fn compute_surf_to_conv_map( -// expansion_order: usize, -// ) -> (HashMap, HashMap) { -// let n = 2 * expansion_order - 1; - -// // Index maps between surface and convolution grids -// let mut surf_to_conv: HashMap = HashMap::new(); -// let mut conv_to_surf: HashMap = HashMap::new(); - -// // Initialise surface grid index -// let mut surf_index = 0; - -// // The boundaries of the surface grid -// let lower = expansion_order - 1; -// let upper = 2 * expansion_order - 2; - -// // Iterate through the entire convolution grid marking the boundaries -// // This makes the map much easier to understand and debug -// for i in 0..n { -// for j in 0..n { -// for k in 0..n { -// let conv_idx = i * n * n + j * n + k; -// if (i >= lower && j >= lower && (k == lower || k == upper)) -// || (j >= lower && k >= lower && (i == lower || i == upper)) -// || (k >= lower && i >= lower && (j == lower || j == upper)) -// { -// surf_to_conv.insert(surf_index, conv_idx); -// conv_to_surf.insert(conv_idx, surf_index); -// surf_index += 1; -// } -// } -// } -// } - -// (surf_to_conv, conv_to_surf) -// } - -// pub fn compute_kernel( -// &self, -// expansion_order: usize, -// convolution_grid: &[[f64; 3]], -// min_target: [f64; 3], -// ) -> Vec>> { -// let n = 2 * expansion_order - 1; -// let mut result = vec![vec![vec![0f64; n]; n]; n]; - -// for (i, result_i) in result.iter_mut().enumerate() { -// for (j, result_ij) in result_i.iter_mut().enumerate() { -// for (k, result_ijk) in result_ij.iter_mut().enumerate() { -// let conv_idx = i * n * n + j * n + k; -// let src = convolution_grid[conv_idx]; -// *result_ijk = self.kernel.kernel(&src[..], &min_target[..]); -// } -// } -// } -// result -// } - -// pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { -// let n = 2 * expansion_order - 1; -// let mut result = vec![vec![vec![0f64; n]; n]; n]; - -// for (i, result_i) in result.iter_mut().enumerate() { -// for (j, result_ij) in result_i.iter_mut().enumerate() { -// for (k, result_ijk) in result_ij.iter_mut().enumerate() { -// let conv_idx = i * n * n + j * n + k; -// if self.conv_to_surf_map.contains_key(&conv_idx) { -// let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); -// *result_ijk = charges[*surf_idx] -// } -// } -// } -// } - -// result -// } -// } - - - -mod test { - - use super::*; - use bempp_kernel::laplace_3d::Laplace3dKernel; - - #[test] - fn test_svd() { - let kernel = Laplace3dKernel::::default(); - let k = 100; - let order = 2; - let domain = Domain { - origin: [0., 0., 0.], - diameter: [1., 1., 1.], - }; - let alpha_inner = 1.05; - - let m2l_data_svd = - SvdFieldTranslationKiFmm::new(kernel, Some(k), order, domain, alpha_inner); - } -} +pub mod helpers; +pub mod types; +pub mod field; diff --git a/field/src/types.rs b/field/src/types.rs new file mode 100644 index 00000000..fd6150ee --- /dev/null +++ b/field/src/types.rs @@ -0,0 +1,76 @@ +use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic}; + +use bempp_tree::types::morton::MortonKey; +use bempp_traits::kernel::Kernel; + +// type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; +pub type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +// // #[derive(Default)] +// pub struct FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// // Amount to dilate inner check surface by +// pub alpha: f64, + +// // Maps between convolution and surface grids +// pub surf_to_conv_map: HashMap, +// pub conv_to_surf_map: HashMap, + +// // Precomputed FFT of unique kernel interactions placed on +// // convolution grid. +// pub m2l: Vec, + +// // Unique transfer vectors to lookup m2l unique kernel interactions +// pub transfer_vectors: Vec, + +// pub kernel: T, +// } + +// #[derive(Default)] +pub struct SvdFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + // Amount to dilate inner check surface by + pub alpha: f64, + + // Compression rank, if unspecified estimated from data. + pub k: usize, + + // Precomputed SVD compressed m2l interaction + pub m2l: (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry), + + // Unique transfer vectors to lookup m2l unique kernel interactions + pub transfer_vectors: Vec, + + pub kernel: T, +} + +// #[derive(Default)] +pub struct SvdFieldTranslationKiFmm +where + T: Kernel + Default, +{ + // Amount to dilate inner check surface by + pub alpha: f64, + + // Compression rank, if unspecified estimated from data. + pub k: usize, + + // Precomputed SVD compressed m2l interaction + pub m2l: (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry), + + // Unique transfer vectors to lookup m2l unique kernel interactions + pub transfer_vectors: Vec, + + pub kernel: T, +} + +#[derive(Debug)] +pub struct TransferVector { + pub vector: usize, + pub source: MortonKey, + pub target: MortonKey, +} \ No newline at end of file diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 58acfa1b..c8029b1e 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -33,9 +33,8 @@ use rlst::{ dense::{rlst_col_vec, rlst_mut_pointer_mat}, }; -use bempp_field::{ - FftFieldTranslationNaiveKiFmm, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm, -}; +use bempp_field::types::{SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm}; + use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, FmmLoop, InteractionLists, SourceTranslation, TargetTranslation}, @@ -1354,10 +1353,6 @@ mod test { #[test] fn test_fmm<'a>() { let npoints = 1000; - // let points = points_fixture(npoints); - // let points_clone = points.clone(); - // let depth = 4; - // let n_crit = 150; let points = points_fixture(npoints, None, None); let order = 6; @@ -1376,14 +1371,6 @@ mod test { let start = Instant::now(); - // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // // kernel.clone(), - // // Some(k), - // // order, - // // tree.get_domain().clone(), - // // alpha_inner, - // // ); - let m2l_data_svd = SvdFieldTranslationKiFmm::new( kernel.clone(), Some(k), @@ -1412,7 +1399,7 @@ mod test { let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - + let leaf_coordinates = pts .iter() .map(|p| p.coordinate) @@ -1426,7 +1413,6 @@ mod test { rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] }.eval(); - let mut direct = vec![0f64; pts.len()]; let all_point_coordinates = points_fixture(npoints, None, None); @@ -1435,25 +1421,24 @@ mod test { let kernel = Laplace3dKernel::::default(); kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..] + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], ); println!("potentials {:?}", potentials.data()); println!("direct {:?}", direct); - // let abs_error: f64 = potentials - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + // let abs_error: f64 = potentials + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); - // println!("p={:?} rel_error={:?}\n", order, rel_error); + // println!("p={:?} rel_error={:?}\n", order, rel_error); assert!(false) - } } diff --git a/fmm/src/laplace.rs b/fmm/src/laplace.rs deleted file mode 100644 index 60fa2649..00000000 --- a/fmm/src/laplace.rs +++ /dev/null @@ -1,190 +0,0 @@ -// //! Laplace kernel implementation. -// use bempp_traits::kernel::Kernel; - -// #[derive(Debug, Default, Clone)] -// pub struct LaplaceKernel { -// pub dim: usize, -// pub is_singular: bool, -// pub value_dimension: usize, -// } - -// impl LaplaceKernel { -// pub fn new(dim: usize, is_singular: bool, value_dimension: usize) -> LaplaceKernel { -// LaplaceKernel { -// dim, -// is_singular, -// value_dimension, -// } -// } - -// pub fn potential_kernel_3_d(&self, source: &[f64], target: &[f64]) -> f64 { -// let mut tmp = source -// .iter() -// .zip(target.iter()) -// .map(|(s, t)| (s - t).powf(2.0)) -// .sum::() -// .powf(0.5) -// * std::f64::consts::PI -// * 4.0; - -// tmp = tmp.recip(); - -// if tmp.is_finite() { -// tmp -// } else { -// 0. -// } -// } -// } - -// impl Kernel for LaplaceKernel { -// fn dim(&self) -> usize { -// self.dim -// } - -// fn is_singular(&self) -> bool { -// self.is_singular -// } - -// fn value_dimension(&self) -> usize { -// self.value_dimension -// } - -// fn kernel(&self, source: &[f64], target: &[f64]) -> f64 { -// let mut tmp = source -// .iter() -// .zip(target.iter()) -// .map(|(s, t)| (s - t).powf(2.0)) -// .sum::() -// .powf(0.5) -// * std::f64::consts::PI -// * 4.0; - -// tmp = tmp.recip(); - -// if tmp.is_finite() { -// tmp -// } else { -// 0. -// } -// } - -// fn potential(&self, sources: &[f64], charges: &[f64], targets: &[f64], potentials: &mut [f64]) { -// for (i, j) in (0..targets.len()).step_by(self.dim()).enumerate() { -// let mut potential = 0.0; -// let target = &targets[j..(j + self.dim())]; - -// for (k, l) in (0..sources.len()).step_by(self.dim()).enumerate() { -// let source = &sources[l..(l + self.dim())]; -// let tmp; -// if self.dim() == 3 { -// tmp = self.kernel(source, target); -// } else { -// panic!("Kernel not implemented for dimension={:?}!", self.dim()) -// } - -// potential += charges[k] * tmp; -// } -// potentials[i] = potential -// } -// } - -// fn gram(&self, sources: &[f64], targets: &[f64], result: &mut Vec) { -// // let mut result: Vec = Vec::new(); - -// for i in (0..targets.len()).step_by(self.dim()) { -// let target = &targets[i..(i + self.dim())]; -// let mut row: Vec = Vec::new(); - -// for j in (0..sources.len()).step_by(self.dim()) { -// let source = &sources[j..(j + self.dim())]; -// let tmp; -// if self.dim() == 3 { -// tmp = self.potential_kernel_3_d(source, target); -// } else { -// panic!("Gram not implemented for dimension={:?}!", self.dim()) -// } - -// row.push(tmp); -// } -// result.append(&mut row); -// } -// // Result::Ok(result) -// } - -// fn scale(&self, level: u64) -> f64 { -// 1. / (2f64.powf(level as f64)) -// } -// } - -// #[allow(unused_imports)] -// pub mod tests { - -// use rand::prelude::*; -// use rand::SeedableRng; - -// use super::*; - -// #[allow(dead_code)] -// fn points_fixture(npoints: usize, dim: usize) -> Vec { -// let mut range = StdRng::seed_from_u64(0); -// let between = rand::distributions::Uniform::from(0.0..1.0); -// let mut points = Vec::new(); - -// for _ in 0..npoints { -// for _ in 0..dim { -// points.push(between.sample(&mut range)) -// } -// } - -// points -// } - -// #[test] -// #[should_panic(expected = "Kernel not implemented for dimension=2!")] -// pub fn test_potential_panics() { -// let dim = 2; -// let npoints = 100; -// let sources = points_fixture(npoints, dim); -// let targets = points_fixture(npoints, dim); -// let charges = vec![1.0; npoints]; -// let mut potentials = vec![0.; npoints]; - -// let kernel = LaplaceKernel::new(dim, false, dim); -// kernel.potential( -// &sources[..], -// &charges[..], -// &targets[..], -// &mut potentials[..], -// ); -// } - -// #[test] -// #[should_panic(expected = "Gram not implemented for dimension=2!")] -// pub fn test_gram_panics() { -// let dim = 2; -// let npoints = 100; -// let sources = points_fixture(npoints, dim); -// let targets = points_fixture(npoints, dim); - -// let kernel = LaplaceKernel::new(dim, false, dim); -// let mut gram = Vec::::new(); -// kernel.gram(&sources[..], &targets[..], &mut gram); -// } - -// #[test] -// pub fn test_gram() { -// let dim = 3; -// let nsources = 100; -// let ntargets = 200; -// let sources = points_fixture(nsources, dim); -// let targets = points_fixture(ntargets, dim); - -// let kernel = LaplaceKernel::new(dim, false, dim); -// let mut gram = Vec::::new(); -// kernel.gram(&sources[..], &targets[..], &mut gram); - -// // Test dimension of output -// assert_eq!(gram.len(), ntargets * nsources); -// } -// } diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index c2f54f23..70355df6 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -3,5 +3,3 @@ pub mod charge; pub mod fmm; pub mod impl_charge; -// pub mod laplace; -pub mod linalg; diff --git a/fmm/src/linalg.rs b/fmm/src/linalg.rs deleted file mode 100644 index 58300cec..00000000 --- a/fmm/src/linalg.rs +++ /dev/null @@ -1,115 +0,0 @@ -// //! Temporary home of linear algebra utilities. TODO: Replace with routines from Householder. -// use ndarray::*; -// // use ndarray_linalg::*; - -// use bempp_traits::types::Scalar; - -// use rlst; -// use rlst::dense::{base_matrix::BaseMatrix, VectorContainer}; - -// // const F64_EPSILON: f64 = 2.220_446_049_250_313E-16f64; -// // type D = Dim<[usize; 2]>; -// // type Type1 = ArrayBase, D>; -// // type Type2 = ArrayBase::Real>, D>; - -// // /// Calculate the Moore-Penrose pseudoinverse. -// // pub fn pinv(array: &Array2) -> (Type1, Type2, Type1) { -// // let (u, mut s, vt): (_, Array1<_>, _) = array.svd(true, true).unwrap(); - -// // let u = u.unwrap(); -// // // Truncate u -// // let vt = vt.unwrap(); - -// // let max_s = s[0]; - -// // // Hacky, should really work with type check at runtime. -// // for s in s.iter_mut() { -// // if *s > T::real(4.) * max_s * T::real(F64_EPSILON) { -// // *s = T::real(1.) / *s; -// // } else { -// // *s = T::real(0); -// // } -// // } - -// // let v = vt.t(); -// // let ut = u.t(); - -// // let s_inv_mat = Array2::from_diag(&s); - -// // // Return components -// // (v.to_owned(), s_inv_mat.to_owned(), ut.to_owned()) -// // } - -// type Type1 = Matrix<::Real, BaseMatrix<::Real, VectorContainer<::Real>, Dynamic, Dynamic>, Dynamic, Dynamic>; - -// pub fn pinv(array: Type1) { - -// let (s, u, vt) = array.linalg.svd().unwrap(Mode::All, Mode::All); - -// let max_s = s[0]; - -// for s in s.iter_mut() { - -// } - -// } - -// // pub fn matrix_rank(array: &Array2) -> usize { -// // let (_, s, _): (_, Array1<_>, _) = array.svd(false, false).unwrap(); -// // let shape = array.shape(); -// // let max_dim = shape.iter().max().unwrap(); - -// // let tol = s[0] * T::real(*max_dim as f64) * T::real(F64_EPSILON); - -// // let significant: Vec = s.iter().map(|sv| sv > &tol).filter(|sv| *sv).collect(); -// // let rank = significant.iter().len(); - -// // rank -// // } - -// #[allow(unused_imports)] -// mod test { - -// use super::*; - -// use rlst; -// use rlst::common::tools::PrettyPrint; -// use rlst::dense::rlst_rand_mat; - -// #[test] -// fn test_pinv() { -// // let mut range = StdRng::seed_from_u64(0); -// // let between = rand::distributions::Uniform::from(0.0..1.0); - -// // // Setup a random square matrix, of dimension 'dim' -// // let mut data: Vec = Vec::new(); -// // let dim: usize = 5; -// // let nvals = dim.pow(2); -// // for _ in 0..nvals { -// // data.push(between.sample(&mut range)) -// // } - -// // let data = Array1::from_vec(data).into_shape((dim, dim)).unwrap(); - -// let dim = 5; -// let data = rlst_rand_mat![f64, (dim, dim)]; - -// pinv(&data); -// // let (a, b, c) = pinv(&data); - -// // // Test dimensions of computed inverse are correct -// // let inv = a.dot(&b).dot(&c); -// // assert_eq!(inv.ncols(), dim); -// // assert_eq!(inv.nrows(), dim); - -// // // Test that the inverse is approximately correct -// // let res = inv.dot(&data); - -// // let ones = Array1::from_vec(vec![1.; dim]); -// // let id = Array2::from_diag(&ones); - -// // for (a, b) in id.iter().zip(res.iter()) { -// // assert_approx_eq!(f64, *a, *b, epsilon = 1e-14); -// // } -// } -// } From 0665e3d3064de974f430c3c0eaa1fd26dc58bac0 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 14:43:42 +0100 Subject: [PATCH 15/40] Reorganise fmm module --- field/src/field.rs | 78 +-- field/src/helpers.rs | 2 +- fmm/Cargo.toml | 7 - fmm/src/charge.rs | 82 +++- fmm/src/field_translation.rs | 487 +++++++++++++++++++ fmm/src/fmm.rs | 917 +---------------------------------- fmm/src/impl_charge.rs | 74 --- fmm/src/interaction_lists.rs | 128 +++++ fmm/src/lib.rs | 4 +- fmm/src/types.rs | 75 +++ kernel/src/laplace_3d.rs | 14 +- 11 files changed, 799 insertions(+), 1069 deletions(-) create mode 100644 fmm/src/field_translation.rs delete mode 100644 fmm/src/impl_charge.rs create mode 100644 fmm/src/interaction_lists.rs create mode 100644 fmm/src/types.rs diff --git a/field/src/field.rs b/field/src/field.rs index dc8253de..1119527f 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -1,24 +1,17 @@ -use std::collections::{HashMap, HashSet}; - -use itertools::Itertools; - use rlst; use rlst::algorithms::linalg::LinAlg; use rlst::algorithms::traits::svd::{Mode, Svd}; -use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; +use rlst::common::traits::{NewLikeSelf, Transpose}; use rlst::common::{ - tools::PrettyPrint, - traits::{Copy, Eval}, + traits::Eval, }; -use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; -use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; +use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; use bempp_traits::{ field::FieldTranslationData, - kernel::{EvalType, Kernel, KernelType}, - types::Scalar, + kernel::{EvalType, Kernel}, }; -use bempp_tree::types::{domain::Domain, morton::MortonKey}; +use bempp_tree::types::domain::Domain; use crate::{helpers::compute_transfer_vectors, types::{TransferVector, SvdM2lEntry, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm}}; @@ -188,26 +181,21 @@ where tmp_gram.data_mut(), ); - let lidx_sources = i * ncols; - let ridx_sources = lidx_sources + ncols; - let block_size = nrows * ncols; let start_idx = i * block_size; let end_idx = start_idx + block_size; - let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + let block = se2tc_fat.get_slice_mut(start_idx, end_idx); block.copy_from_slice(tmp_gram.data_mut()); for j in 0..ncols { let start_idx = j * ntransfer_vectors * nrows + i * nrows; let end_idx = start_idx + nrows; - let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); + let block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); block_column.copy_from_slice(gram_column); } } - let left: usize = 0; - let right: usize = self.k; let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); let u = u.unwrap(); @@ -219,10 +207,10 @@ where sigma_mat[[i, i]] = sigma[i] } - let (mu, nu) = u.shape(); + let (mu, _) = u.shape(); let u = u.block((0, 0), (mu, self.k)).eval(); - let (mvt, nvt) = vt.shape(); + let (_, nvt) = vt.shape(); let vt = vt.block((0, 0), (self.k, nvt)).eval(); // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); @@ -237,7 +225,7 @@ where let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); let st = st.unwrap(); - let (mst, nst) = st.shape(); + let (_, nst) = st.shape(); let st_block = st.block((0, 0), (self.k, nst)); let s_block = st_block.transpose().eval(); @@ -342,31 +330,22 @@ where tmp_gram.data_mut(), ); - // // let tmp_gram = Array::from_shape_vec((nrows, ncols), tmp_gram).unwrap(); - // // se2tc_fat - // // .slice_mut(s![.., lidx_sources..ridx_sources]) - // // .assign(&tmp_gram); - let lidx_sources = i * ncols; - let ridx_sources = lidx_sources + ncols; - let block_size = nrows * ncols; let start_idx = i * block_size; let end_idx = start_idx + block_size; - let mut block = se2tc_fat.get_slice_mut(start_idx, end_idx); + let block = se2tc_fat.get_slice_mut(start_idx, end_idx); block.copy_from_slice(tmp_gram.data_mut()); for j in 0..ncols { let start_idx = j * ntransfer_vectors * nrows + i * nrows; let end_idx = start_idx + nrows; - let mut block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let mut gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); + let block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); + let gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); block_column.copy_from_slice(gram_column); } } - let left: usize = 0; - let right: usize = self.k; let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); let u = u.unwrap(); @@ -378,10 +357,10 @@ where sigma_mat[[i, i]] = sigma[i] } - let (mu, nu) = u.shape(); + let (mu, _) = u.shape(); let u = u.block((0, 0), (mu, self.k)).eval(); - let (mvt, nvt) = vt.shape(); + let (_, nvt) = vt.shape(); let vt = vt.block((0, 0), (self.k, nvt)).eval(); // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); @@ -396,7 +375,7 @@ where let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); let st = st.unwrap(); - let (mst, nst) = st.shape(); + let (_, nst) = st.shape(); let st_block = st.block((0, 0), (self.k, nst)); let s_block = st_block.transpose().eval(); @@ -630,26 +609,3 @@ where // result // } // } - - - -mod test { - - use super::*; - use bempp_kernel::laplace_3d::Laplace3dKernel; - - #[test] - fn test_svd() { - let kernel = Laplace3dKernel::::default(); - let k = 100; - let order = 2; - let domain = Domain { - origin: [0., 0., 0.], - diameter: [1., 1., 1.], - }; - let alpha_inner = 1.05; - - let m2l_data_svd = - SvdFieldTranslationKiFmm::new(kernel, Some(k), order, domain, alpha_inner); - } -} diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 6fe6d514..22536142 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use itertools::Itertools; diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index 3c3fc7e7..0c952c63 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -30,17 +30,10 @@ cauchy = "0.4.*" itertools = "0.10" mpi = { version = "0.6.*" } rand = "0.8.*" -ndarray = { version = "*", features = ["blas"]} -ndarray-rand = "0.14.0" -blas-src = { version = "*", features = ["openblas"] } -openblas-src = { version = "*", features = ["cblas", "system"] } -ndarray-linalg = { version = "*", features = ["openblas-system"] } float-cmp = "0.9.0" rayon = "1.7" num_cpus = "1" -ndrustfft = "0.4.0" num = "0.4" -ndarray-ndimage = "0.3.0" rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse" } [target.aarch64-apple-darwin] diff --git a/fmm/src/charge.rs b/fmm/src/charge.rs index f735e601..c90308c4 100644 --- a/fmm/src/charge.rs +++ b/fmm/src/charge.rs @@ -1,18 +1,74 @@ -#[derive(Clone, Debug, Default)] -pub struct Charge { - /// Charge data - pub data: f64, +use std::{ + cmp::{Eq, Ord, Ordering, PartialEq}, + hash::{Hash, Hasher}, +}; - /// Global unique index. - pub global_idx: usize, +use crate::types::{Charge, Charges}; + +impl Hash for Charge { + fn hash(&self, state: &mut H) { + self.global_idx.hash(state); + } +} + +impl PartialEq for Charge { + fn eq(&self, other: &Self) -> bool { + self.global_idx == other.global_idx + } +} + +impl Eq for Charge {} + +impl Ord for Charge { + fn cmp(&self, other: &Self) -> Ordering { + self.global_idx.cmp(&other.global_idx) + } +} + +impl PartialOrd for Charge { + fn partial_cmp(&self, other: &Self) -> Option { + // less_than(&self.morton, &other.morton) + Some(self.global_idx.cmp(&other.global_idx)) + } +} + +impl Charges { + pub fn new() -> Charges { + Charges { + charges: Vec::new(), + index: 0, + } + } + + pub fn add(&mut self, item: Charge) { + self.charges.push(item); + } + + pub fn sort(&mut self) { + self.charges.sort(); + } +} + +impl Iterator for Charges { + type Item = Charge; + + fn next(&mut self) -> Option { + if self.index >= self.charges.len() { + return None; + } + + self.index += 1; + self.charges.get(self.index).cloned() + } } -/// Container of **Points**. -#[derive(Clone, Debug, Default)] -pub struct Charges { - /// A vector of Charges - pub charges: Vec, +impl FromIterator for Charges { + fn from_iter>(iter: I) -> Self { + let mut c = Charges::new(); - /// index for implementing the Iterator trait. - pub index: usize, + for i in iter { + c.add(i); + } + c + } } diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs new file mode 100644 index 00000000..72fbcd8c --- /dev/null +++ b/fmm/src/field_translation.rs @@ -0,0 +1,487 @@ +// Implementation of field translations +use std::{ + collections::HashMap, + sync::{Arc, RwLock, Mutex}, + ops::Deref +}; + +use itertools::Itertools; +use rayon::prelude::*; + +use rlst::common::{ + traits::Eval, +}; +use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; +use rlst::{ + self, + dense::rlst_col_vec, +}; + +use bempp_traits::{field::{FieldTranslationData, FieldTranslation}, kernel::{Kernel, EvalType}, fmm::{TargetTranslation, SourceTranslation, InteractionLists, Fmm}, tree::Tree}; +use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; +use bempp_field::types::SvdFieldTranslationKiFmm; + +use crate::types::{FmmData, KiFmm}; + +impl SourceTranslation for FmmData> +where + T: Kernel + std::marker::Send + std::marker::Sync, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, +{ + fn p2m<'a>(&self) { + if let Some(leaves) = self.fmm.tree().get_leaves() { + leaves.par_iter().for_each(move |&leaf| { + let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); + let fmm_arc = Arc::clone(&self.fmm); + + if let Some(leaf_points) = self.points.get(&leaf) { + let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); + + // Lookup data + let leaf_coordinates = leaf_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let upward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); + let ntargets = upward_check_surface.len() / fmm_arc.kernel.space_dimension(); + + let leaf_charges = leaf_charges_arc.deref(); + + // Calculate check potential + let mut check_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + leaf_coordinates.data(), + &upward_check_surface[..], + &leaf_charges[..], + check_potential.data_mut() + ); + + let leaf_multipole_owned = ( + fmm_arc.kernel.scale(leaf.level()) + * fmm_arc.uc2e_inv.dot(&check_potential) + ).eval(); + + let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); + + for i in 0..leaf_multipole_lock.shape().0 { + leaf_multipole_lock[[i, 0]] += leaf_multipole_owned[[i, 0]]; + } + } + }); + } + } + + fn m2m<'a>(&self, level: u64) { + // Parallelise over nodes at a given level + if let Some(sources) = self.fmm.tree().get_keys(level) { + sources.par_iter().for_each(move |&source| { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); + let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); + let target_multipole_arc = + Arc::clone(self.multipoles.get(&source.parent()).unwrap()); + let fmm_arc = Arc::clone(&self.fmm); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let target_multipole_owned = + fmm_arc.m2m[operator_index].dot(&source_multipole_lock); + + let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); + + for i in 0..ncoeffs { + target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; + } + }) + } + } +} + +impl TargetTranslation for FmmData> +where + T: Kernel + std::marker::Sync + std::marker::Send, + U: FieldTranslationData + std::marker::Sync + std::marker::Send, +{ + fn l2l(&self, level: u64) { + if let Some(targets) = self.fmm.tree().get_keys(level) { + targets.par_iter().for_each(move |&target| { + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + let fmm = Arc::clone(&self.fmm); + + let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); + + let source_local_lock = source_local_arc.lock().unwrap(); + + let target_local_owned = fmm.l2l[operator_index].dot(&source_local_lock); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + for i in 0..ncoeffs { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; + } + }) + } + } + + fn m2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&target| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + + if let Some(points) = fmm_arc.tree().get_points(&target) { + if let Some(w_list) = fmm_arc.get_w_list(&target) { + for source in w_list.iter() { + let source_multipole_arc = + Arc::clone(self.multipoles.get(source).unwrap()); + + let upward_equivalent_surface = source.compute_surface( + fmm_arc.tree().get_domain(), + fmm_arc.order(), + fmm_arc.alpha_inner, + ); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let target_coordinates = points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + &upward_equivalent_surface[..], + target_coordinates.data(), + source_multipole_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + } + } + }) + } + } + + fn l2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&leaf| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); + let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + + if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { + // Lookup data + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let downward_equivalent_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_outer, + ); + + let source_local_lock = source_local_arc.lock().unwrap(); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + &downward_equivalent_surface[..], + target_coordinates.data(), + source_local_lock.data(), + target_potential.data_mut(), + ); + + let mut target_potential_lock = target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + }) + } + } + + fn p2l<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&leaf| { + let fmm_arc = Arc::clone(&self.fmm); + let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + + if let Some(x_list) = fmm_arc.get_x_list(&leaf) { + for source in x_list.iter() { + if let Some(source_points) = fmm_arc.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let source_coordinates = unsafe { + rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges = self.charges.get(source).unwrap(); + + let downward_check_surface = leaf.compute_surface( + &fmm_arc.tree().domain, + fmm_arc.order, + fmm_arc.alpha_inner, + ); + + let ntargets = downward_check_surface.len() / fmm_arc.kernel.space_dimension(); + let mut downward_check_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + &downward_check_surface[..], + &source_charges[..], + downward_check_potential.data_mut() + ); + + + let mut target_local_lock = target_local_arc.lock().unwrap(); + + let target_local_owned = (fmm_arc.kernel.scale(leaf.level()) * fmm_arc.dc2e_inv.dot(&downward_check_potential)).eval(); + + for i in 0..ncoeffs { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; + } + } + } + } + }) + } + } + + fn p2p<'a>(&self) { + if let Some(targets) = self.fmm.tree().get_leaves() { + targets.par_iter().for_each(move |&target| { + let fmm_arc = Arc::clone(&self.fmm); + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + + if let Some(target_points) = fmm_arc.tree().get_points(&target) { + let target_coordinates = target_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let target_coordinates = unsafe { + rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + if let Some(u_list) = fmm_arc.get_u_list(&target) { + for source in u_list.iter() { + if let Some(source_points) = fmm_arc.tree().get_points(source) { + let source_coordinates = source_points + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); + + // Get into row major order + let source_coordinates = unsafe { + rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] + }.eval(); + + let source_charges_arc = + Arc::clone(self.charges.get(source).unwrap()); + + let mut target_potential = rlst_col_vec![f64, ntargets]; + + fmm_arc.kernel.evaluate_st( + EvalType::Value, + source_coordinates.data(), + target_coordinates.data(), + &source_charges_arc[..], + target_potential.data_mut(), + ); + + let mut target_potential_lock = + target_potential_arc.lock().unwrap(); + + for i in 0..ntargets { + target_potential_lock[[i, 0]] += target_potential[[i, 0]]; + } + } + } + } + } + }) + } + } +} + +impl FieldTranslation for FmmData>> +where + T: Kernel + std::marker::Sync + std::marker::Send + Default, +{ + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { return }; + let mut transfer_vector_to_m2l = + HashMap::>>>::new(); + + for tv in self.fmm.m2l.transfer_vectors.iter() { + transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); + } + + targets.par_iter().enumerate().for_each(|(_i, &target)| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let calculated_transfer_vectors = v_list + .iter() + .map(|source| target.find_transfer_vector(source)) + .collect::>(); + for (transfer_vector, &source) in + calculated_transfer_vectors.iter().zip(v_list.iter()) + { + let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); + let mut m2l_lock = m2l_arc.lock().unwrap(); + m2l_lock.push((source, target)); + } + } + }); + + let mut transfer_vector_to_m2l_rw_lock = + HashMap::>>>::new(); + + // Find all multipole expansions and allocate + for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { + transfer_vector_to_m2l_rw_lock.insert( + transfer_vector, + Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), + ); + } + + transfer_vector_to_m2l_rw_lock + .par_iter() + .for_each(|(transfer_vector, m2l_arc)| { + let c_idx = self + .fmm + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == *transfer_vector) + .unwrap(); + + let (nrows, _) = self.fmm.m2l.m2l.2.shape(); + let top_left = (0, c_idx * self.fmm.m2l.k); + let dim = (nrows, self.fmm.m2l.k); + + let c_sub = self.fmm.m2l.m2l.2.block(top_left, dim); + + let m2l_rw = m2l_arc.read().unwrap(); + let mut multipoles = rlst_mat![f64, (self.fmm.m2l.k, m2l_rw.len())]; + + for (i, (source, _)) in m2l_rw.iter().enumerate() { + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + // Compressed multipole + let compressed_source_multipole_owned = + self.fmm.m2l.m2l.1.dot(&source_multipole_lock).eval(); + + let first = i * self.fmm.m2l.k; + let last = first + self.fmm.m2l.k; + + let multipole_slice = multipoles.get_slice_mut(first, last); + multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); + } + + // // Compute convolution + let compressed_check_potential_owned = c_sub.dot(&multipoles); + + // Post process to find check potential + let check_potential_owned = self + .fmm + .m2l + .m2l + .0 + .dot(&compressed_check_potential_owned) + .eval(); + + // Compute local + let locals_owned = (self.fmm.dc2e_inv.dot(&check_potential_owned) + * self.fmm.kernel.scale(level) + * self.m2l_scale(level)) + .eval(); + + // Assign locals + for (i, (_, target)) in m2l_rw.iter().enumerate() { + let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); + let mut target_local_lock = target_local_arc.lock().unwrap(); + + let top_left = (0, i); + let dim = (self.fmm.m2l.k, 1); + let target_local_owned = locals_owned.block(top_left, dim); + + for i in 0..target_local_lock.shape().0 { + target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; + } + } + }); + } + + fn m2l_scale(&self, level: u64) -> f64 { + if level < 2 { + panic!("M2L only performed on level 2 and below") + } + + if level == 2 { + 1. / 2. + } else { + 2_f64.powf((level - 3) as f64) + } + } +} diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index c8029b1e..1f101cda 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,95 +1,38 @@ -extern crate blas_src; -// TODO Should be generic over kernel float type parmeter -// TODO SHould be generic over kernel evaluation type -// TODO should check what happens with rectangular distributions of points! +// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type +// TODO should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. +// TODO: charge input should be utilized NOW! -use cauchy::Scalar; use itertools::Itertools; -// use ndarray::AssignElem; -// use ndarray::*; -// use ndarray_ndimage::{pad, PadMode}; -// use ndrustfft::{ndfft, ndfft_r2c, ndifft, ndifft_r2c, Complex, FftHandler, R2cFftHandler}; -use rayon::prelude::*; use std::{ collections::HashMap, - ops::{Deref, DerefMut}, - sync::{Arc, Mutex, RwLock}, + sync::{Arc, Mutex}, time::Instant, }; use rlst::algorithms::linalg::LinAlg; use rlst::algorithms::traits::pseudo_inverse::Pinv; -use rlst::algorithms::traits::svd::{Mode, Svd}; -use rlst::common::traits::{NewLikeSelf, NewLikeTranspose, Transpose}; -use rlst::common::{ - tools::PrettyPrint, - traits::{Copy, Eval}, -}; +use rlst::common::traits::NewLikeSelf; +use rlst::common::traits::Eval; use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; -use rlst::dense::{rlst_fixed_mat, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; +use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot}; use rlst::{ self, - common::traits::ColumnMajorIterator, - dense::{rlst_col_vec, rlst_mut_pointer_mat}, + dense::rlst_col_vec, }; -use bempp_field::types::{SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm}; - use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, FmmLoop, InteractionLists, SourceTranslation, TargetTranslation}, + fmm::{Fmm, FmmLoop,SourceTranslation, TargetTranslation}, kernel::{EvalType, Kernel}, tree::Tree, }; use bempp_tree::{ constants::ROOT, - types::{ - morton::{MortonKey, MortonKeys}, - point::Point, - single_node::SingleNodeTree, - }, + types::single_node::SingleNodeTree, }; -use crate::charge::Charges; - -type Expansions = - Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; -type Potentials = - Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; - -pub struct FmmData { - fmm: Arc, - multipoles: HashMap>>, - locals: HashMap>>, - potentials: HashMap>>, - points: HashMap>, - charges: HashMap>>, - // multipoles: HashMap>>>, - // locals: HashMap>>>, - // potentials: HashMap>>>, - // points: HashMap>, - // charges: HashMap>>, -} - -type C2EType = - Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +use crate::types::{Charges, C2EType, FmmData, KiFmm}; -pub struct KiFmm> { - order: usize, - - uc2e_inv: C2EType, - - dc2e_inv: C2EType, - - alpha_inner: f64, - alpha_outer: f64, - - m2m: Vec, - l2l: Vec, - tree: T, - kernel: U, - m2l: V, -} #[allow(dead_code)] impl KiFmm @@ -146,9 +89,6 @@ where dc2e.data_mut(), ); - let nrows = m2l.ncoeffs(order); - let ncols = m2l.ncoeffs(order); - let (s, ut, v) = uc2e.linalg().pinv(None).unwrap(); let s = s.unwrap(); let ut = ut.unwrap(); @@ -269,839 +209,6 @@ where } } -impl SourceTranslation for FmmData> -where - T: Kernel + std::marker::Send + std::marker::Sync, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, -{ - fn p2m<'a>(&self) { - if let Some(leaves) = self.fmm.tree.get_leaves() { - leaves.par_iter().for_each(move |&leaf| { - let leaf_multipole_arc = Arc::clone(self.multipoles.get(&leaf).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - - if let Some(leaf_points) = self.points.get(&leaf) { - let leaf_charges_arc = Arc::clone(self.charges.get(&leaf).unwrap()); - - // Lookup data - let leaf_coordinates = leaf_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let upward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - let ntargets = upward_check_surface.len() / fmm_arc.kernel.space_dimension(); - - let leaf_charges = leaf_charges_arc.deref(); - - // Calculate check potential - let mut check_potential = rlst_col_vec![f64, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - leaf_coordinates.data(), - &upward_check_surface[..], - &leaf_charges[..], - check_potential.data_mut() - ); - - let leaf_multipole_owned = ( - fmm_arc.kernel.scale(leaf.level()) - * fmm_arc.uc2e_inv.dot(&check_potential) - ).eval(); - - let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - - for i in 0..leaf_multipole_lock.shape().0 { - leaf_multipole_lock[[i, 0]] += leaf_multipole_owned[[i, 0]]; - } - } - }); - } - } - - fn m2m<'a>(&self, level: u64) { - // Parallelise over nodes at a given level - if let Some(sources) = self.fmm.tree.get_keys(level) { - sources.par_iter().for_each(move |&source| { - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - let operator_index = source.siblings().iter().position(|&x| x == source).unwrap(); - let source_multipole_arc = Arc::clone(self.multipoles.get(&source).unwrap()); - let target_multipole_arc = - Arc::clone(self.multipoles.get(&source.parent()).unwrap()); - let fmm_arc = Arc::clone(&self.fmm); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_multipole_owned = - fmm_arc.m2m[operator_index].dot(&source_multipole_lock); - - let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - - for i in 0..ncoeffs { - target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; - } - }) - } - } -} - -impl TargetTranslation for FmmData> -where - T: Kernel + std::marker::Sync + std::marker::Send, - U: FieldTranslationData + std::marker::Sync + std::marker::Send, -{ - fn l2l(&self, level: u64) { - if let Some(targets) = self.fmm.tree.get_keys(level) { - targets.par_iter().for_each(move |&target| { - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - let source_local_arc = Arc::clone(self.locals.get(&target.parent()).unwrap()); - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - let fmm = Arc::clone(&self.fmm); - - let operator_index = target.siblings().iter().position(|&x| x == target).unwrap(); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let target_local_owned = fmm.l2l[operator_index].dot(&source_local_lock); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - for i in 0..ncoeffs { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } - }) - } - } - - fn m2p<'a>(&self) { - if let Some(targets) = self.fmm.tree.get_leaves() { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - if let Some(points) = fmm_arc.tree().get_points(&target) { - if let Some(w_list) = fmm_arc.get_w_list(&target) { - for source in w_list.iter() { - let source_multipole_arc = - Arc::clone(self.multipoles.get(source).unwrap()); - - let upward_equivalent_surface = source.compute_surface( - fmm_arc.tree().get_domain(), - fmm_arc.order(), - fmm_arc.alpha_inner, - ); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - let target_coordinates = points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let target_coordinates = unsafe { - rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let mut target_potential = rlst_col_vec![f64, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - &upward_equivalent_surface[..], - target_coordinates.data(), - source_multipole_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } - } - } - } - }) - } - } - - fn l2p<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); - let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { - // Lookup data - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - let ntargets = target_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let target_coordinates = unsafe { - rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let downward_equivalent_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_outer, - ); - - let source_local_lock = source_local_arc.lock().unwrap(); - - let mut target_potential = rlst_col_vec![f64, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - &downward_equivalent_surface[..], - target_coordinates.data(), - source_local_lock.data(), - target_potential.data_mut(), - ); - - let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } - } - }) - } - } - - fn p2l<'a>(&self) { - if let Some(targets) = self.fmm.tree().get_leaves() { - targets.par_iter().for_each(move |&leaf| { - let fmm_arc = Arc::clone(&self.fmm); - let target_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - if let Some(x_list) = fmm_arc.get_x_list(&leaf) { - for source in x_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let source_coordinates = unsafe { - rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges = self.charges.get(source).unwrap(); - - let downward_check_surface = leaf.compute_surface( - &fmm_arc.tree().domain, - fmm_arc.order, - fmm_arc.alpha_inner, - ); - - let ntargets = downward_check_surface.len() / fmm_arc.kernel.space_dimension(); - let mut downward_check_potential = rlst_col_vec![f64, ntargets]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - &downward_check_surface[..], - &source_charges[..], - downward_check_potential.data_mut() - ); - - - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let target_local_owned = (fmm_arc.kernel.scale(leaf.level()) * fmm_arc.dc2e_inv.dot(&downward_check_potential)).eval(); - - for i in 0..ncoeffs { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } - } - } - } - }) - } - } - - fn p2p<'a>(&self) { - if let Some(targets) = self.fmm.tree.get_leaves() { - targets.par_iter().for_each(move |&target| { - let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - - if let Some(target_points) = fmm_arc.tree().get_points(&target) { - let target_coordinates = target_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let target_coordinates = unsafe { - rlst_pointer_mat!['a, f64, target_coordinates.as_ptr(), (ntargets, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - if let Some(u_list) = fmm_arc.get_u_list(&target) { - for source in u_list.iter() { - if let Some(source_points) = fmm_arc.tree().get_points(source) { - let source_coordinates = source_points - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let nsources = source_coordinates.len() / self.fmm.kernel.space_dimension(); - - // Get into row major order - let source_coordinates = unsafe { - rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); - - let source_charges_arc = - Arc::clone(self.charges.get(source).unwrap()); - - // let source_charges_view = - // ArrayView::from(source_charges_arc.deref()); - // let source_charges_slice = source_charges_view.as_slice().unwrap(); - - let mut target_potential = rlst_col_vec![f64, ntargets]; - // let mut target_potential = - // vec![0f64; target_coordinates.len() / self.fmm.kernel.dim()]; - - fmm_arc.kernel.evaluate_st( - EvalType::Value, - source_coordinates.data(), - target_coordinates.data(), - &source_charges_arc[..], - target_potential.data_mut(), - ); - - let mut target_potential_lock = - target_potential_arc.lock().unwrap(); - - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } - } - } - } - } - }) - } - } -} - -impl FieldTranslation for FmmData>> -where - T: Kernel + std::marker::Sync + std::marker::Send + Default, -{ - fn m2l<'a>(&self, level: u64) { - let Some(targets) = self.fmm.tree().get_keys(level) else { return }; - let mut transfer_vector_to_m2l = - HashMap::>>>::new(); - - for tv in self.fmm.m2l.transfer_vectors.iter() { - transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); - } - - targets.par_iter().enumerate().for_each(|(_i, &target)| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let calculated_transfer_vectors = v_list - .iter() - .map(|source| target.find_transfer_vector(source)) - .collect::>(); - for (transfer_vector, &source) in - calculated_transfer_vectors.iter().zip(v_list.iter()) - { - let m2l_arc = Arc::clone(transfer_vector_to_m2l.get(transfer_vector).unwrap()); - let mut m2l_lock = m2l_arc.lock().unwrap(); - m2l_lock.push((source, target)); - } - } - }); - - let mut transfer_vector_to_m2l_rw_lock = - HashMap::>>>::new(); - - // Find all multipole expansions and allocate - for (&transfer_vector, m2l_arc) in transfer_vector_to_m2l.iter() { - transfer_vector_to_m2l_rw_lock.insert( - transfer_vector, - Arc::new(RwLock::new(m2l_arc.lock().unwrap().clone())), - ); - } - - transfer_vector_to_m2l_rw_lock - .par_iter() - .for_each(|(transfer_vector, m2l_arc)| { - let c_idx = self - .fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == *transfer_vector) - .unwrap(); - - let c_lidx = c_idx * self.fmm.m2l.k; - let c_ridx = c_lidx + self.fmm.m2l.k; - // let c_sub = self.fmm.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - - let (nrows, _) = self.fmm.m2l.m2l.2.shape(); - let top_left = (0, c_lidx); - let dim = (nrows, self.fmm.m2l.k); - - // println!("{:?} {:?} {:?}", top_left, dim, self.fmm.m2l.m2l.2.shape()); - let c_sub = self.fmm.m2l.m2l.2.block(top_left, dim); - - let m2l_rw = m2l_arc.read().unwrap(); - // let mut multipoles = Array2::zeros((self.fmm.m2l.k, m2l_rw.len())); - let mut multipoles = rlst_mat![f64, (self.fmm.m2l.k, m2l_rw.len())]; - - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - - for (i, (source, _)) in m2l_rw.iter().enumerate() { - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - - // // let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - - // Compressed multipole - let compressed_source_multipole_owned = - self.fmm.m2l.m2l.1.dot(&source_multipole_lock).eval(); - - let first = i * self.fmm.m2l.k; - let last = first + self.fmm.m2l.k; - - let multipole_slice = multipoles.get_slice_mut(first, last); - multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); - // multipoles - // .slice_mut(s![.., i]) - // .assign(&compressed_source_multipole_owned); - } - - // // Compute convolution - let compressed_check_potential_owned = c_sub.dot(&multipoles); - - // Post process to find check potential - let check_potential_owned = self - .fmm - .m2l - .m2l - .0 - .dot(&compressed_check_potential_owned) - .eval(); - - // Compute local - // // let locals_owned = self.m2l_scale(level) - // // * self.fmm.kernel.scale(level) - // // * self - // // .fmm - // // .dc2e_inv - // // .0 - // // .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - let locals_owned = (self.fmm.dc2e_inv.dot(&check_potential_owned) - * self.fmm.kernel.scale(level) - * self.m2l_scale(level)) - .eval(); - - // Assign locals - for (i, (_, target)) in m2l_rw.iter().enumerate() { - let target_local_arc = Arc::clone(self.locals.get(target).unwrap()); - let mut target_local_lock = target_local_arc.lock().unwrap(); - - let first = i * self.fmm.m2l.k; - let last = first + self.fmm.m2l.k; - - let top_left = (0, i); - let dim = (self.fmm.m2l.k, 1); - let target_local_owned = locals_owned.block(top_left, dim); - - // let target_local_owned = locals_owned.slice(s![.., i]); - - // println!("target lock {:?}", target_local_lock.shape()); - for i in 0..target_local_lock.shape().0 { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } - } - }); - } - - fn m2l_scale(&self, level: u64) -> f64 { - if level < 2 { - panic!("M2L only performed on level 2 and below") - } - - if level == 2 { - 1. / 2. - } else { - 2_f64.powf((level - 3) as f64) - } - } -} - -// impl FieldTranslation for FmmData>> -// where -// T: Kernel + std::marker::Sync + std::marker::Send + Default, -// { -// fn m2l(&self, level: u64) { -// if let Some(targets) = self.fmm.tree().get_keys(level) { -// // Find transfer vectors -// targets.par_iter().for_each(move |&target| { -// let fmm_arc: Arc>> = -// Arc::clone(&self.fmm); -// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - -// if let Some(v_list) = fmm_arc.get_v_list(&target) { -// for (_i, source) in v_list.iter().enumerate() { -// // Locate correct components of compressed M2L matrix. -// let transfer_vector = target.find_transfer_vector(source); - -// let c_idx = fmm_arc -// .m2l -// .transfer_vectors -// .iter() -// .position(|x| x.vector == transfer_vector) -// .unwrap(); -// let c_lidx = c_idx * fmm_arc.m2l.k; -// let c_ridx = c_lidx + fmm_arc.m2l.k; -// let c_sub = fmm_arc.m2l.m2l.2.slice(s![.., c_lidx..c_ridx]); - -// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); -// let source_multipole_lock = source_multipole_arc.lock().unwrap(); -// let source_multipole_view = ArrayView::from(source_multipole_lock.deref()); - -// // Compressed multipole -// let compressed_source_multipole_owned = -// fmm_arc.m2l.m2l.1.dot(&source_multipole_view); - -// // Convolution to find compressed check potential -// let compressed_check_potential_owned = -// c_sub.dot(&compressed_source_multipole_owned); - -// // Post process to find check potential -// let check_potential_owned = -// fmm_arc.m2l.m2l.0.dot(&compressed_check_potential_owned); - -// // Compute local -// let target_local_owned = self.m2l_scale(target.level()) -// * fmm_arc.kernel.scale(target.level()) -// * fmm_arc -// .dc2e_inv -// .0 -// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential_owned)); - -// // Store computation -// let mut target_local_lock = target_local_arc.lock().unwrap(); - -// if !target_local_lock.is_empty() { -// target_local_lock -// .iter_mut() -// .zip(target_local_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// target_local_lock.extend(target_local_owned); -// } -// } -// } -// }) -// } -// } - -// fn m2l_scale(&self, level: u64) -> f64 { -// if level < 2 { -// panic!("M2L only performed on level 2 and below") -// } - -// if level == 2 { -// 1. / 2. -// } else { -// 2_f64.powf((level - 3) as f64) -// } -// } -// } - -// impl FieldTranslation for FmmData>> -// where -// T: Kernel + std::marker::Sync + std::marker::Send + Default, -// { -// fn m2l(&self, level: u64) { -// if let Some(targets) = self.fmm.tree().get_keys(level) { -// targets.par_iter().for_each(move |&target| { -// let fmm_arc = Arc::clone(&self.fmm); -// let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - -// if let Some(v_list) = fmm_arc.get_v_list(&target) { -// for (_, source) in v_list.iter().enumerate() { -// let transfer_vector = target.find_transfer_vector(source); - -// // Locate correct precomputed FFT of kernel interactions -// let k_idx = fmm_arc -// .m2l -// .transfer_vectors -// .iter() -// .position(|x| x.vector == transfer_vector) -// .unwrap(); - -// // Compute FFT of signal -// let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); -// let source_multipole_lock = source_multipole_arc.lock().unwrap(); - -// let signal = fmm_arc -// .m2l -// .compute_signal(fmm_arc.order, source_multipole_lock.deref()); - -// // 1. Pad the signal -// let m = signal.len(); -// let n = signal[0].len(); -// let k = signal[0][0].len(); - -// let p = 2 * m; -// let q = 2 * n; -// let r = 2 * k; - -// let signal = Array3::from_shape_vec( -// (m, n, k), -// signal.into_iter().flatten().flatten().collect(), -// ) -// .unwrap(); - -// let padding = [[p - m, 0], [q - n, 0], [r - k, 0]]; -// let padded_signal = pad(&signal, &padding, PadMode::Constant(0.)); - -// // 2. FFT of the padded signal -// // 2.1 Init the handlers for FFTs along each axis -// let mut handler_ax0 = FftHandler::::new(p); -// let mut handler_ax1 = FftHandler::::new(q); -// let mut handler_ax2 = R2cFftHandler::::new(r); - -// // 2.2 Compute the transform along each axis -// let mut padded_signal_hat: Array3> = -// Array3::zeros((p, q, r / 2 + 1)); -// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft_r2c(&padded_signal, &mut tmp1, &mut handler_ax2, 2); -// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); -// ndfft(&tmp2, &mut padded_signal_hat, &mut handler_ax0, 0); - -// // 3.Compute convolution to find check potential -// let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; - -// // Hadamard product -// let check_potential_hat = padded_kernel_hat * padded_signal_hat; - -// // 3.1 Compute iFFT to find check potentials -// let mut check_potential: Array3 = Array3::zeros((p, q, r)); -// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndifft(&check_potential_hat, &mut tmp1, &mut handler_ax0, 0); -// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndifft(&tmp1, &mut tmp2, &mut handler_ax1, 1); -// ndifft_r2c(&tmp2, &mut check_potential, &mut handler_ax2, 2); - -// // Filter check potentials -// let check_potential = -// check_potential.slice(s![p - m - 1..p, q - n - 1..q, r - k - 1..r]); - -// let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); - -// let mut tmp = Vec::new(); -// for index in target_surface_idxs.chunks_exact(fmm_arc.kernel.dim()) { -// let element = check_potential[[index[0], index[1], index[2]]]; -// tmp.push(element); -// } - -// // Compute local coefficients from check potentials -// let check_potential = Array::from_shape_vec( -// target_surface_idxs.len() / fmm_arc.kernel.dim(), -// tmp, -// ) -// .unwrap(); - -// // Compute local -// let target_local_owned = self.m2l_scale(target.level()) -// * fmm_arc.kernel.scale(target.level()) -// * fmm_arc -// .dc2e_inv -// .0 -// .dot(&self.fmm.dc2e_inv.1.dot(&check_potential)); - -// // Store computation -// let mut target_local_lock = target_local_arc.lock().unwrap(); - -// if !target_local_lock.is_empty() { -// target_local_lock -// .iter_mut() -// .zip(target_local_owned.iter()) -// .for_each(|(c, m)| *c += *m); -// } else { -// target_local_lock.extend(target_local_owned); -// } -// } -// } -// }) -// } -// } - -// fn m2l_scale(&self, level: u64) -> f64 { -// if level < 2 { -// panic!("M2L only performed on level 2 and below") -// } - -// if level == 2 { -// 1. / 2. -// } else { -// 2_f64.powf((level - 3) as f64) -// } -// } -// } - -impl InteractionLists for KiFmm -where - T: Tree, - U: Kernel, - V: FieldTranslationData, -{ - type Tree = T; - - fn get_u_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - let mut u_list = Vec::::new(); - let neighbours = key.neighbors(); - - // Child level - let mut neighbors_children_adj: Vec = neighbours - .iter() - .flat_map(|n| n.children()) - .filter(|nc| self.tree().get_all_keys_set().contains(nc) && key.is_adjacent(nc)) - .collect(); - - // Key level - let mut neighbors_adj: Vec = neighbours - .iter() - .filter(|n| self.tree().get_all_keys_set().contains(n) && key.is_adjacent(n)) - .cloned() - .collect(); - - // Parent level - let mut parent_neighbours_adj: Vec = key - .parent() - .neighbors() - .into_iter() - .filter(|pn| self.tree().get_all_keys_set().contains(pn) && key.is_adjacent(pn)) - .collect(); - - u_list.append(&mut neighbors_children_adj); - u_list.append(&mut neighbors_adj); - u_list.append(&mut parent_neighbours_adj); - u_list.push(*key); - - if !u_list.is_empty() { - Some(MortonKeys { - keys: u_list, - index: 0, - }) - } else { - None - } - } - - fn get_v_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - if key.level() >= 2 { - let v_list = key - .parent() - .neighbors() - .iter() - .flat_map(|pn| pn.children()) - .filter(|pnc| self.tree().get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) - .collect_vec(); - - if !v_list.is_empty() { - return Some(MortonKeys { - keys: v_list, - index: 0, - }); - } else { - return None; - } - } - None - } - - fn get_w_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - // Child level - let w_list = key - .neighbors() - .iter() - .flat_map(|n| n.children()) - .filter(|nc| self.tree().get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) - .collect_vec(); - - if !w_list.is_empty() { - Some(MortonKeys { - keys: w_list, - index: 0, - }) - } else { - None - } - } - - fn get_x_list( - &self, - key: &::NodeIndex, - ) -> Option<::NodeIndices> { - let x_list = key - .parent() - .neighbors() - .into_iter() - .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) - .collect_vec(); - - if !x_list.is_empty() { - Some(MortonKeys { - keys: x_list, - index: 0, - }) - } else { - None - } - } -} - impl Fmm for KiFmm where T: Tree, @@ -1186,8 +293,10 @@ where } #[allow(unused_imports)] +#[allow(warnings)] mod test { use bempp_kernel::laplace_3d::evaluate_laplace_one_target; + use bempp_field::types::SvdFieldTranslationKiFmm; // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; diff --git a/fmm/src/impl_charge.rs b/fmm/src/impl_charge.rs deleted file mode 100644 index ee05469a..00000000 --- a/fmm/src/impl_charge.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::{ - cmp::{Eq, Ord, Ordering, PartialEq}, - hash::{Hash, Hasher}, -}; - -use crate::charge::{Charge, Charges}; - -impl Hash for Charge { - fn hash(&self, state: &mut H) { - self.global_idx.hash(state); - } -} - -impl PartialEq for Charge { - fn eq(&self, other: &Self) -> bool { - self.global_idx == other.global_idx - } -} - -impl Eq for Charge {} - -impl Ord for Charge { - fn cmp(&self, other: &Self) -> Ordering { - self.global_idx.cmp(&other.global_idx) - } -} - -impl PartialOrd for Charge { - fn partial_cmp(&self, other: &Self) -> Option { - // less_than(&self.morton, &other.morton) - Some(self.global_idx.cmp(&other.global_idx)) - } -} - -impl Charges { - pub fn new() -> Charges { - Charges { - charges: Vec::new(), - index: 0, - } - } - - pub fn add(&mut self, item: Charge) { - self.charges.push(item); - } - - pub fn sort(&mut self) { - self.charges.sort(); - } -} - -impl Iterator for Charges { - type Item = Charge; - - fn next(&mut self) -> Option { - if self.index >= self.charges.len() { - return None; - } - - self.index += 1; - self.charges.get(self.index).cloned() - } -} - -impl FromIterator for Charges { - fn from_iter>(iter: I) -> Self { - let mut c = Charges::new(); - - for i in iter { - c.add(i); - } - c - } -} diff --git a/fmm/src/interaction_lists.rs b/fmm/src/interaction_lists.rs new file mode 100644 index 00000000..a5ef8faa --- /dev/null +++ b/fmm/src/interaction_lists.rs @@ -0,0 +1,128 @@ +// Implementation of interaction lists for FMMs (single and multinode) +use itertools::Itertools; + +use bempp_traits::{fmm::InteractionLists, field::FieldTranslationData, kernel::Kernel, tree::Tree}; +use bempp_tree::{types::morton::{MortonKey, MortonKeys}}; + +use crate::types::KiFmm; + +impl InteractionLists for KiFmm +where + T: Tree, + U: Kernel, + V: FieldTranslationData, +{ + type Tree = T; + + fn get_u_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let mut u_list = Vec::::new(); + let neighbours = key.neighbors(); + + // Child level + let mut neighbors_children_adj: Vec = neighbours + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree.get_all_keys_set().contains(nc) && key.is_adjacent(nc)) + .collect(); + + // Key level + let mut neighbors_adj: Vec = neighbours + .iter() + .filter(|n| self.tree.get_all_keys_set().contains(n) && key.is_adjacent(n)) + .cloned() + .collect(); + + // Parent level + let mut parent_neighbours_adj: Vec = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree.get_all_keys_set().contains(pn) && key.is_adjacent(pn)) + .collect(); + + u_list.append(&mut neighbors_children_adj); + u_list.append(&mut neighbors_adj); + u_list.append(&mut parent_neighbours_adj); + u_list.push(*key); + + if !u_list.is_empty() { + Some(MortonKeys { + keys: u_list, + index: 0, + }) + } else { + None + } + } + + fn get_v_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + if key.level() >= 2 { + let v_list = key + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| self.tree.get_all_keys_set().contains(pnc) && !key.is_adjacent(pnc)) + .collect_vec(); + + if !v_list.is_empty() { + return Some(MortonKeys { + keys: v_list, + index: 0, + }); + } else { + return None; + } + } + None + } + + fn get_w_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + // Child level + let w_list = key + .neighbors() + .iter() + .flat_map(|n| n.children()) + .filter(|nc| self.tree.get_all_keys_set().contains(nc) && !key.is_adjacent(nc)) + .collect_vec(); + + if !w_list.is_empty() { + Some(MortonKeys { + keys: w_list, + index: 0, + }) + } else { + None + } + } + + fn get_x_list( + &self, + key: &::NodeIndex, + ) -> Option<::NodeIndices> { + let x_list = key + .parent() + .neighbors() + .into_iter() + .filter(|pn| self.tree.get_all_keys_set().contains(pn) && !key.is_adjacent(pn)) + .collect_vec(); + + if !x_list.is_empty() { + Some(MortonKeys { + keys: x_list, + index: 0, + }) + } else { + None + } + } +} \ No newline at end of file diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 70355df6..e50bf590 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -2,4 +2,6 @@ #![cfg_attr(feature = "strict", deny(warnings))] pub mod charge; pub mod fmm; -pub mod impl_charge; +pub mod types; +pub mod field_translation; +pub mod interaction_lists; \ No newline at end of file diff --git a/fmm/src/types.rs b/fmm/src/types.rs new file mode 100644 index 00000000..24f89351 --- /dev/null +++ b/fmm/src/types.rs @@ -0,0 +1,75 @@ +use std::{ + sync::{Arc, Mutex}, + collections::HashMap +}; + +use bempp_traits::{ + field::FieldTranslationData, + kernel::Kernel, + tree::Tree, + fmm::{Fmm} +}; +use bempp_tree::{ + types::{morton::MortonKey, point::Point} +}; +use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; +use rlst::dense::{traits::*}; +use rlst::{ + self, +}; + + + +#[derive(Clone, Debug, Default)] +pub struct Charge { + /// Charge data + pub data: f64, + + /// Global unique index. + pub global_idx: usize, +} + +/// Container of **Points**. +#[derive(Clone, Debug, Default)] +pub struct Charges { + /// A vector of Charges + pub charges: Vec, + + /// index for implementing the Iterator trait. + pub index: usize, +} + +pub type Expansions = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub type Potentials = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub type C2EType = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub struct FmmData { + pub fmm: Arc, + pub multipoles: HashMap>>, + pub locals: HashMap>>, + pub potentials: HashMap>>, + pub points: HashMap>, + pub charges: HashMap>>, +} + +pub struct KiFmm> { + pub order: usize, + + pub uc2e_inv: C2EType, + + pub dc2e_inv: C2EType, + + pub alpha_inner: f64, + pub alpha_outer: f64, + + pub m2m: Vec, + pub l2l: Vec, + pub tree: T, + pub kernel: U, + pub m2l: V, +} \ No newline at end of file diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index 0a1e2f3f..3f81fc4f 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -1,6 +1,5 @@ //! Implementation of the Laplace kernel use bempp_traits::{ - fmm::Fmm, kernel::{EvalType, Kernel, KernelType}, types::Scalar, }; @@ -136,7 +135,6 @@ pub fn evaluate_laplace_one_target_one_source( match eval_type { EvalType::Value => { - let mut my_result = T::zero(); let diff_norm = ((target[0] - source[0]) * (target[0] - source[0]) + (target[1] - source[1]) * (target[1] - source[1]) + (target[2] - source[2]) * (target[2] - source[2])) @@ -149,18 +147,13 @@ pub fn evaluate_laplace_one_target_one_source( } }; - my_result += T::one().mul_real(inv_diff_norm); + let my_result = T::from(inv_diff_norm).unwrap(); result[0] = my_result.mul_real(m_inv_4pi) } EvalType::ValueDeriv => { // Cannot simply use an array my_result as this is not // correctly auto-vectorized. - let mut my_result0 = T::zero(); - let mut my_result1 = T::zero(); - let mut my_result2 = T::zero(); - let mut my_result3 = T::zero(); - let diff0 = source[0] - target[0]; let diff1 = source[1] - target[1]; let diff2 = source[2] - target[2]; @@ -174,6 +167,11 @@ pub fn evaluate_laplace_one_target_one_source( }; let inv_diff_norm_cubed = inv_diff_norm * inv_diff_norm * inv_diff_norm; + let my_result0 = T::from(inv_diff_norm).unwrap(); + let my_result1 = T::from(diff0 * inv_diff_norm_cubed).unwrap(); + let my_result2 = T::from(diff1 * inv_diff_norm_cubed).unwrap(); + let my_result3 = T::from(diff2 * inv_diff_norm_cubed).unwrap(); + result[0] = my_result0.mul_real(m_inv_4pi); result[1] = my_result1.mul_real(m_inv_4pi); result[2] = my_result2.mul_real(m_inv_4pi); From 00831d0bd15448ffcb94cfa1a6db65f1f7df28f0 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 14:54:41 +0100 Subject: [PATCH 16/40] Tidy imports --- field/src/field.rs | 252 ++--------------------------------- field/src/helpers.rs | 4 +- field/src/lib.rs | 2 +- field/src/types.rs | 32 +---- fmm/src/field_translation.rs | 36 ++--- fmm/src/fmm.rs | 28 ++-- fmm/src/interaction_lists.rs | 8 +- fmm/src/lib.rs | 5 +- fmm/src/types.rs | 23 +--- 9 files changed, 65 insertions(+), 325 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index 1119527f..0a906e6c 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -1,11 +1,11 @@ -use rlst; -use rlst::algorithms::linalg::LinAlg; -use rlst::algorithms::traits::svd::{Mode, Svd}; -use rlst::common::traits::{NewLikeSelf, Transpose}; -use rlst::common::{ - traits::Eval, +use rlst::{ + algorithms::{ + linalg::LinAlg, + traits::svd::{Mode, Svd}, + }, + common::traits::{Eval, NewLikeSelf, Transpose}, + dense::{rlst_mat, traits::*, Dot, Shape}, }; -use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; use bempp_traits::{ field::FieldTranslationData, @@ -13,8 +13,10 @@ use bempp_traits::{ }; use bempp_tree::types::domain::Domain; -use crate::{helpers::compute_transfer_vectors, types::{TransferVector, SvdM2lEntry, SvdFieldTranslationKiFmm, SvdFieldTranslationNaiveKiFmm}}; - +use crate::{ + helpers::compute_transfer_vectors, + types::{SvdFieldTranslationKiFmm, SvdM2lEntry, TransferVector}, +}; // impl FieldTranslationData for FftFieldTranslationNaiveKiFmm // where @@ -145,17 +147,12 @@ where expansion_order: usize, domain: Self::Domain, ) -> Self::M2LOperators { - // ){ // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) // Compute interaction matrices between source and unique targets, defined by unique transfer vectors let nrows = self.ncoeffs(expansion_order); let ncols = self.ncoeffs(expansion_order); - // let mut se2tc_fat: SvdM2lEntry = - // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); let ntransfer_vectors = self.transfer_vectors.len(); let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; @@ -213,190 +210,16 @@ where let (_, nvt) = vt.shape(); let vt = vt.block((0, 0), (self.k, nvt)).eval(); - // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // // Store compressed M2L operators - // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - + // Store compressed M2L operators let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); let st = st.unwrap(); let (_, nst) = st.shape(); let st_block = st.block((0, 0), (self.k, nst)); let s_block = st_block.transpose().eval(); - - let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); - - // let st = s_block.transpose().eval(); - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); - for i in 0..self.transfer_vectors.len() { - // let v_lidx = i * ncols; - // let v_ridx = v_lidx + ncols; - // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - - // let block_size = right*ncols; - // let start_idx = i * block_size; - // let end_idx = start_idx+block_size; - - // let tmp = sigma.dot(&vt_sub.dot(&st.t())); - // let lidx = i * self.k; - // let ridx = lidx + self.k; - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); - let top_left = (0, i * ncols); - let dim = (self.k, ncols); - let vt_block = vt.block(top_left, dim); - - let tmp = sigma_mat.dot(&vt_block.dot(&s_block)); - - let top_left = (0, i * self.k); - let dim = (self.k, self.k); - - c.block_mut(top_left, dim) - .data_mut() - .copy_from_slice(tmp.data()); - } - - (u, st, c) - // let dummy = rlst_mat![f64, (1, 1)]; - // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) - // assert!(false) - } -} - -impl FieldTranslationData for SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - type TransferVector = Vec; - type M2LOperators = (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry); - type Domain = Domain; - - fn compute_transfer_vectors(&self) -> Self::TransferVector { - compute_transfer_vectors() - } - - fn ncoeffs(&self, expansion_order: usize) -> usize { - 6 * (expansion_order - 1).pow(2) + 2 - } - - fn compute_m2l_operators<'a>( - &self, - expansion_order: usize, - domain: Self::Domain, - ) -> Self::M2LOperators { - // Compute unique M2L interactions at Level 3 (smallest choice with all vectors) - - // Compute interaction matrices between source and unique targets, defined by unique transfer vectors - let nrows = self.ncoeffs(expansion_order); - let ncols = self.ncoeffs(expansion_order); - - // let mut se2tc_fat: SvdM2lEntry = - // Array2::zeros((nrows, ncols * self.transfer_vectors.len())); - // let mut se2tc_thin: SvdM2lEntry = - // Array2::zeros((ncols * self.transfer_vectors.len(), nrows)); - let ntransfer_vectors = self.transfer_vectors.len(); - let mut se2tc_fat = rlst_mat![f64, (nrows, ncols * ntransfer_vectors)]; - - let mut se2tc_thin = rlst_mat![f64, (nrows * ntransfer_vectors, ncols)]; - - for (i, t) in self.transfer_vectors.iter().enumerate() { - let source_equivalent_surface = - t.source - .compute_surface(&domain, expansion_order, self.alpha); - let nsources = source_equivalent_surface.len() / self.kernel.space_dimension(); - let source_equivalent_surface = unsafe { - rlst_pointer_mat!['a, f64, source_equivalent_surface.as_ptr(), (nsources, self.kernel.space_dimension()), (1, nsources)] - }; - - let target_check_surface = - t.target - .compute_surface(&domain, expansion_order, self.alpha); - let ntargets = target_check_surface.len() / self.kernel.space_dimension(); - let target_check_surface = unsafe { - rlst_pointer_mat!['a, f64, target_check_surface.as_ptr(), (ntargets, self.kernel.space_dimension()), (1, ntargets)] - }; - - let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; - - self.kernel.gram( - EvalType::Value, - source_equivalent_surface.data(), - target_check_surface.data(), - tmp_gram.data_mut(), - ); - - let block_size = nrows * ncols; - let start_idx = i * block_size; - let end_idx = start_idx + block_size; - let block = se2tc_fat.get_slice_mut(start_idx, end_idx); - block.copy_from_slice(tmp_gram.data_mut()); - - for j in 0..ncols { - let start_idx = j * ntransfer_vectors * nrows + i * nrows; - let end_idx = start_idx + nrows; - let block_column = se2tc_thin.get_slice_mut(start_idx, end_idx); - let gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols); - block_column.copy_from_slice(gram_column); - } - } - - let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap(); - - let u = u.unwrap(); - let vt = vt.unwrap(); - - // Keep 'k' singular values - let mut sigma_mat = rlst_mat![f64, (self.k, self.k)]; - for i in 0..self.k { - sigma_mat[[i, i]] = sigma[i] - } - - let (mu, _) = u.shape(); - let u = u.block((0, 0), (mu, self.k)).eval(); - - let (_, nvt) = vt.shape(); - let vt = vt.block((0, 0), (self.k, nvt)).eval(); - - // // let (u, sigma, vt) = se2tc_fat.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let u = u.unwrap().slice(s![.., left..right]).to_owned(); - // // let sigma = Array2::from_diag(&sigma.slice(s![left..right])); - // // let vt = vt.unwrap().slice(s![left..right, ..]).to_owned(); - // // let (_r, _gamma, st) = se2tc_thin.svddc(ndarray_linalg::JobSvd::Some).unwrap(); - // // let st = st.unwrap().slice(s![left..right, ..]).to_owned(); - - // // Store compressed M2L operators - // // let mut c = Array2::zeros((self.k, self.k * self.transfer_vectors.len())); - - let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap(); - let st = st.unwrap(); - let (_, nst) = st.shape(); - let st_block = st.block((0, 0), (self.k, nst)); - let s_block = st_block.transpose().eval(); - let mut c = rlst_mat![f64, (self.k, self.k * ntransfer_vectors)]; - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), st.shape(), c.shape(), vt.shape()); - // let st = s_block.transpose().eval(); - // println!("HERE {:?} {:?} {:?} {:?}", u.shape(), sigma_mat.shape(), vt.shape(), st.shape()); for i in 0..self.transfer_vectors.len() { - // let v_lidx = i * ncols; - // let v_ridx = v_lidx + ncols; - // let vt_sub = vt.slice(s![.., v_lidx..v_ridx]); - - // let block_size = right*ncols; - // let start_idx = i * block_size; - // let end_idx = start_idx+block_size; - - // let tmp = sigma.dot(&vt_sub.dot(&st.t())); - // let lidx = i * self.k; - // let ridx = lidx + self.k; - // c.slice_mut(s![.., lidx..ridx]).assign(&tmp); let top_left = (0, i * ncols); let dim = (self.k, ncols); let vt_block = vt.block(top_left, dim); @@ -405,62 +228,13 @@ where let top_left = (0, i * self.k); let dim = (self.k, self.k); - + c.block_mut(top_left, dim) .data_mut() .copy_from_slice(tmp.data()); } (u, st, c) - // let dummy = rlst_mat![f64, (1, 1)]; - // (dummy.new_like_self().eval(), dummy.new_like_self().eval(), dummy.new_like_self().eval()) - // assert!(false) - } -} - -impl SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - pub fn new( - kernel: T, - k: Option, - expansion_order: usize, - domain: Domain, - alpha: f64, - ) -> Self { - let dummy = rlst_mat![f64, (1, 1)]; - - // TODO: There should be a default for matrices to make code cleaner. - let mut result = SvdFieldTranslationNaiveKiFmm { - alpha, - k: 100, - kernel, - m2l: ( - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - dummy.new_like_self().eval(), - ), - transfer_vectors: Vec::new(), - }; - - if let Some(k) = k { - // Compression rank <= number of coefficients - let ncoeffs = result.ncoeffs(expansion_order); - if k <= ncoeffs { - result.k = k - } else { - result.k = ncoeffs; - } - } else { - // TODO: Should be data driven if nothing is provided by the user - result.k = 50; - } - - result.transfer_vectors = result.compute_transfer_vectors(); - result.m2l = result.compute_m2l_operators(expansion_order, domain); - - result } } diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 22536142..df4fe287 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use itertools::Itertools; -use bempp_tree::{types::{morton::MortonKey, domain::Domain}}; +use bempp_tree::types::{domain::Domain, morton::MortonKey}; use crate::types::TransferVector; @@ -97,4 +97,4 @@ pub fn compute_transfer_vectors() -> Vec { } result -} \ No newline at end of file +} diff --git a/field/src/lib.rs b/field/src/lib.rs index 97b05ee2..875db307 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -1,3 +1,3 @@ +pub mod field; pub mod helpers; pub mod types; -pub mod field; diff --git a/field/src/types.rs b/field/src/types.rs index fd6150ee..2d36d5eb 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -1,10 +1,13 @@ -use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic}; +use rlst::dense::{ + base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic, +}; -use bempp_tree::types::morton::MortonKey; use bempp_traits::kernel::Kernel; +use bempp_tree::types::morton::MortonKey; // type FftM2LEntry = ArrayBase>, Dim<[usize; 3]>>; -pub type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +pub type SvdM2lEntry = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; // // #[derive(Default)] // pub struct FftFieldTranslationNaiveKiFmm @@ -28,27 +31,6 @@ pub type SvdM2lEntry = Matrix, Dynamic // pub kernel: T, // } -// #[derive(Default)] -pub struct SvdFieldTranslationNaiveKiFmm -where - T: Kernel + Default, -{ - // Amount to dilate inner check surface by - pub alpha: f64, - - // Compression rank, if unspecified estimated from data. - pub k: usize, - - // Precomputed SVD compressed m2l interaction - pub m2l: (SvdM2lEntry, SvdM2lEntry, SvdM2lEntry), - - // Unique transfer vectors to lookup m2l unique kernel interactions - pub transfer_vectors: Vec, - - pub kernel: T, -} - -// #[derive(Default)] pub struct SvdFieldTranslationKiFmm where T: Kernel + Default, @@ -73,4 +55,4 @@ pub struct TransferVector { pub vector: usize, pub source: MortonKey, pub target: MortonKey, -} \ No newline at end of file +} diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 72fbcd8c..824cff18 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -1,26 +1,26 @@ // Implementation of field translations use std::{ collections::HashMap, - sync::{Arc, RwLock, Mutex}, - ops::Deref + ops::Deref, + sync::{Arc, Mutex, RwLock}, }; use itertools::Itertools; use rayon::prelude::*; -use rlst::common::{ - traits::Eval, +use bempp_field::types::SvdFieldTranslationKiFmm; +use bempp_traits::{ + field::{FieldTranslation, FieldTranslationData}, + fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, + kernel::{EvalType, Kernel}, + tree::Tree, }; -use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}; +use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ - self, - dense::rlst_col_vec, + common::traits::Eval, + dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}, }; -use bempp_traits::{field::{FieldTranslationData, FieldTranslation}, kernel::{Kernel, EvalType}, fmm::{TargetTranslation, SourceTranslation, InteractionLists, Fmm}, tree::Tree}; -use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; -use bempp_field::types::SvdFieldTranslationKiFmm; - use crate::types::{FmmData, KiFmm}; impl SourceTranslation for FmmData> @@ -273,7 +273,7 @@ where fmm_arc.order, fmm_arc.alpha_inner, ); - + let ntargets = downward_check_surface.len() / fmm_arc.kernel.space_dimension(); let mut downward_check_potential = rlst_col_vec![f64, ntargets]; @@ -281,8 +281,8 @@ where EvalType::Value, source_coordinates.data(), &downward_check_surface[..], - &source_charges[..], - downward_check_potential.data_mut() + &source_charges[..], + downward_check_potential.data_mut() ); @@ -312,7 +312,7 @@ where .map(|p| p.coordinate) .flat_map(|[x, y, z]| vec![x, y, z]) .collect_vec(); - + let ntargets= target_coordinates.len() / self.fmm.kernel.space_dimension(); // Get into row major order @@ -334,7 +334,7 @@ where // Get into row major order let source_coordinates = unsafe { rlst_pointer_mat!['a, f64, source_coordinates.as_ptr(), (nsources, fmm_arc.kernel.space_dimension()), (fmm_arc.kernel.space_dimension(), 1)] - }.eval(); + }.eval(); let source_charges_arc = Arc::clone(self.charges.get(source).unwrap()); @@ -344,14 +344,14 @@ where fmm_arc.kernel.evaluate_st( EvalType::Value, source_coordinates.data(), - target_coordinates.data(), + target_coordinates.data(), &source_charges_arc[..], target_potential.data_mut(), ); let mut target_potential_lock = target_potential_arc.lock().unwrap(); - + for i in 0..ntargets { target_potential_lock[[i, 0]] += target_potential[[i, 0]]; } diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 1f101cda..82edde77 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,4 +1,4 @@ -// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type +// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type // TODO should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. // TODO: charge input should be utilized NOW! @@ -9,30 +9,24 @@ use std::{ time::Instant, }; -use rlst::algorithms::linalg::LinAlg; -use rlst::algorithms::traits::pseudo_inverse::Pinv; -use rlst::common::traits::NewLikeSelf; -use rlst::common::traits::Eval; -use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; -use rlst::dense::{rlst_mat, rlst_pointer_mat, traits::*, Dot}; use rlst::{ - self, - dense::rlst_col_vec, + algorithms::{linalg::LinAlg, traits::pseudo_inverse::Pinv}, + common::traits::{Eval, NewLikeSelf}, + dense::{ + base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, rlst_col_vec, + rlst_mat, rlst_pointer_mat, traits::*, Dot, + }, }; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, FmmLoop,SourceTranslation, TargetTranslation}, + fmm::{Fmm, FmmLoop, SourceTranslation, TargetTranslation}, kernel::{EvalType, Kernel}, tree::Tree, }; -use bempp_tree::{ - constants::ROOT, - types::single_node::SingleNodeTree, -}; - -use crate::types::{Charges, C2EType, FmmData, KiFmm}; +use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; +use crate::types::{C2EType, Charges, FmmData, KiFmm}; #[allow(dead_code)] impl KiFmm @@ -295,8 +289,8 @@ where #[allow(unused_imports)] #[allow(warnings)] mod test { - use bempp_kernel::laplace_3d::evaluate_laplace_one_target; use bempp_field::types::SvdFieldTranslationKiFmm; + use bempp_kernel::laplace_3d::evaluate_laplace_one_target; // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; diff --git a/fmm/src/interaction_lists.rs b/fmm/src/interaction_lists.rs index a5ef8faa..7c5f8dc4 100644 --- a/fmm/src/interaction_lists.rs +++ b/fmm/src/interaction_lists.rs @@ -1,8 +1,10 @@ // Implementation of interaction lists for FMMs (single and multinode) use itertools::Itertools; -use bempp_traits::{fmm::InteractionLists, field::FieldTranslationData, kernel::Kernel, tree::Tree}; -use bempp_tree::{types::morton::{MortonKey, MortonKeys}}; +use bempp_traits::{ + field::FieldTranslationData, fmm::InteractionLists, kernel::Kernel, tree::Tree, +}; +use bempp_tree::types::morton::{MortonKey, MortonKeys}; use crate::types::KiFmm; @@ -125,4 +127,4 @@ where None } } -} \ No newline at end of file +} diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index e50bf590..0d1a52bc 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -1,7 +1,6 @@ //! Fast Solver FMM library -#![cfg_attr(feature = "strict", deny(warnings))] pub mod charge; +pub mod field_translation; pub mod fmm; +pub mod interaction_lists; pub mod types; -pub mod field_translation; -pub mod interaction_lists; \ No newline at end of file diff --git a/fmm/src/types.rs b/fmm/src/types.rs index 24f89351..e95c184f 100644 --- a/fmm/src/types.rs +++ b/fmm/src/types.rs @@ -1,24 +1,13 @@ use std::{ + collections::HashMap, sync::{Arc, Mutex}, - collections::HashMap }; -use bempp_traits::{ - field::FieldTranslationData, - kernel::Kernel, - tree::Tree, - fmm::{Fmm} -}; -use bempp_tree::{ - types::{morton::MortonKey, point::Point} -}; +use bempp_traits::{field::FieldTranslationData, fmm::Fmm, kernel::Kernel, tree::Tree}; +use bempp_tree::types::{morton::MortonKey, point::Point}; +use rlst::dense::traits::*; use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; -use rlst::dense::{traits::*}; -use rlst::{ - self, -}; - - +use rlst::{self}; #[derive(Clone, Debug, Default)] pub struct Charge { @@ -72,4 +61,4 @@ pub struct KiFmm> { pub tree: T, pub kernel: U, pub m2l: V, -} \ No newline at end of file +} From 5ea4de556e87bd29a376894a911d03d5977139be Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 15:58:16 +0100 Subject: [PATCH 17/40] Add some fmm test, so can begin working on todos; --- field/src/field.rs | 8 +- fmm/src/fmm.rs | 191 +++++++++++-------- traits/src/field.rs | 1 + traits/src/fmm.rs | 11 +- tree/src/implementations/impl_domain.rs | 4 +- tree/src/implementations/impl_single_node.rs | 32 +--- 6 files changed, 137 insertions(+), 110 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index 0a906e6c..ac2f03c5 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -254,7 +254,7 @@ where // TODO: There should be a default for matrices to make code cleaner. let mut result = SvdFieldTranslationKiFmm { alpha, - k: 100, + k: 0, kernel, m2l: ( dummy.new_like_self().eval(), @@ -264,13 +264,13 @@ where transfer_vectors: Vec::new(), }; + let ncoeffs = result.ncoeffs(expansion_order); if let Some(k) = k { // Compression rank <= number of coefficients - let ncoeffs = result.ncoeffs(expansion_order); if k <= ncoeffs { - result.k = k + result.k = k; } else { - result.k = ncoeffs; + result.k = ncoeffs } } else { // TODO: Should be data driven if nothing is provided by the user diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 82edde77..ed8d75a1 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,6 +1,8 @@ -// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type +// TODO: Fix datatree creation, it currently instantiates number of potentials using ncoeffs, fix compression. // TODO should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. // TODO: charge input should be utilized NOW! +// TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. +// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type use itertools::Itertools; use std::{ @@ -20,7 +22,7 @@ use rlst::{ use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, - fmm::{Fmm, FmmLoop, SourceTranslation, TargetTranslation}, + fmm::{Fmm, FmmLoop, SourceTranslation, TargetTranslation, TimeDict}, kernel::{EvalType, Kernel}, tree::Tree, }; @@ -230,81 +232,129 @@ where T: Fmm, FmmData: SourceTranslation + FieldTranslation + TargetTranslation, { - fn upward_pass(&self) { - // Particle to Multipole - let start = Instant::now(); - self.p2m(); - println!("P2M = {:?}ms", start.elapsed().as_millis()); + fn upward_pass(&self, time: Option) -> Option { + match time { + Some(true) => { + let mut times = TimeDict::default(); + // Particle to Multipole + let start = Instant::now(); + self.p2m(); + times.insert("p2m".to_string(), start.elapsed().as_millis()); - // Multipole to Multipole - let depth = self.fmm.tree().get_depth(); - let start = Instant::now(); - for level in (1..=depth).rev() { - self.m2m(level) + // Multipole to Multipole + let depth = self.fmm.tree().get_depth(); + let start = Instant::now(); + for level in (1..=depth).rev() { + self.m2m(level) + } + times.insert("m2m".to_string(), start.elapsed().as_millis()); + Some(times) + } + Some(false) | None => { + // Particle to Multipole + self.p2m(); + + // Multipole to Multipole + let depth = self.fmm.tree().get_depth(); + for level in (1..=depth).rev() { + self.m2m(level) + } + None + } } - println!("M2M = {:?}ms", start.elapsed().as_millis()); } - fn downward_pass(&self) { + fn downward_pass(&self, time: Option) -> Option { let depth = self.fmm.tree().get_depth(); - let mut l2l_time = 0; - let mut m2l_time = 0; - for level in 2..=depth { - if level > 2 { + + match time { + Some(true) => { + let mut times = TimeDict::default(); + let mut l2l_time = 0; + let mut m2l_time = 0; + + for level in 2..=depth { + if level > 2 { + let start = Instant::now(); + self.l2l(level); + l2l_time += start.elapsed().as_millis(); + } + + let start = Instant::now(); + self.m2l(level); + m2l_time += start.elapsed().as_millis(); + } + + times.insert("l2l".to_string(), l2l_time); + times.insert("m2l".to_string(), m2l_time); + + // Leaf level computations + let start = Instant::now(); + self.p2l(); + times.insert("p2l".to_string(), start.elapsed().as_millis()); + + // Sum all potential contributions + let start = Instant::now(); + self.m2p(); + times.insert("m2p".to_string(), start.elapsed().as_millis()); + + let start = Instant::now(); + self.p2p(); + times.insert("p2p".to_string(), start.elapsed().as_millis()); + let start = Instant::now(); - self.l2l(level); - l2l_time += start.elapsed().as_millis(); + self.l2p(); + times.insert("l2p".to_string(), start.elapsed().as_millis()); + + Some(times) } + Some(false) | None => { + for level in 2..=depth { + if level > 2 { + self.l2l(level); + } + self.m2l(level); + } + // Leaf level computations + self.p2l(); - let start = Instant::now(); - self.m2l(level); - m2l_time += start.elapsed().as_millis(); + // Sum all potential contributions + self.m2p(); + self.p2p(); + self.l2p(); + + None + } } - println!("M2L = {:?}ms", m2l_time); - println!("L2L = {:?}ms", l2l_time); - - let start = Instant::now(); - // Leaf level computations - self.p2l(); - println!("P2L = {:?}ms", start.elapsed().as_millis()); - - // // Sum all potential contributions - let start = Instant::now(); - self.m2p(); - println!("M2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.p2p(); - println!("P2P = {:?}ms", start.elapsed().as_millis()); - let start = Instant::now(); - self.l2p(); - println!("L2P = {:?}ms", start.elapsed().as_millis()); } - fn run(&self) { - self.upward_pass(); - self.downward_pass(); + fn run(&self, time: Option) -> Option { + let t1 = self.upward_pass(time); + let t2 = self.downward_pass(time); + + if let (Some(mut t1), Some(t2)) = (t1, t2) { + t1.extend(t2); + Some(t1) + } else { + None + } } } #[allow(unused_imports)] #[allow(warnings)] mod test { + use super::*; + use bempp_field::types::SvdFieldTranslationKiFmm; use bempp_kernel::laplace_3d::evaluate_laplace_one_target; // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; - // use bempp_tree::types::point::PointType; - // use rayon::ThreadPool; - use bempp_kernel::laplace_3d::Laplace3dKernel; - // // use crate::laplace::LaplaceKernel; - use rlst::{common::traits::ColumnMajorIterator, dense::rlst_rand_mat}; - use super::*; - // #[allow(dead_code)] // fn points_fixture(npoints: usize) -> Vec { // let mut range = StdRng::seed_from_u64(0); @@ -458,21 +508,16 @@ mod test { let npoints = 1000; let points = points_fixture(npoints, None, None); - let order = 6; + let order = 5; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; - // TODO: Have to pass this information to data tree creation!!!! let k = 1000; let ncrit = 100; let depth = 2; let kernel = Laplace3dKernel::::default(); - let start = Instant::now(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - println!("Tree = {:?}ms", start.elapsed().as_millis()); - - let start = Instant::now(); let m2l_data_svd = SvdFieldTranslationKiFmm::new( kernel.clone(), @@ -481,22 +526,12 @@ mod test { tree.get_domain().clone(), alpha_inner, ); - println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - - // let start = Instant::now(); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // kernel.clone(), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); let charges = Charges::new(); let datatree = FmmData::new(fmm, charges); - datatree.run(); + datatree.run(None); let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; @@ -531,17 +566,15 @@ mod test { &mut direct[..], ); - println!("potentials {:?}", potentials.data()); - println!("direct {:?}", direct); - - // let abs_error: f64 = potentials - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + let abs_error: f64 = potentials + .data() + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); - // println!("p={:?} rel_error={:?}\n", order, rel_error); - assert!(false) + println!("{:?}", rel_error); + assert!(rel_error <= 1e-6); } } diff --git a/traits/src/field.rs b/traits/src/field.rs index 1885574d..c94cb010 100644 --- a/traits/src/field.rs +++ b/traits/src/field.rs @@ -22,6 +22,7 @@ where // ); ) -> Self::M2LOperators; + // Number of coefficients for a given expansion order fn ncoeffs(&self, expansion_order: usize) -> usize; } diff --git a/traits/src/fmm.rs b/traits/src/fmm.rs index 453d9956..50966267 100644 --- a/traits/src/fmm.rs +++ b/traits/src/fmm.rs @@ -1,4 +1,6 @@ //! FMM traits +use std::collections::HashMap; + use crate::kernel::Kernel; use crate::tree::Tree; @@ -37,12 +39,15 @@ pub trait Fmm { fn tree(&self) -> &Self::Tree; } +/// Dictionary containing timings +pub type TimeDict = HashMap; + pub trait FmmLoop { - fn upward_pass(&self); + fn upward_pass(&self, time: Option) -> Option; - fn downward_pass(&self); + fn downward_pass(&self, time: Option) -> Option; - fn run(&self); + fn run(&self, time: Option) -> Option; } pub trait InteractionLists { diff --git a/tree/src/implementations/impl_domain.rs b/tree/src/implementations/impl_domain.rs index f2f88414..4d62a8cc 100644 --- a/tree/src/implementations/impl_domain.rs +++ b/tree/src/implementations/impl_domain.rs @@ -67,7 +67,9 @@ mod test { use crate::types::domain::Domain; use rlst::common::traits::ColumnMajorIterator; - use rlst::dense::{base_matrix::BaseMatrix, rlst_mat, Dynamic, Matrix, VectorContainer, RawAccess}; + use rlst::dense::{ + base_matrix::BaseMatrix, rlst_mat, Dynamic, Matrix, RawAccess, VectorContainer, + }; fn points_fixture( npoints: usize, diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index a4016b74..39632e89 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -457,7 +457,6 @@ impl Tree for SingleNodeTree { depth: Option, ) -> SingleNodeTree { // TODO: Come back and reconcile a runtime point dimension detector - // let points = points.iter().map(|p| p.coordinate).collect_vec(); let domain = Domain::from_local_points(points); @@ -525,26 +524,26 @@ mod test { use super::*; use rand::prelude::*; use rand::SeedableRng; - use rlst::dense::RawAccess; use rlst::dense::rlst_mat; - use rlst::dense::{Matrix, base_matrix::BaseMatrix, VectorContainer, Dynamic}; - + use rlst::dense::RawAccess; + use rlst::dense::{base_matrix::BaseMatrix, Dynamic, Matrix, VectorContainer}; + fn points_fixture( npoints: usize, min: Option, - max: Option + max: Option, ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> { // Generate a set of randomly distributed points let mut range = StdRng::seed_from_u64(0); - + let between; - if let (Some(min),Some(max)) = (min, max) { + if let (Some(min), Some(max)) = (min, max) { between = rand::distributions::Uniform::from(min..max); } else { between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); } - + let mut points = rlst_mat![f64, (npoints, 3)]; for i in 0..npoints { @@ -654,7 +653,7 @@ mod test { diameter: [1.0, 1.0, 1.0], }; let depth = 1; - + let dim = 3; let mut tmp = Points::default(); @@ -669,19 +668,6 @@ mod test { }) } let mut points = tmp; - // let mut points: Points = points - // .iter() - // .enumerate() - // .map(|(i, p)| { - // let key = MortonKey::from_point(p, &domain, depth); - // Point { - // coordinate: *p, - // encoded_key: key, - // base_key: key, - // global_idx: i, - // } - // }) - // .collect(); let keys = MortonKeys { keys: ROOT.children(), @@ -729,7 +715,7 @@ mod test { // }; let dim = 3; let npoints = 10000; - let points = points_fixture(npoints, None, None); + let points = points_fixture(npoints, None, None); let mut tmp = Points::default(); for i in 0..npoints { let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; From fc87b3a6e6a8925aa8bf9507e40948e3f7291148 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 20:33:17 +0100 Subject: [PATCH 18/40] Add new kernel implementation fixes --- field/src/field.rs | 10 +-- fmm/src/field_translation.rs | 9 +- fmm/src/fmm.rs | 58 +++++-------- kernel/Cargo.toml | 3 +- kernel/src/helpers.rs | 4 +- kernel/src/laplace_3d.rs | 162 ++++------------------------------- kernel/src/lib.rs | 2 - kernel/src/traits.rs | 138 ----------------------------- kernel/src/types.rs | 23 ----- traits/src/kernel.rs | 120 ++++++++++++++++++-------- traits/src/types.rs | 25 +++++- 11 files changed, 160 insertions(+), 394 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index ac2f03c5..22d64c95 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -7,10 +7,7 @@ use rlst::{ dense::{rlst_mat, traits::*, Dot, Shape}, }; -use bempp_traits::{ - field::FieldTranslationData, - kernel::{EvalType, Kernel}, -}; +use bempp_traits::{field::FieldTranslationData, kernel::Kernel, types::EvalType}; use bempp_tree::types::domain::Domain; use crate::{ @@ -171,13 +168,16 @@ where let mut tmp_gram = rlst_mat![f64, (ntargets, nsources)]; - self.kernel.gram( + self.kernel.assemble_st( EvalType::Value, &source_equivalent_surface[..], &target_check_surface[..], tmp_gram.data_mut(), ); + // Need to transpose so that rows correspond to targets, and columns to sources + let mut tmp_gram = tmp_gram.transpose().eval(); + let block_size = nrows * ncols; let start_idx = i * block_size; let end_idx = start_idx + block_size; diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 824cff18..2f0af23b 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -12,8 +12,9 @@ use bempp_field::types::SvdFieldTranslationKiFmm; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, - kernel::{EvalType, Kernel}, + kernel::{Kernel, KernelScale}, tree::Tree, + types::EvalType, }; use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ @@ -25,7 +26,7 @@ use crate::types::{FmmData, KiFmm}; impl SourceTranslation for FmmData> where - T: Kernel + std::marker::Send + std::marker::Sync, + T: Kernel + KernelScale + std::marker::Send + std::marker::Sync, U: FieldTranslationData + std::marker::Sync + std::marker::Send, { fn p2m<'a>(&self) { @@ -114,7 +115,7 @@ where impl TargetTranslation for FmmData> where - T: Kernel + std::marker::Sync + std::marker::Send, + T: Kernel + KernelScale + std::marker::Sync + std::marker::Send, U: FieldTranslationData + std::marker::Sync + std::marker::Send, { fn l2l(&self, level: u64) { @@ -366,7 +367,7 @@ where impl FieldTranslation for FmmData>> where - T: Kernel + std::marker::Sync + std::marker::Send + Default, + T: Kernel + KernelScale + std::marker::Sync + std::marker::Send + Default, { fn m2l<'a>(&self, level: u64) { let Some(targets) = self.fmm.tree().get_keys(level) else { return }; diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index ed8d75a1..3c0910c2 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -2,7 +2,7 @@ // TODO should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. // TODO: charge input should be utilized NOW! // TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. -// TODO Should be generic over kernel float type parameter - this requires trees to be generic over float type +// TODO Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type use itertools::Itertools; use std::{ @@ -13,7 +13,7 @@ use std::{ use rlst::{ algorithms::{linalg::LinAlg, traits::pseudo_inverse::Pinv}, - common::traits::{Eval, NewLikeSelf}, + common::traits::{Eval, NewLikeSelf, Transpose}, dense::{ base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, @@ -23,8 +23,9 @@ use rlst::{ use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, FmmLoop, SourceTranslation, TargetTranslation, TimeDict}, - kernel::{EvalType, Kernel}, + kernel::{Kernel, KernelScale}, tree::Tree, + types::EvalType, }; use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; @@ -33,7 +34,7 @@ use crate::types::{C2EType, Charges, FmmData, KiFmm}; #[allow(dead_code)] impl KiFmm where - T: Kernel, + T: Kernel + KernelScale, U: FieldTranslationData, { pub fn new<'a>( @@ -70,21 +71,27 @@ where // Compute upward check to equivalent, and downward check to equivalent Gram matrices // as well as their inverses using DGESVD. let mut uc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - kernel.gram( + kernel.assemble_st( EvalType::Value, upward_equivalent_surface.data(), upward_check_surface.data(), uc2e.data_mut(), ); + // Need to tranapose so that rows correspond to targets and columns to sources + let uc2e = uc2e.transpose().eval(); + let mut dc2e = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - kernel.gram( + kernel.assemble_st( EvalType::Value, downward_equivalent_surface.data(), downward_check_surface.data(), dc2e.data_mut(), ); + // Need to tranapose so that rows correspond to targets and columns to sources + let dc2e = dc2e.transpose().eval(); + let (s, ut, v) = uc2e.linalg().pinv(None).unwrap(); let s = s.unwrap(); let ut = ut.unwrap(); @@ -124,23 +131,29 @@ where let mut pc2ce = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - kernel.gram( + kernel.assemble_st( EvalType::Value, child_upward_equivalent_surface.data(), upward_check_surface.data(), pc2ce.data_mut(), ); + // Need to transpose so that rows correspond to targets, and columns to sources + let pc2ce = pc2ce.transpose().eval(); + m2m.push(uc2e_inv.dot(&pc2ce).eval()); let mut cc2pe = rlst_mat![f64, (ncheck_surface, nequiv_surface)]; - kernel.gram( + kernel.assemble_st( EvalType::Value, downward_equivalent_surface.data(), &child_downward_check_surface.data(), cc2pe.data_mut(), ); + + // Need to transpose so that rows correspond to targets, and columns to sources + let cc2pe = cc2pe.transpose().eval(); l2l.push((kernel.scale(child.level()) * dc2e_inv.dot(&cc2pe)).eval()); } @@ -348,39 +361,12 @@ mod test { use bempp_field::types::SvdFieldTranslationKiFmm; use bempp_kernel::laplace_3d::evaluate_laplace_one_target; - // use approx::{assert_relative_eq, RelativeEq}; use rand::prelude::*; use rand::SeedableRng; use bempp_kernel::laplace_3d::Laplace3dKernel; use rlst::{common::traits::ColumnMajorIterator, dense::rlst_rand_mat}; - // #[allow(dead_code)] - // fn points_fixture(npoints: usize) -> Vec { - // let mut range = StdRng::seed_from_u64(0); - // let between = rand::distributions::Uniform::from(0.0..1.0); - // let mut points: Vec<[PointType; 3]> = Vec::new(); - - // for _ in 0..npoints { - // points.push([ - // between.sample(&mut range), - // between.sample(&mut range), - // between.sample(&mut range), - // ]) - // } - - // let points = points - // .iter() - // .enumerate() - // .map(|(i, p)| Point { - // coordinate: *p, - // global_idx: i, - // base_key: MortonKey::default(), - // encoded_key: MortonKey::default(), - // }) - // .collect_vec(); - // points - // } fn points_fixture( npoints: usize, min: Option, @@ -575,6 +561,6 @@ mod test { let rel_error: f64 = abs_error / (direct.iter().sum::()); println!("{:?}", rel_error); - assert!(rel_error <= 1e-6); + assert!(rel_error <= 1e-5); } } diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 2301dfbd..cdabfa2b 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -28,7 +28,8 @@ approx = "0.5" rayon = "1.7" num = "0.4" num_cpus = "1" -rlst = {git = "https://github.com/linalg-rs/rlst.git" } +# rlst = {git = "https://github.com/skailasa/rlst.git" } +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} [dev-dependencies] rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} diff --git a/kernel/src/helpers.rs b/kernel/src/helpers.rs index 4ad6ad97..927c8681 100644 --- a/kernel/src/helpers.rs +++ b/kernel/src/helpers.rs @@ -1,6 +1,6 @@ use bempp_traits::{ - kernel::{EvalType, Kernel}, - types::Scalar, + kernel::Kernel, + types::{EvalType, Scalar}, }; pub(crate) fn check_dimensions_evaluate( diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index 32f1e583..5433497d 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -1,13 +1,12 @@ //! Implementation of the Laplace kernel -use bempp_traits::{ - kernel::{EvalType, Kernel, KernelType}, - types::Scalar, -}; use num; use std::marker::PhantomData; use crate::helpers::{check_dimensions_assemble, check_dimensions_evaluate}; -use bempp_traits::types::Scalar; +use bempp_traits::{ + kernel::{Kernel, KernelScale}, + types::{EvalType, KernelType, Scalar}, +}; use num::traits::FloatConst; use rayon::prelude::*; @@ -17,6 +16,18 @@ pub struct Laplace3dKernel { _phantom_t: std::marker::PhantomData, } +impl> KernelScale for Laplace3dKernel { + type T = T; + + fn scale(&self, level: u64) -> Self::T { + let numerator = T::from(1).unwrap(); + let denominator = T::from(2.).unwrap(); + let power = T::from(level).unwrap(); + let denominator = denominator.powf(power); + numerator / denominator + } +} + impl Laplace3dKernel { pub fn new() -> Self { Self { @@ -92,7 +103,7 @@ where fn assemble_st( &self, - eval_type: crate::types::EvalType, + eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T], @@ -103,7 +114,7 @@ where fn assemble_mt( &self, - eval_type: crate::types::EvalType, + eval_type: EvalType, sources: &[::Real], targets: &[::Real], result: &mut [Self::T], @@ -133,92 +144,6 @@ where fn range_component_count(&self, eval_type: EvalType) -> usize { laplace_component_count(eval_type) } - - fn gram( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - result: &mut [Self::T], - ) { - let ntargets = targets.len() / self.space_dimension(); - let nsources = sources.len() / self.space_dimension(); - for i in 0..ntargets { - let target = [targets[i], targets[ntargets + i], targets[2 * ntargets + i]]; - for j in 0..nsources { - let source = [sources[j], sources[nsources + j], sources[2 * nsources + j]]; - let idx = i + ntargets * j; - - evaluate_laplace_one_target_one_source::( - eval_type, - &target, - &source, - &mut result[idx..idx + 1], - ); - } - } - } - - fn scale(&self, level: u64) -> f64 { - 1. / (2f64.powf(level as f64)) - } -} - -pub fn evaluate_laplace_one_target_one_source( - eval_type: EvalType, - target: &[::Real], - source: &[::Real], - result: &mut [T], -) { - let m_inv_4pi = num::cast::(0.25 * f64::FRAC_1_PI()).unwrap(); - let zero_real = ::zero(); - let one_real = ::one(); - - match eval_type { - EvalType::Value => { - let diff_norm = ((target[0] - source[0]) * (target[0] - source[0]) - + (target[1] - source[1]) * (target[1] - source[1]) - + (target[2] - source[2]) * (target[2] - source[2])) - .sqrt(); - let inv_diff_norm = { - if diff_norm == zero_real { - zero_real - } else { - one_real / diff_norm - } - }; - - let my_result = T::from(inv_diff_norm).unwrap(); - result[0] = my_result.mul_real(m_inv_4pi) - } - EvalType::ValueDeriv => { - // Cannot simply use an array my_result as this is not - // correctly auto-vectorized. - - let diff0 = source[0] - target[0]; - let diff1 = source[1] - target[1]; - let diff2 = source[2] - target[2]; - let diff_norm = (diff0 * diff0 + diff1 * diff1 + diff2 * diff2).sqrt(); - let inv_diff_norm = { - if diff_norm == zero_real { - zero_real - } else { - one_real / diff_norm - } - }; - let inv_diff_norm_cubed = inv_diff_norm * inv_diff_norm * inv_diff_norm; - - let my_result0 = T::from(inv_diff_norm).unwrap(); - let my_result1 = T::from(diff0 * inv_diff_norm_cubed).unwrap(); - let my_result2 = T::from(diff1 * inv_diff_norm_cubed).unwrap(); - let my_result3 = T::from(diff2 * inv_diff_norm_cubed).unwrap(); - - result[0] = my_result0.mul_real(m_inv_4pi); - result[1] = my_result1.mul_real(m_inv_4pi); - result[2] = my_result2.mul_real(m_inv_4pi); - result[3] = my_result3.mul_real(m_inv_4pi); - } - } } pub fn evaluate_laplace_one_target( @@ -418,10 +343,9 @@ mod test { use approx::assert_relative_eq; use bempp_traits::types::Scalar; use rlst; - use rlst::common::traits::{Copy, Eval, Transpose}; use rlst::dense::traits::*; - + #[test] fn test_laplace_3d() { let eps = 1E-8; @@ -633,52 +557,4 @@ mod test { green_value_deriv.data_mut(), ); } - - #[test] - fn test_gram() { - let eps = 1E-12; - - let nsources = 5; - let ntargets = 3; - - let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; - let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; - - let mut gram = rlst::dense::rlst_rand_mat![f64, (ntargets, nsources)]; - - let kernel = Laplace3dKernel::::default(); - - kernel.gram( - EvalType::Value, - sources.data(), - targets.data(), - gram.data_mut(), - ); - - for i in 0..ntargets { - let target = [ - targets.data()[i], - targets.data()[ntargets + i], - targets.data()[ntargets * 2 + i], - ]; - - for j in 0..nsources { - let source = [ - sources.data()[j], - sources.data()[nsources + j], - sources.data()[nsources * 2 + j], - ]; - - let result = gram[[i, j]]; - let mut expected = vec![0f64]; - evaluate_laplace_one_target_one_source::( - EvalType::Value, - &target, - &source, - &mut expected, - ); - assert_relative_eq!(expected[0], result, epsilon = eps); - } - } - } } diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 0c92d896..c012280c 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -1,4 +1,2 @@ pub mod helpers; pub mod laplace_3d; -pub mod traits; -pub mod types; diff --git a/kernel/src/traits.rs b/kernel/src/traits.rs index b330b4f6..e69de29b 100644 --- a/kernel/src/traits.rs +++ b/kernel/src/traits.rs @@ -1,138 +0,0 @@ -// //! Trait for Green's function kernels -// use crate::types::EvalType; -// use crate::types::KernelType; -// use bempp_traits::types::Scalar; - -// use rayon::ThreadPool; - -// /// Interface to evaluating Green's functions for given sources and targets. -// pub trait Kernel { -// type T: Scalar; - /// Single threaded evaluation of Green's functions. - /// - /// - `eval_type`: Either [EvalType::Value] to only return Green's function values - /// or [EvalType::ValueDeriv] to return values and derivatives. - /// - `sources`: A slice defining the source points. The points must be given in the form - /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is - /// the value for each dimension must be continuously contained in the slice. - /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. - /// - `charges`: A slice defining the charges. For each source point there needs to be one charge. - /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] - /// then `result` has the same number of elemens as there are targets. For a scalar kernel - /// in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains - /// for each target in consecutive order the value of the kernel and the three components - /// of its derivative. - /// - /// The following code gives an example of how to use it together with the [rlst] dense matrix type. - /// ``` - /// use rlst::dense::*; - /// use bempp_kernel::traits::*; - /// use bempp_kernel::laplace_3d::Laplace3dKernel; - /// use bempp_kernel::types::*; - /// let nsources = 5; - /// let ntargets = 10; - /// - /// let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; - /// let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; - /// let charges = rlst::dense::rlst_col_vec![f64, nsources]; - /// let mut interactions = rlst::dense::rlst_mat![f64, (4, ntargets)]; - /// - /// Laplace3dKernel::::new().evaluate_st(EvalType::ValueDeriv, sources.data(), targets.data(), charges.data(), interactions.data_mut()); - /// - /// println!("The value of the potential at the second target is {}", interactions[[0, 1]]); - /// println!("The target derivative of the potential at the second target is ({}, {}, {})", interactions[[1, 1]], interactions[[2, 1]], interactions[[3, 1]]); - ///``` - fn evaluate_st( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - charges: &[Self::T], - result: &mut [Self::T], - ); - -// /// Multi-threaded evaluation of a Green's function kernel. -// /// -// /// The method parallelizes over the given targets. It expects a Rayon [ThreadPool] -// /// in which the multi-threaded execution can be scheduled. -// fn evaluate_mt( -// &self, -// eval_type: EvalType, -// sources: &[::Real], -// targets: &[::Real], -// charges: &[Self::T], -// result: &mut [Self::T], -// thread_pool: &ThreadPool, -// ); - /// Single threaded assembly of a kernel matrix. - /// - /// - `eval_type`: Either [EvalType::Value] to only return Green's function values - /// or [EvalType::ValueDeriv] to return values and derivatives. - /// - `sources`: A slice defining the source points. The points must be given in the form - /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is - /// the value for each dimension must be continuously contained in the slice. - /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. - /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] - /// then `result` has MxN elements with M the number of targets and N the number of targets. - /// For a scalar kernel in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains - /// in consecutive order the interaction of all sources with the first target and then the corresponding derivatives, - /// followed by the interactions with the second target, and so on. See the example for illustration. - /// - /// The following code gives an example of how to use it together with the [rlst] dense matrix type. - /// ``` - /// use rlst::dense::*; - /// use bempp_kernel::traits::*; - /// use bempp_kernel::laplace_3d::Laplace3dKernel; - /// use bempp_kernel::types::*; - /// let nsources = 5; - /// let ntargets = 10; - /// - /// let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; - /// let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; - /// let mut interactions = rlst::dense::rlst_mat![f64, (nsources, 4 * ntargets)]; - /// - /// Laplace3dKernel::::new().assemble_st(EvalType::ValueDeriv, sources.data(), targets.data(), interactions.data_mut()); - /// - /// // The column index of the third target interaction is 8 = 2 * 4, since each - /// // target is associated with its interaction value plus 3 derivatives, i.e. 4 values. - /// // The derivatives correspondingly have the column indices 9, 10, 11. - /// // If EvalType::Value is chosen then the column index would be 2 as then each target - /// // is only associated with 1 value. - /// println!("The interaction of the second source with the third target is {}", interactions[[1, 8]]); - /// println!("The target derivative of the potential at the second target is ({}, {}, {})", interactions[[1, 9]], interactions[[1, 10]], interactions[[1, 11]]); - ///``` - fn assemble_st( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - result: &mut [Self::T], - ); - - /// Multi-threaded version of kernel matrix assembly. - fn assemble_mt( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - result: &mut [Self::T], - thread_pool: &ThreadPool, - ); - - /// Return the type of the kernel. - fn kernel_type(&self) -> &KernelType; - -// /// Return the domain component count of the Green's fct. -// /// -// /// For a scalar kernel this is `1`. -// fn domain_component_count(&self) -> usize; - -// /// Return the space dimension. -// fn space_dimension(&self) -> usize; - -// /// Return the range component count of the Green's fct. -// /// -// /// For a scalar kernel this is `1` if [EvalType::Value] is -// /// given, and `4` if [EvalType::ValueDeriv] is given. -// fn range_component_count(&self, eval_type: EvalType) -> usize; -// } diff --git a/kernel/src/types.rs b/kernel/src/types.rs index 2f7c9e41..e69de29b 100644 --- a/kernel/src/types.rs +++ b/kernel/src/types.rs @@ -1,23 +0,0 @@ -// use bempp_traits::types::c64; - -// /// Evaluation Mode. -// /// -// /// - `Value`: Declares that only values required. -// /// - `Deriv`: Declare that only derivative required. -// /// - `ValueDeriv` Both values and derivatives required. -// #[derive(Clone, Copy)] -// pub enum EvalType { -// Value, -// ValueDeriv, -// } - -// /// This enum defines the type of the kernel. -// #[derive(Clone, Copy)] -// pub enum KernelType { -// /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) -// Laplace, -// /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) -// Helmholtz(c64), -// /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) -// ModifiedHelmholtz(f64), -// } diff --git a/traits/src/kernel.rs b/traits/src/kernel.rs index ee904754..0ab9211e 100644 --- a/traits/src/kernel.rs +++ b/traits/src/kernel.rs @@ -1,33 +1,7 @@ -// //! Traits for creating integral equation kernels. - //! Trait for Green's function kernels - +use crate::types::{EvalType, KernelType, Scalar}; use rayon::ThreadPool; -use crate::types::{c64, Scalar}; - -/// Evaluation Mode. -/// -/// - `Value`: Declares that only values required. -/// - `Deriv`: Declare that only derivative required. -/// - `ValueDeriv` Both values and derivatives required. -#[derive(Clone, Copy)] -pub enum EvalType { - Value, - ValueDeriv, -} - -/// This enum defines the type of the kernel. -#[derive(Clone, Copy)] -pub enum KernelType { - /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) - Laplace, - /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) - Helmholtz(c64), - /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) - ModifiedHelmholtz(f64), -} - /// Interface to evaluating Green's functions for given sources and targets. pub trait Kernel { type T: Scalar; @@ -42,10 +16,30 @@ pub trait Kernel { /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. /// - `charges`: A slice defining the charges. For each source point there needs to be one charge. /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] - /// then `result` has the same number of elements as there are targets. For a scalar kernel + /// then `result` has the same number of elemens as there are targets. For a scalar kernel /// in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains /// for each target in consecutive order the value of the kernel and the three components /// of its derivative. + /// + /// The following code gives an example of how to use it together with the [rlst] dense matrix type. + /// ``` + /// use rlst::dense::*; + /// use bempp_kernel::traits::*; + /// use bempp_kernel::laplace_3d::Laplace3dKernel; + /// use bempp_kernel::types::*; + /// let nsources = 5; + /// let ntargets = 10; + /// + /// let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; + /// let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; + /// let charges = rlst::dense::rlst_col_vec![f64, nsources]; + /// let mut interactions = rlst::dense::rlst_mat![f64, (4, ntargets)]; + /// + /// Laplace3dKernel::::new().evaluate_st(EvalType::ValueDeriv, sources.data(), targets.data(), charges.data(), interactions.data_mut()); + /// + /// println!("The value of the potential at the second target is {}", interactions[[0, 1]]); + /// println!("The target derivative of the potential at the second target is ({}, {}, {})", interactions[[1, 1]], interactions[[2, 1]], interactions[[3, 1]]); + ///``` fn evaluate_st( &self, eval_type: EvalType, @@ -69,6 +63,61 @@ pub trait Kernel { thread_pool: &ThreadPool, ); + /// Single threaded assembly of a kernel matrix. + /// + /// - `eval_type`: Either [EvalType::Value] to only return Green's function values + /// or [EvalType::ValueDeriv] to return values and derivatives. + /// - `sources`: A slice defining the source points. The points must be given in the form + /// `[x_1, x_2, ... x_N, y_1, y_2, ..., y_N, z_1, z_2, ..., z_N]`, that is + /// the value for each dimension must be continuously contained in the slice. + /// - `targets`: A slice defining the targets. The memory layout is the same as for sources. + /// - `result`: The result array. If the kernel is scalar and `eval_type` has the value [EvalType::Value] + /// then `result` has MxN elements with M the number of targets and N the number of targets. + /// For a scalar kernel in three dimensional space if [EvalType::ValueDeriv] was chosen then `result` contains + /// in consecutive order the interaction of all sources with the first target and then the corresponding derivatives, + /// followed by the interactions with the second target, and so on. See the example for illustration. + /// + /// The following code gives an example of how to use it together with the [rlst] dense matrix type. + /// ``` + /// use rlst::dense::*; + /// use bempp_kernel::traits::*; + /// use bempp_kernel::laplace_3d::Laplace3dKernel; + /// use bempp_kernel::types::*; + /// let nsources = 5; + /// let ntargets = 10; + /// + /// let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)]; + /// let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)]; + /// let mut interactions = rlst::dense::rlst_mat![f64, (nsources, 4 * ntargets)]; + /// + /// Laplace3dKernel::::new().assemble_st(EvalType::ValueDeriv, sources.data(), targets.data(), interactions.data_mut()); + /// + /// // The column index of the third target interaction is 8 = 2 * 4, since each + /// // target is associated with its interaction value plus 3 derivatives, i.e. 4 values. + /// // The derivatives correspondingly have the column indices 9, 10, 11. + /// // If EvalType::Value is chosen then the column index would be 2 as then each target + /// // is only associated with 1 value. + /// println!("The interaction of the second source with the third target is {}", interactions[[1, 8]]); + /// println!("The target derivative of the potential at the second target is ({}, {}, {})", interactions[[1, 9]], interactions[[1, 10]], interactions[[1, 11]]); + ///``` + fn assemble_st( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + result: &mut [Self::T], + ); + + /// Multi-threaded version of kernel matrix assembly. + fn assemble_mt( + &self, + eval_type: EvalType, + sources: &[::Real], + targets: &[::Real], + result: &mut [Self::T], + thread_pool: &ThreadPool, + ); + /// Return the type of the kernel. fn kernel_type(&self) -> &KernelType; @@ -85,16 +134,11 @@ pub trait Kernel { /// For a scalar kernel this is `1` if [EvalType::Value] is /// given, and `4` if [EvalType::ValueDeriv] is given. fn range_component_count(&self, eval_type: EvalType) -> usize; +} - // Return a Gram matrix between the sources and targets - fn gram( - &self, - eval_type: EvalType, - sources: &[::Real], - targets: &[::Real], - result: &mut [Self::T], - ); +// Scaling required by the FMM to apply kernel to each octree level. +pub trait KernelScale { + type T: Scalar; - // Scale the kernel to a given level of the associated tree, for the FMM. - fn scale(&self, level: u64) -> f64; + fn scale(&self, level: u64) -> Self::T; } diff --git a/traits/src/types.rs b/traits/src/types.rs index 8fa93327..726529c9 100644 --- a/traits/src/types.rs +++ b/traits/src/types.rs @@ -2,8 +2,7 @@ // Definition of scalar types. // For now we simply derive from the `caucy::Scalar` type. -pub use cauchy::Scalar; -pub use cauchy::{c32, c64}; +pub use cauchy::{c32, c64, Scalar}; // Declare if entity is local, a ghost, or remote. pub enum Locality { @@ -21,3 +20,25 @@ pub enum Error { // Result Type pub type Result = std::result::Result; + +/// Evaluation Mode. +/// +/// - `Value`: Declares that only values required. +/// - `Deriv`: Declare that only derivative required. +/// - `ValueDeriv` Both values and derivatives required. +#[derive(Clone, Copy)] +pub enum EvalType { + Value, + ValueDeriv, +} + +/// This enum defines the type of the kernel. +#[derive(Clone, Copy)] +pub enum KernelType { + /// The Laplace kernel defined as g(x, y) = 1 / (4 pi | x- y| ) + Laplace, + /// The Helmholtz kernel defined as g(x, y) = exp( 1j * k * | x- y| ) / (4 pi | x- y| ) + Helmholtz(c64), + /// The modified Helmholtz kernel defined as g(x, y) = exp( -omega * | x- y| ) / (4 * pi * | x- y |) + ModifiedHelmholtz(f64), +} From 90d4c7371c631b0fd30d0fb71edc5cc2fc675c4b Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 20:35:06 +0100 Subject: [PATCH 19/40] Remove redundant files from kernel --- kernel/out.txt | 473 ------------------------------------------- kernel/src/traits.rs | 0 kernel/src/types.rs | 0 3 files changed, 473 deletions(-) delete mode 100644 kernel/out.txt delete mode 100644 kernel/src/traits.rs delete mode 100644 kernel/src/types.rs diff --git a/kernel/out.txt b/kernel/out.txt deleted file mode 100644 index 82d09ac8..00000000 --- a/kernel/out.txt +++ /dev/null @@ -1,473 +0,0 @@ -; bempp_kernel::laplace_3d::simd_wrapper_evaluate -; Function Attrs: uwtable -define void @bempp_kernel::laplace_3d::simd_wrapper_evaluate(i1 noundef zeroext %eval_type, ptr noalias nocapture noundef nonnull readonly align 4 %target.0, i64 noundef %target.1, ptr noalias nocapture noundef nonnull readonly align 4 %sources.0, i64 noundef %sources.1, ptr noalias nocapture noundef nonnull readonly align 4 %charges.0, i64 noundef %charges.1, ptr noalias nocapture noundef nonnull writeonly align 4 %result.0, i64 noundef %result.1) unnamed_addr #0 personality ptr @rust_eh_personality { -start: - tail call void @llvm.experimental.noalias.scope.decl(metadata !6) - tail call void @llvm.experimental.noalias.scope.decl(metadata !9) - tail call void @llvm.experimental.noalias.scope.decl(metadata !11) - tail call void @llvm.experimental.noalias.scope.decl(metadata !13) - %_9.i.i = icmp ugt i64 %charges.1, %sources.1 - br i1 %_9.i.i, label %bb3.i.i, label %" as core::slice::index::SliceIndex<[T]>>::index.exit.i" - -bb3.i.i: ; preds = %start -; call core::slice::index::slice_end_index_len_fail - tail call void @core::slice::index::slice_end_index_len_fail(i64 noundef %charges.1, i64 noundef %sources.1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc178) #10, !noalias !15 - unreachable - -" as core::slice::index::SliceIndex<[T]>>::index.exit.i": ; preds = %start - %_24.i = shl i64 %charges.1, 1 - %_3.i.i = icmp ult i64 %_24.i, %charges.1 - br i1 %_3.i.i, label %bb1.i.i, label %bb2.i.i - -bb2.i.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit.i" - %_9.i38.i = icmp ugt i64 %_24.i, %sources.1 - br i1 %_9.i38.i, label %bb3.i39.i, label %" as core::slice::index::SliceIndex<[T]>>::index.exit40.i" - -bb1.i.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit.i" -; call core::slice::index::slice_index_order_fail - tail call void @core::slice::index::slice_index_order_fail(i64 noundef %charges.1, i64 noundef %_24.i, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc180) #10, !noalias !18 - unreachable - -bb3.i39.i: ; preds = %bb2.i.i -; call core::slice::index::slice_end_index_len_fail - tail call void @core::slice::index::slice_end_index_len_fail(i64 noundef %_24.i, i64 noundef %sources.1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc180) #10, !noalias !18 - unreachable - -" as core::slice::index::SliceIndex<[T]>>::index.exit40.i": ; preds = %bb2.i.i - %0 = getelementptr inbounds float, ptr %sources.0, i64 %charges.1 - %_32.i = mul i64 %charges.1, 3 - %_3.i41.i = icmp ult i64 %_32.i, %_24.i - br i1 %_3.i41.i, label %bb1.i44.i, label %bb2.i43.i - -bb2.i43.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit40.i" - %_9.i42.i = icmp ugt i64 %_32.i, %sources.1 - br i1 %_9.i42.i, label %bb3.i46.i, label %" as core::slice::index::SliceIndex<[T]>>::index.exit47.i" - -bb1.i44.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit40.i" -; call core::slice::index::slice_index_order_fail - tail call void @core::slice::index::slice_index_order_fail(i64 noundef %_24.i, i64 noundef %_32.i, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc182) #10, !noalias !21 - unreachable - -bb3.i46.i: ; preds = %bb2.i43.i -; call core::slice::index::slice_end_index_len_fail - tail call void @core::slice::index::slice_end_index_len_fail(i64 noundef %_32.i, i64 noundef %sources.1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc182) #10, !noalias !21 - unreachable - -" as core::slice::index::SliceIndex<[T]>>::index.exit47.i": ; preds = %bb2.i43.i - %1 = getelementptr inbounds float, ptr %sources.0, i64 %_24.i - %.not125.i = icmp eq i64 %charges.1, 0 - br i1 %eval_type, label %bb40.preheader.i, label %bb7.preheader.i - -bb7.preheader.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit47.i" - br i1 %.not125.i, label %bb10.i, label %bb11.lr.ph.i - -bb11.lr.ph.i: ; preds = %bb7.preheader.i - %_56.not.i = icmp eq i64 %target.1, 0 - %2 = getelementptr inbounds [0 x float], ptr %target.0, i64 0, i64 1 - %_74.i = icmp ugt i64 %target.1, 2 - %3 = getelementptr inbounds [0 x float], ptr %target.0, i64 0, i64 2 - br i1 %_56.not.i, label %panic24.i, label %bb11.lr.ph.split.i, !prof !24 - -bb11.lr.ph.split.i: ; preds = %bb11.lr.ph.i - %_65.not.i = icmp eq i64 %target.1, 1 - %_53.i = load float, ptr %target.0, align 4, !alias.scope !6, !noalias !25, !noundef !1 - br i1 %_65.not.i, label %panic26.i, label %bb11.lr.ph.split.split.us.i, !prof !24 - -bb11.lr.ph.split.split.us.i: ; preds = %bb11.lr.ph.split.i - %_62.us.i = load float, ptr %2, align 4, !alias.scope !6, !noalias !25, !noundef !1 - br i1 %_74.i, label %bb11.lr.ph.split.split.us.split.us.i, label %bb14.us.i, !prof !26 - -bb11.lr.ph.split.split.us.split.us.i: ; preds = %bb11.lr.ph.split.split.us.i - %_71.us.us.i = load float, ptr %3, align 4, !alias.scope !6, !noalias !25, !noundef !1 - %min.iters.check = icmp ult i64 %charges.1, 8 - br i1 %min.iters.check, label %bb14.us.us.i.preheader, label %vector.ph - -vector.ph: ; preds = %bb11.lr.ph.split.split.us.split.us.i - %n.vec = and i64 %charges.1, -8 - %broadcast.splatinsert = insertelement <4 x float> poison, float %_53.i, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert18 = insertelement <4 x float> poison, float %_53.i, i64 0 - %broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert22 = insertelement <4 x float> poison, float %_62.us.i, i64 0 - %broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert24 = insertelement <4 x float> poison, float %_62.us.i, i64 0 - %broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert28 = insertelement <4 x float> poison, float %_71.us.us.i, i64 0 - %broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert30 = insertelement <4 x float> poison, float %_71.us.us.i, i64 0 - %broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> poison, <4 x i32> zeroinitializer - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.phi = phi float [ 0.000000e+00, %vector.ph ], [ %39, %vector.body ] - %4 = getelementptr inbounds [0 x float], ptr %sources.0, i64 0, i64 %index - %wide.load = load <4 x float>, ptr %4, align 4, !alias.scope !9, !noalias !27 - %5 = getelementptr inbounds float, ptr %4, i64 4 - %wide.load17 = load <4 x float>, ptr %5, align 4, !alias.scope !9, !noalias !27 - %6 = fsub <4 x float> %wide.load, %broadcast.splat - %7 = fsub <4 x float> %wide.load17, %broadcast.splat19 - %8 = getelementptr inbounds [0 x float], ptr %0, i64 0, i64 %index - %wide.load20 = load <4 x float>, ptr %8, align 4, !alias.scope !9, !noalias !27 - %9 = getelementptr inbounds float, ptr %8, i64 4 - %wide.load21 = load <4 x float>, ptr %9, align 4, !alias.scope !9, !noalias !27 - %10 = fsub <4 x float> %wide.load20, %broadcast.splat23 - %11 = fsub <4 x float> %wide.load21, %broadcast.splat25 - %12 = getelementptr inbounds [0 x float], ptr %1, i64 0, i64 %index - %wide.load26 = load <4 x float>, ptr %12, align 4, !alias.scope !9, !noalias !27 - %13 = getelementptr inbounds float, ptr %12, i64 4 - %wide.load27 = load <4 x float>, ptr %13, align 4, !alias.scope !9, !noalias !27 - %14 = fsub <4 x float> %wide.load26, %broadcast.splat29 - %15 = fsub <4 x float> %wide.load27, %broadcast.splat31 - %16 = fmul <4 x float> %6, %6 - %17 = fmul <4 x float> %7, %7 - %18 = fmul <4 x float> %10, %10 - %19 = fmul <4 x float> %11, %11 - %20 = fadd <4 x float> %16, %18 - %21 = fadd <4 x float> %17, %19 - %22 = fmul <4 x float> %14, %14 - %23 = fmul <4 x float> %15, %15 - %24 = fadd <4 x float> %20, %22 - %25 = fadd <4 x float> %21, %23 - %26 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %24) - %27 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %25) - %28 = fcmp oeq <4 x float> %26, zeroinitializer - %29 = fcmp oeq <4 x float> %27, zeroinitializer - %30 = fdiv <4 x float> , %26 - %31 = fdiv <4 x float> , %27 - %32 = select <4 x i1> %28, <4 x float> zeroinitializer, <4 x float> %30 - %33 = select <4 x i1> %29, <4 x float> zeroinitializer, <4 x float> %31 - %34 = getelementptr inbounds [0 x float], ptr %charges.0, i64 0, i64 %index - %wide.load32 = load <4 x float>, ptr %34, align 4, !alias.scope !11, !noalias !28 - %35 = getelementptr inbounds float, ptr %34, i64 4 - %wide.load33 = load <4 x float>, ptr %35, align 4, !alias.scope !11, !noalias !28 - %36 = fmul <4 x float> %wide.load32, %32 - %37 = fmul <4 x float> %wide.load33, %33 - %38 = tail call float @llvm.vector.reduce.fadd.v4f32(float %vec.phi, <4 x float> %36) - %39 = tail call float @llvm.vector.reduce.fadd.v4f32(float %38, <4 x float> %37) - %index.next = add nuw i64 %index, 8 - %40 = icmp eq i64 %index.next, %n.vec - br i1 %40, label %middle.block, label %vector.body, !llvm.loop !29 - -middle.block: ; preds = %vector.body - %cmp.n = icmp eq i64 %n.vec, %charges.1 - br i1 %cmp.n, label %bb10.loopexit.i, label %bb14.us.us.i.preheader - -bb14.us.us.i.preheader: ; preds = %bb11.lr.ph.split.split.us.split.us.i, %middle.block - %my_result.098.us.us.i.ph = phi float [ 0.000000e+00, %bb11.lr.ph.split.split.us.split.us.i ], [ %39, %middle.block ] - %iter.sroa.0.097.us.us.i.ph = phi i64 [ 0, %bb11.lr.ph.split.split.us.split.us.i ], [ %n.vec, %middle.block ] - br label %bb14.us.us.i - -bb14.us.us.i: ; preds = %bb14.us.us.i.preheader, %bb14.us.us.i - %my_result.098.us.us.i = phi float [ %58, %bb14.us.us.i ], [ %my_result.098.us.us.i.ph, %bb14.us.us.i.preheader ] - %iter.sroa.0.097.us.us.i = phi i64 [ %43, %bb14.us.us.i ], [ %iter.sroa.0.097.us.us.i.ph, %bb14.us.us.i.preheader ] - %41 = getelementptr inbounds [0 x float], ptr %sources.0, i64 0, i64 %iter.sroa.0.097.us.us.i - %_49.us.us.i = load float, ptr %41, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %42 = fsub float %_49.us.us.i, %_53.i - %43 = add nuw i64 %iter.sroa.0.097.us.us.i, 1 - %44 = getelementptr inbounds [0 x float], ptr %0, i64 0, i64 %iter.sroa.0.097.us.us.i - %_58.us.us.i = load float, ptr %44, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %45 = fsub float %_58.us.us.i, %_62.us.i - %46 = getelementptr inbounds [0 x float], ptr %1, i64 0, i64 %iter.sroa.0.097.us.us.i - %_67.us.us.i = load float, ptr %46, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %47 = fsub float %_67.us.us.i, %_71.us.us.i - %48 = fmul float %42, %42 - %49 = fmul float %45, %45 - %50 = fadd float %48, %49 - %51 = fmul float %47, %47 - %52 = fadd float %50, %51 - %53 = tail call float @llvm.sqrt.f32(float %52) - %54 = fcmp oeq float %53, 0.000000e+00 - %55 = fdiv float 1.000000e+00, %53 - %inv_diff_norm.0.us.us.i = select i1 %54, float 0.000000e+00, float %55 - %56 = getelementptr inbounds [0 x float], ptr %charges.0, i64 0, i64 %iter.sroa.0.097.us.us.i - %_96.us.us.i = load float, ptr %56, align 4, !alias.scope !11, !noalias !28, !noundef !1 - %57 = fmul float %_96.us.us.i, %inv_diff_norm.0.us.us.i - %58 = fadd float %my_result.098.us.us.i, %57 - %exitcond.not.i = icmp eq i64 %43, %charges.1 - br i1 %exitcond.not.i, label %bb10.loopexit.i, label %bb14.us.us.i, !llvm.loop !31 - -bb14.us.i: ; preds = %bb11.lr.ph.split.split.us.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 2, i64 noundef 2, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc222) #10, !noalias !33 - unreachable - -bb40.preheader.i: ; preds = %" as core::slice::index::SliceIndex<[T]>>::index.exit47.i" - br i1 %.not125.i, label %bb43.i, label %bb44.lr.ph.i - -bb44.lr.ph.i: ; preds = %bb40.preheader.i - %_128.not.i = icmp eq i64 %target.1, 0 - %59 = getelementptr inbounds [0 x float], ptr %target.0, i64 0, i64 1 - %_146.i = icmp ugt i64 %target.1, 2 - %60 = getelementptr inbounds [0 x float], ptr %target.0, i64 0, i64 2 - br i1 %_128.not.i, label %panic9.i, label %bb44.lr.ph.split.i, !prof !24 - -bb44.lr.ph.split.i: ; preds = %bb44.lr.ph.i - %_137.not.i = icmp eq i64 %target.1, 1 - %_125.i = load float, ptr %target.0, align 4, !alias.scope !6, !noalias !25, !noundef !1 - br i1 %_137.not.i, label %panic11.i, label %bb44.lr.ph.split.split.us.i, !prof !24 - -bb44.lr.ph.split.split.us.i: ; preds = %bb44.lr.ph.split.i - %_134.us.i = load float, ptr %59, align 4, !alias.scope !6, !noalias !25, !noundef !1 - br i1 %_146.i, label %bb44.lr.ph.split.split.us.split.us.i, label %bb47.us.i, !prof !26 - -bb44.lr.ph.split.split.us.split.us.i: ; preds = %bb44.lr.ph.split.split.us.i - %_143.us.us.i = load float, ptr %60, align 4, !alias.scope !6, !noalias !25, !noundef !1 - %min.iters.check36 = icmp ult i64 %charges.1, 8 - br i1 %min.iters.check36, label %bb47.us.us.i.preheader, label %vector.ph37 - -vector.ph37: ; preds = %bb44.lr.ph.split.split.us.split.us.i - %n.vec39 = and i64 %charges.1, -8 - %broadcast.splatinsert50 = insertelement <4 x float> poison, float %_125.i, i64 0 - %broadcast.splat51 = shufflevector <4 x float> %broadcast.splatinsert50, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert52 = insertelement <4 x float> poison, float %_125.i, i64 0 - %broadcast.splat53 = shufflevector <4 x float> %broadcast.splatinsert52, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert56 = insertelement <4 x float> poison, float %_134.us.i, i64 0 - %broadcast.splat57 = shufflevector <4 x float> %broadcast.splatinsert56, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert58 = insertelement <4 x float> poison, float %_134.us.i, i64 0 - %broadcast.splat59 = shufflevector <4 x float> %broadcast.splatinsert58, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert62 = insertelement <4 x float> poison, float %_143.us.us.i, i64 0 - %broadcast.splat63 = shufflevector <4 x float> %broadcast.splatinsert62, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert64 = insertelement <4 x float> poison, float %_143.us.us.i, i64 0 - %broadcast.splat65 = shufflevector <4 x float> %broadcast.splatinsert64, <4 x float> poison, <4 x i32> zeroinitializer - br label %vector.body42 - -vector.body42: ; preds = %vector.body42, %vector.ph37 - %index43 = phi i64 [ 0, %vector.ph37 ], [ %index.next68, %vector.body42 ] - %vec.phi44 = phi float [ 0.000000e+00, %vector.ph37 ], [ %112, %vector.body42 ] - %vec.phi45 = phi float [ 0.000000e+00, %vector.ph37 ], [ %114, %vector.body42 ] - %vec.phi46 = phi float [ 0.000000e+00, %vector.ph37 ], [ %116, %vector.body42 ] - %vec.phi47 = phi float [ 0.000000e+00, %vector.ph37 ], [ %118, %vector.body42 ] - %61 = getelementptr inbounds [0 x float], ptr %sources.0, i64 0, i64 %index43 - %wide.load48 = load <4 x float>, ptr %61, align 4, !alias.scope !9, !noalias !27 - %62 = getelementptr inbounds float, ptr %61, i64 4 - %wide.load49 = load <4 x float>, ptr %62, align 4, !alias.scope !9, !noalias !27 - %63 = fsub <4 x float> %wide.load48, %broadcast.splat51 - %64 = fsub <4 x float> %wide.load49, %broadcast.splat53 - %65 = getelementptr inbounds [0 x float], ptr %0, i64 0, i64 %index43 - %wide.load54 = load <4 x float>, ptr %65, align 4, !alias.scope !9, !noalias !27 - %66 = getelementptr inbounds float, ptr %65, i64 4 - %wide.load55 = load <4 x float>, ptr %66, align 4, !alias.scope !9, !noalias !27 - %67 = fsub <4 x float> %wide.load54, %broadcast.splat57 - %68 = fsub <4 x float> %wide.load55, %broadcast.splat59 - %69 = getelementptr inbounds [0 x float], ptr %1, i64 0, i64 %index43 - %wide.load60 = load <4 x float>, ptr %69, align 4, !alias.scope !9, !noalias !27 - %70 = getelementptr inbounds float, ptr %69, i64 4 - %wide.load61 = load <4 x float>, ptr %70, align 4, !alias.scope !9, !noalias !27 - %71 = fsub <4 x float> %wide.load60, %broadcast.splat63 - %72 = fsub <4 x float> %wide.load61, %broadcast.splat65 - %73 = fmul <4 x float> %63, %63 - %74 = fmul <4 x float> %64, %64 - %75 = fmul <4 x float> %67, %67 - %76 = fmul <4 x float> %68, %68 - %77 = fadd <4 x float> %73, %75 - %78 = fadd <4 x float> %74, %76 - %79 = fmul <4 x float> %71, %71 - %80 = fmul <4 x float> %72, %72 - %81 = fadd <4 x float> %77, %79 - %82 = fadd <4 x float> %78, %80 - %83 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %81) - %84 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %82) - %85 = fcmp oeq <4 x float> %83, zeroinitializer - %86 = fcmp oeq <4 x float> %84, zeroinitializer - %87 = fdiv <4 x float> , %83 - %88 = fdiv <4 x float> , %84 - %89 = select <4 x i1> %85, <4 x float> zeroinitializer, <4 x float> %87 - %90 = select <4 x i1> %86, <4 x float> zeroinitializer, <4 x float> %88 - %91 = fmul <4 x float> %89, %89 - %92 = fmul <4 x float> %90, %90 - %93 = fmul <4 x float> %89, %91 - %94 = fmul <4 x float> %90, %92 - %95 = getelementptr inbounds [0 x float], ptr %charges.0, i64 0, i64 %index43 - %wide.load66 = load <4 x float>, ptr %95, align 4, !alias.scope !11, !noalias !28 - %96 = getelementptr inbounds float, ptr %95, i64 4 - %wide.load67 = load <4 x float>, ptr %96, align 4, !alias.scope !11, !noalias !28 - %97 = fmul <4 x float> %wide.load66, %89 - %98 = fmul <4 x float> %wide.load67, %90 - %99 = fmul <4 x float> %63, %93 - %100 = fmul <4 x float> %64, %94 - %101 = fmul <4 x float> %wide.load66, %99 - %102 = fmul <4 x float> %wide.load67, %100 - %103 = fmul <4 x float> %67, %93 - %104 = fmul <4 x float> %68, %94 - %105 = fmul <4 x float> %wide.load66, %103 - %106 = fmul <4 x float> %wide.load67, %104 - %107 = fmul <4 x float> %71, %93 - %108 = fmul <4 x float> %72, %94 - %109 = fmul <4 x float> %wide.load66, %107 - %110 = fmul <4 x float> %wide.load67, %108 - %111 = tail call float @llvm.vector.reduce.fadd.v4f32(float %vec.phi44, <4 x float> %109) - %112 = tail call float @llvm.vector.reduce.fadd.v4f32(float %111, <4 x float> %110) - %113 = tail call float @llvm.vector.reduce.fadd.v4f32(float %vec.phi45, <4 x float> %105) - %114 = tail call float @llvm.vector.reduce.fadd.v4f32(float %113, <4 x float> %106) - %115 = tail call float @llvm.vector.reduce.fadd.v4f32(float %vec.phi46, <4 x float> %97) - %116 = tail call float @llvm.vector.reduce.fadd.v4f32(float %115, <4 x float> %98) - %117 = tail call float @llvm.vector.reduce.fadd.v4f32(float %vec.phi47, <4 x float> %101) - %118 = tail call float @llvm.vector.reduce.fadd.v4f32(float %117, <4 x float> %102) - %index.next68 = add nuw i64 %index43, 8 - %119 = icmp eq i64 %index.next68, %n.vec39 - br i1 %119, label %middle.block34, label %vector.body42, !llvm.loop !34 - -middle.block34: ; preds = %vector.body42 - %cmp.n41 = icmp eq i64 %n.vec39, %charges.1 - br i1 %cmp.n41, label %bb43.loopexit.i, label %bb47.us.us.i.preheader - -bb47.us.us.i.preheader: ; preds = %bb44.lr.ph.split.split.us.split.us.i, %middle.block34 - %iter5.sroa.0.0108.us.us.i.ph = phi i64 [ 0, %bb44.lr.ph.split.split.us.split.us.i ], [ %n.vec39, %middle.block34 ] - %my_result3.0107.us.us.i.ph = phi float [ 0.000000e+00, %bb44.lr.ph.split.split.us.split.us.i ], [ %112, %middle.block34 ] - %my_result2.0106.us.us.i.ph = phi float [ 0.000000e+00, %bb44.lr.ph.split.split.us.split.us.i ], [ %114, %middle.block34 ] - %my_result0.0105.us.us.i.ph = phi float [ 0.000000e+00, %bb44.lr.ph.split.split.us.split.us.i ], [ %116, %middle.block34 ] - %my_result1.0104.us.us.i.ph = phi float [ 0.000000e+00, %bb44.lr.ph.split.split.us.split.us.i ], [ %118, %middle.block34 ] - br label %bb47.us.us.i - -bb47.us.us.i: ; preds = %bb47.us.us.i.preheader, %bb47.us.us.i - %iter5.sroa.0.0108.us.us.i = phi i64 [ %122, %bb47.us.us.i ], [ %iter5.sroa.0.0108.us.us.i.ph, %bb47.us.us.i.preheader ] - %my_result3.0107.us.us.i = phi float [ %148, %bb47.us.us.i ], [ %my_result3.0107.us.us.i.ph, %bb47.us.us.i.preheader ] - %my_result2.0106.us.us.i = phi float [ %145, %bb47.us.us.i ], [ %my_result2.0106.us.us.i.ph, %bb47.us.us.i.preheader ] - %my_result0.0105.us.us.i = phi float [ %139, %bb47.us.us.i ], [ %my_result0.0105.us.us.i.ph, %bb47.us.us.i.preheader ] - %my_result1.0104.us.us.i = phi float [ %142, %bb47.us.us.i ], [ %my_result1.0104.us.us.i.ph, %bb47.us.us.i.preheader ] - %120 = getelementptr inbounds [0 x float], ptr %sources.0, i64 0, i64 %iter5.sroa.0.0108.us.us.i - %_121.us.us.i = load float, ptr %120, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %121 = fsub float %_121.us.us.i, %_125.i - %122 = add nuw i64 %iter5.sroa.0.0108.us.us.i, 1 - %123 = getelementptr inbounds [0 x float], ptr %0, i64 0, i64 %iter5.sroa.0.0108.us.us.i - %_130.us.us.i = load float, ptr %123, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %124 = fsub float %_130.us.us.i, %_134.us.i - %125 = getelementptr inbounds [0 x float], ptr %1, i64 0, i64 %iter5.sroa.0.0108.us.us.i - %_139.us.us.i = load float, ptr %125, align 4, !alias.scope !9, !noalias !27, !noundef !1 - %126 = fsub float %_139.us.us.i, %_143.us.us.i - %127 = fmul float %121, %121 - %128 = fmul float %124, %124 - %129 = fadd float %127, %128 - %130 = fmul float %126, %126 - %131 = fadd float %129, %130 - %132 = tail call float @llvm.sqrt.f32(float %131) - %133 = fcmp oeq float %132, 0.000000e+00 - %134 = fdiv float 1.000000e+00, %132 - %inv_diff_norm7.0.us.us.i = select i1 %133, float 0.000000e+00, float %134 - %135 = fmul float %inv_diff_norm7.0.us.us.i, %inv_diff_norm7.0.us.us.i - %136 = fmul float %inv_diff_norm7.0.us.us.i, %135 - %137 = getelementptr inbounds [0 x float], ptr %charges.0, i64 0, i64 %iter5.sroa.0.0108.us.us.i - %_173.us.us.i = load float, ptr %137, align 4, !alias.scope !11, !noalias !28 - %138 = fmul float %_173.us.us.i, %inv_diff_norm7.0.us.us.i - %139 = fadd float %my_result0.0105.us.us.i, %138 - %140 = fmul float %121, %136 - %141 = fmul float %_173.us.us.i, %140 - %142 = fadd float %my_result1.0104.us.us.i, %141 - %143 = fmul float %124, %136 - %144 = fmul float %_173.us.us.i, %143 - %145 = fadd float %my_result2.0106.us.us.i, %144 - %146 = fmul float %126, %136 - %147 = fmul float %_173.us.us.i, %146 - %148 = fadd float %my_result3.0107.us.us.i, %147 - %exitcond133.not.i = icmp eq i64 %122, %charges.1 - br i1 %exitcond133.not.i, label %bb43.loopexit.i, label %bb47.us.us.i, !llvm.loop !35 - -bb47.us.i: ; preds = %bb44.lr.ph.split.split.us.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 2, i64 noundef 2, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc194) #10, !noalias !33 - unreachable - -bb43.loopexit.i: ; preds = %bb47.us.us.i, %middle.block34 - %.lcssa15 = phi float [ %116, %middle.block34 ], [ %139, %bb47.us.us.i ] - %.lcssa14 = phi float [ %118, %middle.block34 ], [ %142, %bb47.us.us.i ] - %.lcssa13 = phi float [ %114, %middle.block34 ], [ %145, %bb47.us.us.i ] - %.lcssa = phi float [ %112, %middle.block34 ], [ %148, %bb47.us.us.i ] - %phi.bo134.i = fmul float %.lcssa15, 0x3FB45F3060000000 - %phi.bo135.i = fmul float %.lcssa14, 0x3FB45F3060000000 - %phi.bo136.i = fmul float %.lcssa13, 0x3FB45F3060000000 - %phi.bo137.i = fmul float %.lcssa, 0x3FB45F3060000000 - br label %bb43.i - -bb43.i: ; preds = %bb43.loopexit.i, %bb40.preheader.i - %my_result1.0.lcssa.i = phi float [ 0.000000e+00, %bb40.preheader.i ], [ %phi.bo135.i, %bb43.loopexit.i ] - %my_result0.0.lcssa.i = phi float [ 0.000000e+00, %bb40.preheader.i ], [ %phi.bo134.i, %bb43.loopexit.i ] - %my_result2.0.lcssa.i = phi float [ 0.000000e+00, %bb40.preheader.i ], [ %phi.bo136.i, %bb43.loopexit.i ] - %my_result3.0.lcssa.i = phi float [ 0.000000e+00, %bb40.preheader.i ], [ %phi.bo137.i, %bb43.loopexit.i ] - %_213.not.i = icmp eq i64 %result.1, 0 - br i1 %_213.not.i, label %panic18.i, label %bb82.i, !prof !24 - -panic9.i: ; preds = %bb44.lr.ph.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 0, i64 noundef 0, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc186) #10, !noalias !33 - unreachable - -panic11.i: ; preds = %bb44.lr.ph.split.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 1, i64 noundef 1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc190) #10, !noalias !33 - unreachable - -bb82.i: ; preds = %bb43.i - store float %my_result0.0.lcssa.i, ptr %result.0, align 4, !alias.scope !13, !noalias !36 - %_219.not.i = icmp eq i64 %result.1, 1 - br i1 %_219.not.i, label %panic19.i, label %bb84.i, !prof !24 - -panic18.i: ; preds = %bb43.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 0, i64 noundef 0, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc204) #10, !noalias !33 - unreachable - -bb84.i: ; preds = %bb82.i - %149 = getelementptr inbounds [0 x float], ptr %result.0, i64 0, i64 1 - store float %my_result1.0.lcssa.i, ptr %149, align 4, !alias.scope !13, !noalias !36 - %_225.i = icmp ugt i64 %result.1, 2 - br i1 %_225.i, label %bb86.i, label %panic20.i, !prof !26 - -panic19.i: ; preds = %bb82.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 1, i64 noundef 1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc206) #10, !noalias !33 - unreachable - -bb86.i: ; preds = %bb84.i - %150 = getelementptr inbounds [0 x float], ptr %result.0, i64 0, i64 2 - store float %my_result2.0.lcssa.i, ptr %150, align 4, !alias.scope !13, !noalias !36 - %_231.not.i = icmp eq i64 %result.1, 3 - br i1 %_231.not.i, label %panic21.i, label %bb88.i, !prof !24 - -panic20.i: ; preds = %bb84.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 2, i64 noundef 2, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc208) #10, !noalias !33 - unreachable - -bb88.i: ; preds = %bb86.i - %151 = getelementptr inbounds [0 x float], ptr %result.0, i64 0, i64 3 - store float %my_result3.0.lcssa.i, ptr %151, align 4, !alias.scope !13, !noalias !36 - br label %bempp_kernel::laplace_3d::evaluate_laplace_one_target.exit - -panic21.i: ; preds = %bb86.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 3, i64 noundef 3, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc210) #10, !noalias !33 - unreachable - -bb10.loopexit.i: ; preds = %bb14.us.us.i, %middle.block - %.lcssa16 = phi float [ %39, %middle.block ], [ %58, %bb14.us.us.i ] - %phi.bo.i = fmul float %.lcssa16, 0x3FB45F3060000000 - br label %bb10.i - -bb10.i: ; preds = %bb10.loopexit.i, %bb7.preheader.i - %my_result.0.lcssa.i = phi float [ 0.000000e+00, %bb7.preheader.i ], [ %phi.bo.i, %bb10.loopexit.i ] - %_106.not.i = icmp eq i64 %result.1, 0 - br i1 %_106.not.i, label %panic30.i, label %bb35.i, !prof !24 - -panic24.i: ; preds = %bb11.lr.ph.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 0, i64 noundef 0, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc214) #10, !noalias !33 - unreachable - -panic26.i: ; preds = %bb11.lr.ph.split.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 1, i64 noundef 1, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc218) #10, !noalias !33 - unreachable - -bb35.i: ; preds = %bb10.i - store float %my_result.0.lcssa.i, ptr %result.0, align 4, !alias.scope !13, !noalias !36 - br label %bempp_kernel::laplace_3d::evaluate_laplace_one_target.exit - -panic30.i: ; preds = %bb10.i -; call core::panicking::panic_bounds_check - tail call void @core::panicking::panic_bounds_check(i64 noundef 0, i64 noundef 0, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc226) #10, !noalias !33 - unreachable - -bempp_kernel::laplace_3d::evaluate_laplace_one_target.exit: ; preds = %bb88.i, %bb35.i - ret void -} diff --git a/kernel/src/traits.rs b/kernel/src/traits.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/kernel/src/types.rs b/kernel/src/types.rs deleted file mode 100644 index e69de29b..00000000 From fa46c145d6334683361d48080f272c2a31c76a83 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 21:33:11 +0100 Subject: [PATCH 20/40] Fix compression bug, find another tree one! --- field/src/field.rs | 218 ++++++++++++++++++----------------- fmm/src/field_translation.rs | 6 +- fmm/src/fmm.rs | 106 +++++++++-------- 3 files changed, 169 insertions(+), 161 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index 22d64c95..89dbeb05 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -15,113 +15,6 @@ use crate::{ types::{SvdFieldTranslationKiFmm, SvdM2lEntry, TransferVector}, }; -// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// type Domain = Domain; -// type M2LOperators = Vec>, Dim<[usize; 3]>>>; -// type TransferVector = Vec; - -// fn compute_m2l_operators( -// &self, -// expansion_order: usize, -// domain: Self::Domain, -// ) -> Self::M2LOperators { -// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; -// let mut result: Vec = Vec::new(); - -// for t in self.transfer_vectors.iter() { -// let source_equivalent_surface = -// t.source -// .compute_surface(&domain, expansion_order, self.alpha); - -// let conv_grid_sources = t.source.convolution_grid( -// expansion_order, -// &domain, -// &source_equivalent_surface, -// self.alpha, -// ); - -// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); - -// // TODO: Remove dim -// let dim = 3; -// // Find min target -// let ncoeffs: usize = target_check_surface.len() / dim; -// let sums: Vec<_> = (0..ncoeffs) -// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) -// .collect(); - -// let min_index = sums -// .iter() -// .enumerate() -// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) -// .map(|(index, _)| index) -// .unwrap(); - -// let min_target = [ -// target_check_surface[min_index], -// target_check_surface[min_index + ncoeffs], -// target_check_surface[min_index + 2 * ncoeffs], -// ]; - -// // TODO: Fix compute_kernel to work with new kernel -// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); -// let m = kernel.len(); -// let n = kernel[0].len(); -// let k = kernel[0][0].len(); - -// // Precompute and store the FFT of each unique kernel interaction -// let kernel = -// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) -// .unwrap(); - -// // Begin by calculating pad lengths along each dimension -// let p = 2 * m; -// let q = 2 * n; -// let r = 2 * k; - -// let padding = [[0, p - m], [0, q - n], [0, r - k]]; - -// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); - -// // Flip the kernel -// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); -// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); - -// // Compute FFT of kernel for this transfer vector -// { -// // 1. Init the handlers for FFTs along each axis -// let mut handler_ax0 = FftHandler::::new(p); -// let mut handler_ax1 = FftHandler::::new(q); -// let mut handler_ax2 = R2cFftHandler::::new(r); - -// // 2. Compute the transform along each axis -// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); -// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); -// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); -// } - -// // Store FFT of kernel for this transfer vector -// { -// result.push(padded_kernel_hat); -// } -// } - -// result -// } - -// fn compute_transfer_vectors(&self) -> Self::TransferVector { -// compute_transfer_vectors() -// } - -// fn ncoeffs(&self, expansion_order: usize) -> usize { -// 6 * (expansion_order - 1).pow(2) + 2 -// } -// } impl FieldTranslationData for SvdFieldTranslationKiFmm where @@ -234,7 +127,8 @@ where .copy_from_slice(tmp.data()); } - (u, st, c) + let st_block = s_block.transpose().eval(); + (u, st_block, c) } } @@ -284,6 +178,114 @@ where } } +// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm +// where +// T: Kernel + Default, +// { +// type Domain = Domain; +// type M2LOperators = Vec>, Dim<[usize; 3]>>>; +// type TransferVector = Vec; + +// fn compute_m2l_operators( +// &self, +// expansion_order: usize, +// domain: Self::Domain, +// ) -> Self::M2LOperators { +// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; +// let mut result: Vec = Vec::new(); + +// for t in self.transfer_vectors.iter() { +// let source_equivalent_surface = +// t.source +// .compute_surface(&domain, expansion_order, self.alpha); + +// let conv_grid_sources = t.source.convolution_grid( +// expansion_order, +// &domain, +// &source_equivalent_surface, +// self.alpha, +// ); + +// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + +// // TODO: Remove dim +// let dim = 3; +// // Find min target +// let ncoeffs: usize = target_check_surface.len() / dim; +// let sums: Vec<_> = (0..ncoeffs) +// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) +// .collect(); + +// let min_index = sums +// .iter() +// .enumerate() +// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) +// .map(|(index, _)| index) +// .unwrap(); + +// let min_target = [ +// target_check_surface[min_index], +// target_check_surface[min_index + ncoeffs], +// target_check_surface[min_index + 2 * ncoeffs], +// ]; + +// // TODO: Fix compute_kernel to work with new kernel +// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); +// let m = kernel.len(); +// let n = kernel[0].len(); +// let k = kernel[0][0].len(); + +// // Precompute and store the FFT of each unique kernel interaction +// let kernel = +// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) +// .unwrap(); + +// // Begin by calculating pad lengths along each dimension +// let p = 2 * m; +// let q = 2 * n; +// let r = 2 * k; + +// let padding = [[0, p - m], [0, q - n], [0, r - k]]; + +// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); + +// // Flip the kernel +// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); +// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); + +// // Compute FFT of kernel for this transfer vector +// { +// // 1. Init the handlers for FFTs along each axis +// let mut handler_ax0 = FftHandler::::new(p); +// let mut handler_ax1 = FftHandler::::new(q); +// let mut handler_ax2 = R2cFftHandler::::new(r); + +// // 2. Compute the transform along each axis +// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); +// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); +// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); +// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); +// } + +// // Store FFT of kernel for this transfer vector +// { +// result.push(padded_kernel_hat); +// } +// } + +// result +// } + +// fn compute_transfer_vectors(&self) -> Self::TransferVector { +// compute_transfer_vectors() +// } + +// fn ncoeffs(&self, expansion_order: usize) -> usize { +// 6 * (expansion_order - 1).pow(2) + 2 +// } +// } + // impl FftFieldTranslationNaiveKiFmm // where // T: Kernel + Default, diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 2f0af23b..79d0c235 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -378,6 +378,8 @@ where transfer_vector_to_m2l.insert(tv.vector, Arc::new(Mutex::new(Vec::new()))); } + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + targets.par_iter().enumerate().for_each(|(_i, &target)| { if let Some(v_list) = self.fmm.get_v_list(&target) { let calculated_transfer_vectors = v_list @@ -440,7 +442,7 @@ where multipole_slice.copy_from_slice(compressed_source_multipole_owned.data()); } - // // Compute convolution + // Compute convolution let compressed_check_potential_owned = c_sub.dot(&multipoles); // Post process to find check potential @@ -464,7 +466,7 @@ where let mut target_local_lock = target_local_arc.lock().unwrap(); let top_left = (0, i); - let dim = (self.fmm.m2l.k, 1); + let dim = (ncoeffs, 1); let target_local_owned = locals_owned.block(top_left, dim); for i in 0..target_local_lock.shape().0 { diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 3c0910c2..3c110b27 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,8 +1,8 @@ -// TODO: Fix datatree creation, it currently instantiates number of potentials using ncoeffs, fix compression. -// TODO should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. +// TODO: Non Adaptive trees are failing somewhere if there are empty leaves!!! +// TODO: should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. // TODO: charge input should be utilized NOW! // TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. -// TODO Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type +// TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type use itertools::Itertools; use std::{ @@ -491,76 +491,80 @@ mod test { #[test] fn test_fmm<'a>() { - let npoints = 1000; + let npoints = 1000000; let points = points_fixture(npoints, None, None); let order = 5; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; - let k = 1000; - let ncrit = 100; - let depth = 2; + let k = 40; + let ncrit = 150; + let depth = 5; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - tree.get_domain().clone(), - alpha_inner, - ); + println!("{:?}", tree.get_all_leaves_set().len()); + // let m2l_data_svd = SvdFieldTranslationKiFmm::new( + // kernel.clone(), + // Some(k), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - let charges = Charges::new(); - let datatree = FmmData::new(fmm, charges); - datatree.run(None); + // let charges = Charges::new(); + // let datatree = FmmData::new(fmm, charges); - let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + // let times = datatree.run(Some(true)).unwrap(); - let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + // let leaf_coordinates = pts + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); - // Get into row major order - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); + // let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); + // // Get into row major order + // let leaf_coordinates = unsafe { + // rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + // }.eval(); - let all_charges = vec![1f64; npoints]; + // let mut direct = vec![0f64; pts.len()]; + // let all_point_coordinates = points_fixture(npoints, None, None); - let kernel = Laplace3dKernel::::default(); + // let all_charges = vec![1f64; npoints]; - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); + // let kernel = Laplace3dKernel::::default(); + + // kernel.evaluate_st( + // EvalType::Value, + // all_point_coordinates.data(), + // leaf_coordinates.data(), + // &all_charges[..], + // &mut direct[..], + // ); - let abs_error: f64 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f64 = abs_error / (direct.iter().sum::()); + // let abs_error: f64 = potentials + // .data() + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); - println!("{:?}", rel_error); - assert!(rel_error <= 1e-5); + // println!("{:?}", times); + // println!("{:?}", rel_error); + // assert!(rel_error <= 1e-5); + assert!(false) } } From 1b2e216c2bc06b70ed28fe7d9c8920582ceadcaa Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 6 Jul 2023 22:10:26 +0100 Subject: [PATCH 21/40] Identify bugs for tomorrow --- fmm/src/field_translation.rs | 18 ++++++++++++------ fmm/src/fmm.rs | 37 ++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 79d0c235..716e4add 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -29,6 +29,7 @@ where T: Kernel + KernelScale + std::marker::Send + std::marker::Sync, U: FieldTranslationData + std::marker::Sync + std::marker::Send, { + // TODO: Change back to multithreading over the leaves once Timo has merged trait changes to fn p2m<'a>(&self) { if let Some(leaves) = self.fmm.tree().get_leaves() { leaves.par_iter().for_each(move |&leaf| { @@ -63,12 +64,14 @@ where // Calculate check potential let mut check_potential = rlst_col_vec![f64, ntargets]; - fmm_arc.kernel.evaluate_st( + let thread_pool = bempp_tools::threads::create_pool(8); + fmm_arc.kernel.evaluate_mt( EvalType::Value, leaf_coordinates.data(), &upward_check_surface[..], &leaf_charges[..], - check_potential.data_mut() + check_potential.data_mut(), + &thread_pool ); let leaf_multipole_owned = ( @@ -143,10 +146,12 @@ where fn m2p<'a>(&self) { if let Some(targets) = self.fmm.tree().get_leaves() { targets.par_iter().for_each(move |&target| { + let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); if let Some(points) = fmm_arc.tree().get_points(&target) { + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); + if let Some(w_list) = fmm_arc.get_w_list(&target) { for source in w_list.iter() { let source_multipole_arc = @@ -191,7 +196,8 @@ where } } } - }) + } +) } } @@ -199,10 +205,10 @@ where if let Some(targets) = self.fmm.tree().get_leaves() { targets.par_iter().for_each(move |&leaf| { let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); let source_local_arc = Arc::clone(self.locals.get(&leaf).unwrap()); if let Some(target_points) = fmm_arc.tree().get_points(&leaf) { + let target_potential_arc = Arc::clone(self.potentials.get(&leaf).unwrap()); // Lookup data let target_coordinates = target_points .iter() @@ -305,9 +311,9 @@ where if let Some(targets) = self.fmm.tree().get_leaves() { targets.par_iter().for_each(move |&target| { let fmm_arc = Arc::clone(&self.fmm); - let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); if let Some(target_points) = fmm_arc.tree().get_points(&target) { + let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); let target_coordinates = target_points .iter() .map(|p| p.coordinate) diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 3c110b27..a38478be 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,6 +1,6 @@ -// TODO: Non Adaptive trees are failing somewhere if there are empty leaves!!! -// TODO: should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. -// TODO: charge input should be utilized NOW! +// TODO: Some kind of threading error when I go up to 1e6 points, something to do with Rayon - create a minimal example +// TODO: Should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. +// TODO: Charge input should be utilized NOW! // TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type @@ -491,35 +491,34 @@ mod test { #[test] fn test_fmm<'a>() { - let npoints = 1000000; + let npoints = 1000; let points = points_fixture(npoints, None, None); let order = 5; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; - let k = 40; + let k = 50; let ncrit = 150; - let depth = 5; + let depth = 3; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - println!("{:?}", tree.get_all_leaves_set().len()); - // let m2l_data_svd = SvdFieldTranslationKiFmm::new( - // kernel.clone(), - // Some(k), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + tree.get_domain().clone(), + alpha_inner, + ); - // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - // let charges = Charges::new(); - // let datatree = FmmData::new(fmm, charges); + let charges = Charges::new(); + let datatree = FmmData::new(fmm, charges); - // let times = datatree.run(Some(true)).unwrap(); + let times = datatree.run(None); // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; @@ -565,6 +564,6 @@ mod test { // println!("{:?}", times); // println!("{:?}", rel_error); // assert!(rel_error <= 1e-5); - assert!(false) + // assert!(false) } } From fda7c8777cb2f75e1bd3e196b7fea9f239f4b562 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 7 Jul 2023 13:41:09 +0100 Subject: [PATCH 22/40] Add kernel fixes --- fmm/src/field_translation.rs | 4 +- fmm/src/fmm.rs | 81 +++++++++++++++--------------- kernel/src/laplace_3d.rs | 97 ++++++++++++++++++++++-------------- traits/src/kernel.rs | 7 ++- 4 files changed, 103 insertions(+), 86 deletions(-) diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 716e4add..f2c3632e 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -64,14 +64,12 @@ where // Calculate check potential let mut check_potential = rlst_col_vec![f64, ntargets]; - let thread_pool = bempp_tools::threads::create_pool(8); - fmm_arc.kernel.evaluate_mt( + fmm_arc.kernel.evaluate_st( EvalType::Value, leaf_coordinates.data(), &upward_check_surface[..], &leaf_charges[..], check_potential.data_mut(), - &thread_pool ); let leaf_multipole_owned = ( diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index a38478be..b9349e4d 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,8 +1,8 @@ -// TODO: Some kind of threading error when I go up to 1e6 points, something to do with Rayon - create a minimal example // TODO: Should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. // TODO: Charge input should be utilized NOW! // TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type +// TODO: FFT convolutions implemented in rlst use itertools::Itertools; use std::{ @@ -491,16 +491,16 @@ mod test { #[test] fn test_fmm<'a>() { - let npoints = 1000; + let npoints = 1000000; let points = points_fixture(npoints, None, None); - let order = 5; + let order = 9; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; let k = 50; let ncrit = 150; - let depth = 3; + let depth = 5; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); @@ -518,52 +518,51 @@ mod test { let charges = Charges::new(); let datatree = FmmData::new(fmm, charges); - let times = datatree.run(None); + let times = datatree.run(Some(true)); - // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - // let leaf_coordinates = pts - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); + let leaf_coordinates = pts + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); - // let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - // // Get into row major order - // let leaf_coordinates = unsafe { - // rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - // }.eval(); + // Get into row major order + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); - // let mut direct = vec![0f64; pts.len()]; - // let all_point_coordinates = points_fixture(npoints, None, None); + let mut direct = vec![0f64; pts.len()]; + let all_point_coordinates = points_fixture(npoints, None, None); - // let all_charges = vec![1f64; npoints]; + let all_charges = vec![1f64; npoints]; - // let kernel = Laplace3dKernel::::default(); - - // kernel.evaluate_st( - // EvalType::Value, - // all_point_coordinates.data(), - // leaf_coordinates.data(), - // &all_charges[..], - // &mut direct[..], - // ); + let kernel = Laplace3dKernel::::default(); - // let abs_error: f64 = potentials - // .data() - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], + ); - // println!("{:?}", times); - // println!("{:?}", rel_error); - // assert!(rel_error <= 1e-5); - // assert!(false) + let abs_error: f64 = potentials + .data() + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); + + println!("{:?}", times); + println!("{:?}", rel_error); + assert!(rel_error <= 1e-5); } } diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index 5433497d..f6b6d0d3 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -3,10 +3,7 @@ use num; use std::marker::PhantomData; use crate::helpers::{check_dimensions_assemble, check_dimensions_evaluate}; -use bempp_traits::{ - kernel::{Kernel, KernelScale}, - types::{EvalType, KernelType, Scalar}, -}; +use bempp_traits::{types::{Scalar, EvalType, KernelType}, kernel::{Kernel, KernelScale}}; use num::traits::FloatConst; use rayon::prelude::*; @@ -69,8 +66,22 @@ where charges: &[Self::T], result: &mut [Self::T], ) { - let thread_pool = bempp_tools::threads::create_pool(1); - self.evaluate_mt(eval_type, sources, targets, charges, result, &thread_pool); + check_dimensions_evaluate(self, eval_type, sources, targets, charges, result); + let ntargets = targets.len() / self.space_dimension(); + let range_dim = self.range_component_count(eval_type); + + result + .chunks_exact_mut(range_dim) + .enumerate() + .for_each(|(target_index, my_chunk)| { + let target = [ + targets[target_index], + targets[ntargets + target_index], + targets[2 * ntargets + target_index], + ]; + + evaluate_laplace_one_target(eval_type, &target, sources, charges, my_chunk) + }); } fn evaluate_mt( @@ -80,25 +91,23 @@ where targets: &[::Real], charges: &[Self::T], result: &mut [Self::T], - thread_pool: &rayon::ThreadPool, ) { check_dimensions_evaluate(self, eval_type, sources, targets, charges, result); let ntargets = targets.len() / self.space_dimension(); let range_dim = self.range_component_count(eval_type); - thread_pool.install(|| { - result.par_chunks_exact_mut(range_dim).enumerate().for_each( - |(target_index, my_chunk)| { - let target = [ - targets[target_index], - targets[ntargets + target_index], - targets[2 * ntargets + target_index], - ]; - - evaluate_laplace_one_target(eval_type, &target, sources, charges, my_chunk) - }, - ); - }) + result + .par_chunks_exact_mut(range_dim) + .enumerate() + .for_each(|(target_index, my_chunk)| { + let target = [ + targets[target_index], + targets[ntargets + target_index], + targets[2 * ntargets + target_index], + ]; + + evaluate_laplace_one_target(eval_type, &target, sources, charges, my_chunk) + }); } fn assemble_st( @@ -108,8 +117,23 @@ where targets: &[::Real], result: &mut [Self::T], ) { - let thread_pool = bempp_tools::threads::create_pool(1); - self.assemble_mt(eval_type, sources, targets, result, &thread_pool); + check_dimensions_assemble(self, eval_type, sources, targets, result); + let ntargets = targets.len() / self.space_dimension(); + let nsources = sources.len() / self.space_dimension(); + let range_dim = self.range_component_count(eval_type); + + result + .chunks_exact_mut(range_dim * nsources) + .enumerate() + .for_each(|(target_index, my_chunk)| { + let target = [ + targets[target_index], + targets[ntargets + target_index], + targets[2 * ntargets + target_index], + ]; + + assemble_laplace_one_target(eval_type, &target, sources, my_chunk) + }); } fn assemble_mt( @@ -118,27 +142,24 @@ where sources: &[::Real], targets: &[::Real], result: &mut [Self::T], - thread_pool: &rayon::ThreadPool, ) { check_dimensions_assemble(self, eval_type, sources, targets, result); let ntargets = targets.len() / self.space_dimension(); let nsources = sources.len() / self.space_dimension(); let range_dim = self.range_component_count(eval_type); - thread_pool.install(|| { - result - .par_chunks_exact_mut(range_dim * nsources) - .enumerate() - .for_each(|(target_index, my_chunk)| { - let target = [ - targets[target_index], - targets[ntargets + target_index], - targets[2 * ntargets + target_index], - ]; - - assemble_laplace_one_target(eval_type, &target, sources, my_chunk) - }); - }) + result + .par_chunks_exact_mut(range_dim * nsources) + .enumerate() + .for_each(|(target_index, my_chunk)| { + let target = [ + targets[target_index], + targets[ntargets + target_index], + targets[2 * ntargets + target_index], + ]; + + assemble_laplace_one_target(eval_type, &target, sources, my_chunk) + }); } fn range_component_count(&self, eval_type: EvalType) -> usize { @@ -557,4 +578,4 @@ mod test { green_value_deriv.data_mut(), ); } -} +} \ No newline at end of file diff --git a/traits/src/kernel.rs b/traits/src/kernel.rs index 0ab9211e..d5a7c6ca 100644 --- a/traits/src/kernel.rs +++ b/traits/src/kernel.rs @@ -1,6 +1,7 @@ //! Trait for Green's function kernels -use crate::types::{EvalType, KernelType, Scalar}; -use rayon::ThreadPool; +use crate::types::EvalType; +use crate::types::KernelType; +use crate::types::Scalar; /// Interface to evaluating Green's functions for given sources and targets. pub trait Kernel { @@ -60,7 +61,6 @@ pub trait Kernel { targets: &[::Real], charges: &[Self::T], result: &mut [Self::T], - thread_pool: &ThreadPool, ); /// Single threaded assembly of a kernel matrix. @@ -115,7 +115,6 @@ pub trait Kernel { sources: &[::Real], targets: &[::Real], result: &mut [Self::T], - thread_pool: &ThreadPool, ); /// Return the type of the kernel. From 9c105ec2122d3ba599ca3906e4ff4a690f0736c2 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 7 Jul 2023 13:41:30 +0100 Subject: [PATCH 23/40] Format --- field/src/field.rs | 1 - fmm/src/field_translation.rs | 1 - kernel/src/laplace_3d.rs | 7 +++++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index 89dbeb05..fceaf53c 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -15,7 +15,6 @@ use crate::{ types::{SvdFieldTranslationKiFmm, SvdM2lEntry, TransferVector}, }; - impl FieldTranslationData for SvdFieldTranslationKiFmm where T: Kernel + Default, diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index f2c3632e..c96a82ca 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -149,7 +149,6 @@ where if let Some(points) = fmm_arc.tree().get_points(&target) { let target_potential_arc = Arc::clone(self.potentials.get(&target).unwrap()); - if let Some(w_list) = fmm_arc.get_w_list(&target) { for source in w_list.iter() { let source_multipole_arc = diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs index f6b6d0d3..b98f6dc0 100644 --- a/kernel/src/laplace_3d.rs +++ b/kernel/src/laplace_3d.rs @@ -3,7 +3,10 @@ use num; use std::marker::PhantomData; use crate::helpers::{check_dimensions_assemble, check_dimensions_evaluate}; -use bempp_traits::{types::{Scalar, EvalType, KernelType}, kernel::{Kernel, KernelScale}}; +use bempp_traits::{ + kernel::{Kernel, KernelScale}, + types::{EvalType, KernelType, Scalar}, +}; use num::traits::FloatConst; use rayon::prelude::*; @@ -578,4 +581,4 @@ mod test { green_value_deriv.data_mut(), ); } -} \ No newline at end of file +} From 74dd11d38619d30219ee427a5fa88007bd0fcf5f Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 7 Jul 2023 16:45:58 +0100 Subject: [PATCH 24/40] Add tests for irregular data distributions, and update domain to accomadate these explicitly --- tree/Cargo.toml | 4 +- tree/src/implementations/helpers.rs | 50 +++++++++ tree/src/implementations/impl_domain.rs | 106 ++++++++----------- tree/src/implementations/impl_morton.rs | 77 ++++++-------- tree/src/implementations/impl_single_node.rs | 95 +++++++---------- tree/src/implementations/mod.rs | 1 + tree/src/types/domain.rs | 3 +- 7 files changed, 170 insertions(+), 166 deletions(-) create mode 100644 tree/src/implementations/helpers.rs diff --git a/tree/Cargo.toml b/tree/Cargo.toml index ea6d1120..d5b2bf63 100644 --- a/tree/Cargo.toml +++ b/tree/Cargo.toml @@ -21,10 +21,8 @@ memoffset = "0.6" rand = "0.8.*" hyksort = { path = "../hyksort", optional = true } bempp-traits = { path = "../traits" } +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} [features] mpi = ["dep:mpi", "dep:hyksort"] strict = [] - -[dev-dependencies] -rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} \ No newline at end of file diff --git a/tree/src/implementations/helpers.rs b/tree/src/implementations/helpers.rs new file mode 100644 index 00000000..9f79431b --- /dev/null +++ b/tree/src/implementations/helpers.rs @@ -0,0 +1,50 @@ +use rand::prelude::*; +use rand::SeedableRng; +use rlst::dense::{base_matrix::BaseMatrix, rlst_mat, Dynamic, Matrix, VectorContainer}; + +pub type PointsMat = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +// Points fixture for testing, uniformly samples in each axis from min to max. +pub fn points_fixture(npoints: usize, min: Option, max: Option) -> PointsMat { + // Generate a set of randomly distributed points + let mut range = StdRng::seed_from_u64(0); + + let between; + if let (Some(min), Some(max)) = (min, max) { + between = rand::distributions::Uniform::from(min..max); + } else { + between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); + } + + let mut points = rlst_mat![f64, (npoints, 3)]; + + for i in 0..npoints { + points[[i, 0]] = between.sample(&mut range); + points[[i, 1]] = between.sample(&mut range); + points[[i, 2]] = between.sample(&mut range); + } + + points +} + +// Points fixture for testing, uniformly samples in the bounds [[0, 1), [0, 1), [0, 500)] for the x, y, and z +// axes respectively. +pub fn points_fixture_col(npoints: usize) -> PointsMat { + // Generate a set of randomly distributed points + let mut range = StdRng::seed_from_u64(0); + + let between1 = rand::distributions::Uniform::from(0f64..0.1f64); + let between2 = rand::distributions::Uniform::from(0f64..500f64); + + let mut points = rlst_mat![f64, (npoints, 3)]; + + for i in 0..npoints { + // One axis has a different sampling + points[[i, 0]] = between1.sample(&mut range); + points[[i, 1]] = between1.sample(&mut range); + points[[i, 2]] = between2.sample(&mut range); + } + + points +} diff --git a/tree/src/implementations/impl_domain.rs b/tree/src/implementations/impl_domain.rs index 4d62a8cc..2f3ae140 100644 --- a/tree/src/implementations/impl_domain.rs +++ b/tree/src/implementations/impl_domain.rs @@ -6,7 +6,7 @@ impl Domain { /// ensure correct Morton Encoding. pub fn from_local_points(points: &[PointType]) -> Domain { // Increase size of bounding box to capture all points - let err: f64 = 0.001; + let err: f64 = 1e-5; // TODO: Should be parametrised by dimension let dim = 3; let npoints = points.len() / dim; @@ -22,80 +22,41 @@ impl Domain { let min_y = y.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); let min_z = z.iter().min_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); - // let max_x = points - // .iter() - // .max_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) - // .unwrap()[0]; - // let max_y = points - // .iter() - // .max_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) - // .unwrap()[1]; - // let max_z = points - // .iter() - // .max_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) - // .unwrap()[2]; - - // let min_x = points - // .iter() - // .min_by(|a, b| a[0].partial_cmp(&b[0]).unwrap()) - // .unwrap()[0]; - // let min_y = points - // .iter() - // .min_by(|a, b| a[1].partial_cmp(&b[1]).unwrap()) - // .unwrap()[1]; - // let min_z = points - // .iter() - // .min_by(|a, b| a[2].partial_cmp(&b[2]).unwrap()) - // .unwrap()[2]; - - Domain { - origin: [min_x - err, min_y - err, min_z - err], - diameter: [ - (max_x - min_x) + 2. * err, - (max_y - min_y) + 2. * err, - (max_z - min_z) + 2. * err, - ], - } + // Find maximum dimension, this will define the size of the boxes in the domain + let diameter_x = (max_x - min_x).abs(); + let diameter_y = (max_y - min_y).abs(); + let diameter_z = (max_z - min_z).abs(); + + let diameter = diameter_x.max(diameter_y).max(diameter_z); + let diameter = [ + diameter + 2. * err, + diameter + 2. * err, + diameter + 2. * err, + ]; + + // The origin is defined by the minimum point + let origin = [min_x - err, min_y - err, min_z - err]; + + Domain { origin, diameter } } } #[cfg(test)] mod test { + use rlst::dense::{RawAccess, Shape}; - use rand::prelude::*; - use rand::SeedableRng; - - use crate::types::domain::Domain; - use rlst::common::traits::ColumnMajorIterator; - use rlst::dense::{ - base_matrix::BaseMatrix, rlst_mat, Dynamic, Matrix, RawAccess, VectorContainer, - }; + use crate::implementations::helpers::{points_fixture, points_fixture_col, PointsMat}; - fn points_fixture( - npoints: usize, - ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> - { - // Generate a set of randomly distributed points - let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); - let mut points = rlst_mat![f64, (npoints, 3)]; - - for i in 0..npoints { - points[[i, 0]] = between.sample(&mut range); - points[[i, 1]] = between.sample(&mut range); - points[[i, 2]] = between.sample(&mut range); - } - - points - } + use super::*; - #[test] - fn test_compute_bounds() { - let npoints = 10000; - let points = points_fixture(npoints); + fn test_compute_bounds(points: PointsMat) { let domain = Domain::from_local_points(&points.data()); + // Test that the domain remains cubic + assert!(domain.diameter.iter().all(|&x| x == domain.diameter[0])); + // Test that all local points are contained within the local domain + let npoints = points.shape().0; for i in 0..npoints { let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; @@ -110,4 +71,21 @@ mod test { ); } } + + #[test] + fn test_bounds() { + let npoints = 10000; + + // Test points in positive octant only + let points = points_fixture(npoints, None, None); + test_compute_bounds(points); + + // Test points in positive and negative octants + let points = points_fixture(npoints, Some(-1.), Some(1.)); + test_compute_bounds(points); + + // Test rectangular distributions of points + let points = points_fixture_col(npoints); + test_compute_bounds(points); + } } diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index 6db2acbe..d7988692 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -227,10 +227,12 @@ pub fn point_to_anchor( domain: &Domain, ) -> Result<[KeyType; 3], Box> { // Check if point is in the domain - let mut contained = true; + + let mut contained = Vec::new(); for (&p, d, o) in izip!(point, domain.diameter, domain.origin) { - contained = (o < p) && (p < o + d); + contained.push((o <= p) && (p <= o + d)); } + let contained = contained.iter().all(|&x| x == true); match contained { true => { @@ -244,7 +246,7 @@ pub fn point_to_anchor( let scaling_factor = 1 << (DEEPEST_LEVEL - level); - for (a, p, o, s) in izip!(&mut anchor, point, &domain.origin, side_length) { + for (a, p, o, s) in izip!(&mut anchor, point, &domain.origin, &side_length) { *a = (((p - o) / s).floor()) as KeyType * scaling_factor; } Ok(anchor) @@ -882,11 +884,10 @@ impl MortonKeyInterface for MortonKey { #[cfg(test)] mod test { use itertools::Itertools; + use rlst::dense::{RawAccess, Shape}; use std::vec; - use rand::prelude::*; - use rand::Rng; - use rand::SeedableRng; + use crate::implementations::helpers::points_fixture; use super::*; @@ -1059,22 +1060,17 @@ mod test { #[test] fn test_sorting() { let npoints = 1000; - let mut range = rand::thread_rng(); - let mut points: Vec<[PointType; 3]> = Vec::new(); + let points = points_fixture(npoints, Some(-1.), Some(1.0)); - for _ in 0..npoints { - points.push([range.gen(), range.gen(), range.gen()]); - } + let domain = Domain::from_local_points(&points.data()); - let domain = Domain { - origin: [0., 0., 0.], - diameter: [1., 1., 1.], - }; + let mut keys: Vec = Vec::new(); - let mut keys: Vec = points - .iter() - .map(|p| MortonKey::from_point(p, &domain, DEEPEST_LEVEL)) - .collect(); + for i in 0..points.shape().0 { + let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; + + keys.push(MortonKey::from_point(&point, &domain, DEEPEST_LEVEL)); + } // Add duplicates to keys, to test ordering in terms of equality let mut cpy: Vec = keys.to_vec(); @@ -1331,27 +1327,22 @@ mod test { #[test] pub fn test_morton_keys_iterator() { - let mut range = StdRng::seed_from_u64(0); - let between = rand::distributions::Uniform::from(0.0..1.0); - let mut points: Vec<[PointType; 3]> = Vec::new(); - let npoints = 1000; - for _ in 0..npoints { - points.push([ - between.sample(&mut range), - between.sample(&mut range), - between.sample(&mut range), - ]) - } let domain = Domain { - origin: [0.0, 0.0, 0.0], - diameter: [1.0, 1.0, 1.0], + origin: [-1.01, -1.01, -1.01], + diameter: [2.0, 2.0, 2.0], }; + let min = Some(-1.01); + let max = Some(0.99); - let keys = points - .iter() - .map(|p| MortonKey::from_point(p, &domain, DEEPEST_LEVEL)) - .collect(); + let points = points_fixture(npoints, min, max); + + let mut keys = Vec::new(); + + for i in 0..points.shape().0 { + let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; + keys.push(MortonKey::from_point(&point, &domain, DEEPEST_LEVEL)) + } let keys = MortonKeys { keys, index: 0 }; @@ -1394,7 +1385,7 @@ mod test { } let domain = Domain { - origin: [-0.5, -0.5, -0.5], + origin: [-0.7, -0.6, -0.5], diameter: [1., 1., 1.], }; @@ -1417,7 +1408,7 @@ mod test { }; // Test a point not in the domain - let point = [0.9, 0.9, 1.9]; + let point = [1.9, 0.9, 0.9]; let level = 2; let _anchor = point_to_anchor(&point, level, &domain); } @@ -1431,7 +1422,7 @@ mod test { }; // Test a point not in the domain - let point = [-0.5, -0.5, -0.5]; + let point = [-0.51, -0.5, -0.5]; let level = 2; let _anchor = point_to_anchor(&point, level, &domain); } @@ -1580,7 +1571,7 @@ mod test { fn test_is_adjacent() { let point = [0.5, 0.5, 0.5]; let domain = Domain { - origin: [0., 0., 0.], + origin: [-0.1, -0.1, 0.1], diameter: [1., 1., 1.], }; @@ -1618,9 +1609,9 @@ mod test { #[test] fn test_encoding_is_always_absolute() { - let point = [0.5, 0.5, 0.5]; - let domain = Domain { - origin: [0., 0., 0.], + let point = [-0.099999, -0.099999, -0.099999]; + let domain: Domain = Domain { + origin: [-0.1, -0.1, -0.1], diameter: [1., 1., 1.], }; diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index 39632e89..bebde236 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -521,47 +521,38 @@ impl Tree for SingleNodeTree { #[cfg(test)] mod test { - use super::*; - use rand::prelude::*; - use rand::SeedableRng; - use rlst::dense::rlst_mat; - use rlst::dense::RawAccess; - use rlst::dense::{base_matrix::BaseMatrix, Dynamic, Matrix, VectorContainer}; - - fn points_fixture( - npoints: usize, - min: Option, - max: Option, - ) -> Matrix, Dynamic, Dynamic>, Dynamic, Dynamic> - { - // Generate a set of randomly distributed points - let mut range = StdRng::seed_from_u64(0); - - let between; - if let (Some(min), Some(max)) = (min, max) { - between = rand::distributions::Uniform::from(min..max); - } else { - between = rand::distributions::Uniform::from(0.0_f64..1.0_f64); - } - let mut points = rlst_mat![f64, (npoints, 3)]; + use rlst::dense::RawAccess; - for i in 0..npoints { - points[[i, 0]] = between.sample(&mut range); - points[[i, 1]] = between.sample(&mut range); - points[[i, 2]] = between.sample(&mut range); - } + use crate::implementations::helpers::{points_fixture, points_fixture_col}; - points - } + use super::*; #[test] pub fn test_uniform_tree() { - let npoints = 10000; - let points = points_fixture(npoints, None, None); - let depth = 3; - let n_crit = 150; - let tree = SingleNodeTree::new(points.data(), false, Some(n_crit), Some(depth)); + let npoints = 100; + let depth = 2; + + // Test uniformly distributed data + let points = points_fixture(npoints, Some(-1.0), Some(1.0)); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); + + // Test that the tree really is uniform + let levels: Vec = tree + .get_leaves() + .unwrap() + .iter() + .map(|node| node.level()) + .collect(); + let first = levels[0]; + assert!(levels.iter().all(|key| *key == first)); + + // Test that max level constraint is satisfied + assert!(first == depth); + + // Test a column distribution of data + let points = points_fixture_col(npoints); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); // Test that the tree really is uniform let levels: Vec = tree @@ -575,6 +566,18 @@ mod test { // Test that max level constraint is satisfied assert!(first == depth); + + let mut unique_leaves = HashSet::new(); + + // Test that only a subset of the leaves contain any points + for leaf in tree.get_all_leaves_set().iter() { + if let Some(points) = tree.get_points(&leaf) { + unique_leaves.insert(leaf.morton); + } + } + + let expected = 2u64.pow(depth.try_into().unwrap()) as usize; // Number of octants at encoding level that should be filled + assert_eq!(unique_leaves.len(), expected); } #[test] @@ -635,26 +638,12 @@ mod test { pub fn test_assign_nodes_to_points() { // Generate points in a single octant of the domain let npoints = 10; - // let mut range = StdRng::seed_from_u64(0); - // let between = rand::distributions::Uniform::from(0.0..0.5); - // let mut points: Vec<[PointType; 3]> = Vec::new(); - - // for _ in 0..npoints { - // points.push([ - // between.sample(&mut range), - // between.sample(&mut range), - // between.sample(&mut range), - // ]) - // } let points = points_fixture(npoints, Some(0.), Some(0.5)); let domain = Domain { origin: [0.0, 0.0, 0.0], diameter: [1.0, 1.0, 1.0], }; - let depth = 1; - - let dim = 3; let mut tmp = Points::default(); for i in 0..npoints { @@ -708,15 +697,11 @@ mod test { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0], }; - let _depth = 5; - // let mut points = Points { - // points: points_fixture(10000, None, None).data(), - // index: 0, - // }; - let dim = 3; let npoints = 10000; let points = points_fixture(npoints, None, None); + let mut tmp = Points::default(); + for i in 0..npoints { let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]]; let key = MortonKey::from_point(&point, &domain, DEEPEST_LEVEL); diff --git a/tree/src/implementations/mod.rs b/tree/src/implementations/mod.rs index f9a670ca..a8d39d88 100644 --- a/tree/src/implementations/mod.rs +++ b/tree/src/implementations/mod.rs @@ -1,3 +1,4 @@ +pub mod helpers; pub mod impl_domain; pub mod impl_morton; pub mod impl_point; diff --git a/tree/src/types/domain.rs b/tree/src/types/domain.rs index e0619246..9631fbc7 100644 --- a/tree/src/types/domain.rs +++ b/tree/src/types/domain.rs @@ -9,6 +9,7 @@ pub struct Domain { /// The lower left corner of the domain, defined by the point distribution. pub origin: [PointType; 3], - /// The diameter of the domain along the [x, y, z] axes respectively. + /// The diameter of the domain along the [x, y, z] axes respectively, defined + /// by the maximum width of the point distribution along a given axis. pub diameter: [PointType; 3], } From 19c7f54cfce7a3937b78f2c5222eddc3adcdf844 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Sat, 8 Jul 2023 10:56:20 +0100 Subject: [PATCH 25/40] Some form of charge instantiation --- fmm/src/charge.rs | 74 ------------------------------------ fmm/src/field_translation.rs | 6 +++ fmm/src/fmm.rs | 52 +++++++++++++++++-------- fmm/src/types.rs | 34 +++++++++-------- 4 files changed, 61 insertions(+), 105 deletions(-) delete mode 100644 fmm/src/charge.rs diff --git a/fmm/src/charge.rs b/fmm/src/charge.rs deleted file mode 100644 index c90308c4..00000000 --- a/fmm/src/charge.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::{ - cmp::{Eq, Ord, Ordering, PartialEq}, - hash::{Hash, Hasher}, -}; - -use crate::types::{Charge, Charges}; - -impl Hash for Charge { - fn hash(&self, state: &mut H) { - self.global_idx.hash(state); - } -} - -impl PartialEq for Charge { - fn eq(&self, other: &Self) -> bool { - self.global_idx == other.global_idx - } -} - -impl Eq for Charge {} - -impl Ord for Charge { - fn cmp(&self, other: &Self) -> Ordering { - self.global_idx.cmp(&other.global_idx) - } -} - -impl PartialOrd for Charge { - fn partial_cmp(&self, other: &Self) -> Option { - // less_than(&self.morton, &other.morton) - Some(self.global_idx.cmp(&other.global_idx)) - } -} - -impl Charges { - pub fn new() -> Charges { - Charges { - charges: Vec::new(), - index: 0, - } - } - - pub fn add(&mut self, item: Charge) { - self.charges.push(item); - } - - pub fn sort(&mut self) { - self.charges.sort(); - } -} - -impl Iterator for Charges { - type Item = Charge; - - fn next(&mut self) -> Option { - if self.index >= self.charges.len() { - return None; - } - - self.index += 1; - self.charges.get(self.index).cloned() - } -} - -impl FromIterator for Charges { - fn from_iter>(iter: I) -> Self { - let mut c = Charges::new(); - - for i in iter { - c.add(i); - } - c - } -} diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index c96a82ca..8f5fb24a 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -45,6 +45,12 @@ where .map(|p| p.coordinate) .flat_map(|[x, y, z]| vec![x, y, z]) .collect_vec(); + + let global_idxs = leaf_points + .iter() + .map(|p| p.global_idx) + .collect_vec(); + let nsources = leaf_coordinates.len() / self.fmm.kernel.space_dimension(); // Get into row major order diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index b9349e4d..5f777404 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,5 +1,3 @@ -// TODO: Should check what happens with rectangular distributions of points would be easier to do as a part of the above todo. -// TODO: Charge input should be utilized NOW! // TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type // TODO: FFT convolutions implemented in rlst @@ -16,7 +14,7 @@ use rlst::{ common::traits::{Eval, NewLikeSelf, Transpose}, dense::{ base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, rlst_col_vec, - rlst_mat, rlst_pointer_mat, traits::*, Dot, + rlst_mat, rlst_pointer_mat, traits::*, Dot, global, }, }; @@ -29,7 +27,7 @@ use bempp_traits::{ }; use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree}; -use crate::types::{C2EType, Charges, FmmData, KiFmm}; +use crate::types::{C2EType,ChargeDict, FmmData, KiFmm}; #[allow(dead_code)] impl KiFmm @@ -178,7 +176,7 @@ where T: Kernel, U: FieldTranslationData, { - pub fn new(fmm: KiFmm, _charges: Charges) -> Self { + pub fn new(fmm: KiFmm, global_charges: &ChargeDict) -> Self { let mut multipoles = HashMap::new(); let mut locals = HashMap::new(); let mut potentials = HashMap::new(); @@ -196,11 +194,20 @@ where if let Some(point_data) = fmm.tree().get_points(key) { points.insert(*key, point_data.iter().cloned().collect_vec()); - // TODO: Fragile let npoints = point_data.len(); potentials.insert(*key, Arc::new(Mutex::new(rlst_col_vec![f64, npoints]))); - // TODO: Replace with a global index lookup at some point - charges.insert(*key, Arc::new(vec![1.0; point_data.len()])); + + // Lookup indices and store with charges + let mut tmp_idx = Vec::new(); + for point in point_data.iter() { + tmp_idx.push(point.global_idx) + } + let mut tmp_charges = vec![0.; point_data.len()]; + for i in 0..tmp_idx.len() { + tmp_charges[i] = *global_charges.get(&tmp_idx[i]).unwrap(); + } + + charges.insert(*key, Arc::new(tmp_charges)); } } } @@ -490,8 +497,8 @@ mod test { // } #[test] - fn test_fmm<'a>() { - let npoints = 1000000; + fn test_fmm_svd<'a>() { + let npoints = 1000; let points = points_fixture(npoints, None, None); let order = 9; @@ -515,8 +522,23 @@ mod test { let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - let charges = Charges::new(); - let datatree = FmmData::new(fmm, charges); + // Form charge dict using tree. + let mut charge_dict: ChargeDict = HashMap::new(); + + let mut global_idxs = Vec::new(); + + for leaf in fmm.tree().get_all_leaves_set().iter() { + if let Some(points) = fmm.tree().get_points(&leaf) { + // Find global indices + global_idxs.extend(points.iter().map(|p| p.global_idx).collect_vec()); + } + } + + for &global_idx in global_idxs.iter() { + charge_dict.insert(global_idx, 1.0); + } + + let datatree = FmmData::new(fmm, &charge_dict); let times = datatree.run(Some(true)); @@ -541,7 +563,7 @@ mod test { let mut direct = vec![0f64; pts.len()]; let all_point_coordinates = points_fixture(npoints, None, None); - let all_charges = vec![1f64; npoints]; + let all_charges = charge_dict.into_values().collect_vec(); let kernel = Laplace3dKernel::::default(); @@ -561,8 +583,6 @@ mod test { .sum(); let rel_error: f64 = abs_error / (direct.iter().sum::()); - println!("{:?}", times); - println!("{:?}", rel_error); - assert!(rel_error <= 1e-5); + assert!(rel_error <= 1e-7); } } diff --git a/fmm/src/types.rs b/fmm/src/types.rs index e95c184f..4e122ffa 100644 --- a/fmm/src/types.rs +++ b/fmm/src/types.rs @@ -9,24 +9,28 @@ use rlst::dense::traits::*; use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix}; use rlst::{self}; -#[derive(Clone, Debug, Default)] -pub struct Charge { - /// Charge data - pub data: f64, +// #[derive(Clone, Debug, Default)] +// pub struct Charge { +// /// Charge data +// pub data: f64, - /// Global unique index. - pub global_idx: usize, -} +// /// Global unique index. +// pub global_idx: usize, +// } -/// Container of **Points**. -#[derive(Clone, Debug, Default)] -pub struct Charges { - /// A vector of Charges - pub charges: Vec, +pub type Charge = f64; +pub type GlobalIdx = usize; +pub type ChargeDict = HashMap; - /// index for implementing the Iterator trait. - pub index: usize, -} +// /// Container of **Points**. +// #[derive(Clone, Debug, Default)] +// pub struct Charges { +// /// A vector of Charges +// pub charges: Vec, + +// /// index for implementing the Iterator trait. +// pub index: usize, +// } pub type Expansions = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; From f39acad1eca8919a523be6164e629fb2b1486ea4 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Sat, 8 Jul 2023 10:56:44 +0100 Subject: [PATCH 26/40] rm charge from lib --- fmm/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 0d1a52bc..80dbfeb5 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -1,5 +1,4 @@ //! Fast Solver FMM library -pub mod charge; pub mod field_translation; pub mod fmm; pub mod interaction_lists; From 3e5975d114524f57dd8d91bc12a187d8ea875cd0 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Sat, 8 Jul 2023 20:56:48 +0100 Subject: [PATCH 27/40] Add a way of feeding charges specified by their global index for single node trees --- fmm/src/charge.rs | 15 +++++ fmm/src/fmm.rs | 43 ++++++------- fmm/src/lib.rs | 1 + traits/src/tree.rs | 9 +++ tree/src/implementations/impl_single_node.rs | 66 +++++++------------- 5 files changed, 66 insertions(+), 68 deletions(-) create mode 100644 fmm/src/charge.rs diff --git a/fmm/src/charge.rs b/fmm/src/charge.rs new file mode 100644 index 00000000..f467f82b --- /dev/null +++ b/fmm/src/charge.rs @@ -0,0 +1,15 @@ +use std::collections::HashMap; + +use crate::types::{ChargeDict, Charge, GlobalIdx}; + + +pub fn build_charge_dict(global_idxs: &[GlobalIdx], charges: &[Charge]) +-> ChargeDict + { + + let mut res: ChargeDict = HashMap::new(); + for (&global_idx, &charge) in global_idxs.iter().zip(charges.iter()) { + res.insert(global_idx, charge); + } + res +} \ No newline at end of file diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 5f777404..7aa2ba33 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,6 +1,6 @@ -// TODO: Fix the componentwise storage of pinv of dc2e/uc2e as this is losing accuracy. -// TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type // TODO: FFT convolutions implemented in rlst +// TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type +// TODO: Tree should infer dimension from the data (stride). use itertools::Itertools; use std::{ @@ -366,13 +366,16 @@ where mod test { use super::*; - use bempp_field::types::SvdFieldTranslationKiFmm; - use bempp_kernel::laplace_3d::evaluate_laplace_one_target; use rand::prelude::*; use rand::SeedableRng; + + use rlst::{common::traits::ColumnMajorIterator, dense::rlst_rand_mat}; + use bempp_field::types::SvdFieldTranslationKiFmm; + use bempp_kernel::laplace_3d::evaluate_laplace_one_target; use bempp_kernel::laplace_3d::Laplace3dKernel; - use rlst::{common::traits::ColumnMajorIterator, dense::rlst_rand_mat}; + + use crate::charge::build_charge_dict; fn points_fixture( npoints: usize, @@ -500,17 +503,19 @@ mod test { fn test_fmm_svd<'a>() { let npoints = 1000; let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; - let order = 9; + let order = 7; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; let k = 50; let ncrit = 150; - let depth = 5; + let depth = 2; let kernel = Laplace3dKernel::::default(); - let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); let m2l_data_svd = SvdFieldTranslationKiFmm::new( kernel.clone(), @@ -522,22 +527,9 @@ mod test { let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - // Form charge dict using tree. - let mut charge_dict: ChargeDict = HashMap::new(); - - let mut global_idxs = Vec::new(); - - for leaf in fmm.tree().get_all_leaves_set().iter() { - if let Some(points) = fmm.tree().get_points(&leaf) { - // Find global indices - global_idxs.extend(points.iter().map(|p| p.global_idx).collect_vec()); - } - } - - for &global_idx in global_idxs.iter() { - charge_dict.insert(global_idx, 1.0); - } - + // Form charge dict, matching charges with their associated global indices + let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + let datatree = FmmData::new(fmm, &charge_dict); let times = datatree.run(Some(true)); @@ -583,6 +575,7 @@ mod test { .sum(); let rel_error: f64 = abs_error / (direct.iter().sum::()); - assert!(rel_error <= 1e-7); + println!("rel error {:?}", rel_error); + assert!(rel_error <= 1e-5); } } diff --git a/fmm/src/lib.rs b/fmm/src/lib.rs index 80dbfeb5..5b0db82c 100644 --- a/fmm/src/lib.rs +++ b/fmm/src/lib.rs @@ -3,3 +3,4 @@ pub mod field_translation; pub mod fmm; pub mod interaction_lists; pub mod types; +pub mod charge; \ No newline at end of file diff --git a/traits/src/tree.rs b/traits/src/tree.rs index e5fdafd2..ec6e180e 100644 --- a/traits/src/tree.rs +++ b/traits/src/tree.rs @@ -36,11 +36,20 @@ pub trait Tree { // Copy of nodes type NodeIndices: IntoIterator; + // Global indices of points + type GlobalIndex; + + // Slice of global indices + type GlobalIndexSlice<'a>: IntoIterator + where + Self: 'a; + fn new( points: Self::PointDataSlice<'_>, adaptive: bool, n_crit: Option, depth: Option, + global_idxs: Self::GlobalIndexSlice<'_> ) -> Self; // Get depth of tree. diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs index bebde236..7993c99b 100644 --- a/tree/src/implementations/impl_single_node.rs +++ b/tree/src/implementations/impl_single_node.rs @@ -16,7 +16,7 @@ use crate::{ impl SingleNodeTree { /// Constructor for uniform trees - pub fn uniform_tree(points: &[PointType], &domain: &Domain, depth: u64) -> SingleNodeTree { + pub fn uniform_tree(points: &[PointType], &domain: &Domain, depth: u64, global_idxs: &[usize]) -> SingleNodeTree { // Encode points at deepest level, and map to specified depth // TODO: Automatically infer dimension @@ -32,25 +32,10 @@ impl SingleNodeTree { coordinate: point, base_key, encoded_key, - global_idx: i, + global_idx: global_idxs[i], }) } let mut points = tmp; - - // let mut points: Points = points - // .iter() - // .enumerate() - // .map(|(i, &p)| { - // let base_key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); - // let encoded_key = MortonKey::from_point(&p, &domain, depth); - // Point { - // coordinate: p, - // base_key, - // encoded_key, - // global_idx: i, - // } - // }) - // .collect(); points.sort(); // Generate complete tree at specified depth @@ -140,22 +125,8 @@ impl SingleNodeTree { } /// Constructor for adaptive trees - pub fn adaptive_tree(points: &[PointType], &domain: &Domain, n_crit: u64) -> SingleNodeTree { + pub fn adaptive_tree(points: &[PointType], &domain: &Domain, n_crit: u64, global_idxs: &[usize]) -> SingleNodeTree { // Encode points at deepest level - // let mut points: Points = points - // .iter() - // .enumerate() - // .map(|(i, &p)| { - // let key = MortonKey::from_point(&p, &domain, DEEPEST_LEVEL); - // Point { - // coordinate: p, - // base_key: key, - // encoded_key: key, - // global_idx: i, - // } - // }) - // .collect(); - // TODO: Automatically infer dimension let dim = 3; let npoints = points.len() / dim; @@ -167,7 +138,7 @@ impl SingleNodeTree { coordinate: point, base_key: key, encoded_key: key, - global_idx: i, + global_idx: global_idxs[i], }) } let mut points = tmp; @@ -446,6 +417,8 @@ impl Tree for SingleNodeTree { type PointSlice<'a> = &'a [Point]; type PointData = f64; type PointDataSlice<'a> = &'a [f64]; + type GlobalIndex = usize; + type GlobalIndexSlice<'a> = &'a [usize]; /// Create a new single-node tree. If non-adaptive (uniform) trees are created, they are specified /// by a user defined maximum depth, if an adaptive tree is created it is specified by only by the @@ -455,6 +428,7 @@ impl Tree for SingleNodeTree { adaptive: bool, n_crit: Option, depth: Option, + global_idxs: Self::GlobalIndexSlice<'_> ) -> SingleNodeTree { // TODO: Come back and reconcile a runtime point dimension detector @@ -464,9 +438,9 @@ impl Tree for SingleNodeTree { let depth = depth.unwrap_or(DEEPEST_LEVEL); if adaptive { - SingleNodeTree::adaptive_tree(points, &domain, n_crit) + SingleNodeTree::adaptive_tree(points, &domain, n_crit, &global_idxs) } else { - SingleNodeTree::uniform_tree(points, &domain, depth) + SingleNodeTree::uniform_tree(points, &domain, depth, &global_idxs) } } @@ -522,7 +496,7 @@ impl Tree for SingleNodeTree { #[cfg(test)] mod test { - use rlst::dense::RawAccess; + use rlst::dense::{RawAccess, global}; use crate::implementations::helpers::{points_fixture, points_fixture_col}; @@ -535,7 +509,8 @@ mod test { // Test uniformly distributed data let points = points_fixture(npoints, Some(-1.0), Some(1.0)); - let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); + let global_idxs = (0..npoints).collect_vec(); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth), &global_idxs); // Test that the tree really is uniform let levels: Vec = tree @@ -552,7 +527,8 @@ mod test { // Test a column distribution of data let points = points_fixture_col(npoints); - let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); + let global_idxs = (0..npoints).collect_vec(); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth), &global_idxs); // Test that the tree really is uniform let levels: Vec = tree @@ -584,10 +560,11 @@ mod test { pub fn test_adaptive_tree() { let npoints = 10000; let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); let adaptive = true; let n_crit = 150; - let tree = SingleNodeTree::new(points.data(), adaptive, Some(n_crit), None); + let tree = SingleNodeTree::new(points.data(), adaptive, Some(n_crit), None, &global_idxs); // Test that tree is not uniform let levels: Vec = tree @@ -628,8 +605,9 @@ mod test { pub fn test_no_overlaps() { let npoints = 10000; let points = points_fixture(npoints, None, None); - let uniform = SingleNodeTree::new(points.data(), false, Some(150), Some(4)); - let adaptive = SingleNodeTree::new(points.data(), true, Some(150), None); + let global_idxs = (0..npoints).collect_vec(); + let uniform = SingleNodeTree::new(points.data(), false, Some(150), Some(4),&global_idxs); + let adaptive = SingleNodeTree::new(points.data(), true, Some(150), None, &global_idxs); test_no_overlaps_helper(uniform.get_leaves().unwrap()); test_no_overlaps_helper(adaptive.get_leaves().unwrap()); } @@ -781,8 +759,9 @@ mod test { // Uniform tree let npoints = 10000; let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); let depth = 3; - let tree = SingleNodeTree::new(points.data(), false, None, Some(depth)); + let tree = SingleNodeTree::new(points.data(), false, None, Some(depth), &global_idxs); let keys = tree.get_all_keys().unwrap(); @@ -812,7 +791,8 @@ mod test { // Adaptive tree let ncrit = 150; - let tree = SingleNodeTree::new(points.data(), true, Some(ncrit), None); + + let tree = SingleNodeTree::new(points.data(), true, Some(ncrit), None, &global_idxs); let keys = tree.get_all_keys().unwrap(); let depth = tree.get_depth(); From 4ab6196b9b8a0708c31a79deb11c8ea2e750829f Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 10 Jul 2023 11:19:57 +0100 Subject: [PATCH 28/40] Add convolution grid function that is in column major order --- tree/src/implementations/impl_morton.rs | 185 ++++++++++++++---------- 1 file changed, 105 insertions(+), 80 deletions(-) diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index d7988692..5b9ab008 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -683,79 +683,86 @@ impl MortonKey { domain: &Domain, surface: &[f64], alpha: f64, - ) -> Vec<[f64; 3]> { - // Number of convolution points along each axis - let n = 2 * order - 1; - let dim = 3; - let mut grid = vec![[0f64; 3]; n.pow(dim)]; - - for i in 0..n { - for j in 0..n { - for k in 0..n { - grid[i * n * n + j * n + k] = [i as f64, j as f64, k as f64] - } + ) -> Vec { + // Number of convolution points along each axis + let n = 2 * order - 1; + + let dim: usize = 3; + let ncoeffs = n.pow(dim as u32); + let mut grid = vec![0f64; dim*ncoeffs]; + let mut idx = 0; + + for i in 0..n { + for j in 0..n { + for k in 0..n { + grid[idx] = i as f64; + grid[(dim - 2) * ncoeffs + idx] = j as f64; + grid[(dim - 1) * ncoeffs + idx] = k as f64; + idx += 1; } } + } - // Dilate convolution grid - let diameter = self - .diameter(domain) - .iter() - .map(|x| x * alpha) - .collect_vec(); - - // Shift and scale to embed surface grid inside convolution grid - // Scale - grid.iter_mut().for_each(|point| { - point.iter_mut().enumerate().for_each(|(i, value)| { - *value *= 1.0 / ((n - 1) as f64); // normalize - *value *= diameter[i]; // find diameter - *value *= 2.0; // convolution grid is 2x as large - }); - }); - - // Shift - let sums: Vec = grid.iter().map(|point| point.iter().sum()).collect(); - let max_index = sums - .iter() - .enumerate() - .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) - .map(|(index, _)| index) - .unwrap(); - let max_conv_point = grid[max_index]; - - let ncoeffs = surface.len() / 3; - let sums: Vec<_> = (0..ncoeffs) - .map(|i| surface[i] + surface[ncoeffs + i] + surface[2 * ncoeffs + i]) - .collect(); - - let max_index = sums - .iter() - .enumerate() - .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) - .map(|(index, _)| index) - .unwrap(); - - let max_surface_point = [ - surface[max_index], - surface[max_index + ncoeffs], - surface[max_index + 2 * ncoeffs], - ]; - - let diff = max_conv_point - .iter() - .zip(max_surface_point) - .map(|(a, b)| b - a) - .collect_vec(); - - grid.iter_mut().for_each(|point| { - point - .iter_mut() - .enumerate() - .for_each(|(i, value)| *value += diff[i]) - }); - - grid + let diameter = self + .diameter(domain) + .iter() + .map(|x| x * alpha) + .collect_vec(); + + // Shift and scale to embed surface grid inside convolution grid + // Scale + grid.iter_mut().for_each(|point| { + *point *= 1.0 / ((n - 1) as f64); // normalize + *point *= diameter[0]; // find diameter + *point *= 2.0; // convolution grid is 2x as large + }); + + // Shift + let sums: Vec<_> = (0..ncoeffs) + .map(|i| grid[i] + grid[ncoeffs + i] + grid[2*ncoeffs + i]) + .collect(); + let max_index = sums + .iter() + .enumerate() + .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) + .map(|(index, _)| index) + .unwrap(); + let max_conv_point = [ + grid[max_index], + grid[max_index + ncoeffs], + grid[max_index + 2 * ncoeffs] + ]; + + // + let nsurf = surface.len() / dim; + let sums: Vec<_> = (0..nsurf) + .map(|i| surface[i] + surface[nsurf + i] + surface[2*nsurf + i]) + .collect(); + let max_index = sums + .iter() + .enumerate() + .max_by(|a, b| a.1.partial_cmp(b.1).unwrap()) + .map(|(index, _)| index) + .unwrap(); + let max_surface_point = [ + surface[max_index], + surface[max_index + nsurf], + surface[max_index + 2 * nsurf] + ]; + + let diff = max_conv_point + .iter() + .zip(max_surface_point) + .map(|(a, b)| b - a) + .collect_vec(); + + for i in 0..dim { + grid[i*ncoeffs..(i+1)*ncoeffs] + .iter_mut() + .for_each(|value| *value += diff[i]); + } + + grid } /// Compute surface grid for KiFMM at this Morton key. @@ -884,7 +891,7 @@ impl MortonKeyInterface for MortonKey { #[cfg(test)] mod test { use itertools::Itertools; - use rlst::dense::{RawAccess, Shape}; + use rlst::{dense::{RawAccess, Shape, rlst_pointer_mat, LayoutType, Dynamic}, common::tools::PrettyPrint}; use std::vec; use crate::implementations::helpers::points_fixture; @@ -1767,20 +1774,38 @@ mod test { diameter: [1., 1., 1.], }; - let order = 3; + let order = 2; let alpha = 1.0; let key = MortonKey::from_point(&point, &domain, 0); - let surface = key.compute_surface(&domain, order, alpha); - let conv_grid = key.convolution_grid(order, &domain, &surface, alpha); + let surface_grid = key.compute_surface(&domain, order, alpha); + let conv_grid = key.convolution_grid(order, &domain, &surface_grid, alpha); + + let mut surface = Vec::new(); + let nsurf = surface_grid.len() /3 ; + for i in 0..nsurf { + surface.push( + [ + surface_grid[i], + surface_grid[i+nsurf], + surface_grid[i+2*nsurf] + ] + ) + } + let mut convolution = Vec::new(); + let nconv = conv_grid.len() / 3; + for i in 0..nconv { + convolution.push( + [ + conv_grid[i], + conv_grid[i+nconv], + conv_grid[i+2*nconv], + ] + ) + } - // Test that surface grid is embedded in convolution grid - let surf_grid: Vec<[f64; 3]> = surface - .chunks_exact(3) - .map(|chunk| [chunk[0], chunk[1], chunk[2]]) - .collect(); + assert!(surface.iter().all(|point| convolution.contains(point))); - assert!(surf_grid.iter().all(|point| conv_grid.contains(point))); } } From 1765b5ed024805e3b5f20faa14895225141d95b6 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 11 Jul 2023 12:50:18 +0100 Subject: [PATCH 29/40] Add new helper functions --- field/Cargo.toml | 16 +- field/src/field.rs | 441 ++++++++++++++++++++++--------------------- field/src/helpers.rs | 225 +++++++++++++++++++++- field/src/types.rs | 38 ++-- fmm/src/fmm.rs | 1 + tools/src/arrays.rs | 4 + traits/src/arrays.rs | 3 + 7 files changed, 493 insertions(+), 235 deletions(-) diff --git a/field/Cargo.toml b/field/Cargo.toml index a8c70c98..bf9352b2 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -23,10 +23,16 @@ crate-type = ["lib", "cdylib"] bempp-traits = { path = "../traits" } bempp-tree = { path = "../tree" } bempp-kernel = { path = "../kernel" } +bempp-tools = { path = "../tools" } itertools = "0.10" -ndarray = { version = "*", features = ["blas"]} -ndarray-linalg = { version = "*", features = ["openblas-system"] } -ndarray-ndimage = "0.3.0" -ndrustfft = "0.4.0" +# ndarray = { version = "*", features = ["blas"]} +# ndarray-linalg = { version = "*", features = ["openblas-system"] } +# ndarray-ndimage = "0.3.0" +# ndrustfft = "0.4.0" num = "0.4" -rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} \ No newline at end of file +rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} +realfft = "3.3.0" +rustfft = "6.1.0" + +[dev-dependencies] +approx_eq = "0.1.8" diff --git a/field/src/field.rs b/field/src/field.rs index fceaf53c..cbe7e779 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -1,18 +1,22 @@ +use std::collections::HashMap; + +use num::Complex; use rlst::{ algorithms::{ linalg::LinAlg, traits::svd::{Mode, Svd}, }, common::traits::{Eval, NewLikeSelf, Transpose}, - dense::{rlst_mat, traits::*, Dot, Shape}, + dense::{rlst_mat, traits::*, Dot, Shape, rlst_pointer_mat}, }; -use bempp_traits::{field::FieldTranslationData, kernel::Kernel, types::EvalType}; +use bempp_traits::{field::FieldTranslationData, kernel::Kernel, types::EvalType, arrays::Array3DAccess, fmm::Fmm}; use bempp_tree::types::domain::Domain; +use bempp_tools::Array3D; use crate::{ - helpers::compute_transfer_vectors, - types::{SvdFieldTranslationKiFmm, SvdM2lEntry, TransferVector}, + helpers::{compute_transfer_vectors, pad3, flip3, rfft3, irfft3}, + types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, SvdM2lEntry, FftM2lEntry, TransferVector}, }; impl FieldTranslationData for SvdFieldTranslationKiFmm @@ -166,7 +170,6 @@ where result.k = ncoeffs } } else { - // TODO: Should be data driven if nothing is provided by the user result.k = 50; } @@ -177,210 +180,224 @@ where } } -// impl FieldTranslationData for FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// type Domain = Domain; -// type M2LOperators = Vec>, Dim<[usize; 3]>>>; -// type TransferVector = Vec; - -// fn compute_m2l_operators( -// &self, -// expansion_order: usize, -// domain: Self::Domain, -// ) -> Self::M2LOperators { -// type TranslationType = ArrayBase>, Dim<[usize; 3]>>; -// let mut result: Vec = Vec::new(); - -// for t in self.transfer_vectors.iter() { -// let source_equivalent_surface = -// t.source -// .compute_surface(&domain, expansion_order, self.alpha); - -// let conv_grid_sources = t.source.convolution_grid( -// expansion_order, -// &domain, -// &source_equivalent_surface, -// self.alpha, -// ); - -// let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); - -// // TODO: Remove dim -// let dim = 3; -// // Find min target -// let ncoeffs: usize = target_check_surface.len() / dim; -// let sums: Vec<_> = (0..ncoeffs) -// .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) -// .collect(); - -// let min_index = sums -// .iter() -// .enumerate() -// .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) -// .map(|(index, _)| index) -// .unwrap(); - -// let min_target = [ -// target_check_surface[min_index], -// target_check_surface[min_index + ncoeffs], -// target_check_surface[min_index + 2 * ncoeffs], -// ]; - -// // TODO: Fix compute_kernel to work with new kernel -// let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); -// let m = kernel.len(); -// let n = kernel[0].len(); -// let k = kernel[0][0].len(); - -// // Precompute and store the FFT of each unique kernel interaction -// let kernel = -// Array3::from_shape_vec((m, n, k), kernel.into_iter().flatten().flatten().collect()) -// .unwrap(); - -// // Begin by calculating pad lengths along each dimension -// let p = 2 * m; -// let q = 2 * n; -// let r = 2 * k; - -// let padding = [[0, p - m], [0, q - n], [0, r - k]]; - -// let padded_kernel = pad(&kernel, &padding, PadMode::Constant(0.)); - -// // Flip the kernel -// let padded_kernel = padded_kernel.slice(s![..;-1,..;-1,..;-1]).to_owned(); -// let mut padded_kernel_hat: Array3> = Array3::zeros((p, q, r / 2 + 1)); - -// // Compute FFT of kernel for this transfer vector -// { -// // 1. Init the handlers for FFTs along each axis -// let mut handler_ax0 = FftHandler::::new(p); -// let mut handler_ax1 = FftHandler::::new(q); -// let mut handler_ax2 = R2cFftHandler::::new(r); - -// // 2. Compute the transform along each axis -// let mut tmp1: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft_r2c(&padded_kernel, &mut tmp1, &mut handler_ax2, 2); -// let mut tmp2: Array3> = Array3::zeros((p, q, r / 2 + 1)); -// ndfft(&tmp1, &mut tmp2, &mut handler_ax1, 1); -// ndfft(&tmp2, &mut padded_kernel_hat, &mut handler_ax0, 0); -// } - -// // Store FFT of kernel for this transfer vector -// { -// result.push(padded_kernel_hat); -// } -// } - -// result -// } - -// fn compute_transfer_vectors(&self) -> Self::TransferVector { -// compute_transfer_vectors() -// } - -// fn ncoeffs(&self, expansion_order: usize) -> usize { -// 6 * (expansion_order - 1).pow(2) + 2 -// } -// } - -// impl FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { -// let mut result = FftFieldTranslationNaiveKiFmm::default(); - -// // Create maps between surface and convolution grids -// let (surf_to_conv, conv_to_surf) = -// FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); -// result.surf_to_conv_map = surf_to_conv; -// result.conv_to_surf_map = conv_to_surf; - -// result.kernel = kernel; - -// result.alpha = alpha; -// result.transfer_vectors = result.compute_transfer_vectors(); -// result.m2l = result.compute_m2l_operators(expansion_order, domain); - -// result -// } - -// pub fn compute_surf_to_conv_map( -// expansion_order: usize, -// ) -> (HashMap, HashMap) { -// let n = 2 * expansion_order - 1; - -// // Index maps between surface and convolution grids -// let mut surf_to_conv: HashMap = HashMap::new(); -// let mut conv_to_surf: HashMap = HashMap::new(); - -// // Initialise surface grid index -// let mut surf_index = 0; - -// // The boundaries of the surface grid -// let lower = expansion_order - 1; -// let upper = 2 * expansion_order - 2; - -// // Iterate through the entire convolution grid marking the boundaries -// // This makes the map much easier to understand and debug -// for i in 0..n { -// for j in 0..n { -// for k in 0..n { -// let conv_idx = i * n * n + j * n + k; -// if (i >= lower && j >= lower && (k == lower || k == upper)) -// || (j >= lower && k >= lower && (i == lower || i == upper)) -// || (k >= lower && i >= lower && (j == lower || j == upper)) -// { -// surf_to_conv.insert(surf_index, conv_idx); -// conv_to_surf.insert(conv_idx, surf_index); -// surf_index += 1; -// } -// } -// } -// } - -// (surf_to_conv, conv_to_surf) -// } - -// pub fn compute_kernel( -// &self, -// expansion_order: usize, -// convolution_grid: &[[f64; 3]], -// min_target: [f64; 3], -// ) -> Vec>> { -// let n = 2 * expansion_order - 1; -// let mut result = vec![vec![vec![0f64; n]; n]; n]; - -// for (i, result_i) in result.iter_mut().enumerate() { -// for (j, result_ij) in result_i.iter_mut().enumerate() { -// for (k, result_ijk) in result_ij.iter_mut().enumerate() { -// let conv_idx = i * n * n + j * n + k; -// let src = convolution_grid[conv_idx]; -// *result_ijk = self.kernel.kernel(&src[..], &min_target[..]); -// } -// } -// } -// result -// } - -// pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { -// let n = 2 * expansion_order - 1; -// let mut result = vec![vec![vec![0f64; n]; n]; n]; - -// for (i, result_i) in result.iter_mut().enumerate() { -// for (j, result_ij) in result_i.iter_mut().enumerate() { -// for (k, result_ijk) in result_ij.iter_mut().enumerate() { -// let conv_idx = i * n * n + j * n + k; -// if self.conv_to_surf_map.contains_key(&conv_idx) { -// let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); -// *result_ijk = charges[*surf_idx] -// } -// } -// } -// } - -// result -// } -// } +impl FieldTranslationData for FftFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + type Domain = Domain; + type M2LOperators = Vec>>; + type TransferVector = Vec; + + fn compute_m2l_operators( + &self, + expansion_order: usize, + domain: Self::Domain, + ) -> Self::M2LOperators { + let mut result: Vec>> = Vec::new(); + + for t in self.transfer_vectors.iter() { + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); + + let conv_grid_sources = t.source.convolution_grid( + expansion_order, + &domain, + &source_equivalent_surface, + self.alpha, + ); + + let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + + // TODO: Remove dim + let dim = 3; + // Find min target + let ncoeffs: usize = target_check_surface.len() / dim; + let sums: Vec<_> = (0..ncoeffs) + .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) + .collect(); + + let min_index = sums + .iter() + .enumerate() + .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) + .map(|(index, _)| index) + .unwrap(); + + let min_target = [ + target_check_surface[min_index], + target_check_surface[min_index + ncoeffs], + target_check_surface[min_index + 2 * ncoeffs], + ]; + + let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); + + let &(m, n, o) = kernel.shape(); + + // Precompute and store the FFT of each unique kernel interaction + + // Begin by calculating pad lengths along each dimension + let p = 2 * m; + let q = 2 * n; + let r = 2 * o; + + let padded_kernel = pad3(&kernel, (p-m, q-n, r-o), (0, 0, 0)); + + // Flip the kernel + let padded_kernel = flip3(&padded_kernel); + + // Compute FFT of kernel for this transfer vector + let padded_kernel_hat = rfft3(&padded_kernel); + + // Store FFT of kernel for this transfer vector + result.push(padded_kernel_hat) + } + + result + } + + fn compute_transfer_vectors(&self) -> Self::TransferVector { + compute_transfer_vectors() + } + + fn ncoeffs(&self, expansion_order: usize) -> usize { + 6 * (expansion_order - 1).pow(2) + 2 + } +} + +impl FftFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + // pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { + // let mut result = FftFieldTranslationNaiveKiFmm::default(); + + // // Create maps between surface and convolution grids + // let (surf_to_conv, conv_to_surf) = + // FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); + // result.surf_to_conv_map = surf_to_conv; + // result.conv_to_surf_map = conv_to_surf; + + // result.kernel = kernel; + + // result.alpha = alpha; + // result.transfer_vectors = result.compute_transfer_vectors(); + // result.m2l = result.compute_m2l_operators(expansion_order, domain); + + // result + // } + + // pub fn compute_surf_to_conv_map( + // expansion_order: usize, + // ) -> (HashMap, HashMap) { + // let n = 2 * expansion_order - 1; + + // // Index maps between surface and convolution grids + // let mut surf_to_conv: HashMap = HashMap::new(); + // let mut conv_to_surf: HashMap = HashMap::new(); + + // // Initialise surface grid index + // let mut surf_index = 0; + + // // The boundaries of the surface grid + // let lower = expansion_order - 1; + // let upper = 2 * expansion_order - 2; + + // // Iterate through the entire convolution grid marking the boundaries + // // This makes the map much easier to understand and debug + // for i in 0..n { + // for j in 0..n { + // for k in 0..n { + // let conv_idx = i * n * n + j * n + k; + // if (i >= lower && j >= lower && (k == lower || k == upper)) + // || (j >= lower && k >= lower && (i == lower || i == upper)) + // || (k >= lower && i >= lower && (j == lower || j == upper)) + // { + // surf_to_conv.insert(surf_index, conv_idx); + // conv_to_surf.insert(conv_idx, surf_index); + // surf_index += 1; + // } + // } + // } + // } + + // (surf_to_conv, conv_to_surf) + // } + + pub fn compute_kernel( + &self, + expansion_order: usize, + convolution_grid: &[f64], + min_target: [f64; 3], + ) -> Array3D + // -> Vec>> + { + let n = 2 * expansion_order - 1; + // let mut result = vec![vec![vec![0f64; n]; n]; n]; + let mut result = Array3D::::new((n, n, n)); + let nconv = n.pow(3); + + let mut kernel_evals = vec![0f64; nconv]; + + self.kernel.assemble_st( + EvalType::Value, + convolution_grid, + &min_target[..], + &mut kernel_evals[..] + ); + + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i * n * n + j * n + k; + *result.get_mut(i, j, k).unwrap() = kernel_evals[conv_idx]; + } + } + } + + result + } + + // pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { + // let n = 2 * expansion_order - 1; + // let mut result = vec![vec![vec![0f64; n]; n]; n]; + + // for (i, result_i) in result.iter_mut().enumerate() { + // for (j, result_ij) in result_i.iter_mut().enumerate() { + // for (k, result_ijk) in result_ij.iter_mut().enumerate() { + // let conv_idx = i * n * n + j * n + k; + // if self.conv_to_surf_map.contains_key(&conv_idx) { + // let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); + // *result_ijk = charges[*surf_idx] + // } + // } + // } + // } + + // result + // } +} + +#[cfg(test)] +mod test { + + use bempp_kernel::laplace_3d::Laplace3dKernel; + + use super::*; + + #[test] + fn test_fft() { + + let kernel = Laplace3dKernel::::new(); + let transfer_vectors = compute_transfer_vectors(); + + let fft = FftFieldTranslationNaiveKiFmm { + alpha: 1.05, + kernel, + transfer_vectors + }; + + let domain = Domain { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0] }; + fft.compute_m2l_operators(2, domain); + } +} \ No newline at end of file diff --git a/field/src/helpers.rs b/field/src/helpers.rs index df4fe287..7df8904a 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -1,7 +1,11 @@ -use std::collections::HashSet; +use std::{collections::HashSet, usize}; use itertools::Itertools; +use realfft::{num_complex::Complex, RealFftPlanner, num_traits::Zero}; +use rustfft::{FftNum, FftPlanner}; +use bempp_tools::Array3D; +use bempp_traits::arrays::Array3DAccess; use bempp_tree::types::{domain::Domain, morton::MortonKey}; use crate::types::TransferVector; @@ -98,3 +102,222 @@ pub fn compute_transfer_vectors() -> Vec { result } + +pub fn pad3(arr: &Array3D, pad_size: (usize, usize, usize), pad_index: (usize, usize, usize)) -> Array3D +where + T: Clone+FftNum +{ + let &(m, n, o) = arr.shape(); + + let (x, y, z) = pad_index; + let (p, q, r) = pad_size; + + + // Check that there is enough space for pad + assert!(x+p <= m+p && y+q <= n+q && z+r <= o+r); + + let mut padded = Array3D::new((p+m, q+n, r+o)); + + for i in 0..m { + for j in 0..n { + for k in 0..o { + *padded.get_mut(x+i, y+j, z+k).unwrap() = *arr.get(i, j, k).unwrap(); + } + } + } + + padded +} + +pub fn flip3(arr: &Array3D) -> Array3D +where + T: Clone+FftNum +{ + let mut flipped = Array3D::new(*arr.shape()); + + let &(m, n, o) = arr.shape(); + + for i in 0..m { + for j in 0..n { + for k in 0..o { + *flipped.get_mut(i, j, k).unwrap() = *arr.get(m-i-1, n-j-1, o-k-1).unwrap(); + } + } + } + + flipped +} + +pub fn rfft3(input_arr: &Array3D) -> Array3D> +where + T: Clone + FftNum, +{ + let &(m, n, o) = input_arr.shape(); + + let m_ = m / 2 + 1; + let mut transformed = Array3D::>::new((m_, n, o)); + + let mut real_planner = RealFftPlanner::::new(); + let real_fft = real_planner.plan_fft_forward(m); + let mut planner = FftPlanner::::new(); + let fftn = planner.plan_fft_forward(n); + let ffto = planner.plan_fft_forward(o); + let mut scratch = vec![Complex::zero(); m]; + + // X dimension + for j in 0..n { + for k in 0..o { + // Form slices + let mut input = Vec::new(); + for i in 0..m { + input.push(*input_arr.get(i, j, k).unwrap()) + } + + let mut output = vec![Complex::zero(); m_]; + let _ = real_fft.process_with_scratch(&mut input, &mut output, &mut scratch); + + for i in 0..m_ { + *transformed.get_mut(i, j, k).unwrap() = output[i]; + } + } + } + + // Y dimension + for i in 0..m_ { + for k in 0..o { + // Form slices + let mut data = Vec::new(); + for j in 0..n { + data.push(*transformed.get(i, j, k).unwrap()) + } + let _ = fftn.process_with_scratch(&mut data, &mut scratch); + for j in 0..n { + *transformed.get_mut(i, j, k).unwrap() = data[j]; + } + } + } + + // Z dimension + for i in 0..m_ { + for j in 0..n { + let mut data = Vec::new(); + for k in 0..o { + data.push(*transformed.get(i, j, k).unwrap()) + } + let _ = ffto.process_with_scratch(&mut data, &mut scratch); + for k in 0..o { + *transformed.get_mut(i, j, k).unwrap() = data[k]; + } + } + } + + transformed +} + +pub fn irfft3(input_arr: &Array3D>, real_dim: usize) -> Array3D +where + T: FftNum + Clone, +{ + let &(m, n, o) = input_arr.shape(); + + let mut transformed = Array3D::>::new((real_dim, n, o)); + let mut result = Array3D::new((real_dim, n, o)); + let mut scratch = vec![Complex::zero(); o]; + + let mut real_planner = RealFftPlanner::::new(); + let real_fft = real_planner.plan_fft_inverse(real_dim); + let mut planner = FftPlanner::::new(); + let fftn = planner.plan_fft_inverse(n); + let ffto = planner.plan_fft_inverse(o); + + // Z axis + for i in 0..m { + for j in 0..n { + let mut data = Vec::new(); + for k in 0..o { + data.push(*input_arr.get(i, j, k).unwrap()) + } + let _ = ffto.process_with_scratch(&mut data, &mut scratch); + let norm = T::one() / T::from_usize(data.len()).unwrap(); + for k in 0..o { + *transformed.get_mut(i, j, k).unwrap() = data[k] * norm; + } + } + } + + // Y axis + for i in 0..m { + for k in 0..o { + let mut data = Vec::new(); + for j in 0..n { + data.push(*transformed.get_mut(i, j, k).unwrap()); + } + let _ = fftn.process_with_scratch(&mut data, &mut scratch); + let norm = T::one() / T::from_usize(data.len()).unwrap(); + for j in 0..n { + *transformed.get_mut(i, j, k).unwrap() = data[j] * norm; + } + } + } + + // X axis + for j in 0..n { + for k in 0..o { + let mut input = Vec::new(); + for i in 0..m { + input.push(*transformed.get_mut(i, j, k).unwrap()); + } + let mut output = vec![T::zero(); real_dim]; + + let _ = real_fft.process_with_scratch(&mut input, &mut output, &mut scratch); + let norm = T::one() / T::from_usize(real_dim).unwrap(); + + for i in 0..real_dim { + *transformed.get_mut(i, j, k).unwrap() = Complex::from(output[i] * norm); + } + } + } + + for i in 0..real_dim { + for j in 0..n { + for k in 0..o { + *result.get_mut(i, j, k).unwrap() = T::from(transformed.get(i, j, k).unwrap().re); + } + } + } + + result +} + +#[cfg(test)] +mod test { + + use super::*; + + use approx_eq::assert_approx_eq; + + #[test] + fn test_rfft3() { + let mut input = Array3D::new((3, 3, 3)); + + for i in 0..3 { + for j in 0..3 { + for k in 0..3 { + *input.get_mut(i, j, k).unwrap() = (i + j * 3 + k * 3 * 3 + 1) as f64 + } + } + } + + let transformed = rfft3(&input); + + let result = irfft3(&transformed, 3); + + for i in 0..3 { + for j in 0..3 { + for k in 0..3 { + assert_approx_eq!(*result.get(i, j, k).unwrap(), *input.get(i, j, k).unwrap()); + } + } + } + } +} \ No newline at end of file diff --git a/field/src/types.rs b/field/src/types.rs index 2d36d5eb..0e4aef4d 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use rlst::dense::{ base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic, }; @@ -9,27 +11,29 @@ use bempp_tree::types::morton::MortonKey; pub type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; -// // #[derive(Default)] -// pub struct FftFieldTranslationNaiveKiFmm -// where -// T: Kernel + Default, -// { -// // Amount to dilate inner check surface by -// pub alpha: f64, +pub type FftM2lEntry = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub struct FftFieldTranslationNaiveKiFmm +where + T: Kernel + Default, +{ + // Amount to dilate inner check surface by + pub alpha: f64, -// // Maps between convolution and surface grids -// pub surf_to_conv_map: HashMap, -// pub conv_to_surf_map: HashMap, + // // Maps between convolution and surface grids + // pub surf_to_conv_map: HashMap, + // pub conv_to_surf_map: HashMap, -// // Precomputed FFT of unique kernel interactions placed on -// // convolution grid. -// pub m2l: Vec, + // // Precomputed FFT of unique kernel interactions placed on + // // convolution grid. + // pub m2l: Vec, -// // Unique transfer vectors to lookup m2l unique kernel interactions -// pub transfer_vectors: Vec, + // Unique transfer vectors to lookup m2l unique kernel interactions + pub transfer_vectors: Vec, -// pub kernel: T, -// } + pub kernel: T, +} pub struct SvdFieldTranslationKiFmm where diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 7aa2ba33..26609749 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,6 +1,7 @@ // TODO: FFT convolutions implemented in rlst // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type // TODO: Tree should infer dimension from the data (stride). +// TODO: Data driven SVD compression in the case the user specified no compression parameter. use itertools::Itertools; use std::{ diff --git a/tools/src/arrays.rs b/tools/src/arrays.rs index f2bb5ad4..bef69ca7 100644 --- a/tools/src/arrays.rs +++ b/tools/src/arrays.rs @@ -138,6 +138,10 @@ impl Array3DAccess for Array3D { fn shape(&self) -> &(usize, usize, usize) { &self.shape } + + fn get_data(&self) -> &[T] { + &self.data + } } /// A four-dimensional rectangular array diff --git a/traits/src/arrays.rs b/traits/src/arrays.rs index a9625cbb..6d4c684d 100644 --- a/traits/src/arrays.rs +++ b/traits/src/arrays.rs @@ -87,6 +87,9 @@ pub trait Array3DAccess { /// Get the shape of the array fn shape(&self) -> &(usize, usize, usize); + + /// Get a pointer to the raw data in the array + fn get_data(&self) -> &[T]; } pub trait Array4DAccess { From c270d3bcf434212dab7012816fa39efc035aa90f Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 11 Jul 2023 15:29:20 +0100 Subject: [PATCH 30/40] Start working on translation --- field/src/field.rs | 174 ++++++++++++++++++----------------- field/src/types.rs | 17 ++-- fmm/src/field_translation.rs | 95 ++++++++++++++++++- 3 files changed, 190 insertions(+), 96 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index cbe7e779..db0e2419 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, hash::Hash}; use num::Complex; use rlst::{ @@ -7,7 +7,7 @@ use rlst::{ traits::svd::{Mode, Svd}, }, common::traits::{Eval, NewLikeSelf, Transpose}, - dense::{rlst_mat, traits::*, Dot, Shape, rlst_pointer_mat}, + dense::{rlst_mat, traits::*, Dot, Shape}, }; use bempp_traits::{field::FieldTranslationData, kernel::Kernel, types::EvalType, arrays::Array3DAccess, fmm::Fmm}; @@ -15,7 +15,7 @@ use bempp_tree::types::domain::Domain; use bempp_tools::Array3D; use crate::{ - helpers::{compute_transfer_vectors, pad3, flip3, rfft3, irfft3}, + helpers::{compute_transfer_vectors, pad3, flip3, rfft3}, types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, SvdM2lEntry, FftM2lEntry, TransferVector}, }; @@ -185,7 +185,7 @@ where T: Kernel + Default, { type Domain = Domain; - type M2LOperators = Vec>>; + type M2LOperators = Vec; type TransferVector = Vec; fn compute_m2l_operators( @@ -193,7 +193,7 @@ where expansion_order: usize, domain: Self::Domain, ) -> Self::M2LOperators { - let mut result: Vec>> = Vec::new(); + let mut result = Vec::new(); for t in self.transfer_vectors.iter() { let source_equivalent_surface = @@ -269,60 +269,65 @@ impl FftFieldTranslationNaiveKiFmm where T: Kernel + Default, { - // pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { - // let mut result = FftFieldTranslationNaiveKiFmm::default(); - - // // Create maps between surface and convolution grids - // let (surf_to_conv, conv_to_surf) = - // FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); - // result.surf_to_conv_map = surf_to_conv; - // result.conv_to_surf_map = conv_to_surf; - - // result.kernel = kernel; - - // result.alpha = alpha; - // result.transfer_vectors = result.compute_transfer_vectors(); - // result.m2l = result.compute_m2l_operators(expansion_order, domain); - - // result - // } - - // pub fn compute_surf_to_conv_map( - // expansion_order: usize, - // ) -> (HashMap, HashMap) { - // let n = 2 * expansion_order - 1; - - // // Index maps between surface and convolution grids - // let mut surf_to_conv: HashMap = HashMap::new(); - // let mut conv_to_surf: HashMap = HashMap::new(); - - // // Initialise surface grid index - // let mut surf_index = 0; - - // // The boundaries of the surface grid - // let lower = expansion_order - 1; - // let upper = 2 * expansion_order - 2; - - // // Iterate through the entire convolution grid marking the boundaries - // // This makes the map much easier to understand and debug - // for i in 0..n { - // for j in 0..n { - // for k in 0..n { - // let conv_idx = i * n * n + j * n + k; - // if (i >= lower && j >= lower && (k == lower || k == upper)) - // || (j >= lower && k >= lower && (i == lower || i == upper)) - // || (k >= lower && i >= lower && (j == lower || j == upper)) - // { - // surf_to_conv.insert(surf_index, conv_idx); - // conv_to_surf.insert(conv_idx, surf_index); - // surf_index += 1; - // } - // } - // } - // } - - // (surf_to_conv, conv_to_surf) - // } + pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { + + let mut result = FftFieldTranslationNaiveKiFmm { + alpha, + kernel, + surf_to_conv_map: HashMap::default(), + conv_to_surf_map: HashMap::default(), + m2l: Vec::default(), + transfer_vectors: Vec::default(), + }; + + // Create maps between surface and convolution grids + let (surf_to_conv, conv_to_surf) = + FftFieldTranslationNaiveKiFmm::::compute_surf_to_conv_map(expansion_order); + + result.surf_to_conv_map = surf_to_conv; + result.conv_to_surf_map = conv_to_surf; + result.transfer_vectors = result.compute_transfer_vectors(); + result.m2l = result.compute_m2l_operators(expansion_order, domain); + + result + } + + pub fn compute_surf_to_conv_map( + expansion_order: usize, + ) -> (HashMap, HashMap) { + let n = 2 * expansion_order - 1; + + // Index maps between surface and convolution grids + let mut surf_to_conv: HashMap = HashMap::new(); + let mut conv_to_surf: HashMap = HashMap::new(); + + // Initialise surface grid index + let mut surf_index = 0; + + // The boundaries of the surface grid + let lower = expansion_order - 1; + let upper = 2 * expansion_order - 2; + + // Iterate through the entire convolution grid marking the boundaries + // This makes the map much easier to understand and debug + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i * n * n + j * n + k; + if (i >= lower && j >= lower && (k == lower || k == upper)) + || (j >= lower && k >= lower && (i == lower || i == upper)) + || (k >= lower && i >= lower && (j == lower || j == upper)) + { + surf_to_conv.insert(surf_index, conv_idx); + conv_to_surf.insert(conv_idx, surf_index); + surf_index += 1; + } + } + } + } + + (surf_to_conv, conv_to_surf) + } pub fn compute_kernel( &self, @@ -330,10 +335,8 @@ where convolution_grid: &[f64], min_target: [f64; 3], ) -> Array3D - // -> Vec>> { let n = 2 * expansion_order - 1; - // let mut result = vec![vec![vec![0f64; n]; n]; n]; let mut result = Array3D::::new((n, n, n)); let nconv = n.pow(3); @@ -358,24 +361,28 @@ where result } - // pub fn compute_signal(&self, expansion_order: usize, charges: &[f64]) -> Vec>> { - // let n = 2 * expansion_order - 1; - // let mut result = vec![vec![vec![0f64; n]; n]; n]; - - // for (i, result_i) in result.iter_mut().enumerate() { - // for (j, result_ij) in result_i.iter_mut().enumerate() { - // for (k, result_ijk) in result_ij.iter_mut().enumerate() { - // let conv_idx = i * n * n + j * n + k; - // if self.conv_to_surf_map.contains_key(&conv_idx) { - // let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); - // *result_ijk = charges[*surf_idx] - // } - // } - // } - // } - - // result - // } + pub fn compute_signal( + &self, + expansion_order: usize, + charges: &[f64] + ) + -> Array3D + { + let n = 2 * expansion_order - 1; + let mut result = Array3D::new((n,n,n)); + + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i*n*n+j*n+k; + let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); + *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; + } + } + } + + result + } } #[cfg(test)] @@ -391,13 +398,8 @@ mod test { let kernel = Laplace3dKernel::::new(); let transfer_vectors = compute_transfer_vectors(); - let fft = FftFieldTranslationNaiveKiFmm { - alpha: 1.05, - kernel, - transfer_vectors - }; - let domain = Domain { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0] }; - fft.compute_m2l_operators(2, domain); + let fft = FftFieldTranslationNaiveKiFmm::new(kernel, 2, domain, 1.05); + } } \ No newline at end of file diff --git a/field/src/types.rs b/field/src/types.rs index 0e4aef4d..94c6a349 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; +use num::Complex; +use bempp_tools::Array3D; use rlst::dense::{ base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic, }; @@ -11,8 +13,7 @@ use bempp_tree::types::morton::MortonKey; pub type SvdM2lEntry = Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; -pub type FftM2lEntry = - Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; +pub type FftM2lEntry = Array3D>; pub struct FftFieldTranslationNaiveKiFmm where @@ -21,13 +22,13 @@ where // Amount to dilate inner check surface by pub alpha: f64, - // // Maps between convolution and surface grids - // pub surf_to_conv_map: HashMap, - // pub conv_to_surf_map: HashMap, + // Maps between convolution and surface grids + pub surf_to_conv_map: HashMap, + pub conv_to_surf_map: HashMap, - // // Precomputed FFT of unique kernel interactions placed on - // // convolution grid. - // pub m2l: Vec, + // Precomputed FFT of unique kernel interactions placed on + // convolution grid. + pub m2l: Vec, // Unique transfer vectors to lookup m2l unique kernel interactions pub transfer_vectors: Vec, diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 8f5fb24a..bb38eb58 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -5,20 +5,23 @@ use std::{ sync::{Arc, Mutex, RwLock}, }; +use bempp_tools::Array3D; +use num::Complex; use itertools::Itertools; use rayon::prelude::*; -use bempp_field::types::SvdFieldTranslationKiFmm; +use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm}, helpers::{pad3, rfft3, irfft3}}; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, kernel::{Kernel, KernelScale}, tree::Tree, types::EvalType, + arrays::Array3DAccess }; use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ - common::traits::Eval, + common::traits::{Eval, CmpWiseProduct}, dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}, }; @@ -497,3 +500,91 @@ where } } } + +impl FieldTranslation for FmmData>> +where + T: Kernel + KernelScale + std::marker::Sync + std::marker::Send + Default +{ + + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { return }; + + targets.par_iter().for_each(move |&target| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let fmm_arc = Arc::clone(&self.fmm); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + + for source in v_list.iter() { + + let transfer_vector = target.find_transfer_vector(source); + + // Locate correct precomputed FFT of kernel + let k_idx = fmm_arc + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == transfer_vector) + .unwrap(); + + // Compute FFT of signal + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + + // 1. Pad the signal + let &(m, n, o) = signal.shape(); + + let p = 2*m; + let q = 2*n; + let r = 2*o; + + // TODO: Look carefully how to pad upper left + let padded_signal = pad3(&signal, (p-m, q-n, r-o), (p, m, o)); + + let padded_signal_hat = rfft3(&padded_signal); + let &(m, n, o) = padded_signal_hat.shape(); + let len_padded_signal_hat = m*n*o; + + // 2. Compute the convolution to find the check potential + let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + let &(m, n, o) = padded_kernel_hat.shape(); + let len_padded_kernel_hat= m*n*o; + + // Compute Hadamard product + let padded_signal_hat = unsafe { + rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (1, len_padded_signal_hat), (1,1)] + }; + + let padded_kernel_hat= unsafe { + rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (1, len_padded_kernel_hat), (1,1)] + }; + + let check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); + + // 3.1 Compute iFFT to find check potentials + let check_potential_hat = Array3D::from_data(check_potential_hat.data().to_vec(), (m, n, o)); + + let check_potential = irfft3(&check_potential_hat, m); + + + + + } + } + }) + } + + fn m2l_scale(&self, level: u64) -> f64 { + if level < 2 { + panic!("M2L only performed on level 2 and below") + } + if level == 2 { + 1. / 2. + } else { + 2_f64.powf((level - 3) as f64) + } + + } + +} \ No newline at end of file From 33e9bc45e7a99049198d205b893ab7dbb88e7fd7 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 11 Jul 2023 18:22:55 +0100 Subject: [PATCH 31/40] Add something compiling, but not convergin --- field/src/field.rs | 26 +++++++++-- field/src/helpers.rs | 65 ++++++++++++++++++++++++++++ fmm/src/field_translation.rs | 63 ++++++++++++++++++++++----- fmm/src/fmm.rs | 83 +++++++++++++++++++++++++++++++++++- 4 files changed, 220 insertions(+), 17 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index db0e2419..2018d89d 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -375,8 +375,10 @@ where for j in 0..n { for k in 0..n { let conv_idx = i*n*n+j*n+k; - let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); - *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; + if self.conv_to_surf_map.contains_key(&conv_idx) { + let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); + *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; + } } } } @@ -389,17 +391,33 @@ where mod test { use bempp_kernel::laplace_3d::Laplace3dKernel; + use bempp_tree::types::morton::MortonKey; use super::*; #[test] fn test_fft() { + let order = 2; + let alpha = 1.05; + let level = 2; let kernel = Laplace3dKernel::::new(); - let transfer_vectors = compute_transfer_vectors(); let domain = Domain { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0] }; - let fft = FftFieldTranslationNaiveKiFmm::new(kernel, 2, domain, 1.05); + let fft = FftFieldTranslationNaiveKiFmm::new(kernel, order, domain, alpha); + let domain = Domain { origin: [0., 0., 0.], diameter: [1., 1., 1.] }; + let key = MortonKey::from_point(&[0.5, 0.5, 0.5], &domain, level); + let surface_grid = key.compute_surface(&domain, order, alpha); + let convolution_grid = key.convolution_grid(order, &domain, &surface_grid, alpha); + let min_target = [0.8, 0.8, 0.8]; + + let k = fft.compute_kernel(order, &convolution_grid, min_target); + + let &(m, n, o) = k.shape(); + + for i in 0..m { + println!("{:?}", k.get(i, 1, 1)); + } } } \ No newline at end of file diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 7df8904a..1682eb64 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -320,4 +320,69 @@ mod test { } } } + + #[test] + fn test_pad3() { + let dim = 3; + // Initialise input data + let mut input = Array3D::new((dim, dim, dim)); + for i in 0..dim { + for j in 0..dim { + for k in 0..dim { + *input.get_mut(i, j, k).unwrap() = (i + j * dim + k * dim * dim + 1) as f64 + } + } + } + + // Test padding at edge of each axis + let pad_size = (2,3,4); + let pad_index = (0, 0, 0); + let padded = pad3(&input, pad_size, pad_index); + + let &(m, n, o) = padded.shape(); + + // Check dimension + assert_eq!(m, dim+pad_size.0); + assert_eq!(n, dim+pad_size.1); + assert_eq!(o, dim+pad_size.2); + + // Check that padding has been correctly applied + for i in dim..m { + for j in dim..n { + for k in dim.. o { + assert_eq!(*padded.get(i, j, k).unwrap(), 0f64) + } + } + } + + for i in 0..dim { + for j in 0..dim { + for k in 0..dim { + assert_eq!(*padded.get(i, j, k).unwrap(), *input.get(i, j, k).unwrap()) + } + } + } + + // Test padding at the start of each axis + let pad_index = (2,2,2); + + let padded = pad3(&input, pad_size, pad_index); + + // Check that padding has been correctly applied + for i in 0..pad_index.0 { + for j in 0..pad_index.1 { + for k in 0.. pad_index.2 { + assert_eq!(*padded.get(i, j, k).unwrap(), 0f64) + } + } + } + + for i in 0..dim { + for j in 0..dim { + for k in 0..dim { + assert_eq!(*padded.get(i+pad_index.0, j+pad_index.1, k+pad_index.2).unwrap(), *input.get(i, j, k).unwrap()); + } + } + } + } } \ No newline at end of file diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index bb38eb58..9e1b07fc 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -1,8 +1,8 @@ // Implementation of field translations use std::{ collections::HashMap, - ops::Deref, - sync::{Arc, Mutex, RwLock}, + ops::{Deref, Mul, DerefMut}, + sync::{Arc, Mutex, RwLock, MutexGuard}, }; use bempp_tools::Array3D; @@ -21,7 +21,8 @@ use bempp_traits::{ }; use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ - common::traits::{Eval, CmpWiseProduct}, + common::traits::*, + common::tools::PrettyPrint, dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}, }; @@ -528,6 +529,7 @@ where // Compute FFT of signal let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); @@ -540,16 +542,19 @@ where let r = 2*o; // TODO: Look carefully how to pad upper left - let padded_signal = pad3(&signal, (p-m, q-n, r-o), (p, m, o)); + let pad_size = (p-m, q-n, r-o); + let pad_index = (p-m, q-n, r-o); + let real_dim = p; + let padded_signal = pad3(&signal, pad_size, pad_index); let padded_signal_hat = rfft3(&padded_signal); - let &(m, n, o) = padded_signal_hat.shape(); - let len_padded_signal_hat = m*n*o; + let &(m_, n_, o_) = padded_signal_hat.shape(); + let len_padded_signal_hat = m_*n_*o_; // 2. Compute the convolution to find the check potential let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; - let &(m, n, o) = padded_kernel_hat.shape(); - let len_padded_kernel_hat= m*n*o; + let &(m_, n_, o_) = padded_kernel_hat.shape(); + let len_padded_kernel_hat= m_*n_*o_; // Compute Hadamard product let padded_signal_hat = unsafe { @@ -560,16 +565,52 @@ where rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (1, len_padded_kernel_hat), (1,1)] }; + assert_eq!(len_padded_kernel_hat, len_padded_signal_hat); + let check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); // 3.1 Compute iFFT to find check potentials - let check_potential_hat = Array3D::from_data(check_potential_hat.data().to_vec(), (m, n, o)); + let check_potential_hat = Array3D::from_data(check_potential_hat.data().to_vec(), (m_, n_, o_)); + + let check_potential = irfft3(&check_potential_hat, real_dim); + + // Filter check potentials + let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); + for i in (p-m-1)..p { + for j in (q-n-1)..q { + for k in (r-o-1)..o { + let i_= i - (p-m-1); + let j_ = j - (q-n-1); + let k_ = k - (r-o-1); + *filtered_check_potentials.get_mut(i_, j_, k_).unwrap() = *check_potential.get(i, j, k).unwrap(); + } + } + } - let check_potential = irfft3(&check_potential_hat, m); + let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); + let mut tmp = Vec::new(); + let ntargets = target_surface_idxs.len() / fmm_arc.kernel.space_dimension(); + let xs = &target_surface_idxs[0..ntargets]; + let ys = &target_surface_idxs[ntargets..2*ntargets]; + let zs = &target_surface_idxs[2*ntargets..]; - + for i in 0..ntargets { + let val = filtered_check_potentials.get(xs[i], ys[i], zs[i]).unwrap(); + tmp.push(*val); + } + + let check_potential = unsafe { + rlst_pointer_mat!['a, f64, tmp.as_ptr(), (ntargets, 1), (1,1)] + }; + + // Finally, compute local coefficients from check potential + let target_local_owned = (self.m2l_scale(target.level()) + * fmm_arc.kernel.scale(target.level()) + * fmm_arc.dc2e_inv.dot(&check_potential)).eval(); + let mut target_local_lock = target_local_arc.lock().unwrap(); + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); } } }) diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 26609749..f85993cf 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -367,6 +367,7 @@ where mod test { use super::*; + use bempp_field::types::FftFieldTranslationNaiveKiFmm; use rand::prelude::*; use rand::SeedableRng; @@ -502,7 +503,7 @@ mod test { #[test] fn test_fmm_svd<'a>() { - let npoints = 1000; + let npoints = 10000; let points = points_fixture(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; @@ -513,7 +514,7 @@ mod test { let adaptive = false; let k = 50; let ncrit = 150; - let depth = 2; + let depth = 3; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); @@ -579,4 +580,82 @@ mod test { println!("rel error {:?}", rel_error); assert!(rel_error <= 1e-5); } + + // #[test] + // fn test_fmm_fft<'a>() { + // let npoints = 10000; + // let points = points_fixture(npoints, None, None); + // let global_idxs = (0..npoints).collect_vec(); + // let charges = vec![1.0; npoints]; + + // let order = 7; + // let alpha_inner = 1.05; + // let alpha_outer = 2.9; + // let adaptive = false; + // let ncrit = 150; + // let depth = 3; + // let kernel = Laplace3dKernel::::default(); + + // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); + + // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // kernel.clone(), + // order, + // tree.get_domain().clone(), + // alpha_inner + // ); + + // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + + // // Form charge dict, matching charges with their associated global indices + // let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + // let datatree = FmmData::new(fmm, &charge_dict); + + // let times = datatree.run(Some(true)); + + // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + + // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + // let leaf_coordinates = pts + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); + + // let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + + // // Get into row major order + // let leaf_coordinates = unsafe { + // rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + // }.eval(); + + // let mut direct = vec![0f64; pts.len()]; + // let all_point_coordinates = points_fixture(npoints, None, None); + + // let all_charges = charge_dict.into_values().collect_vec(); + + // let kernel = Laplace3dKernel::::default(); + + // kernel.evaluate_st( + // EvalType::Value, + // all_point_coordinates.data(), + // leaf_coordinates.data(), + // &all_charges[..], + // &mut direct[..], + // ); + + // let abs_error: f64 = potentials + // .data() + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); + + // println!("rel error {:?}", rel_error); + // assert!(rel_error <= 1e-5); + // } } From fe0b76912cad3a826e9192da843cc4ba854f6d64 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 11 Jul 2023 20:24:34 +0100 Subject: [PATCH 32/40] Converging fft, but slow --- field/src/field.rs | 5 +- fmm/src/field_translation.rs | 13 ++- fmm/src/fmm.rs | 172 +++++++++++++++++------------------ 3 files changed, 96 insertions(+), 94 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index 2018d89d..cc7f8e29 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -265,6 +265,7 @@ where } } + impl FftFieldTranslationNaiveKiFmm where T: Kernel + Default, @@ -416,8 +417,6 @@ mod test { let &(m, n, o) = k.shape(); - for i in 0..m { - println!("{:?}", k.get(i, 1, 1)); - } + assert!(false); } } \ No newline at end of file diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 9e1b07fc..bb1df45f 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -502,6 +502,7 @@ where } } + impl FieldTranslation for FmmData>> where T: Kernel + KernelScale + std::marker::Sync + std::marker::Send + Default @@ -544,7 +545,9 @@ where // TODO: Look carefully how to pad upper left let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); - let real_dim = p; + let real_dim = q; + + let padded_signal = pad3(&signal, pad_size, pad_index); let padded_signal_hat = rfft3(&padded_signal); @@ -555,14 +558,14 @@ where let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; let &(m_, n_, o_) = padded_kernel_hat.shape(); let len_padded_kernel_hat= m_*n_*o_; - + // Compute Hadamard product let padded_signal_hat = unsafe { - rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (1, len_padded_signal_hat), (1,1)] + rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (len_padded_signal_hat, 1), (1,1)] }; let padded_kernel_hat= unsafe { - rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (1, len_padded_kernel_hat), (1,1)] + rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] }; assert_eq!(len_padded_kernel_hat, len_padded_signal_hat); @@ -578,7 +581,7 @@ where let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); for i in (p-m-1)..p { for j in (q-n-1)..q { - for k in (r-o-1)..o { + for k in (r-o-1)..r { let i_= i - (p-m-1); let j_ = j - (q-n-1); let k_ = k - (r-o-1); diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index f85993cf..975b9571 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -501,89 +501,9 @@ mod test { // assert!(false) // } - #[test] - fn test_fmm_svd<'a>() { - let npoints = 10000; - let points = points_fixture(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - let order = 7; - let alpha_inner = 1.05; - let alpha_outer = 2.9; - let adaptive = false; - let k = 50; - let ncrit = 150; - let depth = 3; - let kernel = Laplace3dKernel::::default(); - - let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); - - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - tree.get_domain().clone(), - alpha_inner, - ); - - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // Form charge dict, matching charges with their associated global indices - let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - - let datatree = FmmData::new(fmm, &charge_dict); - - let times = datatree.run(Some(true)); - - let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - - let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); - - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - - // Get into row major order - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); - - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); - - let all_charges = charge_dict.into_values().collect_vec(); - - let kernel = Laplace3dKernel::::default(); - - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); - - let abs_error: f64 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f64 = abs_error / (direct.iter().sum::()); - - println!("rel error {:?}", rel_error); - assert!(rel_error <= 1e-5); - } - // #[test] - // fn test_fmm_fft<'a>() { - // let npoints = 10000; + // fn test_fmm_svd<'a>() { + // let npoints = 1000; // let points = points_fixture(npoints, None, None); // let global_idxs = (0..npoints).collect_vec(); // let charges = vec![1.0; npoints]; @@ -592,20 +512,22 @@ mod test { // let alpha_inner = 1.05; // let alpha_outer = 2.9; // let adaptive = false; + // let k = 50; // let ncrit = 150; - // let depth = 3; + // let depth = 2; // let kernel = Laplace3dKernel::::default(); // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); - // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + // let m2l_data_svd = SvdFieldTranslationKiFmm::new( // kernel.clone(), + // Some(k), // order, // tree.get_domain().clone(), - // alpha_inner + // alpha_inner, // ); - // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); // // Form charge dict, matching charges with their associated global indices // let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); @@ -658,4 +580,82 @@ mod test { // println!("rel error {:?}", rel_error); // assert!(rel_error <= 1e-5); // } + + #[test] + fn test_fmm_fft<'a>() { + let npoints = 1000; + let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let order = 5; + let alpha_inner = 1.05; + let alpha_outer = 2.9; + let adaptive = false; + let ncrit = 150; + let depth = 2; + let kernel = Laplace3dKernel::::default(); + + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); + + let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + kernel.clone(), + order, + tree.get_domain().clone(), + alpha_inner + ); + + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_fft); + + // Form charge dict, matching charges with their associated global indices + let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + + let datatree = FmmData::new(fmm, &charge_dict); + + let times = datatree.run(Some(true)); + + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + + let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + + let leaf_coordinates = pts + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); + + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + + // Get into row major order + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); + + let mut direct = vec![0f64; pts.len()]; + let all_point_coordinates = points_fixture(npoints, None, None); + + let all_charges = charge_dict.into_values().collect_vec(); + + let kernel = Laplace3dKernel::::default(); + + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], + ); + + let abs_error: f64 = potentials + .data() + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); + + println!("rel error {:?}", rel_error); + assert!(rel_error <= 1e-5); + } } From 885bfa6b35ce64259f0bacb319e234d19750dab3 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 11 Jul 2023 20:29:06 +0100 Subject: [PATCH 33/40] Add todos --- fmm/src/field_translation.rs | 3 - fmm/src/fmm.rs | 230 +++++++++++------------------------ 2 files changed, 68 insertions(+), 165 deletions(-) diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index bb1df45f..a4724c37 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -546,7 +546,6 @@ where let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); let real_dim = q; - let padded_signal = pad3(&signal, pad_size, pad_index); @@ -568,8 +567,6 @@ where rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] }; - assert_eq!(len_padded_kernel_hat, len_padded_signal_hat); - let check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); // 3.1 Compute iFFT to find check potentials diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 975b9571..a0f110d3 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,4 +1,5 @@ -// TODO: FFT convolutions implemented in rlst +// TODO: Figure out why fft is slow, probably due to a billion allocations being done +// TODO: Change all potential assignments to be direct rather than over a loop as currently as there is a trait for this. // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type // TODO: Tree should infer dimension from the data (stride). // TODO: Data driven SVD compression in the case the user specified no compression parameter. @@ -406,180 +407,85 @@ mod test { points } - // #[test] - // fn test_upward_pass() { - // let npoints = 1000; - // let points = points_fixture(npoints, None, None); - - // let order = 5; - // let alpha_inner = 1.05; - // let alpha_outer = 2.9; - // let adaptive = false; - // let k = 50; - // let ncrit = 100; - // let depth = 2; - // let kernel = Laplace3dKernel::::default(); - - // let start = Instant::now(); - // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth)); - // println!("Tree = {:?}ms", start.elapsed().as_millis()); - - // let start = Instant::now(); - - // // // let m2l_data_svd_naive = SvdFieldTranslationNaiveKiFmm::new( - // // // kernel.clone(), - // // // Some(k), - // // // order, - // // // tree.get_domain().clone(), - // // // alpha_inner, - // // // ); - - // let m2l_data_svd = SvdFieldTranslationKiFmm::new( - // kernel.clone(), - // Some(k), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - // println!("SVD operators = {:?}ms", start.elapsed().as_millis()); - - // // let start = Instant::now(); - // // let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( - // // kernel.clone(), - // // order, - // // tree.get_domain().clone(), - // // alpha_inner, - // // ); - // // println!("FFT operators = {:?}ms", start.elapsed().as_millis()); - - // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // let charges = Charges::new(); - // let datatree = FmmData::new(fmm, charges); - // datatree.upward_pass(); - - // // let e = e.unwrap().lock().unwrap(); - // // let e= datatree.multipoles.get(&ROOT).unwrap().lock().unwrap().deref(); - - // let pt = vec![100., 0., 0.]; - // let distant_point = unsafe { rlst_pointer_mat!['static, f64, pt.as_ptr(), (1, 3), (1, 1)] }; - - // let charges = vec![1.0; npoints]; - // let charges = - // unsafe { rlst_pointer_mat!['static, f64, charges.as_ptr(), (1, npoints), (1, 1)] }; - // let mut direct = rlst_col_vec![f64, 1]; - // evaluate_laplace_one_target( - // EvalType::Value, - // distant_point.data(), - // points.data(), - // charges.data(), - // direct.data_mut(), - // ); - - // let mut result = rlst_col_vec![f64, 1]; - - // let upward_equivalent_surface = ROOT.compute_surface( - // datatree.fmm.tree().get_domain(), - // datatree.fmm.order, - // datatree.fmm.alpha_inner, - // ); - // let binding = datatree.multipoles.get(&ROOT).unwrap().lock().unwrap(); - // let multipole_expansion = binding.deref(); - - // evaluate_laplace_one_target( - // EvalType::Value, - // distant_point.data(), - // &upward_equivalent_surface[..], - // multipole_expansion.data(), - // result.data_mut(), - // ); - - // result.pretty_print(); - // direct.pretty_print(); - // // kernel.evaluate_st(EvalType::Value, points.data(), , charges, result) - // // println!("distant {:?}", distant_point) - // assert!(false) - // } - - // #[test] - // fn test_fmm_svd<'a>() { - // let npoints = 1000; - // let points = points_fixture(npoints, None, None); - // let global_idxs = (0..npoints).collect_vec(); - // let charges = vec![1.0; npoints]; - - // let order = 7; - // let alpha_inner = 1.05; - // let alpha_outer = 2.9; - // let adaptive = false; - // let k = 50; - // let ncrit = 150; - // let depth = 2; - // let kernel = Laplace3dKernel::::default(); - - // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); - - // let m2l_data_svd = SvdFieldTranslationKiFmm::new( - // kernel.clone(), - // Some(k), - // order, - // tree.get_domain().clone(), - // alpha_inner, - // ); - - // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // // Form charge dict, matching charges with their associated global indices - // let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + #[test] + fn test_fmm_svd<'a>() { + let npoints = 1000; + let points = points_fixture(npoints, None, None); + let global_idxs = (0..npoints).collect_vec(); + let charges = vec![1.0; npoints]; + + let order = 7; + let alpha_inner = 1.05; + let alpha_outer = 2.9; + let adaptive = false; + let k = 50; + let ncrit = 150; + let depth = 2; + let kernel = Laplace3dKernel::::default(); + + let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); + + let m2l_data_svd = SvdFieldTranslationKiFmm::new( + kernel.clone(), + Some(k), + order, + tree.get_domain().clone(), + alpha_inner, + ); + + let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + // Form charge dict, matching charges with their associated global indices + let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - // let datatree = FmmData::new(fmm, &charge_dict); + let datatree = FmmData::new(fmm, &charge_dict); - // let times = datatree.run(Some(true)); + let times = datatree.run(Some(true)); - // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - // let leaf_coordinates = pts - // .iter() - // .map(|p| p.coordinate) - // .flat_map(|[x, y, z]| vec![x, y, z]) - // .collect_vec(); + let leaf_coordinates = pts + .iter() + .map(|p| p.coordinate) + .flat_map(|[x, y, z]| vec![x, y, z]) + .collect_vec(); - // let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - // // Get into row major order - // let leaf_coordinates = unsafe { - // rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - // }.eval(); + // Get into row major order + let leaf_coordinates = unsafe { + rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + }.eval(); - // let mut direct = vec![0f64; pts.len()]; - // let all_point_coordinates = points_fixture(npoints, None, None); + let mut direct = vec![0f64; pts.len()]; + let all_point_coordinates = points_fixture(npoints, None, None); - // let all_charges = charge_dict.into_values().collect_vec(); + let all_charges = charge_dict.into_values().collect_vec(); - // let kernel = Laplace3dKernel::::default(); + let kernel = Laplace3dKernel::::default(); - // kernel.evaluate_st( - // EvalType::Value, - // all_point_coordinates.data(), - // leaf_coordinates.data(), - // &all_charges[..], - // &mut direct[..], - // ); + kernel.evaluate_st( + EvalType::Value, + all_point_coordinates.data(), + leaf_coordinates.data(), + &all_charges[..], + &mut direct[..], + ); - // let abs_error: f64 = potentials - // .data() - // .iter() - // .zip(direct.iter()) - // .map(|(a, b)| (a - b).abs()) - // .sum(); - // let rel_error: f64 = abs_error / (direct.iter().sum::()); + let abs_error: f64 = potentials + .data() + .iter() + .zip(direct.iter()) + .map(|(a, b)| (a - b).abs()) + .sum(); + let rel_error: f64 = abs_error / (direct.iter().sum::()); - // println!("rel error {:?}", rel_error); - // assert!(rel_error <= 1e-5); - // } + println!("rel error {:?}", rel_error); + assert!(rel_error <= 1e-5); + } #[test] fn test_fmm_fft<'a>() { From 3cd7d0b3924fbc4a90bbfa49f9dfb5d6a995640b Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Wed, 12 Jul 2023 11:13:56 +0100 Subject: [PATCH 34/40] Fix assignment --- field/src/helpers.rs | 2 +- fmm/src/field_translation.rs | 44 ++++++++++++------------------------ fmm/src/fmm.rs | 13 ++++++++--- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 1682eb64..002901f8 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -162,7 +162,7 @@ where let mut planner = FftPlanner::::new(); let fftn = planner.plan_fft_forward(n); let ffto = planner.plan_fft_forward(o); - let mut scratch = vec![Complex::zero(); m]; + let mut scratch: Vec> = vec![Complex::zero(); m]; // X dimension for j in 0..n { diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index a4724c37..3796db36 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -88,10 +88,8 @@ where ).eval(); let mut leaf_multipole_lock = leaf_multipole_arc.lock().unwrap(); - - for i in 0..leaf_multipole_lock.shape().0 { - leaf_multipole_lock[[i, 0]] += leaf_multipole_owned[[i, 0]]; - } + + *leaf_multipole_lock.deref_mut() = (leaf_multipole_lock.deref() + leaf_multipole_owned).eval(); } }); } @@ -116,9 +114,7 @@ where let mut target_multipole_lock = target_multipole_arc.lock().unwrap(); - for i in 0..ncoeffs { - target_multipole_lock[[i, 0]] += target_multipole_owned[[i, 0]]; - } + *target_multipole_lock.deref_mut() = (target_multipole_lock.deref() + target_multipole_owned).eval(); }) } } @@ -143,10 +139,8 @@ where let target_local_owned = fmm.l2l[operator_index].dot(&source_local_lock); let mut target_local_lock = target_local_arc.lock().unwrap(); - - for i in 0..ncoeffs { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } + + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); }) } } @@ -196,10 +190,8 @@ where ); let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } + + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); } } } @@ -248,10 +240,8 @@ where ); let mut target_potential_lock = target_potential_arc.lock().unwrap(); - - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } + + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); } }) } @@ -304,9 +294,7 @@ where let target_local_owned = (fmm_arc.kernel.scale(leaf.level()) * fmm_arc.dc2e_inv.dot(&downward_check_potential)).eval(); - for i in 0..ncoeffs { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); } } } @@ -366,9 +354,7 @@ where let mut target_potential_lock = target_potential_arc.lock().unwrap(); - for i in 0..ntargets { - target_potential_lock[[i, 0]] += target_potential[[i, 0]]; - } + *target_potential_lock.deref_mut() = (target_potential_lock.deref() + target_potential).eval(); } } } @@ -482,9 +468,7 @@ where let dim = (ncoeffs, 1); let target_local_owned = locals_owned.block(top_left, dim); - for i in 0..target_local_lock.shape().0 { - target_local_lock[[i, 0]] += target_local_owned[[i, 0]]; - } + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); } }); } @@ -533,6 +517,7 @@ where let source_multipole_lock = source_multipole_arc.lock().unwrap(); + // TODO: SLOW ~ 1.5s let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); // 1. Pad the signal @@ -542,13 +527,14 @@ where let q = 2*n; let r = 2*o; - // TODO: Look carefully how to pad upper left let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); let real_dim = q; + // Also slow but not as slow as compute signal ~100ms let padded_signal = pad3(&signal, pad_size, pad_index); + // TODO: Very SLOW ~21s let padded_signal_hat = rfft3(&padded_signal); let &(m_, n_, o_) = padded_signal_hat.shape(); let len_padded_signal_hat = m_*n_*o_; diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index a0f110d3..385f2bf0 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -1,5 +1,4 @@ // TODO: Figure out why fft is slow, probably due to a billion allocations being done -// TODO: Change all potential assignments to be direct rather than over a loop as currently as there is a trait for this. // TODO: Should be generic over kernel/kernel scale float type parameter - this requires trees to be generic over float type // TODO: Tree should infer dimension from the data (stride). // TODO: Data driven SVD compression in the case the user specified no compression parameter. @@ -414,11 +413,11 @@ mod test { let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; - let order = 7; + let order = 5; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; - let k = 50; + let k = 1000; let ncrit = 150; let depth = 2; let kernel = Laplace3dKernel::::default(); @@ -442,6 +441,10 @@ mod test { let times = datatree.run(Some(true)); + // println!("SVD times {:?}", times); + + // assert!(false); + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); @@ -520,6 +523,10 @@ mod test { let times = datatree.run(Some(true)); + // println!("FFT times {:?}", times); + + // assert!(false); + let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); From 26097ac9a7a338f8fb762fe5e8e555140b56c641 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 14 Jul 2023 10:37:16 +0100 Subject: [PATCH 35/40] Optimise input size --- field/src/field.rs | 225 ++++++++++++++++++++++++++++++++++- field/src/types.rs | 21 ++++ fmm/src/field_translation.rs | 143 +++++++++++++++++++++- fmm/src/fmm.rs | 11 +- 4 files changed, 386 insertions(+), 14 deletions(-) diff --git a/field/src/field.rs b/field/src/field.rs index cc7f8e29..d45d5fb9 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -16,7 +16,7 @@ use bempp_tools::Array3D; use crate::{ helpers::{compute_transfer_vectors, pad3, flip3, rfft3}, - types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, SvdM2lEntry, FftM2lEntry, TransferVector}, + types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm, SvdM2lEntry, FftM2lEntry, TransferVector}, }; impl FieldTranslationData for SvdFieldTranslationKiFmm @@ -237,9 +237,13 @@ where // Precompute and store the FFT of each unique kernel interaction // Begin by calculating pad lengths along each dimension - let p = 2 * m; - let q = 2 * n; - let r = 2 * o; + let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + + let p = p.max(4); + let q = q.max(4); + let r = r.max(4); let padded_kernel = pad3(&kernel, (p-m, q-n, r-o), (0, 0, 0)); @@ -388,6 +392,219 @@ where } } +impl FieldTranslationData for FftFieldTranslationKiFmm +where + T: Kernel + Default, +{ + type Domain = Domain; + type M2LOperators = Vec; + type TransferVector = Vec; + + fn compute_m2l_operators( + &self, + expansion_order: usize, + domain: Self::Domain, + ) -> Self::M2LOperators { + let mut result = Vec::new(); + + for t in self.transfer_vectors.iter() { + let source_equivalent_surface = + t.source + .compute_surface(&domain, expansion_order, self.alpha); + + let conv_grid_sources = t.source.convolution_grid( + expansion_order, + &domain, + &source_equivalent_surface, + self.alpha, + ); + + let target_check_surface = t.target.compute_surface(&domain, expansion_order, self.alpha); + + // TODO: Remove dim + let dim = 3; + // Find min target + let ncoeffs: usize = target_check_surface.len() / dim; + let sums: Vec<_> = (0..ncoeffs) + .map(|i| target_check_surface[i] + target_check_surface[ncoeffs + i] + target_check_surface[2*ncoeffs + i]) + .collect(); + + let min_index = sums + .iter() + .enumerate() + .min_by(|a, b| a.1.partial_cmp(b.1).unwrap()) + .map(|(index, _)| index) + .unwrap(); + + let min_target = [ + target_check_surface[min_index], + target_check_surface[min_index + ncoeffs], + target_check_surface[min_index + 2 * ncoeffs], + ]; + + let kernel = self.compute_kernel(expansion_order, &conv_grid_sources, min_target); + + let &(m, n, o) = kernel.shape(); + + // Precompute and store the FFT of each unique kernel interaction + + // Begin by calculating pad lengths along each dimension + let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + + let p = p.max(4); + let q = q.max(4); + let r = r.max(4); + + let padded_kernel = pad3(&kernel, (p-m, q-n, r-o), (0, 0, 0)); + + // Flip the kernel + let padded_kernel = flip3(&padded_kernel); + + // Compute FFT of kernel for this transfer vector + let padded_kernel_hat = rfft3(&padded_kernel); + + // Store FFT of kernel for this transfer vector + result.push(padded_kernel_hat) + } + + result + } + + fn compute_transfer_vectors(&self) -> Self::TransferVector { + compute_transfer_vectors() + } + + fn ncoeffs(&self, expansion_order: usize) -> usize { + 6 * (expansion_order - 1).pow(2) + 2 + } +} + + +impl FftFieldTranslationKiFmm +where + T: Kernel + Default, +{ + pub fn new(kernel: T, expansion_order: usize, domain: Domain, alpha: f64) -> Self { + + let mut result = FftFieldTranslationKiFmm { + alpha, + kernel, + surf_to_conv_map: HashMap::default(), + conv_to_surf_map: HashMap::default(), + m2l: Vec::default(), + transfer_vectors: Vec::default(), + }; + + // Create maps between surface and convolution grids + let (surf_to_conv, conv_to_surf) = + FftFieldTranslationKiFmm::::compute_surf_to_conv_map(expansion_order); + + result.surf_to_conv_map = surf_to_conv; + result.conv_to_surf_map = conv_to_surf; + result.transfer_vectors = result.compute_transfer_vectors(); + result.m2l = result.compute_m2l_operators(expansion_order, domain); + + result + } + + pub fn compute_surf_to_conv_map( + expansion_order: usize, + ) -> (HashMap, HashMap) { + let n = 2 * expansion_order - 1; + + // Index maps between surface and convolution grids + let mut surf_to_conv: HashMap = HashMap::new(); + let mut conv_to_surf: HashMap = HashMap::new(); + + // Initialise surface grid index + let mut surf_index = 0; + + // The boundaries of the surface grid + let lower = expansion_order - 1; + let upper = 2 * expansion_order - 2; + + // Iterate through the entire convolution grid marking the boundaries + // This makes the map much easier to understand and debug + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i * n * n + j * n + k; + if (i >= lower && j >= lower && (k == lower || k == upper)) + || (j >= lower && k >= lower && (i == lower || i == upper)) + || (k >= lower && i >= lower && (j == lower || j == upper)) + { + surf_to_conv.insert(surf_index, conv_idx); + conv_to_surf.insert(conv_idx, surf_index); + surf_index += 1; + } + } + } + } + + (surf_to_conv, conv_to_surf) + } + + pub fn compute_kernel( + &self, + expansion_order: usize, + convolution_grid: &[f64], + min_target: [f64; 3], + ) -> Array3D + { + let n = 2 * expansion_order - 1; + let mut result = Array3D::::new((n, n, n)); + let nconv = n.pow(3); + + let mut kernel_evals = vec![0f64; nconv]; + + self.kernel.assemble_st( + EvalType::Value, + convolution_grid, + &min_target[..], + &mut kernel_evals[..] + ); + + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i * n * n + j * n + k; + *result.get_mut(i, j, k).unwrap() = kernel_evals[conv_idx]; + } + } + } + + result + } + + pub fn compute_signal( + &self, + expansion_order: usize, + charges: &[f64] + ) + -> Array3D + { + let n = 2 * expansion_order - 1; + let mut result = Array3D::new((n,n,n)); + + for i in 0..n { + for j in 0..n { + for k in 0..n { + let conv_idx = i*n*n+j*n+k; + if self.conv_to_surf_map.contains_key(&conv_idx) { + let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); + *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; + } + } + } + } + + result + } +} + + #[cfg(test)] mod test { diff --git a/field/src/types.rs b/field/src/types.rs index 94c6a349..262e60e6 100644 --- a/field/src/types.rs +++ b/field/src/types.rs @@ -36,6 +36,27 @@ where pub kernel: T, } +pub struct FftFieldTranslationKiFmm +where + T: Kernel + Default, +{ + // Amount to dilate inner check surface by + pub alpha: f64, + + // Maps between convolution and surface grids + pub surf_to_conv_map: HashMap, + pub conv_to_surf_map: HashMap, + + // Precomputed FFT of unique kernel interactions placed on + // convolution grid. + pub m2l: Vec, + + // Unique transfer vectors to lookup m2l unique kernel interactions + pub transfer_vectors: Vec, + + pub kernel: T, +} + pub struct SvdFieldTranslationKiFmm where T: Kernel + Default, diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 3796db36..9ae9db49 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -10,7 +10,7 @@ use num::Complex; use itertools::Itertools; use rayon::prelude::*; -use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm}, helpers::{pad3, rfft3, irfft3}}; +use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3}}; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, @@ -523,10 +523,145 @@ where // 1. Pad the signal let &(m, n, o) = signal.shape(); - let p = 2*m; - let q = 2*n; - let r = 2*o; + let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + let p = p.max(4); + let q = q.max(4); + let r = r.max(4); + + let pad_size = (p-m, q-n, r-o); + let pad_index = (p-m, q-n, r-o); + let real_dim = q; + + // Also slow but not as slow as compute signal ~100ms + let padded_signal = pad3(&signal, pad_size, pad_index); + + // TODO: Very SLOW ~21s + let padded_signal_hat = rfft3(&padded_signal); + let &(m_, n_, o_) = padded_signal_hat.shape(); + let len_padded_signal_hat = m_*n_*o_; + + // 2. Compute the convolution to find the check potential + let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + let &(m_, n_, o_) = padded_kernel_hat.shape(); + let len_padded_kernel_hat= m_*n_*o_; + + // Compute Hadamard product + let padded_signal_hat = unsafe { + rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (len_padded_signal_hat, 1), (1,1)] + }; + + let padded_kernel_hat= unsafe { + rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + }; + + let check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); + + // 3.1 Compute iFFT to find check potentials + let check_potential_hat = Array3D::from_data(check_potential_hat.data().to_vec(), (m_, n_, o_)); + + let check_potential = irfft3(&check_potential_hat, real_dim); + + // Filter check potentials + let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); + for i in (p-m-1)..p { + for j in (q-n-1)..q { + for k in (r-o-1)..r { + let i_= i - (p-m-1); + let j_ = j - (q-n-1); + let k_ = k - (r-o-1); + *filtered_check_potentials.get_mut(i_, j_, k_).unwrap() = *check_potential.get(i, j, k).unwrap(); + } + } + } + + let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); + let mut tmp = Vec::new(); + let ntargets = target_surface_idxs.len() / fmm_arc.kernel.space_dimension(); + let xs = &target_surface_idxs[0..ntargets]; + let ys = &target_surface_idxs[ntargets..2*ntargets]; + let zs = &target_surface_idxs[2*ntargets..]; + + for i in 0..ntargets { + let val = filtered_check_potentials.get(xs[i], ys[i], zs[i]).unwrap(); + tmp.push(*val); + } + + let check_potential = unsafe { + rlst_pointer_mat!['a, f64, tmp.as_ptr(), (ntargets, 1), (1,1)] + }; + + // Finally, compute local coefficients from check potential + let target_local_owned = (self.m2l_scale(target.level()) + * fmm_arc.kernel.scale(target.level()) + * fmm_arc.dc2e_inv.dot(&check_potential)).eval(); + + + let mut target_local_lock = target_local_arc.lock().unwrap(); + *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); + } + } + }) + } + + fn m2l_scale(&self, level: u64) -> f64 { + if level < 2 { + panic!("M2L only performed on level 2 and below") + } + if level == 2 { + 1. / 2. + } else { + 2_f64.powf((level - 3) as f64) + } + + } + +} + +impl FieldTranslation for FmmData>> +where + T: Kernel + KernelScale + std::marker::Sync + std::marker::Send + Default +{ + + fn m2l<'a>(&self, level: u64) { + let Some(targets) = self.fmm.tree().get_keys(level) else { return }; + + targets.par_iter().for_each(move |&target| { + if let Some(v_list) = self.fmm.get_v_list(&target) { + let fmm_arc = Arc::clone(&self.fmm); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + + for source in v_list.iter() { + + let transfer_vector = target.find_transfer_vector(source); + + // Locate correct precomputed FFT of kernel + let k_idx = fmm_arc + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == transfer_vector) + .unwrap(); + + // Compute FFT of signal + let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + + // TODO: SLOW ~ 1.5s + let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + + // 1. Pad the signal + let &(m, n, o) = signal.shape(); + let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + let p = p.max(4); + let q = q.max(4); + let r = r.max(4); + let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); let real_dim = q; diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 385f2bf0..6e014a77 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -367,7 +367,7 @@ where mod test { use super::*; - use bempp_field::types::FftFieldTranslationNaiveKiFmm; + use bempp_field::types::{FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}; use rand::prelude::*; use rand::SeedableRng; @@ -492,7 +492,7 @@ mod test { #[test] fn test_fmm_fft<'a>() { - let npoints = 1000; + let npoints = 10000; let points = points_fixture(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; @@ -502,12 +502,12 @@ mod test { let alpha_outer = 2.9; let adaptive = false; let ncrit = 150; - let depth = 2; + let depth = 3; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); - let m2l_data_fft = FftFieldTranslationNaiveKiFmm::new( + let m2l_data_fft = FftFieldTranslationKiFmm::new( kernel.clone(), order, tree.get_domain().clone(), @@ -523,9 +523,8 @@ mod test { let times = datatree.run(Some(true)); - // println!("FFT times {:?}", times); + println!("FFT times {:?}", times); - // assert!(false); let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; From d8e56f1982f90b840571ddc1bf355b4e8cd9915d Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Tue, 18 Jul 2023 14:06:50 +0100 Subject: [PATCH 36/40] Add fftw convolutions --- field/Cargo.toml | 4 + field/src/field.rs | 98 +++++++++++++----------- field/src/helpers.rs | 42 +++++++++++ fmm/Cargo.toml | 2 + fmm/src/field_translation.rs | 100 +++++++++++++++++++------ fmm/src/fmm.rs | 140 +++++++++++++++++------------------ tools/src/arrays.rs | 5 ++ traits/src/arrays.rs | 3 + 8 files changed, 258 insertions(+), 136 deletions(-) diff --git a/field/Cargo.toml b/field/Cargo.toml index bf9352b2..90dccafb 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -33,6 +33,10 @@ num = "0.4" rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse"} realfft = "3.3.0" rustfft = "6.1.0" +fftw = {git = "https://github.com/skailasa/fftw.git" } +cauchy = "0.4.0" +dashmap = "5.5.0" +rayon = "1.7.0" [dev-dependencies] approx_eq = "0.1.8" diff --git a/field/src/field.rs b/field/src/field.rs index d45d5fb9..10b36634 100644 --- a/field/src/field.rs +++ b/field/src/field.rs @@ -1,6 +1,7 @@ use std::{collections::HashMap, hash::Hash}; use num::Complex; +use fftw::types::*; use rlst::{ algorithms::{ linalg::LinAlg, @@ -15,7 +16,7 @@ use bempp_tree::types::domain::Domain; use bempp_tools::Array3D; use crate::{ - helpers::{compute_transfer_vectors, pad3, flip3, rfft3}, + helpers::{compute_transfer_vectors, pad3, flip3, rfft3, rfft3_fftw}, types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm, SvdM2lEntry, FftM2lEntry, TransferVector}, }; @@ -237,13 +238,16 @@ where // Precompute and store the FFT of each unique kernel interaction // Begin by calculating pad lengths along each dimension - let p = 2_f64.powf((m as f64).log2().ceil()) as usize; - let q = 2_f64.powf((n as f64).log2().ceil()) as usize; - let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; - let p = p.max(4); - let q = q.max(4); - let r = r.max(4); + // let p = p.max(4); + // let q = q.max(4); + // let r = r.max(4); + let p = m + 1; + let q = n + 1; + let r = o + 1; let padded_kernel = pad3(&kernel, (p-m, q-n, r-o), (0, 0, 0)); @@ -354,14 +358,7 @@ where &mut kernel_evals[..] ); - for i in 0..n { - for j in 0..n { - for k in 0..n { - let conv_idx = i * n * n + j * n + k; - *result.get_mut(i, j, k).unwrap() = kernel_evals[conv_idx]; - } - } - } + result.get_data_mut().copy_from_slice(&kernel_evals[..]); result } @@ -376,18 +373,23 @@ where let n = 2 * expansion_order - 1; let mut result = Array3D::new((n,n,n)); + let mut tmp = vec![0f64; n*n*n]; + for i in 0..n { for j in 0..n { for k in 0..n { - let conv_idx = i*n*n+j*n+k; - if self.conv_to_surf_map.contains_key(&conv_idx) { - let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); - *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; - } + let conv_idx = i * n * n + j * n + k; + if let Some(surf_idx) = self.conv_to_surf_map.get(&conv_idx) { + tmp[conv_idx] = charges[*surf_idx]; + } else { + tmp[conv_idx] = 0f64; + } } } } - + + result.get_data_mut().copy_from_slice(&tmp[..]); + result } } @@ -449,21 +451,31 @@ where // Precompute and store the FFT of each unique kernel interaction // Begin by calculating pad lengths along each dimension - let p = 2_f64.powf((m as f64).log2().ceil()) as usize; - let q = 2_f64.powf((n as f64).log2().ceil()) as usize; - let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; - let p = p.max(4); - let q = q.max(4); - let r = r.max(4); + // let p = p.max(4); + // let q = q.max(4); + // let r = r.max(4); + let p = m + 1; + let q = n + 1; + let r = o + 1; let padded_kernel = pad3(&kernel, (p-m, q-n, r-o), (0, 0, 0)); // Flip the kernel - let padded_kernel = flip3(&padded_kernel); + let mut padded_kernel = flip3(&padded_kernel); // Compute FFT of kernel for this transfer vector - let padded_kernel_hat = rfft3(&padded_kernel); + // let padded_kernel_hat = rfft3(&padded_kernel); + let mut padded_kernel_hat = Array3D::::new((p, q, r/2 + 1)); + // println!("HERE {:?}", padded_kernel_hat.shape()); + rfft3_fftw(padded_kernel.get_data_mut(), padded_kernel_hat.get_data_mut(), &[p, q, r]); + + // println!("padded_kernel {:?} {:?}", padded_kernel.get_data(), padded_kernel.shape()); + // println!("padded_kernel_hat {:?} {:?}", padded_kernel_hat.get_data(), padded_kernel_hat.shape()); + // break; // Store FFT of kernel for this transfer vector result.push(padded_kernel_hat) @@ -566,14 +578,7 @@ where &mut kernel_evals[..] ); - for i in 0..n { - for j in 0..n { - for k in 0..n { - let conv_idx = i * n * n + j * n + k; - *result.get_mut(i, j, k).unwrap() = kernel_evals[conv_idx]; - } - } - } + result.get_data_mut().copy_from_slice(&kernel_evals[..]); result } @@ -588,18 +593,23 @@ where let n = 2 * expansion_order - 1; let mut result = Array3D::new((n,n,n)); + let mut tmp = vec![0f64; n*n*n]; + for i in 0..n { for j in 0..n { for k in 0..n { - let conv_idx = i*n*n+j*n+k; - if self.conv_to_surf_map.contains_key(&conv_idx) { - let surf_idx = self.conv_to_surf_map.get(&conv_idx).unwrap(); - *result.get_mut(i, j, k).unwrap() = charges[*surf_idx]; - } + let conv_idx = i * n * n + j * n + k; + if let Some(surf_idx) = self.conv_to_surf_map.get(&conv_idx) { + tmp[conv_idx] = charges[*surf_idx]; + } else { + tmp[conv_idx] = 0f64; + } } } } - + + result.get_data_mut().copy_from_slice(&tmp[..]); + result } } @@ -622,7 +632,7 @@ mod test { let kernel = Laplace3dKernel::::new(); let domain = Domain { origin: [0., 0., 0.], diameter: [1.0, 1.0, 1.0] }; - let fft = FftFieldTranslationNaiveKiFmm::new(kernel, order, domain, alpha); + let fft = FftFieldTranslationKiFmm::new(kernel, order, domain, alpha); let domain = Domain { origin: [0., 0., 0.], diameter: [1., 1., 1.] }; let key = MortonKey::from_point(&[0.5, 0.5, 0.5], &domain, level); diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 002901f8..e6b19790 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -1,8 +1,12 @@ use std::{collections::HashSet, usize}; +use dashmap::DashMap; use itertools::Itertools; +use num::traits::real::Real; use realfft::{num_complex::Complex, RealFftPlanner, num_traits::Zero}; use rustfft::{FftNum, FftPlanner}; +use fftw::{plan::*, array::*, types::*}; +use rayon::prelude::*; use bempp_tools::Array3D; use bempp_traits::arrays::Array3DAccess; @@ -148,6 +152,44 @@ where flipped } +pub fn rfft3_fftw(mut input: &mut [f64], mut output: &mut[c64], shape: &[usize]) { + + let mut plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + + plan.r2c(input, output); +} + +pub fn rfft3_fftw_par_dm( + mut input: &DashMap>, + mut output: &DashMap>, + shape: &[usize], + targets: &[MortonKey] +) { + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + + let mut plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + + targets.into_par_iter().for_each(|key| { + plan.r2c( + input.get_mut(key).unwrap().get_data_mut(), + output.get_mut(key).unwrap().get_data_mut() + ); + }); +} + +pub fn irfft3_fftw(mut input: &mut [c64], mut output: &mut[f64], shape: &[usize]) { + let size: usize = shape.iter().product(); + let mut plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); + plan.c2r(input, output); + // Normalise + output + .iter_mut() + .for_each(|value| *value *= 1.0 / (size as f64)); +} + + pub fn rfft3(input_arr: &Array3D) -> Array3D> where T: Clone + FftNum, diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index 0c952c63..976e23ab 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -35,6 +35,8 @@ rayon = "1.7" num_cpus = "1" num = "0.4" rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse" } +fftw = {git = "https://github.com/skailasa/fftw.git" } +dashmap = "5.5.0" [target.aarch64-apple-darwin] rustflags = [ "-C", "target-feature=+neon"] diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 9ae9db49..4761d0a0 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -9,8 +9,9 @@ use bempp_tools::Array3D; use num::Complex; use itertools::Itertools; use rayon::prelude::*; +use fftw::types::*; -use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3}}; +use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3, rfft3_fftw, irfft3_fftw, rfft3_fftw_par_dm}}; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, @@ -523,12 +524,16 @@ where // 1. Pad the signal let &(m, n, o) = signal.shape(); - let p = 2_f64.powf((m as f64).log2().ceil()) as usize; - let q = 2_f64.powf((n as f64).log2().ceil()) as usize; - let r = 2_f64.powf((o as f64).log2().ceil()) as usize; - let p = p.max(4); - let q = q.max(4); - let r = r.max(4); + // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + // let p = p.max(4); + // let q = q.max(4); + // let r = r.max(4); + + let p = m + 1; + let q = n + 1; + let r = o + 1; let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); @@ -619,6 +624,11 @@ where } +use dashmap::DashMap; + + + + impl FieldTranslation for FmmData>> where T: Kernel + KernelScale + std::marker::Sync + std::marker::Send + Default @@ -626,10 +636,51 @@ where fn m2l<'a>(&self, level: u64) { let Some(targets) = self.fmm.tree().get_keys(level) else { return }; + + // // Form signals to use for convolution first + // let n = 2*self.fmm.order -1; + // let mut padded_signals: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n)))).collect(); + // let mut padded_signals_hat: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n/2 + 1)))).collect(); + + // targets.par_iter().for_each(|target| { + // let fmm_arc = Arc::clone(&self.fmm); + // let source_multipole_arc = Arc::clone(self.multipoles.get(target).unwrap()); + // let source_multipole_lock = source_multipole_arc.lock().unwrap(); + // let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + + // // Pad the signal + // let &(m, n, o) = signal.shape(); + + // let p = m + 1; + // let q = n + 1; + // let r = o + 1; + + // let pad_size = (p-m, q-n, r-o); + // let pad_index = (p-m, q-n, r-o); + // let real_dim = q; + + // let mut padded_signal = pad3(&signal, pad_size, pad_index); + // padded_signals_hat.insert(*target, Array3D::::new((p, q, r/2 + 1))); + // padded_signals.insert(*target, padded_signal); + // }); + + // // Compute FFT of signals for use in convolution + // let ntargets = targets.len(); + // let key = targets[0]; + // let shape = padded_signals.get(&key).unwrap().shape().clone(); + // let shape = [shape.0, shape.1, shape.2]; + + // rfft3_fftw_par_dm(&padded_signals, &padded_signals_hat, &shape, targets); + + // // Loop through padded signals and apply convolutions in all directions of transfer vector, even if there are zeros. + + - targets.par_iter().for_each(move |&target| { + + targets.iter().for_each(move |&target| { if let Some(v_list) = self.fmm.get_v_list(&target) { let fmm_arc = Arc::clone(&self.fmm); + let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); for source in v_list.iter() { @@ -655,22 +706,28 @@ where // 1. Pad the signal let &(m, n, o) = signal.shape(); - let p = 2_f64.powf((m as f64).log2().ceil()) as usize; - let q = 2_f64.powf((n as f64).log2().ceil()) as usize; - let r = 2_f64.powf((o as f64).log2().ceil()) as usize; - let p = p.max(4); - let q = q.max(4); - let r = r.max(4); + // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + // let p = p.max(4); + // let q = q.max(4); + // let r = r.max(4); + + let p = m + 1; + let q = n + 1; + let r = o + 1; let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); let real_dim = q; // Also slow but not as slow as compute signal ~100ms - let padded_signal = pad3(&signal, pad_size, pad_index); + let mut padded_signal = pad3(&signal, pad_size, pad_index); // TODO: Very SLOW ~21s - let padded_signal_hat = rfft3(&padded_signal); + // let padded_signal_hat = rfft3(&padded_signal); + let mut padded_signal_hat = Array3D::::new((p, q, r/2 + 1)); + rfft3_fftw(padded_signal.get_data_mut(), padded_signal_hat.get_data_mut(), &[p, q, r]); let &(m_, n_, o_) = padded_signal_hat.shape(); let len_padded_signal_hat = m_*n_*o_; @@ -688,12 +745,11 @@ where rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] }; - let check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); - - // 3.1 Compute iFFT to find check potentials - let check_potential_hat = Array3D::from_data(check_potential_hat.data().to_vec(), (m_, n_, o_)); + let mut check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); - let check_potential = irfft3(&check_potential_hat, real_dim); + // 3.1 Compute iFFT to find check potentials + let mut check_potential = Array3D::::new((p, q, r)); + irfft3_fftw(check_potential_hat.data_mut(), check_potential.get_data_mut(), &[p, q, r]); // Filter check potentials let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); @@ -703,7 +759,7 @@ where let i_= i - (p-m-1); let j_ = j - (q-n-1); let k_ = k - (r-o-1); - *filtered_check_potentials.get_mut(i_, j_, k_).unwrap() = *check_potential.get(i, j, k).unwrap(); + *filtered_check_potentials.get_mut(i_, j_, k_).unwrap()= *check_potential.get(i, j, k).unwrap(); } } } diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 6e014a77..3ce3a160 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -406,93 +406,93 @@ mod test { points } - #[test] - fn test_fmm_svd<'a>() { - let npoints = 1000; - let points = points_fixture(npoints, None, None); - let global_idxs = (0..npoints).collect_vec(); - let charges = vec![1.0; npoints]; - - let order = 5; - let alpha_inner = 1.05; - let alpha_outer = 2.9; - let adaptive = false; - let k = 1000; - let ncrit = 150; - let depth = 2; - let kernel = Laplace3dKernel::::default(); - - let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); - - let m2l_data_svd = SvdFieldTranslationKiFmm::new( - kernel.clone(), - Some(k), - order, - tree.get_domain().clone(), - alpha_inner, - ); - - let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); - - // Form charge dict, matching charges with their associated global indices - let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); + // #[test] + // fn test_fmm_svd<'a>() { + // let npoints = 1000; + // let points = points_fixture(npoints, None, None); + // let global_idxs = (0..npoints).collect_vec(); + // let charges = vec![1.0; npoints]; + + // let order = 2; + // let alpha_inner = 1.05; + // let alpha_outer = 2.9; + // let adaptive = false; + // let k = 1000; + // let ncrit = 150; + // let depth = 2; + // let kernel = Laplace3dKernel::::default(); + + // let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); + + // let m2l_data_svd = SvdFieldTranslationKiFmm::new( + // kernel.clone(), + // Some(k), + // order, + // tree.get_domain().clone(), + // alpha_inner, + // ); + + // let fmm = KiFmm::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data_svd); + + // // Form charge dict, matching charges with their associated global indices + // let mut charge_dict = build_charge_dict(&global_idxs[..], &charges[..]); - let datatree = FmmData::new(fmm, &charge_dict); + // let datatree = FmmData::new(fmm, &charge_dict); - let times = datatree.run(Some(true)); + // let times = datatree.run(Some(true)); - // println!("SVD times {:?}", times); + // // println!("SVD times {:?}", times); - // assert!(false); + // // assert!(false); - let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; + // let leaf = &datatree.fmm.tree.get_leaves().unwrap()[0]; - let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); - let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); + // let potentials = datatree.potentials.get(&leaf).unwrap().lock().unwrap(); + // let pts = datatree.fmm.tree().get_points(&leaf).unwrap(); - let leaf_coordinates = pts - .iter() - .map(|p| p.coordinate) - .flat_map(|[x, y, z]| vec![x, y, z]) - .collect_vec(); + // let leaf_coordinates = pts + // .iter() + // .map(|p| p.coordinate) + // .flat_map(|[x, y, z]| vec![x, y, z]) + // .collect_vec(); - let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); + // let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension(); - // Get into row major order - let leaf_coordinates = unsafe { - rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] - }.eval(); + // // Get into row major order + // let leaf_coordinates = unsafe { + // rlst_pointer_mat!['a, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)] + // }.eval(); - let mut direct = vec![0f64; pts.len()]; - let all_point_coordinates = points_fixture(npoints, None, None); + // let mut direct = vec![0f64; pts.len()]; + // let all_point_coordinates = points_fixture(npoints, None, None); - let all_charges = charge_dict.into_values().collect_vec(); + // let all_charges = charge_dict.into_values().collect_vec(); - let kernel = Laplace3dKernel::::default(); + // let kernel = Laplace3dKernel::::default(); - kernel.evaluate_st( - EvalType::Value, - all_point_coordinates.data(), - leaf_coordinates.data(), - &all_charges[..], - &mut direct[..], - ); + // kernel.evaluate_st( + // EvalType::Value, + // all_point_coordinates.data(), + // leaf_coordinates.data(), + // &all_charges[..], + // &mut direct[..], + // ); - let abs_error: f64 = potentials - .data() - .iter() - .zip(direct.iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); - let rel_error: f64 = abs_error / (direct.iter().sum::()); + // let abs_error: f64 = potentials + // .data() + // .iter() + // .zip(direct.iter()) + // .map(|(a, b)| (a - b).abs()) + // .sum(); + // let rel_error: f64 = abs_error / (direct.iter().sum::()); - println!("rel error {:?}", rel_error); - assert!(rel_error <= 1e-5); - } + // println!("rel error {:?}", rel_error); + // assert!(rel_error <= 1e-5); + // } #[test] fn test_fmm_fft<'a>() { - let npoints = 10000; + let npoints = 1000; let points = points_fixture(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; @@ -568,6 +568,6 @@ mod test { let rel_error: f64 = abs_error / (direct.iter().sum::()); println!("rel error {:?}", rel_error); - assert!(rel_error <= 1e-5); + assert!(rel_error <= 1e-10); } } diff --git a/tools/src/arrays.rs b/tools/src/arrays.rs index bef69ca7..9a7464be 100644 --- a/tools/src/arrays.rs +++ b/tools/src/arrays.rs @@ -91,6 +91,7 @@ impl<'a, T: Num> Iterator for Array2DRowIterator<'a, T> { } /// A three-dimensional rectangular array +#[derive(Clone)] pub struct Array3D { /// The data in the array, in row-major order data: Vec, @@ -142,6 +143,10 @@ impl Array3DAccess for Array3D { fn get_data(&self) -> &[T] { &self.data } + + fn get_data_mut(&mut self) -> &mut [T] { + &mut self.data + } } /// A four-dimensional rectangular array diff --git a/traits/src/arrays.rs b/traits/src/arrays.rs index 6d4c684d..9ffd5ceb 100644 --- a/traits/src/arrays.rs +++ b/traits/src/arrays.rs @@ -90,6 +90,9 @@ pub trait Array3DAccess { /// Get a pointer to the raw data in the array fn get_data(&self) -> &[T]; + + /// Get a mut pointer to the raw data in the array + fn get_data_mut(&mut self) -> &mut [T]; } pub trait Array4DAccess { From 3e90f7576bbae01f6763cc9c88d9928119ff1478 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Wed, 19 Jul 2023 11:42:34 +0100 Subject: [PATCH 37/40] Add dashmap experiments --- field/Cargo.toml | 2 +- field/src/helpers.rs | 2 +- fmm/Cargo.toml | 4 +- fmm/src/field_translation.rs | 283 ++++++++++++++++++++++------------- fmm/src/fmm.rs | 6 +- 5 files changed, 187 insertions(+), 110 deletions(-) diff --git a/field/Cargo.toml b/field/Cargo.toml index 90dccafb..ac583e65 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -35,7 +35,7 @@ realfft = "3.3.0" rustfft = "6.1.0" fftw = {git = "https://github.com/skailasa/fftw.git" } cauchy = "0.4.0" -dashmap = "5.5.0" +dashmap = {version = "5.5.0", features=["rayon"]} rayon = "1.7.0" [dev-dependencies] diff --git a/field/src/helpers.rs b/field/src/helpers.rs index e6b19790..a6c57b93 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, usize}; +use std::{collections::HashSet, usize, sync::{Arc, RwLock}}; use dashmap::DashMap; use itertools::Itertools; diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index 976e23ab..c7ead786 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -31,12 +31,12 @@ itertools = "0.10" mpi = { version = "0.6.*" } rand = "0.8.*" float-cmp = "0.9.0" -rayon = "1.7" num_cpus = "1" num = "0.4" rlst = {git = "https://github.com/skailasa/rlst.git", branch = "enh/moore-penrose-pseudo-inverse" } fftw = {git = "https://github.com/skailasa/fftw.git" } -dashmap = "5.5.0" +dashmap = {version = "5.5.0", features=["rayon"]} +rayon = "1.7" [target.aarch64-apple-darwin] rustflags = [ "-C", "target-feature=+neon"] diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 4761d0a0..e6b5ca1a 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -2,7 +2,7 @@ use std::{ collections::HashMap, ops::{Deref, Mul, DerefMut}, - sync::{Arc, Mutex, RwLock, MutexGuard}, + sync::{Arc, Mutex, RwLock, MutexGuard}, time::Instant, }; use bempp_tools::Array3D; @@ -637,7 +637,10 @@ where fn m2l<'a>(&self, level: u64) { let Some(targets) = self.fmm.tree().get_keys(level) else { return }; - // // Form signals to use for convolution first + // Form signals to use for convolution first + + let start = Instant::now(); + // let n = 2*self.fmm.order -1; // let mut padded_signals: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n)))).collect(); // let mut padded_signals_hat: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n/2 + 1)))).collect(); @@ -664,133 +667,207 @@ where // padded_signals.insert(*target, padded_signal); // }); - // // Compute FFT of signals for use in convolution - // let ntargets = targets.len(); - // let key = targets[0]; - // let shape = padded_signals.get(&key).unwrap().shape().clone(); - // let shape = [shape.0, shape.1, shape.2]; + let n = 2*self.fmm.order - 1; + let ntargets = targets.len(); - // rfft3_fftw_par_dm(&padded_signals, &padded_signals_hat, &shape, targets); + // Pad the signal + let &(m, n, o) = &(n, n, n); - // // Loop through padded signals and apply convolutions in all directions of transfer vector, even if there are zeros. + let p = m + 1; + let q = n + 1; + let r = o + 1; + let pad_size = (p-m, q-n, r-o); + let pad_index = (p-m, q-n, r-o); + let real_dim = q; + let mut padded_signals = Arc::new(RwLock::new( + rlst_mat![f64, (p*q*r, ntargets)] + )); - targets.iter().for_each(move |&target| { - if let Some(v_list) = self.fmm.get_v_list(&target) { - let fmm_arc = Arc::clone(&self.fmm); + targets.par_iter().enumerate().for_each(|(i, target)| { + let fmm_arc = Arc::clone(&self.fmm); + let source_multipole_arc = Arc::clone(self.multipoles.get(target).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + + let mut padded_signal = pad3(&signal, pad_size, pad_index); + + let first = i * p*q*r; + let last = first + p*q*r; - let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); + let mut padded_signals_lock = padded_signals.write().unwrap(); + padded_signals_lock.get_slice_mut(first, last).copy_from_slice(padded_signal.get_data()); + }); - for source in v_list.iter() { + println!("data organisation time {:?}", start.elapsed().as_millis()); - let transfer_vector = target.find_transfer_vector(source); + // Compute FFT of signals for use in convolution + // let ntargets = targets.len(); + // let key = targets[0]; + // let shape = padded_signals.get(&key).unwrap().shape().clone(); + // let shape = [shape.0, shape.1, shape.2]; - // Locate correct precomputed FFT of kernel - let k_idx = fmm_arc - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == transfer_vector) - .unwrap(); + // let start = Instant::now(); + // rfft3_fftw_par_dm(&padded_signals, &padded_signals_hat, &shape, targets); + // println!("FFT time {:?}", start.elapsed().as_millis()); + // Loop through padded signals and apply convolutions in all directions of transfer vector, even if there are zeros. - // Compute FFT of signal - let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); - - let source_multipole_lock = source_multipole_arc.lock().unwrap(); + // (0..self.fmm.m2l.transfer_vectors.len()).into_par_iter().for_each(|k_idx| { + // let fmm_arc = Arc::clone(&self.fmm); + // // let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + // // let &(m_, n_, o_) = padded_kernel_hat.shape(); + // // let len_padded_kernel_hat= m_*n_*o_; + + // // let padded_kernel_hat= unsafe { + // // rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + // // }; - // TODO: SLOW ~ 1.5s - let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + // }); - // 1. Pad the signal - let &(m, n, o) = signal.shape(); + // padded_signals_hat.par_iter().for_each(|pair| { + // let source = pair.key(); + // let padded_signal_hat = pair.value(); + // let fmm_arc = Arc::clone(&self.fmm); + + // // Compute Hadamard product + // let &(m_, n_, o_) = padded_signal_hat.shape(); + // let len_padded_signal_hat= m_*n_*o_; + // let padded_signal_hat = unsafe { + // rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (len_padded_signal_hat, 1), (1,1)] + // }; + + // for (k_idx, tv) in self.fmm.m2l.transfer_vectors.iter().enumerate() { + // let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + // let &(m_, n_, o_) = padded_kernel_hat.shape(); + // let len_padded_kernel_hat= m_*n_*o_; + + // let padded_kernel_hat= unsafe { + // rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + // }; - // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; - // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; - // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; - // let p = p.max(4); - // let q = q.max(4); - // let r = r.max(4); + // let mut check_potential_hat = padded_kernel_hat.cmp_wise_product(&padded_signal_hat).eval(); + + // } + // }) - let p = m + 1; - let q = n + 1; - let r = o + 1; - - let pad_size = (p-m, q-n, r-o); - let pad_index = (p-m, q-n, r-o); - let real_dim = q; + - // Also slow but not as slow as compute signal ~100ms - let mut padded_signal = pad3(&signal, pad_size, pad_index); + // targets.iter().for_each(move |&target| { + // if let Some(v_list) = self.fmm.get_v_list(&target) { + // let fmm_arc = Arc::clone(&self.fmm); - // TODO: Very SLOW ~21s - // let padded_signal_hat = rfft3(&padded_signal); - let mut padded_signal_hat = Array3D::::new((p, q, r/2 + 1)); - rfft3_fftw(padded_signal.get_data_mut(), padded_signal_hat.get_data_mut(), &[p, q, r]); - let &(m_, n_, o_) = padded_signal_hat.shape(); - let len_padded_signal_hat = m_*n_*o_; + // let target_local_arc = Arc::clone(self.locals.get(&target).unwrap()); - // 2. Compute the convolution to find the check potential - let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; - let &(m_, n_, o_) = padded_kernel_hat.shape(); - let len_padded_kernel_hat= m_*n_*o_; - - // Compute Hadamard product - let padded_signal_hat = unsafe { - rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (len_padded_signal_hat, 1), (1,1)] - }; - - let padded_kernel_hat= unsafe { - rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] - }; + // for source in v_list.iter() { - let mut check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); - - // 3.1 Compute iFFT to find check potentials - let mut check_potential = Array3D::::new((p, q, r)); - irfft3_fftw(check_potential_hat.data_mut(), check_potential.get_data_mut(), &[p, q, r]); + // let transfer_vector = target.find_transfer_vector(source); - // Filter check potentials - let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); - for i in (p-m-1)..p { - for j in (q-n-1)..q { - for k in (r-o-1)..r { - let i_= i - (p-m-1); - let j_ = j - (q-n-1); - let k_ = k - (r-o-1); - *filtered_check_potentials.get_mut(i_, j_, k_).unwrap()= *check_potential.get(i, j, k).unwrap(); - } - } - } + // // Locate correct precomputed FFT of kernel + // let k_idx = fmm_arc + // .m2l + // .transfer_vectors + // .iter() + // .position(|x| x.vector == transfer_vector) + // .unwrap(); - let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); - let mut tmp = Vec::new(); - let ntargets = target_surface_idxs.len() / fmm_arc.kernel.space_dimension(); - let xs = &target_surface_idxs[0..ntargets]; - let ys = &target_surface_idxs[ntargets..2*ntargets]; - let zs = &target_surface_idxs[2*ntargets..]; + // // Compute FFT of signal + // let source_multipole_arc = Arc::clone(self.multipoles.get(source).unwrap()); + + // let source_multipole_lock = source_multipole_arc.lock().unwrap(); - for i in 0..ntargets { - let val = filtered_check_potentials.get(xs[i], ys[i], zs[i]).unwrap(); - tmp.push(*val); - } + // // TODO: SLOW ~ 1.5s + // let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); - let check_potential = unsafe { - rlst_pointer_mat!['a, f64, tmp.as_ptr(), (ntargets, 1), (1,1)] - }; + // // 1. Pad the signal + // let &(m, n, o) = signal.shape(); - // Finally, compute local coefficients from check potential - let target_local_owned = (self.m2l_scale(target.level()) - * fmm_arc.kernel.scale(target.level()) - * fmm_arc.dc2e_inv.dot(&check_potential)).eval(); + // // let p = 2_f64.powf((m as f64).log2().ceil()) as usize; + // // let q = 2_f64.powf((n as f64).log2().ceil()) as usize; + // // let r = 2_f64.powf((o as f64).log2().ceil()) as usize; + // // let p = p.max(4); + // // let q = q.max(4); + // // let r = r.max(4); + // let p = m + 1; + // let q = n + 1; + // let r = o + 1; + + // let pad_size = (p-m, q-n, r-o); + // let pad_index = (p-m, q-n, r-o); + // let real_dim = q; + + // // Also slow but not as slow as compute signal ~100ms + // let mut padded_signal = pad3(&signal, pad_size, pad_index); + + // // TODO: Very SLOW ~21s + // // let padded_signal_hat = rfft3(&padded_signal); + // let mut padded_signal_hat = Array3D::::new((p, q, r/2 + 1)); + // rfft3_fftw(padded_signal.get_data_mut(), padded_signal_hat.get_data_mut(), &[p, q, r]); + // let &(m_, n_, o_) = padded_signal_hat.shape(); + // let len_padded_signal_hat = m_*n_*o_; + + // // 2. Compute the convolution to find the check potential + // let padded_kernel_hat = &fmm_arc.m2l.m2l[k_idx]; + // let &(m_, n_, o_) = padded_kernel_hat.shape(); + // let len_padded_kernel_hat= m_*n_*o_; + + // // Compute Hadamard product + // let padded_signal_hat = unsafe { + // rlst_pointer_mat!['a, Complex, padded_signal_hat.get_data().as_ptr(), (len_padded_signal_hat, 1), (1,1)] + // }; + + // let padded_kernel_hat= unsafe { + // rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + // }; - let mut target_local_lock = target_local_arc.lock().unwrap(); - *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); - } - } - }) + // let mut check_potential_hat = padded_kernel_hat.cmp_wise_product(padded_signal_hat).eval(); + + // // 3.1 Compute iFFT to find check potentials + // let mut check_potential = Array3D::::new((p, q, r)); + // irfft3_fftw(check_potential_hat.data_mut(), check_potential.get_data_mut(), &[p, q, r]); + + // // Filter check potentials + // let mut filtered_check_potentials: Array3D = Array3D::new((m+1, n+1, o+1)); + // for i in (p-m-1)..p { + // for j in (q-n-1)..q { + // for k in (r-o-1)..r { + // let i_= i - (p-m-1); + // let j_ = j - (q-n-1); + // let k_ = k - (r-o-1); + // *filtered_check_potentials.get_mut(i_, j_, k_).unwrap()= *check_potential.get(i, j, k).unwrap(); + // } + // } + // } + + // let (_, target_surface_idxs) = target.surface_grid(fmm_arc.order); + // let mut tmp = Vec::new(); + // let ntargets = target_surface_idxs.len() / fmm_arc.kernel.space_dimension(); + // let xs = &target_surface_idxs[0..ntargets]; + // let ys = &target_surface_idxs[ntargets..2*ntargets]; + // let zs = &target_surface_idxs[2*ntargets..]; + + // for i in 0..ntargets { + // let val = filtered_check_potentials.get(xs[i], ys[i], zs[i]).unwrap(); + // tmp.push(*val); + // } + + // let check_potential = unsafe { + // rlst_pointer_mat!['a, f64, tmp.as_ptr(), (ntargets, 1), (1,1)] + // }; + + // // Finally, compute local coefficients from check potential + // let target_local_owned = (self.m2l_scale(target.level()) + // * fmm_arc.kernel.scale(target.level()) + // * fmm_arc.dc2e_inv.dot(&check_potential)).eval(); + + + // let mut target_local_lock = target_local_arc.lock().unwrap(); + // *target_local_lock.deref_mut() = (target_local_lock.deref() + target_local_owned).eval(); + // } + // } + // }) } fn m2l_scale(&self, level: u64) -> f64 { diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 3ce3a160..7b4d1f3a 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -492,17 +492,17 @@ mod test { #[test] fn test_fmm_fft<'a>() { - let npoints = 1000; + let npoints = 1000000; let points = points_fixture(npoints, None, None); let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; - let order = 5; + let order = 9; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; let ncrit = 150; - let depth = 3; + let depth = 5; let kernel = Laplace3dKernel::::default(); let tree = SingleNodeTree::new(points.data(), adaptive, Some(ncrit), Some(depth), &global_idxs[..]); From 9be8e55dfb3f9f4d02e7844f8dd7e9082fd32a6a Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Thu, 20 Jul 2023 11:26:19 +0100 Subject: [PATCH 38/40] Add some more work on parallel vector chunks approach/ --- field/src/helpers.rs | 30 +++++++++++ fmm/src/field_translation.rs | 102 +++++++++++++++++------------------ fmm/src/fmm.rs | 2 +- 3 files changed, 82 insertions(+), 52 deletions(-) diff --git a/field/src/helpers.rs b/field/src/helpers.rs index a6c57b93..3b0b5897 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -179,6 +179,36 @@ pub fn rfft3_fftw_par_dm( }); } +use rlst::dense::{ + base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, traits::*, Dynamic, +}; + +pub type FftMatrixf64 = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub type FftMatrixc64 = + Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; + +pub fn rfft3_fftw_par_vec( + mut input: &mut FftMatrixf64, + mut output: &mut FftMatrixc64, + shape: &[usize], +) { + assert!(shape.len() == 3); + + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + + let mut plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); + let it_out = output.data_mut().par_chunks_exact_mut(size_real).into_par_iter(); + + it_inp.zip(it_out).for_each(|(inp, out)| { + plan.r2c(inp, out); + }); +} + pub fn irfft3_fftw(mut input: &mut [c64], mut output: &mut[f64], shape: &[usize]) { let size: usize = shape.iter().product(); let mut plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index e6b5ca1a..4e3a7085 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use rayon::prelude::*; use fftw::types::*; -use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3, rfft3_fftw, irfft3_fftw, rfft3_fftw_par_dm}}; +use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3, rfft3_fftw, irfft3_fftw, rfft3_fftw_par_dm, rfft3_fftw_par_vec}}; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, @@ -645,65 +645,28 @@ where // let mut padded_signals: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n)))).collect(); // let mut padded_signals_hat: DashMap> = targets.iter().map(|target| (*target, Array3D::::new((n, n, n/2 + 1)))).collect(); + // // Pad the signal + // let &(m, n, o) = &(n, n, n); + + // let p = m + 1; + // let q = n + 1; + // let r = o + 1; + + // let pad_size = (p-m, q-n, r-o); + // let pad_index = (p-m, q-n, r-o); + // let real_dim = q; + // targets.par_iter().for_each(|target| { // let fmm_arc = Arc::clone(&self.fmm); // let source_multipole_arc = Arc::clone(self.multipoles.get(target).unwrap()); // let source_multipole_lock = source_multipole_arc.lock().unwrap(); // let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); - - // // Pad the signal - // let &(m, n, o) = signal.shape(); - - // let p = m + 1; - // let q = n + 1; - // let r = o + 1; - - // let pad_size = (p-m, q-n, r-o); - // let pad_index = (p-m, q-n, r-o); - // let real_dim = q; // let mut padded_signal = pad3(&signal, pad_size, pad_index); // padded_signals_hat.insert(*target, Array3D::::new((p, q, r/2 + 1))); // padded_signals.insert(*target, padded_signal); // }); - - let n = 2*self.fmm.order - 1; - let ntargets = targets.len(); - - // Pad the signal - let &(m, n, o) = &(n, n, n); - - let p = m + 1; - let q = n + 1; - let r = o + 1; - - let pad_size = (p-m, q-n, r-o); - let pad_index = (p-m, q-n, r-o); - let real_dim = q; - - let mut padded_signals = Arc::new(RwLock::new( - rlst_mat![f64, (p*q*r, ntargets)] - )); - - - targets.par_iter().enumerate().for_each(|(i, target)| { - let fmm_arc = Arc::clone(&self.fmm); - let source_multipole_arc = Arc::clone(self.multipoles.get(target).unwrap()); - let source_multipole_lock = source_multipole_arc.lock().unwrap(); - let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); - - let mut padded_signal = pad3(&signal, pad_size, pad_index); - - let first = i * p*q*r; - let last = first + p*q*r; - - let mut padded_signals_lock = padded_signals.write().unwrap(); - padded_signals_lock.get_slice_mut(first, last).copy_from_slice(padded_signal.get_data()); - }); - - println!("data organisation time {:?}", start.elapsed().as_millis()); - - // Compute FFT of signals for use in convolution + // Compute FFT of signals for use in convolution // let ntargets = targets.len(); // let key = targets[0]; // let shape = padded_signals.get(&key).unwrap().shape().clone(); @@ -752,8 +715,45 @@ where // } // }) - + ////////////////////////////////// + let n = 2*self.fmm.order - 1; + let ntargets = targets.len(); + // Pad the signal + let &(m, n, o) = &(n, n, n); + + let p = m + 1; + let q = n + 1; + let r = o + 1; + let size = p*q*r; + let pad_size = (p-m, q-n, r-o); + let pad_index = (p-m, q-n, r-o); + let real_dim = q; + + let mut padded_signals = rlst_col_vec![f64, (size*ntargets)]; + + let mut chunks = padded_signals.data_mut().par_chunks_exact_mut(size); + let range = (0..chunks.len()).into_par_iter(); + range.zip(chunks).for_each(|(i, chunk)| { + let fmm_arc = Arc::clone(&self.fmm); + let target = targets[i]; + let source_multipole_arc = Arc::clone(self.multipoles.get(&target).unwrap()); + let source_multipole_lock = source_multipole_arc.lock().unwrap(); + let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); + + let mut padded_signal = pad3(&signal, pad_size, pad_index); + + chunk.copy_from_slice(padded_signal.get_data()); + }); + println!("data organisation time {:?}", start.elapsed().as_millis()); + + let size_real = p*q*(r/2+1); + let mut padded_signals_hat = rlst_col_vec![c64, (size_real*ntargets)]; + let start = Instant::now(); + rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); + println!("fft time {:?}", start.elapsed().as_millis()); + + ////////////////////////// // targets.iter().for_each(move |&target| { // if let Some(v_list) = self.fmm.get_v_list(&target) { // let fmm_arc = Arc::clone(&self.fmm); diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 7b4d1f3a..215044a3 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -497,7 +497,7 @@ mod test { let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; - let order = 9; + let order = 10; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; From c00c627289f94b10537d2c980077d6d3c49bbd60 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Mon, 24 Jul 2023 14:59:19 +0100 Subject: [PATCH 39/40] Temp --- field/src/helpers.rs | 4 ++-- fmm/Cargo.toml | 3 +++ fmm/src/field_translation.rs | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/field/src/helpers.rs b/field/src/helpers.rs index 3b0b5897..d132f4be 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -201,8 +201,8 @@ pub fn rfft3_fftw_par_vec( let size_real = (size / size_d) * (size_d / 2 + 1); let mut plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); - let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter(); - let it_out = output.data_mut().par_chunks_exact_mut(size_real).into_par_iter(); + let it_inp = input.data_mut().par_rchunks_exact_mut(size).into_par_iter(); + let it_out = output.data_mut().par_rchunks_exact_mut(size_real).into_par_iter(); it_inp.zip(it_out).for_each(|(inp, out)| { plan.r2c(inp, out); diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml index c7ead786..d5a798eb 100644 --- a/fmm/Cargo.toml +++ b/fmm/Cargo.toml @@ -40,3 +40,6 @@ rayon = "1.7" [target.aarch64-apple-darwin] rustflags = [ "-C", "target-feature=+neon"] + +[profile.release] +lto = true \ No newline at end of file diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 4e3a7085..25bd2654 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -24,7 +24,7 @@ use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ common::traits::*, common::tools::PrettyPrint, - dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape}, + dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape, rlst_rand_col_vec}, }; use crate::types::{FmmData, KiFmm}; @@ -752,6 +752,39 @@ where let start = Instant::now(); rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); println!("fft time {:?}", start.elapsed().as_millis()); + println!("size real {:?} size {:?}", size_real, size); + + let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + // Compute hadamard product with kernels + let range = (0..self.fmm.m2l.transfer_vectors.len()).into_par_iter(); + self.fmm.m2l.transfer_vectors.iter().take(16).par_bridge().for_each(|tv| { + // Locate correct precomputed FFT of kernel + let k_idx = self.fmm + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == tv.vector) + .unwrap(); + let padded_kernel_hat = &self.fmm.m2l.m2l[k_idx]; + let &(m_, n_, o_) = padded_kernel_hat.shape(); + let len_padded_kernel_hat= m_*n_*o_; + let padded_kernel_hat= unsafe { + rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + }; + + let padded_kernel_hat_arc = Arc::new(padded_kernel_hat); + + padded_signals_hat.data().chunks_exact(len_padded_kernel_hat).enumerate().for_each(|(i, padded_signal_hat)| { + let padded_signal_hat = unsafe { + rlst_pointer_mat!['a, Complex, padded_signal_hat.as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + }; + + let padded_kernel_hat_ref = Arc::clone(&padded_kernel_hat_arc); + + let check_potential = padded_signal_hat.cmp_wise_product(padded_kernel_hat_ref.deref()).eval(); + }); + }); + ////////////////////////// // targets.iter().for_each(move |&target| { From 236892c0290a45f9542dae296112a446221768d2 Mon Sep 17 00:00:00 2001 From: Srinath Kailasa Date: Fri, 11 Aug 2023 12:43:17 +0100 Subject: [PATCH 40/40] Add work on using index pointers to avoid re-allocations --- field/src/helpers.rs | 31 ++- fmm/src/field_translation.rs | 245 ++++++++++++++++++++---- fmm/src/fmm.rs | 2 +- tree/src/implementations/impl_morton.rs | 26 +++ 4 files changed, 261 insertions(+), 43 deletions(-) diff --git a/field/src/helpers.rs b/field/src/helpers.rs index d132f4be..e349f10b 100644 --- a/field/src/helpers.rs +++ b/field/src/helpers.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, usize, sync::{Arc, RwLock}}; +use std::{collections::HashSet, usize, sync::{Arc, RwLock, Mutex}, ops::{Deref, DerefMut}}; use dashmap::DashMap; use itertools::Itertools; @@ -209,6 +209,35 @@ pub fn rfft3_fftw_par_vec( }); } + +pub fn rfft3_fftw_par_vec_arc_mutex( + mut input: &mut Vec>>>, + mut output: &mut Vec>>>, + shape: &[usize], +) { + assert!(shape.len() == 3); + + let size: usize = shape.iter().product(); + let size_d = shape.last().unwrap(); + let size_real = (size / size_d) * (size_d / 2 + 1); + + let mut plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap(); + + let n = input.len(); + + (0..n).into_par_iter().for_each(|i| { + let input_arc = Arc::clone(&input[i]); + let output_arc = Arc::clone(&output[i]); + + let mut input_data = input_arc.lock().unwrap(); + let mut input_data_slice = input_data.as_mut_slice(); + let mut output_data = output_arc.lock().unwrap(); + let mut output_data_slice = output_data.as_mut_slice(); + + plan.r2c(input_data_slice, output_data_slice); + }); +} + pub fn irfft3_fftw(mut input: &mut [c64], mut output: &mut[f64], shape: &[usize]) { let size: usize = shape.iter().product(); let mut plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap(); diff --git a/fmm/src/field_translation.rs b/fmm/src/field_translation.rs index 25bd2654..9e3298cb 100644 --- a/fmm/src/field_translation.rs +++ b/fmm/src/field_translation.rs @@ -6,12 +6,13 @@ use std::{ }; use bempp_tools::Array3D; -use num::Complex; +use num::{Complex, FromPrimitive}; use itertools::Itertools; use rayon::prelude::*; use fftw::types::*; +use num::Zero; -use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3, rfft3_fftw, irfft3_fftw, rfft3_fftw_par_dm, rfft3_fftw_par_vec}}; +use bempp_field::{types::{SvdFieldTranslationKiFmm, FftFieldTranslationNaiveKiFmm, FftFieldTranslationKiFmm}, helpers::{pad3, rfft3, irfft3, rfft3_fftw, irfft3_fftw, rfft3_fftw_par_dm, rfft3_fftw_par_vec, rfft3_fftw_par_vec_arc_mutex}}; use bempp_traits::{ field::{FieldTranslation, FieldTranslationData}, fmm::{Fmm, InteractionLists, SourceTranslation, TargetTranslation}, @@ -24,7 +25,7 @@ use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree}; use rlst::{ common::traits::*, common::tools::PrettyPrint, - dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape, rlst_rand_col_vec}, + dense::{rlst_col_vec, rlst_mat, rlst_pointer_mat, traits::*, Dot, Shape, rlst_rand_col_vec, global}, }; use crate::types::{FmmData, KiFmm}; @@ -627,6 +628,7 @@ where use dashmap::DashMap; +type FftMatrixc64 = rlst::dense::Matrix, Dynamic, Dynamic>, Dynamic, Dynamic>; impl FieldTranslation for FmmData>> @@ -726,64 +728,225 @@ where let q = n + 1; let r = o + 1; let size = p*q*r; + let size_real = p*q*(r/2+1); let pad_size = (p-m, q-n, r-o); let pad_index = (p-m, q-n, r-o); let real_dim = q; - let mut padded_signals = rlst_col_vec![f64, (size*ntargets)]; + let mut padded_signals = vec![Arc::new(Mutex::new(vec![0f64; size])); ntargets]; - let mut chunks = padded_signals.data_mut().par_chunks_exact_mut(size); - let range = (0..chunks.len()).into_par_iter(); - range.zip(chunks).for_each(|(i, chunk)| { + (0..ntargets).into_par_iter().for_each(|i| { let fmm_arc = Arc::clone(&self.fmm); - let target = targets[i]; + let target = &targets[i]; let source_multipole_arc = Arc::clone(self.multipoles.get(&target).unwrap()); let source_multipole_lock = source_multipole_arc.lock().unwrap(); let signal = fmm_arc.m2l.compute_signal(fmm_arc.order, source_multipole_lock.data()); let mut padded_signal = pad3(&signal, pad_size, pad_index); - chunk.copy_from_slice(padded_signal.get_data()); + let mut padded_signal_arc = Arc::clone(&padded_signals[i]); + + padded_signal_arc.lock().unwrap().deref_mut().copy_from_slice(padded_signal.get_data()); }); println!("data organisation time {:?}", start.elapsed().as_millis()); - let size_real = p*q*(r/2+1); - let mut padded_signals_hat = rlst_col_vec![c64, (size_real*ntargets)]; + // Each index maps to a target (sorted) from targets + let mut padded_signals_hat = vec![Arc::new(Mutex::new(vec![Complex::::zero(); size_real])); ntargets]; + let start = Instant::now(); - rfft3_fftw_par_vec(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); + rfft3_fftw_par_vec_arc_mutex(&mut padded_signals, &mut padded_signals_hat, &[p, q, r]); + println!("fft time {:?}", start.elapsed().as_millis()); - println!("size real {:?} size {:?}", size_real, size); - let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); - // Compute hadamard product with kernels - let range = (0..self.fmm.m2l.transfer_vectors.len()).into_par_iter(); - self.fmm.m2l.transfer_vectors.iter().take(16).par_bridge().for_each(|tv| { - // Locate correct precomputed FFT of kernel - let k_idx = self.fmm - .m2l - .transfer_vectors - .iter() - .position(|x| x.vector == tv.vector) - .unwrap(); - let padded_kernel_hat = &self.fmm.m2l.m2l[k_idx]; - let &(m_, n_, o_) = padded_kernel_hat.shape(); - let len_padded_kernel_hat= m_*n_*o_; - let padded_kernel_hat= unsafe { - rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] - }; - - let padded_kernel_hat_arc = Arc::new(padded_kernel_hat); - - padded_signals_hat.data().chunks_exact(len_padded_kernel_hat).enumerate().for_each(|(i, padded_signal_hat)| { - let padded_signal_hat = unsafe { - rlst_pointer_mat!['a, Complex, padded_signal_hat.as_ptr(), (len_padded_kernel_hat, 1), (1,1)] - }; - - let padded_kernel_hat_ref = Arc::clone(&padded_kernel_hat_arc); + let start = Instant::now(); + + // Map between keys and index locations in targets at this level + let mut target_index_map = Arc::new(RwLock::new(HashMap::new())); + + for (i, target) in targets.iter().enumerate() { + + let mut map = target_index_map.write().unwrap(); + map.insert(*target, i); + } + + // Each index corresponds to a target, and contains a vector of pointers to the padded signals in the targets interactions list + let mut source_index_pointer: Vec>>>>>>> = + (0..ntargets).map(|_| Arc::new(Mutex::new(Vec::>>>>::new()))).collect(); + + targets + .into_par_iter() + .zip(source_index_pointer.par_iter_mut()) + .enumerate() + .for_each(|(i, (target, arc_mutex_vec))| { + + let fmm_arc = Arc::clone(&self.fmm); + let v_list = target + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| !target.is_adjacent_same_level(pnc)) + .collect_vec(); + + // Lookup indices for each element of v_list and add the pointers to the underlying data to the index pointer + let mut indices = Vec::new(); + let target_index_map_arc = Arc::clone(&target_index_map); + let map = target_index_map.read().unwrap(); + for source in v_list.iter() { + let idx = map.get(source).unwrap(); + indices.push(*idx); + } + + let mut outer_vec: MutexGuard<'_, Vec>>>>> = arc_mutex_vec.lock().unwrap(); + for &idx in indices.iter() { + let tmp: Arc>>> = Arc::clone(&padded_signals_hat[idx]); + outer_vec.push(tmp); + } + }); + + println!("index pointer time {:?}", start.elapsed().as_millis()); + + + // Compute Hadamard product with elements of V List, now stored in source_index_pointer + + let start = Instant::now(); + // let mut global_check_potentials_hat = vec![Arc::new(Mutex::new(vec![Complex::::zero(); size_real])); ntargets]; + + let mut global_check_potentials_hat = (0..ntargets) + .map(|_| Arc::new(Mutex::new(vec![Complex::::zero(); size_real]))).collect_vec(); + // let mut global_check_potentials_hat = (0..ntargets) + // .map(|_| Arc::new(Mutex::new(vec![0f64; size_real]))).collect_vec(); + + global_check_potentials_hat + .par_iter_mut() + .zip( + source_index_pointer + .into_par_iter() + ) + .zip( + targets.into_par_iter() + ).for_each(|((check_potential_hat, sources), target)| { + + // Find the corresponding Kernel matrices for each signal + let fmm_arc = Arc::clone(&self.fmm); + let v_list = target + .parent() + .neighbors() + .iter() + .flat_map(|pn| pn.children()) + .filter(|pnc| !target.is_adjacent_same_level(pnc)) + .collect_vec(); + + + let k_idxs = v_list + .iter() + .map(|source| target.find_transfer_vector(source)) + .map(|tv| { + fmm_arc + .m2l + .transfer_vectors + .iter() + .position(|x| x.vector == tv) + .unwrap() + }).collect_vec(); + + + // Compute convolutions + let check_potential_hat_arc = Arc::clone(check_potential_hat); + let mut check_potential_hat_data = check_potential_hat_arc.lock().unwrap(); + + let tmp = sources.lock().unwrap(); + let mut result = vec![Complex::::zero(); size_real]; - let check_potential = padded_signal_hat.cmp_wise_product(padded_kernel_hat_ref.deref()).eval(); + // for i in 0..result.len() { + // for _ in 0..189 { + // result[i] += Complex::::from(1.0); + // } + // } + + for i in 0..1 { + + let psh = tmp[i].lock().unwrap(); + let pkh = &fmm_arc.m2l.m2l[k_idxs[i]].get_data(); + + let hadamard: Vec = psh.iter().zip(pkh.iter()).map(|(s, k)| {*s * *k}).collect_vec(); + for j in 0..result.len() { + result[j] += Complex::::from(1.0); + } + } + + + // for ((i, source), &k_idx) in tmp.iter().enumerate().zip(k_idxs.iter()) { + + // // let psh = source.lock().unwrap(); + // // let pkh = &fmm_arc.m2l.m2l[k_idx]; + + // // let psh = unsafe { + // // rlst_pointer_mat!['a, c64, psh.as_ptr(), (size_real, 1), (1,1)] + // // }; + + // // let pkh = unsafe { + // // rlst_pointer_mat!['a, c64, pkh.get_data().as_ptr(), (size_real, 1), (1,1)] + // // }; + + // // let hadamard = psh.cmp_wise_product(&pkh).eval(); + // // result.iter_mut().zip(hadamard.data().iter()).for_each(|(r, h)| *r += h); + + // let psh = source.lock().unwrap(); + // let pkh = &fmm_arc.m2l.m2l[k_idx].get_data(); + + // let hadamard: Vec = psh.iter().zip(pkh.iter()).map(|(s, k)| {*s * *k}).collect_vec(); + + // for j in 0..result.len() { + // result[j] += Complex::::from(1.0); + // } + + // // result.iter_mut().zip(hadamard.iter()).for_each(|(r, h)| *r += Complex::::zero()) + // // result.iter_mut().for_each(|(r)| *r += Complex::::zero()) + // // check_potential_hat_data.deref_mut().iter_mut() + // // .zip(hadamard.iter()) + // // .for_each(|(r, h)| *r += h); + // // check_potential_hat_data.deref_mut().iter_mut() + // // // .zip(hadamard.iter()) + // // .for_each(|(r)| *r += Complex::::from(1.0)); + + // } + + // check_potential_hat_data.deref_mut().iter_mut().for_each(|x| *x += Complex::zero()); + }); - }); + + println!("Hadamard time {:?}", start.elapsed().as_millis()); + // let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order); + // // Compute hadamard product with kernels + // let range = (0..self.fmm.m2l.transfer_vectors.len()).into_par_iter(); + // self.fmm.m2l.transfer_vectors.iter().take(16).par_bridge().for_each(|tv| { + // // Locate correct precomputed FFT of kernel + // let k_idx = self.fmm + // .m2l + // .transfer_vectors + // .iter() + // .position(|x| x.vector == tv.vector) + // .unwrap(); + // let padded_kernel_hat = &self.fmm.m2l.m2l[k_idx]; + // let &(m_, n_, o_) = padded_kernel_hat.shape(); + // let len_padded_kernel_hat= m_*n_*o_; + // let padded_kernel_hat= unsafe { + // rlst_pointer_mat!['a, Complex, padded_kernel_hat.get_data().as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + // }; + + // let padded_kernel_hat_arc = Arc::new(padded_kernel_hat); + + // padded_signals_hat.data().chunks_exact(len_padded_kernel_hat).enumerate().for_each(|(i, padded_signal_hat)| { + // let padded_signal_hat = unsafe { + // rlst_pointer_mat!['a, Complex, padded_signal_hat.as_ptr(), (len_padded_kernel_hat, 1), (1,1)] + // }; + + // let padded_kernel_hat_ref = Arc::clone(&padded_kernel_hat_arc); + + // let check_potential = padded_signal_hat.cmp_wise_product(padded_kernel_hat_ref.deref()).eval(); + // }); + // }); ////////////////////////// diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs index 215044a3..7b4d1f3a 100644 --- a/fmm/src/fmm.rs +++ b/fmm/src/fmm.rs @@ -497,7 +497,7 @@ mod test { let global_idxs = (0..npoints).collect_vec(); let charges = vec![1.0; npoints]; - let order = 10; + let order = 9; let alpha_inner = 1.05; let alpha_outer = 2.9; let adaptive = false; diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs index 5b9ab008..724699bc 100644 --- a/tree/src/implementations/impl_morton.rs +++ b/tree/src/implementations/impl_morton.rs @@ -643,6 +643,32 @@ impl MortonKey { .collect() } + pub fn is_adjacent_same_level(&self, other: &MortonKey) -> bool { + // Calculate distance between centres of each node + let da = 1 << (DEEPEST_LEVEL - self.level()); + let db = 1 << (DEEPEST_LEVEL - other.level()); + let ra = (da as f64) * 0.5; + let rb = (db as f64) * 0.5; + + let ca: Vec = self.anchor.iter().map(|&x| (x as f64) + ra).collect(); + let cb: Vec = other.anchor.iter().map(|&x| (x as f64) + rb).collect(); + + let distance: Vec = ca.iter().zip(cb.iter()).map(|(a, b)| b - a).collect(); + + let min = -ra - rb; + let max = ra + rb; + let mut result = true; + + for &d in distance.iter() { + if d > max || d < min { + result = false + } + } + + result + + } + /// Check if two keys are adjacent with respect to each other pub fn is_adjacent(&self, other: &MortonKey) -> bool { let ancestors = self.ancestors();