ENH: Add an FMM which uses linear data structures. (#134)

* Nits * Remove arc around fmm * Start work on linear data structure * Remove redundant ass type in trees * Remove point data * Improve point data access * Improve trait name for sc.inv. kernels * Some re-organisation * Add a chunked p2m, weird hanging * Fix linear p2m * Fix p2m * Fix p2m * Add working m2m, and start work on adaptive chunking' * Some kind of bug with empty boxes * Fix bug in p2m with empty boxes, start re-organising * Add blocking * Add blocking * wip * Tmp commit * Tmp commit * Cleaner organisation * Add a separate test for upward pass * Completely unoptimised linear implementation with too many allocs * Run formatter * Remove extra local alloc * Respond to clippy * Format * Comment out parallel tree tests for now
bempp · Dec 3, 2023 · 8de7258 · 8de7258
1 parent d08e676
commit 8de7258
Show file tree

Hide file tree

Showing 27 changed files with 3,992 additions and 1,483 deletions.
diff --git a/field/src/fft.rs b/field/src/fft.rs
@@ -4,7 +4,6 @@ use num::Complex;
 use rayon::prelude::*;
 
 use crate::types::{FftMatrixc32, FftMatrixc64, FftMatrixf32, FftMatrixf64};
-use rlst::dense::RawAccessMut;
 
 pub trait Fft<DtypeReal, DtypeCplx>
 where
@@ -46,18 +45,127 @@ where
     fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]);
 }
 
+// impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
+//     fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) {
+//         let size: usize = shape.iter().product();
+//         let size_d = shape.last().unwrap();
+//         let size_real = (size / size_d) * (size_d / 2 + 1);
+//         let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
+
+//         let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
+//         let it_out = output
+//             .data_mut()
+//             .par_chunks_exact_mut(size_real)
+//             .into_par_iter();
+
+//         it_inp.zip(it_out).for_each(|(inp, out)| {
+//             let _ = plan.r2c(inp, out);
+//         });
+//     }
+
+//     fn irfft_fftw_par_vec(input: &mut FftMatrixc32, output: &mut FftMatrixf32, shape: &[usize]) {
+//         let size: usize = shape.iter().product();
+//         let size_d = shape.last().unwrap();
+//         let size_real = (size / size_d) * (size_d / 2 + 1);
+//         let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
+
+//         let it_inp = input
+//             .data_mut()
+//             .par_chunks_exact_mut(size_real)
+//             .into_par_iter();
+//         let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
+
+//         it_inp.zip(it_out).for_each(|(inp, out)| {
+//             let _ = plan.c2r(inp, out);
+//             // Normalise output
+//             out.iter_mut()
+//                 .for_each(|value| *value *= 1.0 / (size as f32));
+//         })
+//     }
+
+//     fn rfft3_fftw(input: &mut [Self], output: &mut [Complex<Self>], shape: &[usize]) {
+//         assert!(shape.len() == 3);
+//         let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
+//         let _ = plan.r2c(input, output);
+//     }
+
+//     fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]) {
+//         assert!(shape.len() == 3);
+//         let size: usize = shape.iter().product();
+//         let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
+//         let _ = plan.c2r(input, output);
+//         // Normalise
+//         output
+//             .iter_mut()
+//             .for_each(|value| *value *= 1.0 / (size as f32));
+//     }
+// }
+
+// impl Fft<FftMatrixf64, FftMatrixc64> for f64 {
+//     fn rfft3_fftw_par_vec(input: &mut FftMatrixf64, output: &mut FftMatrixc64, shape: &[usize]) {
+//         let size: usize = shape.iter().product();
+//         let size_d = shape.last().unwrap();
+//         let size_real = (size / size_d) * (size_d / 2 + 1);
+//         let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
+
+//         let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
+//         let it_out = output
+//             .data_mut()
+//             .par_chunks_exact_mut(size_real)
+//             .into_par_iter();
+
+//         it_inp.zip(it_out).for_each(|(inp, out)| {
+//             let _ = plan.r2c(inp, out);
+//         });
+//     }
+
+//     fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) {
+//         let size: usize = shape.iter().product();
+//         let size_d = shape.last().unwrap();
+//         let size_real = (size / size_d) * (size_d / 2 + 1);
+//         let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
+
+//         let it_inp = input
+//             .data_mut()
+//             .par_chunks_exact_mut(size_real)
+//             .into_par_iter();
+//         let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
+
+//         it_inp.zip(it_out).for_each(|(inp, out)| {
+//             let _ = plan.c2r(inp, out);
+//             // Normalise output
+//             out.iter_mut()
+//                 .for_each(|value| *value *= 1.0 / (size as f64));
+//         })
+//     }
+
+//     fn rfft3_fftw(input: &mut [Self], output: &mut [Complex<Self>], shape: &[usize]) {
+//         assert!(shape.len() == 3);
+//         let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
+//         let _ = plan.r2c(input, output);
+//     }
+
+//     fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]) {
+//         assert!(shape.len() == 3);
+//         let size: usize = shape.iter().product();
+//         let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
+//         let _ = plan.c2r(input, output);
+//         // Normalise
+//         output
+//             .iter_mut()
+//             .for_each(|value| *value *= 1.0 / (size as f64));
+//     }
+// }
+
 impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
     fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) {
         let size: usize = shape.iter().product();
         let size_d = shape.last().unwrap();
         let size_real = (size / size_d) * (size_d / 2 + 1);
         let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
 
-        let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
-        let it_out = output
-            .data_mut()
-            .par_chunks_exact_mut(size_real)
-            .into_par_iter();
+        let it_inp = input.par_chunks_exact_mut(size).into_par_iter();
+        let it_out = output.par_chunks_exact_mut(size_real).into_par_iter();
 
         it_inp.zip(it_out).for_each(|(inp, out)| {
             let _ = plan.r2c(inp, out);
@@ -70,11 +178,8 @@ impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
         let size_real = (size / size_d) * (size_d / 2 + 1);
         let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
 
-        let it_inp = input
-            .data_mut()
-            .par_chunks_exact_mut(size_real)
-            .into_par_iter();
-        let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
+        let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter();
+        let it_out = output.par_chunks_exact_mut(size).into_par_iter();
 
         it_inp.zip(it_out).for_each(|(inp, out)| {
             let _ = plan.c2r(inp, out);
@@ -109,27 +214,22 @@ impl Fft<FftMatrixf64, FftMatrixc64> for f64 {
         let size_real = (size / size_d) * (size_d / 2 + 1);
         let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
 
-        let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
-        let it_out = output
-            .data_mut()
-            .par_chunks_exact_mut(size_real)
-            .into_par_iter();
+        let it_inp = input.par_chunks_exact_mut(size).into_par_iter();
+        let it_out = output.par_chunks_exact_mut(size_real).into_par_iter();
 
         it_inp.zip(it_out).for_each(|(inp, out)| {
             let _ = plan.r2c(inp, out);
         });
     }
+
     fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) {
         let size: usize = shape.iter().product();
         let size_d = shape.last().unwrap();
         let size_real = (size / size_d) * (size_d / 2 + 1);
         let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
 
-        let it_inp = input
-            .data_mut()
-            .par_chunks_exact_mut(size_real)
-            .into_par_iter();
-        let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
+        let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter();
+        let it_out = output.par_chunks_exact_mut(size).into_par_iter();
 
         it_inp.zip(it_out).for_each(|(inp, out)| {
             let _ = plan.c2r(inp, out);

diff --git a/field/src/types.rs b/field/src/types.rs
@@ -128,16 +128,21 @@ where
 }
 
 /// Type alias for real coefficients for into FFTW wrappers
-pub type FftMatrixf64 = Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>;
+// pub type FftMatrixf64 = Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>;
+pub type FftMatrixf64 = Vec<f64>;
 
 /// Type alias for real coefficients for into FFTW wrappers
-pub type FftMatrixf32 = Matrix<f32, BaseMatrix<f32, VectorContainer<f32>, Dynamic>, Dynamic>;
+// pub type FftMatrixf32 = Matrix<f32, BaseMatrix<f32, VectorContainer<f32>, Dynamic>, Dynamic>;
+pub type FftMatrixf32 = Vec<f32>;
 
 /// Type alias for complex coefficients for FFTW wrappers
-pub type FftMatrixc64 = Matrix<c64, BaseMatrix<c64, VectorContainer<c64>, Dynamic>, Dynamic>;
+// pub type FftMatrixc64 = Matrix<c64, BaseMatrix<c64, VectorContainer<c64>, Dynamic>, Dynamic>;
+pub type FftMatrixc64 = Vec<c64>;
 
 /// Type alias for complex coefficients for FFTW wrappers
-pub type FftMatrixc32 = Matrix<c32, BaseMatrix<c32, VectorContainer<c32>, Dynamic>, Dynamic>;
+// pub type FftMatrixc32 = Matrix<c32, BaseMatrix<c32, VectorContainer<c32>, Dynamic>, Dynamic>;
+pub type FftMatrixc32 = Vec<c32>;
 
 /// Type alias for real coefficients for into FFTW wrappers
-pub type FftMatrix<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+// pub type FftMatrix<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+pub type FftMatrix<T> = Vec<T>;
diff --git a/fmm/src/constants.rs b/fmm/src/constants.rs
@@ -1,4 +1,3 @@
 //! Crate wide constants
 
-/// Size of cache in bytes to use for blocking purposes during the M2L sparsification via an FFT
-pub const CACHE_SIZE: usize = 512;
+pub const P2M_MAX_CHUNK_SIZE: usize = 256;