Skip to content

Commit

Permalink
ENH: Add an FMM which uses linear data structures. (#134)
Browse files Browse the repository at this point in the history
* Nits

* Remove arc around fmm

* Start work on linear data structure

* Remove redundant ass type in trees

* Remove point data

* Improve point data access

* Improve trait name for sc.inv. kernels

* Some re-organisation

* Add a chunked p2m, weird hanging

* Fix linear p2m

* Fix p2m

* Fix p2m

* Add working m2m, and start work on adaptive chunking'

* Some kind of bug with empty boxes

* Fix bug in p2m with empty boxes, start re-organising

* Add blocking

* Add blocking

* wip

* Tmp commit

* Tmp commit

* Cleaner organisation

* Add a separate test for upward pass

* Completely unoptimised linear implementation with too many allocs

* Run formatter

* Remove extra local alloc

* Respond to clippy

* Format

* Comment out parallel tree tests for now
  • Loading branch information
skailasa authored Dec 3, 2023
1 parent d08e676 commit 8de7258
Show file tree
Hide file tree
Showing 27 changed files with 3,992 additions and 1,483 deletions.
142 changes: 121 additions & 21 deletions field/src/fft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use num::Complex;
use rayon::prelude::*;

use crate::types::{FftMatrixc32, FftMatrixc64, FftMatrixf32, FftMatrixf64};
use rlst::dense::RawAccessMut;

pub trait Fft<DtypeReal, DtypeCplx>
where
Expand Down Expand Up @@ -46,18 +45,127 @@ where
fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]);
}

// impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
// fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) {
// let size: usize = shape.iter().product();
// let size_d = shape.last().unwrap();
// let size_real = (size / size_d) * (size_d / 2 + 1);
// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();

// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
// let it_out = output
// .data_mut()
// .par_chunks_exact_mut(size_real)
// .into_par_iter();

// it_inp.zip(it_out).for_each(|(inp, out)| {
// let _ = plan.r2c(inp, out);
// });
// }

// fn irfft_fftw_par_vec(input: &mut FftMatrixc32, output: &mut FftMatrixf32, shape: &[usize]) {
// let size: usize = shape.iter().product();
// let size_d = shape.last().unwrap();
// let size_real = (size / size_d) * (size_d / 2 + 1);
// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();

// let it_inp = input
// .data_mut()
// .par_chunks_exact_mut(size_real)
// .into_par_iter();
// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();

// it_inp.zip(it_out).for_each(|(inp, out)| {
// let _ = plan.c2r(inp, out);
// // Normalise output
// out.iter_mut()
// .for_each(|value| *value *= 1.0 / (size as f32));
// })
// }

// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex<Self>], shape: &[usize]) {
// assert!(shape.len() == 3);
// let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
// let _ = plan.r2c(input, output);
// }

// fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]) {
// assert!(shape.len() == 3);
// let size: usize = shape.iter().product();
// let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
// let _ = plan.c2r(input, output);
// // Normalise
// output
// .iter_mut()
// .for_each(|value| *value *= 1.0 / (size as f32));
// }
// }

// impl Fft<FftMatrixf64, FftMatrixc64> for f64 {
// fn rfft3_fftw_par_vec(input: &mut FftMatrixf64, output: &mut FftMatrixc64, shape: &[usize]) {
// let size: usize = shape.iter().product();
// let size_d = shape.last().unwrap();
// let size_real = (size / size_d) * (size_d / 2 + 1);
// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();

// let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
// let it_out = output
// .data_mut()
// .par_chunks_exact_mut(size_real)
// .into_par_iter();

// it_inp.zip(it_out).for_each(|(inp, out)| {
// let _ = plan.r2c(inp, out);
// });
// }

// fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) {
// let size: usize = shape.iter().product();
// let size_d = shape.last().unwrap();
// let size_real = (size / size_d) * (size_d / 2 + 1);
// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();

// let it_inp = input
// .data_mut()
// .par_chunks_exact_mut(size_real)
// .into_par_iter();
// let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();

// it_inp.zip(it_out).for_each(|(inp, out)| {
// let _ = plan.c2r(inp, out);
// // Normalise output
// out.iter_mut()
// .for_each(|value| *value *= 1.0 / (size as f64));
// })
// }

// fn rfft3_fftw(input: &mut [Self], output: &mut [Complex<Self>], shape: &[usize]) {
// assert!(shape.len() == 3);
// let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();
// let _ = plan.r2c(input, output);
// }

// fn irfft3_fftw(input: &mut [Complex<Self>], output: &mut [Self], shape: &[usize]) {
// assert!(shape.len() == 3);
// let size: usize = shape.iter().product();
// let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();
// let _ = plan.c2r(input, output);
// // Normalise
// output
// .iter_mut()
// .for_each(|value| *value *= 1.0 / (size as f64));
// }
// }

impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
fn rfft3_fftw_par_vec(input: &mut FftMatrixf32, output: &mut FftMatrixc32, shape: &[usize]) {
let size: usize = shape.iter().product();
let size_d = shape.last().unwrap();
let size_real = (size / size_d) * (size_d / 2 + 1);
let plan: R2CPlan32 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();

let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
let it_out = output
.data_mut()
.par_chunks_exact_mut(size_real)
.into_par_iter();
let it_inp = input.par_chunks_exact_mut(size).into_par_iter();
let it_out = output.par_chunks_exact_mut(size_real).into_par_iter();

it_inp.zip(it_out).for_each(|(inp, out)| {
let _ = plan.r2c(inp, out);
Expand All @@ -70,11 +178,8 @@ impl Fft<FftMatrixf32, FftMatrixc32> for f32 {
let size_real = (size / size_d) * (size_d / 2 + 1);
let plan: C2RPlan32 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();

let it_inp = input
.data_mut()
.par_chunks_exact_mut(size_real)
.into_par_iter();
let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter();
let it_out = output.par_chunks_exact_mut(size).into_par_iter();

it_inp.zip(it_out).for_each(|(inp, out)| {
let _ = plan.c2r(inp, out);
Expand Down Expand Up @@ -109,27 +214,22 @@ impl Fft<FftMatrixf64, FftMatrixc64> for f64 {
let size_real = (size / size_d) * (size_d / 2 + 1);
let plan: R2CPlan64 = R2CPlan::aligned(shape, Flag::MEASURE).unwrap();

let it_inp = input.data_mut().par_chunks_exact_mut(size).into_par_iter();
let it_out = output
.data_mut()
.par_chunks_exact_mut(size_real)
.into_par_iter();
let it_inp = input.par_chunks_exact_mut(size).into_par_iter();
let it_out = output.par_chunks_exact_mut(size_real).into_par_iter();

it_inp.zip(it_out).for_each(|(inp, out)| {
let _ = plan.r2c(inp, out);
});
}

fn irfft_fftw_par_vec(input: &mut FftMatrixc64, output: &mut FftMatrixf64, shape: &[usize]) {
let size: usize = shape.iter().product();
let size_d = shape.last().unwrap();
let size_real = (size / size_d) * (size_d / 2 + 1);
let plan: C2RPlan64 = C2RPlan::aligned(shape, Flag::MEASURE).unwrap();

let it_inp = input
.data_mut()
.par_chunks_exact_mut(size_real)
.into_par_iter();
let it_out = output.data_mut().par_chunks_exact_mut(size).into_par_iter();
let it_inp = input.par_chunks_exact_mut(size_real).into_par_iter();
let it_out = output.par_chunks_exact_mut(size).into_par_iter();

it_inp.zip(it_out).for_each(|(inp, out)| {
let _ = plan.c2r(inp, out);
Expand Down
15 changes: 10 additions & 5 deletions field/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,21 @@ where
}

/// Type alias for real coefficients for into FFTW wrappers
pub type FftMatrixf64 = Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>;
// pub type FftMatrixf64 = Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>;
pub type FftMatrixf64 = Vec<f64>;

/// Type alias for real coefficients for into FFTW wrappers
pub type FftMatrixf32 = Matrix<f32, BaseMatrix<f32, VectorContainer<f32>, Dynamic>, Dynamic>;
// pub type FftMatrixf32 = Matrix<f32, BaseMatrix<f32, VectorContainer<f32>, Dynamic>, Dynamic>;
pub type FftMatrixf32 = Vec<f32>;

/// Type alias for complex coefficients for FFTW wrappers
pub type FftMatrixc64 = Matrix<c64, BaseMatrix<c64, VectorContainer<c64>, Dynamic>, Dynamic>;
// pub type FftMatrixc64 = Matrix<c64, BaseMatrix<c64, VectorContainer<c64>, Dynamic>, Dynamic>;
pub type FftMatrixc64 = Vec<c64>;

/// Type alias for complex coefficients for FFTW wrappers
pub type FftMatrixc32 = Matrix<c32, BaseMatrix<c32, VectorContainer<c32>, Dynamic>, Dynamic>;
// pub type FftMatrixc32 = Matrix<c32, BaseMatrix<c32, VectorContainer<c32>, Dynamic>, Dynamic>;
pub type FftMatrixc32 = Vec<c32>;

/// Type alias for real coefficients for into FFTW wrappers
pub type FftMatrix<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
// pub type FftMatrix<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
pub type FftMatrix<T> = Vec<T>;
3 changes: 1 addition & 2 deletions fmm/src/constants.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
//! Crate wide constants

/// Size of cache in bytes to use for blocking purposes during the M2L sparsification via an FFT
pub const CACHE_SIZE: usize = 512;
pub const P2M_MAX_CHUNK_SIZE: usize = 256;
Loading

0 comments on commit 8de7258

Please sign in to comment.