Skip to content

Commit

Permalink
Reimplement parallel assembly (#277)
Browse files Browse the repository at this point in the history
* restructure into folders

* kifmm version

* remove unused tests

* reimplement parallel assembly

* use ids in parallel test to account for rearranged cell indices

* ndgrid branch

* clippy

* ndgrid main

* update examples, tests, benches

* clippy
  • Loading branch information
mscroggs authored Aug 19, 2024
1 parent 3e437e5 commit fead46f
Show file tree
Hide file tree
Showing 24 changed files with 431 additions and 140 deletions.
20 changes: 5 additions & 15 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[features]
mpi = ["dep:mpi"]
mpi = ["dep:mpi", "ndelement/mpi", "ndgrid/mpi"]
strict = []

[package]
Expand All @@ -20,28 +20,22 @@ name = "bempp"
crate-type = ["lib", "cdylib"]

[dependencies]
approx = "0.5"
cauchy = "0.4.*"
itertools = "0.13.*"
mpi = { version = "0.8.*", optional = true }
num = "0.4"
paste = "1.*"
lazy_static = "1.4"
libc = "0.2"
log = "0.4"
ndelement = { git = "https://github.com/bempp/ndelement.git" }
ndgrid = { git = "https://github.com/bempp/ndgrid.git" }
rayon = "1.9"
rand = "0.8.5"
rlst = { version = "0.2" }
green-kernels = { git = "https://github.com/bempp/green-kernels.git" }
thiserror="1.*"

[dev-dependencies]
approx = "0.5"
paste = "1.*"
cauchy = "0.4.*"
criterion = { version = "0.5.*", features = ["html_reports"]}
kifmm = { version = "0.1" }
blas-src = { version = "0.10", features = ["blis"]}
lapack-src = { version = "0.10", features = ["netlib"]}
# kifmm = { version = "1.0" }

[[bench]]
name = "assembly_benchmark"
Expand All @@ -52,7 +46,3 @@ cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]

[lints.clippy]
wildcard_imports = "forbid"

[target.aarch64-apple-darwin.dev-dependencies]
blas-src = { version = "0.10", features = ["accelerate"]}
lapack-src = { version = "0.10", features = ["accelerate"]}
5 changes: 1 addition & 4 deletions benches/assembly_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
use bempp::assembly::{batched, batched::BatchedAssembler};
use bempp::function::SerialFunctionSpace;
use bempp::traits::function::FunctionSpace;
use bempp::traits::FunctionSpace;
use criterion::{criterion_group, criterion_main, Criterion};
use ndelement::ciarlet::LagrangeElementFamily;
use ndelement::types::{Continuity, ReferenceCellType};
use ndgrid::shapes::regular_sphere;
use rlst::rlst_dynamic_array2;

extern crate blas_src;
extern crate lapack_src;

pub fn assembly_parts_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("assembly");
group.sample_size(20);
Expand Down
5 changes: 1 addition & 4 deletions examples/assembly.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
use bempp::assembly::{batched, batched::BatchedAssembler};
use bempp::function::SerialFunctionSpace;
use bempp::traits::function::FunctionSpace;
use bempp::traits::FunctionSpace;
use ndelement::ciarlet::LagrangeElementFamily;
use ndelement::types::{Continuity, ReferenceCellType};
use ndgrid::shapes::regular_sphere;
use rlst::{rlst_dynamic_array2, RandomAccessByRef};

extern crate blas_src;
extern crate lapack_src;

fn main() {
// Create a grid, family of elements, and function space
let grid = regular_sphere(0);
Expand Down
253 changes: 253 additions & 0 deletions examples/test_parallel_assembly.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
//? mpirun -n {{NPROCESSES}} --features "mpi"

#[cfg(feature = "mpi")]
use approx::assert_relative_eq;
#[cfg(feature = "mpi")]
use bempp::{
assembly::batched,
assembly::batched::BatchedAssembler,
function::{ParallelFunctionSpace, SerialFunctionSpace},
traits::FunctionSpace,
};
#[cfg(feature = "mpi")]
use itertools::izip;
#[cfg(feature = "mpi")]
use mpi::{
environment::Universe,
request::WaitGuard,
traits::{Communicator, Destination, Source},
};
#[cfg(feature = "mpi")]
use ndelement::{
ciarlet::{CiarletElement, LagrangeElementFamily},
types::{Continuity, ReferenceCellType},
};
#[cfg(feature = "mpi")]
use ndgrid::{
grid::parallel::ParallelGrid,
traits::{Builder, Entity, Grid, ParallelBuilder},
SingleElementGrid, SingleElementGridBuilder,
};
#[cfg(feature = "mpi")]
use rlst::{CsrMatrix, Shape};
#[cfg(feature = "mpi")]
use std::collections::{hash_map::Entry, HashMap};

#[cfg(feature = "mpi")]
fn create_single_element_grid_data(b: &mut SingleElementGridBuilder<f64>, n: usize) {
for y in 0..n {
for x in 0..n {
b.add_point(
y * n + x,
&[x as f64 / (n - 1) as f64, y as f64 / (n - 1) as f64, 0.0],
);
}
}

for i in 0..n - 1 {
for j in 0..n - 1 {
b.add_cell(
i * (n - 1) + j,
&[j * n + i, j * n + i + 1, j * n + i + n, j * n + i + n + 1],
);
}
}
}

#[cfg(feature = "mpi")]
fn example_single_element_grid<C: Communicator>(
comm: &C,
n: usize,
) -> ParallelGrid<'_, C, SingleElementGrid<f64, CiarletElement<f64>>> {
let rank = comm.rank();

let mut b = SingleElementGridBuilder::<f64>::new(3, (ReferenceCellType::Quadrilateral, 1));

if rank == 0 {
create_single_element_grid_data(&mut b, n);
b.create_parallel_grid(comm)
} else {
b.receive_parallel_grid(comm, 0)
}
}

#[cfg(feature = "mpi")]
fn example_single_element_grid_serial(n: usize) -> SingleElementGrid<f64, CiarletElement<f64>> {
let mut b = SingleElementGridBuilder::<f64>::new(3, (ReferenceCellType::Quadrilateral, 1));
create_single_element_grid_data(&mut b, n);
b.create_grid()
}

#[cfg(feature = "mpi")]
fn test_parallel_assembly_single_element_grid<C: Communicator>(
comm: &C,
degree: usize,
cont: Continuity,
) {
let rank = comm.rank();
let size = comm.size();

let n = 10;
let grid = example_single_element_grid(comm, n);
let element = LagrangeElementFamily::<f64>::new(degree, cont);
let space = ParallelFunctionSpace::new(&grid, &element);

let a = batched::LaplaceSingleLayerAssembler::<f64>::default();

let matrix = a.parallel_assemble_singular_into_csr(&space, &space);

if rank == 0 {
// Compute the same matrix on a single process
let serial_grid = example_single_element_grid_serial(n);
let serial_space = SerialFunctionSpace::new(&serial_grid, &element);
let serial_matrix = a.assemble_singular_into_csr(&serial_space, &serial_space);

// Dofs associated with each cell (by cell id)
let mut serial_dofmap = HashMap::new();
for cell in serial_grid.entity_iter(2) {
serial_dofmap.insert(
cell.id().unwrap(),
serial_space
.cell_dofs(cell.local_index())
.unwrap()
.iter()
.map(|i| serial_space.global_dof_index(*i))
.collect::<Vec<_>>(),
);
}
let mut parallel_dofmap = HashMap::new();
for cell in grid.entity_iter(2) {
parallel_dofmap.insert(
cell.id().unwrap(),
space
.cell_dofs(cell.local_index())
.unwrap()
.iter()
.map(|i| space.global_dof_index(*i))
.collect::<Vec<_>>(),
);
}
for p in 1..size {
let process = comm.process_at_rank(p);
let (cell_ids, _status) = process.receive_vec::<usize>();
let (dofs, _status) = process.receive_vec::<usize>();
let (dofs_len, _status) = process.receive_vec::<usize>();
let mut start = 0;
for (id, len) in izip!(cell_ids, dofs_len) {
if let Entry::Vacant(e) = parallel_dofmap.entry(id) {
e.insert(dofs[start..start + len].to_vec());
} else {
assert_eq!(parallel_dofmap[&id], dofs[start..start + len]);
}
start += len;
}
}

let mut index_map = vec![0; serial_space.global_size()];

for (id, dofs) in parallel_dofmap {
for (i, j) in izip!(&serial_dofmap[&id], dofs) {
index_map[j] = *i;
}
}

// Gather sparse matrices onto process 0
let mut rows = vec![];
let mut cols = vec![];
let mut data = vec![];

let mut r = 0;
for (i, index) in matrix.indices().iter().enumerate() {
while i >= matrix.indptr()[r + 1] {
r += 1;
}
rows.push(index_map[r]);
cols.push(index_map[*index]);
data.push(matrix.data()[i]);
}

for p in 1..size {
let process = comm.process_at_rank(p);
let (indices, _status) = process.receive_vec::<usize>();
let (indptr, _status) = process.receive_vec::<usize>();
let (subdata, _status) = process.receive_vec::<f64>();
let mat = CsrMatrix::new(matrix.shape(), indices, indptr, subdata);

let mut r = 0;
for (i, index) in mat.indices().iter().enumerate() {
while i >= mat.indptr()[r + 1] {
r += 1;
}
rows.push(index_map[r]);
cols.push(index_map[*index]);
data.push(mat.data()[i]);
}
}
let full_matrix = CsrMatrix::from_aij(
[space.global_size(), space.global_size()],
&rows,
&cols,
&data,
)
.unwrap();

// Compare to matrix assembled on just this process
for (i, j) in full_matrix.indices().iter().zip(serial_matrix.indices()) {
assert_eq!(i, j);
}
for (i, j) in full_matrix.indptr().iter().zip(serial_matrix.indptr()) {
assert_eq!(i, j);
}
for (i, j) in full_matrix.data().iter().zip(serial_matrix.data()) {
assert_relative_eq!(i, j, epsilon = 1e-10);
}
} else {
let mut cell_ids = vec![];
let mut dofs = vec![];
let mut dofs_len = vec![];
for cell in grid.entity_iter(2) {
cell_ids.push(cell.id().unwrap());
let cell_dofs = space
.cell_dofs(cell.local_index())
.unwrap()
.iter()
.map(|i| space.global_dof_index(*i))
.collect::<Vec<_>>();
dofs.extend_from_slice(&cell_dofs);
dofs_len.push(cell_dofs.len());
}

mpi::request::scope(|scope| {
let root = comm.process_at_rank(0);
// TODO: send this:
let _ = WaitGuard::from(root.immediate_send(scope, &cell_ids));
let _ = WaitGuard::from(root.immediate_send(scope, &dofs));
let _ = WaitGuard::from(root.immediate_send(scope, &dofs_len));
let _ = WaitGuard::from(root.immediate_send(scope, matrix.indices()));
let _ = WaitGuard::from(root.immediate_send(scope, matrix.indptr()));
let _ = WaitGuard::from(root.process_at_rank(0).immediate_send(scope, matrix.data()));
});
}
}

#[cfg(feature = "mpi")]
fn main() {
let universe: Universe = mpi::initialize().unwrap();
let world = universe.world();
let rank = world.rank();

for degree in 0..4 {
if rank == 0 {
println!("Testing assembly with DP{degree} using SingleElementGrid in parallel.");
}
test_parallel_assembly_single_element_grid(&world, degree, Continuity::Discontinuous);
}
for degree in 1..4 {
if rank == 0 {
println!("Testing assembly with P{degree} using SingleElementGrid in parallel.");
}
test_parallel_assembly_single_element_grid(&world, degree, Continuity::Standard);
}
}
#[cfg(not(feature = "mpi"))]
fn main() {}
2 changes: 1 addition & 1 deletion src/assembly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mod test {
use super::batched::BatchedAssembler;
use super::*;
use crate::function::SerialFunctionSpace;
use crate::traits::function::FunctionSpace;
use crate::traits::FunctionSpace;
use cauchy::{c32, c64};
use ndelement::ciarlet::CiarletElement;
use ndelement::ciarlet::LagrangeElementFamily;
Expand Down
27 changes: 10 additions & 17 deletions src/assembly/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,30 @@ use green_kernels::types::GreenKernelEvalType;
use rlst::{Array, BaseArray, VectorContainer};

mod boundary;
pub use boundary::{BatchedAssembler, BatchedAssemblerOptions};
mod potential;
pub use potential::{BatchedPotentialAssembler, BatchedPotentialAssemblerOptions};

mod adjoint_double_layer;
mod double_layer;
mod hypersingular;
mod single_layer;
pub use adjoint_double_layer::{
pub use boundary::adjoint_double_layer::{
HelmholtzAdjointDoubleLayerAssembler, LaplaceAdjointDoubleLayerAssembler,
};
pub use double_layer::{HelmholtzDoubleLayerAssembler, LaplaceDoubleLayerAssembler};
pub use hypersingular::{HelmholtzHypersingularAssembler, LaplaceHypersingularAssembler};
pub use single_layer::{HelmholtzSingleLayerAssembler, LaplaceSingleLayerAssembler};
pub use boundary::double_layer::{HelmholtzDoubleLayerAssembler, LaplaceDoubleLayerAssembler};
pub use boundary::hypersingular::{HelmholtzHypersingularAssembler, LaplaceHypersingularAssembler};
pub use boundary::single_layer::{HelmholtzSingleLayerAssembler, LaplaceSingleLayerAssembler};
pub use boundary::{BatchedAssembler, BatchedAssemblerOptions};

mod double_layer_potential;
mod single_layer_potential;
pub use double_layer_potential::{
mod potential;
pub use potential::double_layer::{
HelmholtzDoubleLayerPotentialAssembler, LaplaceDoubleLayerPotentialAssembler,
};
pub use single_layer_potential::{
pub use potential::single_layer::{
HelmholtzSingleLayerPotentialAssembler, LaplaceSingleLayerPotentialAssembler,
};
pub use potential::{BatchedPotentialAssembler, BatchedPotentialAssemblerOptions};

type RlstArray<T, const DIM: usize> = Array<T, BaseArray<T, VectorContainer<T>, DIM>, DIM>;

#[cfg(test)]
mod test {
use super::*;
use crate::function::SerialFunctionSpace;
use crate::traits::function::FunctionSpace;
use crate::traits::FunctionSpace;
use approx::*;
use ndelement::ciarlet::LagrangeElementFamily;
use ndelement::types::Continuity;
Expand Down
Loading

0 comments on commit fead46f

Please sign in to comment.