Skip to content

Commit

Permalink
improve geometry speed
Browse files Browse the repository at this point in the history
  • Loading branch information
mscroggs committed Nov 24, 2023
1 parent 3d62a53 commit 0ce4051
Show file tree
Hide file tree
Showing 8 changed files with 310 additions and 394 deletions.
2 changes: 0 additions & 2 deletions bem/benches/assembly_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ pub fn assembly_parts_benchmark(c: &mut Criterion) {
batched::assemble_nonsingular::<16, 16>(
&mut matrix,
&laplace_3d::Laplace3dKernel::new(),
false,
false,
&space,
&space,
&colouring,
Expand Down
71 changes: 29 additions & 42 deletions bem/src/assembly/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use bempp_traits::kernel::Kernel;
use bempp_traits::types::EvalType;
use bempp_traits::types::Scalar;
use rayon::prelude::*;
use rlst_common::traits::{RandomAccessByRef, RawAccess, RawAccessMut, Shape, UnsafeRandomAccessByRef};
use rlst_common::traits::{
RandomAccessByRef, RawAccess, RawAccessMut, Shape, UnsafeRandomAccessByRef,
};
use rlst_dense::rlst_dynamic_array4;

fn get_quadrature_rule(
Expand Down Expand Up @@ -184,8 +186,6 @@ fn assemble_batch_singular<'a>(
fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>(
output: &RawData2D<f64>,
kernel: &impl Kernel<T = f64>,
needs_trial_normal: bool,
needs_test_normal: bool,
trial_space: &SerialFunctionSpace<'a>,
trial_cells: &[usize],
test_space: &SerialFunctionSpace<'a>,
Expand Down Expand Up @@ -223,23 +223,6 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
.geometry()
.get_evaluator(trial_element, trial_points);

#[allow(clippy::type_complexity)]
let test_compute_normals: Box<dyn Fn(usize, &mut Mat<f64>)> = if needs_test_normal {
Box::new(|index: usize, normals: &mut Mat<f64>| {
test_evaluator.compute_normals(index, normals)
})
} else {
Box::new(|_: usize, _: &mut Mat<f64>| ())
};
#[allow(clippy::type_complexity)]
let trial_compute_normals: Box<dyn Fn(usize, &mut Mat<f64>)> = if needs_trial_normal {
Box::new(|index: usize, normals: &mut Mat<f64>| {
trial_evaluator.compute_normals(index, normals)
})
} else {
Box::new(|_: usize, _: &mut Mat<f64>| ())
};

let mut trial_jdet = vec![[0.0; NPTS_TRIAL]; trial_cells.len()];
let mut trial_mapped_pts = vec![];
let mut trial_normals = vec![];
Expand All @@ -251,10 +234,12 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
let trial_cell_gindex = trial_grid.geometry().index_map()[*trial_cell];

trial_evaluator
.compute_jacobian_determinants(trial_cell_gindex, &mut trial_jdet[trial_cell_i]);
trial_evaluator.compute_normals_and_jacobian_determinants(
trial_cell_gindex,
&mut trial_normals[trial_cell_i],
&mut trial_jdet[trial_cell_i],
);
trial_evaluator.compute_points(trial_cell_gindex, &mut trial_mapped_pts[trial_cell_i]);
trial_compute_normals(trial_cell_gindex, &mut trial_normals[trial_cell_i]);
}

let mut sum: f64;
Expand All @@ -265,9 +250,12 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
let test_cell_gindex = test_grid.geometry().index_map()[*test_cell];
let test_vertices = unsafe { test_c20.row_unchecked(test_cell_tindex) };

test_evaluator.compute_jacobian_determinants(test_cell_gindex, &mut test_jdet);
test_evaluator.compute_normals_and_jacobian_determinants(
test_cell_gindex,
&mut test_normals,
&mut test_jdet,
);
test_evaluator.compute_points(test_cell_gindex, &mut test_mapped_pts);
test_compute_normals(test_cell_gindex, &mut test_normals);

for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
let trial_cell_tindex = trial_grid.topology().index_map()[*trial_cell];
Expand All @@ -281,7 +269,9 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
}
}

if neighbour { continue; }
if neighbour {
continue;
}

kernel.assemble_st(
EvalType::Value,
Expand All @@ -306,11 +296,10 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
{
for (trial_index, trial_wt) in trial_weights.iter().enumerate() {
unsafe {
trial_integrands[trial_index] =
trial_wt
* trial_jdet[trial_cell_i][trial_index]
* trial_table.get_unchecked([0, trial_index, trial_i, 0]);
}
trial_integrands[trial_index] = trial_wt
* trial_jdet[trial_cell_i][trial_index]
* trial_table.get_unchecked([0, trial_index, trial_i, 0]);
}
}
sum = 0.0;
for (test_index, test_wt) in test_weights.iter().enumerate() {
Expand All @@ -331,7 +320,7 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
.data
.offset((*test_dof + output.shape[0] * *trial_dof) as isize) += sum;
}
}
}
}
}
}
Expand All @@ -354,8 +343,6 @@ pub fn assemble<'a>(
assemble_nonsingular::<16, 16>(
output,
kernel,
needs_trial_normal,
needs_test_normal,
trial_space,
test_space,
&trial_colouring,
Expand All @@ -379,8 +366,6 @@ pub fn assemble<'a>(
pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>(
output: &mut Mat<f64>,
kernel: &impl Kernel<T = f64>,
needs_trial_normal: bool,
needs_test_normal: bool,
trial_space: &SerialFunctionSpace<'a>,
test_space: &SerialFunctionSpace<'a>,
trial_colouring: &Vec<Vec<usize>>,
Expand Down Expand Up @@ -414,8 +399,10 @@ pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>
.element()
.tabulate(&qpoints_test, 0, &mut test_table);

let mut trial_table =
rlst_dynamic_array4!(f64, trial_space.element().tabulate_array_shape(0, NPTS_TRIAL));
let mut trial_table = rlst_dynamic_array4!(
f64,
trial_space.element().tabulate_array_shape(0, NPTS_TRIAL)
);
trial_space
.element()
.tabulate(&qpoints_test, 0, &mut trial_table);
Expand Down Expand Up @@ -459,8 +446,6 @@ pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>
assemble_batch_nonadjacent::<NPTS_TEST, NPTS_TRIAL>(
&output_raw,
kernel,
needs_trial_normal,
needs_test_normal,
trial_space,
trial_cells[t],
test_space,
Expand Down Expand Up @@ -556,7 +541,8 @@ pub fn assemble_singular<'a>(
);

let points = transpose_to_matrix(&qrule.trial_points, [qrule.npoints, 2]);
let mut table = rlst_dynamic_array4!(f64,
let mut table = rlst_dynamic_array4!(
f64,
trial_space
.element()
.tabulate_array_shape(0, points.shape()[0])
Expand All @@ -566,7 +552,8 @@ pub fn assemble_singular<'a>(
trial_tables.push(table);

let points = transpose_to_matrix(&qrule.test_points, [qrule.npoints, 2]);
let mut table = rlst_dynamic_array4!(f64,
let mut table = rlst_dynamic_array4!(
f64,
test_space
.element()
.tabulate_array_shape(0, points.shape()[0])
Expand Down
27 changes: 14 additions & 13 deletions element/src/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

use crate::cell::create_cell;
use crate::polynomials::{legendre_shape, polynomial_count, tabulate_legendre_polynomials};
use bempp_tools::arrays::{AdjacencyList, Mat, Array3D};
use bempp_tools::arrays::{AdjacencyList, Array3D, Mat};
use bempp_traits::arrays::AdjacencyListAccess;
use bempp_traits::cell::ReferenceCellType;
use bempp_traits::element::{Continuity, ElementFamily, FiniteElement, MapType};
use rlst_common::traits::{RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessMut};
use rlst_dense::linalg::Trans;
use rlst_dense::{rlst_dynamic_array2,rlst_dynamic_array3};
use rlst_dense::{rlst_dynamic_array2, rlst_dynamic_array3};
pub mod lagrange;
pub mod raviart_thomas;

Expand Down Expand Up @@ -139,8 +139,8 @@ impl CiarletElement {
let value = d_matrix.get_mut([j, l, dof + i]).unwrap();
*value = 0.0;
for k in 0..pts.shape()[0] {
*value +=
*mat.get([i, j, k]).unwrap() * *table.get([0, l, k]).unwrap();
*value += *mat.get([i, j, k]).unwrap()
* *table.get([0, l, k]).unwrap();
}
}
}
Expand Down Expand Up @@ -179,8 +179,8 @@ impl CiarletElement {
for l in 0..pdim {
for j in 0..value_size {
for k in 0..pdim {
*coefficients.get_mut([i, j, k]).unwrap() +=
*inverse.get([i, l]).unwrap() * *polynomial_coeffs.get([l, j, k]).unwrap()
*coefficients.get_mut([i, j, k]).unwrap() += *inverse.get([i, l]).unwrap()
* *polynomial_coeffs.get([l, j, k]).unwrap()
}
}
}
Expand Down Expand Up @@ -247,18 +247,19 @@ impl FiniteElement for CiarletElement {
fn dim(&self) -> usize {
self.dim
}
fn tabulate<T: RandomAccessByRef<2, Item = f64> + Shape<2>, T4Mut: RandomAccessMut<4, Item = f64>>(
fn tabulate<
T: RandomAccessByRef<2, Item = f64> + Shape<2>,
T4Mut: RandomAccessMut<4, Item = f64>,
>(
&self,
points: &T,
nderivs: usize,
data: &mut T4Mut,
) {
let mut table = rlst_dynamic_array3!(f64, legendre_shape(
self.cell_type,
points,
self.highest_degree,
nderivs,
));
let mut table = rlst_dynamic_array3!(
f64,
legendre_shape(self.cell_type, points, self.highest_degree, nderivs,)
);
tabulate_legendre_polynomials(
self.cell_type,
points,
Expand Down
8 changes: 4 additions & 4 deletions element/src/element/lagrange.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub fn create(
}
x[tdim].push(to_matrix(&cell.midpoint(), [1, tdim]));
let mut mentry = rlst_dynamic_array3!(f64, [1, 1, 1]);
*mentry.get_mut([0,0,0]).unwrap() = 1.0;
*mentry.get_mut([0, 0, 0]).unwrap() = 1.0;
m[tdim].push(mentry);
} else {
// TODO: GLL points
Expand All @@ -47,7 +47,7 @@ pub fn create(
}
x[0].push(pts);
let mut mentry = rlst_dynamic_array3!(f64, [1, 1, 1]);
*mentry.get_mut([0,0,0]).unwrap() = 1.0;
*mentry.get_mut([0, 0, 0]).unwrap() = 1.0;
m[0].push(mentry);
}
for e in 0..cell.entity_count(1) {
Expand All @@ -59,7 +59,7 @@ pub fn create(
let mut ident = rlst_dynamic_array3!(f64, [degree - 1, 1, degree - 1]);

for i in 1..degree {
*ident.get_mut([i-1,0,i-1]).unwrap() = 1.0;
*ident.get_mut([i - 1, 0, i - 1]).unwrap() = 1.0;
for j in 0..tdim {
*pts.get_mut([i - 1, j]).unwrap() =
v0[j] + i as f64 / degree as f64 * (v1[j] - v0[j]);
Expand Down Expand Up @@ -123,7 +123,7 @@ pub fn create(

let mut ident = rlst_dynamic_array3!(f64, [npts, 1, npts]);
for i in 0..npts {
*ident.get_mut([i,0,i]).unwrap() = 1.0;
*ident.get_mut([i, 0, i]).unwrap() = 1.0;
}
x[2].push(pts);
m[2].push(ident);
Expand Down
Loading

0 comments on commit 0ce4051

Please sign in to comment.