Skip to content

Commit

Permalink
speed
Browse files Browse the repository at this point in the history
  • Loading branch information
mscroggs committed Nov 22, 2023
1 parent fc63907 commit 2335735
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 62 deletions.
7 changes: 5 additions & 2 deletions bem/src/assembly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,11 @@ mod test {

for i in 0..5 {
for j in 0..5 {
println!("{} {}", *matrix.get([i, j]).unwrap(),
*matrix2.get([i, j]).unwrap());
println!(
"{} {}",
*matrix.get([i, j]).unwrap(),
*matrix2.get([i, j]).unwrap()
);
}
println!();
}
Expand Down
62 changes: 41 additions & 21 deletions bem/src/assembly/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,9 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz

let mut k = vec![0.0; NPTS_TEST * NPTS_TRIAL];
let mut test_jdet = [0.0; NPTS_TEST];
let mut trial_jdet = [0.0; NPTS_TRIAL];
let mut test_normals = zero_matrix([NPTS_TEST, 3]);
let mut trial_normals = zero_matrix([NPTS_TRIAL, 3]);

let mut test_mapped_pts = rlst_dense::rlst_dynamic_array2![f64, [NPTS_TEST, 3]];
let mut trial_mapped_pts = rlst_dense::rlst_dynamic_array2![f64, [NPTS_TRIAL, 3]];

let test_element = test_grid.geometry().element(test_cells[0]);
let trial_element = trial_grid.geometry().element(trial_cells[0]);
Expand Down Expand Up @@ -237,6 +234,25 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
Box::new(|_: usize, _: &mut Mat<f64>| ())
};

let mut trial_jdet_all = vec![[0.0; NPTS_TRIAL]; trial_cells.len()];
let mut trial_mapped_pts_all = vec![];
let mut trial_normals_all = vec![];
for i in 0..trial_cells.len() {
trial_mapped_pts_all.push(zero_matrix([NPTS_TRIAL, 3]));
trial_normals_all.push(zero_matrix([NPTS_TRIAL, 3]));
}

for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
let trial_cell_tindex = trial_grid.topology().index_map()[*trial_cell];
let trial_cell_gindex = trial_grid.geometry().index_map()[*trial_cell];
let trial_vertices = unsafe { trial_c20.row_unchecked(trial_cell_tindex) };

trial_evaluator
.compute_jacobian_determinants(trial_cell_gindex, &mut trial_jdet_all[trial_cell_i]);
trial_evaluator.compute_points(trial_cell_gindex, &mut trial_mapped_pts_all[trial_cell_i]);
trial_compute_normals(trial_cell_gindex, &mut trial_normals_all[trial_cell_i]);
}

for test_cell in test_cells {
let test_cell_tindex = test_grid.topology().index_map()[*test_cell];
let test_cell_gindex = test_grid.geometry().index_map()[*test_cell];
Expand All @@ -246,19 +262,15 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
test_evaluator.compute_points(test_cell_gindex, &mut test_mapped_pts);
test_compute_normals(test_cell_gindex, &mut test_normals);

for trial_cell in trial_cells {
for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
let trial_cell_tindex = trial_grid.topology().index_map()[*trial_cell];
let trial_cell_gindex = trial_grid.geometry().index_map()[*trial_cell];
let trial_vertices = unsafe { trial_c20.row_unchecked(trial_cell_tindex) };

trial_evaluator.compute_jacobian_determinants(trial_cell_gindex, &mut trial_jdet);
trial_evaluator.compute_points(trial_cell_gindex, &mut trial_mapped_pts);
trial_compute_normals(trial_cell_gindex, &mut trial_normals);

kernel.assemble_st(
EvalType::Value,
test_mapped_pts.data(),
trial_mapped_pts.data(),
trial_mapped_pts_all[trial_cell_i].data(),
&mut k,
);

Expand All @@ -279,15 +291,23 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
let mut sum = 0.0;

for (test_index, test_wt) in test_weights.iter().enumerate() {
for (trial_index, trial_wt) in trial_weights.iter().enumerate() { unsafe {
sum += k[test_index * trial_weights.len() + trial_index]
* (test_wt
* trial_wt
* test_table.get_unchecked(0, test_index, test_i, 0)
* test_jdet[test_index]
* trial_table.get_unchecked(0, trial_index, trial_i, 0)
* trial_jdet[test_index]);
}}
let test_integrand = unsafe {
test_wt
* test_jdet[test_index]
* test_table.get_unchecked(0, test_index, test_i, 0)
};
for (trial_index, trial_wt) in trial_weights.iter().enumerate() {
unsafe {
let trial_integrand = {
trial_wt
* trial_jdet_all[trial_cell_i][trial_index]
* trial_table.get_unchecked(0, trial_index, trial_i, 0)
};
sum += k[test_index * trial_weights.len() + trial_index]
* test_integrand
* trial_integrand;
}
}
}
// TODO: should we write into a result array, then copy into output after this loop?
let mut neighbour = false;
Expand All @@ -299,9 +319,9 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
}
if !neighbour {
unsafe {
*output.data.offset(
(*test_dof + output.shape[0] * *trial_dof) as isize,
) += sum;
*output
.data
.offset((*test_dof + output.shape[0] * *trial_dof) as isize) += sum;
}
}
}
Expand Down
15 changes: 9 additions & 6 deletions bem/src/assembly/cl_kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,20 @@ fn get_integration_element_vec(jac: &Vec<Vec<f64>>, int_elem: &mut [f64]) {

fn get_global_point(corners: &Vec<Vec<f64>>, point: &[f64], result: &mut [f64]) {
for i in 0..3 {
result[i] = corners[0][i] + point[0] * (corners[1][i] - corners[0][i]) + point[1] * (corners[2][i] - corners[0][i]);
result[i] = corners[0][i]
+ point[0] * (corners[1][i] - corners[0][i])
+ point[1] * (corners[2][i] - corners[0][i]);
}
}
fn get_global_point_vec(corners: &Vec<Vec<f64>>, point: &[f64], result: &mut [f64]) {
return;
for index in 0.. corners[0].len() / 3 {
for index in 0..corners[0].len() / 3 {
for i in 0..3 {
result[index * 3 + i] = corners[0][3 * index + i] + point[0] * (corners[1][3 * index + i] - corners[0][3 * index + i]) + point[1] * (corners[2][3 * index + i] - corners[2][3 * index + i]);
result[index * 3 + i] = corners[0][3 * index + i]
+ point[0] * (corners[1][3 * index + i] - corners[0][3 * index + i])
+ point[1] * (corners[2][3 * index + i] - corners[2][3 * index + i]);
}
}

}

fn elements_are_adjacent<'a>(e0: &[usize], e1: &[usize]) -> bool {
Expand Down Expand Up @@ -206,7 +209,8 @@ fn lagrange_kernel<'a>(
if !elements_are_adjacent(&test_element, &trial_element[vec_index]) {
global_row_index = my_test_local2global[0];
global_col_index = my_trial_local2global[vec_index][0];
global_result[global_row_index * n_trial + global_col_index] += shape_integral[0][0][vec_index];
global_result[global_row_index * n_trial + global_col_index] +=
shape_integral[0][0][vec_index];
}
}
}
Expand All @@ -228,7 +232,6 @@ pub fn assemble<'a>(
let test_colouring = test_space.compute_cell_colouring();
let trial_colouring = trial_space.compute_cell_colouring();


let mut test_local2global = vec![0];
for trial_c in &trial_colouring {
let mut trial_local2global = vec![];
Expand Down
10 changes: 6 additions & 4 deletions element/src/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use bempp_tools::arrays::{AdjacencyList, Array3D, Mat};
use bempp_traits::arrays::{AdjacencyListAccess, Array3DAccess, Array4DAccess};
use bempp_traits::cell::ReferenceCellType;
use bempp_traits::element::{Continuity, ElementFamily, FiniteElement, MapType};
use rlst_common::traits::{RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessMut};
use rlst_dense::linalg::Trans;
use rlst_dense::rlst_dynamic_array2;
use rlst_common::traits::{ RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessMut};
pub mod lagrange;
pub mod raviart_thomas;

Expand Down Expand Up @@ -166,9 +166,11 @@ impl CiarletElement {
}

let mut ident = rlst_dense::rlst_dynamic_array2!(f64, [dim, dim]);
for i in 0..dim { unsafe {
*ident.get_unchecked_mut([i, i]) = 1.0;
}}
for i in 0..dim {
unsafe {
*ident.get_unchecked_mut([i, i]) = 1.0;
}
}
let lu = dual_matrix.into_lu().unwrap();
let inverse = lu.solve(Trans::NoTrans, ident).unwrap();

Expand Down
15 changes: 12 additions & 3 deletions element/src/element/lagrange.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ mod test {
*data.get(0, pt, 0, 0).unwrap(),
1.0 - *points.get([pt, 0]).unwrap()
);
assert_relative_eq!(*data.get(0, pt, 1, 0).unwrap(), *points.get([pt, 0]).unwrap());
assert_relative_eq!(
*data.get(0, pt, 1, 0).unwrap(),
*points.get([pt, 0]).unwrap()
);
}
check_dofs(e);
}
Expand Down Expand Up @@ -247,8 +250,14 @@ mod test {
*data.get(0, pt, 0, 0).unwrap(),
1.0 - *points.get([pt, 0]).unwrap() - *points.get([pt, 1]).unwrap()
);
assert_relative_eq!(*data.get(0, pt, 1, 0).unwrap(), *points.get([pt, 0]).unwrap());
assert_relative_eq!(*data.get(0, pt, 2, 0).unwrap(), *points.get([pt, 1]).unwrap());
assert_relative_eq!(
*data.get(0, pt, 1, 0).unwrap(),
*points.get([pt, 0]).unwrap()
);
assert_relative_eq!(
*data.get(0, pt, 2, 0).unwrap(),
*points.get([pt, 1]).unwrap()
);
}
check_dofs(e);
}
Expand Down
5 changes: 4 additions & 1 deletion element/src/element/raviart_thomas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ mod test {
*data.get(0, pt, 1, 0).unwrap(),
*points.get([pt, 0]).unwrap() - 1.0
);
assert_relative_eq!(*data.get(0, pt, 1, 1).unwrap(), *points.get([pt, 1]).unwrap());
assert_relative_eq!(
*data.get(0, pt, 1, 1).unwrap(),
*points.get([pt, 1]).unwrap()
);
assert_relative_eq!(
*data.get(0, pt, 2, 0).unwrap(),
-*points.get([pt, 0]).unwrap()
Expand Down
42 changes: 25 additions & 17 deletions grid/src/grid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use bempp_traits::cell::{ReferenceCell, ReferenceCellType};
use bempp_traits::element::{Continuity, ElementFamily, FiniteElement};
use bempp_traits::grid::{Geometry, GeometryEvaluator, Grid, Ownership, Topology};
use itertools::izip;
use rlst_common::traits::{RandomAccessByRef, RandomAccessMut, Shape,
UnsafeRandomAccessByRef, UnsafeRandomAccessMut,
use rlst_common::traits::{
RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessByRef, UnsafeRandomAccessMut,
};
use rlst_proc_macro::rlst_static_array;
use std::cell::RefCell;
Expand Down Expand Up @@ -89,10 +89,12 @@ impl<'a> GeometryEvaluator<Mat<f64>, Mat<f64>> for EvaluatorTdim2Gdim3<'a> {
let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
for j in 0..3 {
unsafe {
*axes.get_unchecked_mut([0, j]) += *self.geometry.coordinate_unchecked(v, j)
* self.table.get(1, p, i, 0).unwrap();
*axes.get_unchecked_mut([1, j]) += *self.geometry.coordinate_unchecked(v, j)
* self.table.get(2, p, i, 0).unwrap();
*axes.get_unchecked_mut([0, j]) +=
*self.geometry.coordinate_unchecked(v, j)
* self.table.get(1, p, i, 0).unwrap();
*axes.get_unchecked_mut([1, j]) +=
*self.geometry.coordinate_unchecked(v, j)
* self.table.get(2, p, i, 0).unwrap();
}
}
}
Expand Down Expand Up @@ -263,8 +265,9 @@ impl<'a> GeometryEvaluator<Mat<f64>, Mat<f64>> for LinearSimplexEvaluatorTdim2Gd
for j in 0..3 {
for k in 0..2 {
unsafe {
*axes.get_unchecked_mut([k, j]) += *self.geometry.coordinate_unchecked(v, j)
* self.table.get(k + 1, 0, i, 0).unwrap();
*axes.get_unchecked_mut([k, j]) +=
*self.geometry.coordinate_unchecked(v, j)
* self.table.get(k + 1, 0, i, 0).unwrap();
}
}
}
Expand Down Expand Up @@ -535,11 +538,14 @@ impl Geometry for SerialGeometry {
*self.coordinate(v, j).unwrap() * data.get(2, p, i, 0).unwrap();
}
}
*normals.get_mut([p, 0]).unwrap() = *axes.get([0, 1]).unwrap() * *axes.get([1, 2]).unwrap()
*normals.get_mut([p, 0]).unwrap() = *axes.get([0, 1]).unwrap()
* *axes.get([1, 2]).unwrap()
- *axes.get([0, 2]).unwrap() * *axes.get([1, 1]).unwrap();
*normals.get_mut([p, 1]).unwrap() = *axes.get([0, 2]).unwrap() * *axes.get([1, 0]).unwrap()
*normals.get_mut([p, 1]).unwrap() = *axes.get([0, 2]).unwrap()
* *axes.get([1, 0]).unwrap()
- *axes.get([0, 0]).unwrap() * *axes.get([1, 2]).unwrap();
*normals.get_mut([p, 2]).unwrap() = *axes.get([0, 0]).unwrap() * *axes.get([1, 1]).unwrap()
*normals.get_mut([p, 2]).unwrap() = *axes.get([0, 0]).unwrap()
* *axes.get([1, 1]).unwrap()
- *axes.get([0, 1]).unwrap() * *axes.get([1, 0]).unwrap();
let size = (*normals.get([p, 0]).unwrap() * *normals.get([p, 0]).unwrap()
+ *normals.get([p, 1]).unwrap() * *normals.get([p, 1]).unwrap()
Expand Down Expand Up @@ -608,9 +614,8 @@ impl Geometry for SerialGeometry {
*jdet = match tdim {
1 => match gdim {
1 => *js.get([p, 0]).unwrap(),
2 => {
((*js.get([p, 0]).unwrap()).powi(2) + (*js.get([p, 1]).unwrap()).powi(2)).sqrt()
}
2 => ((*js.get([p, 0]).unwrap()).powi(2) + (*js.get([p, 1]).unwrap()).powi(2))
.sqrt(),
3 => ((*js.get([p, 0]).unwrap()).powi(2)
+ (*js.get([p, 1]).unwrap()).powi(2)
+ (*js.get([p, 2]).unwrap()).powi(2))
Expand Down Expand Up @@ -691,7 +696,8 @@ impl Geometry for SerialGeometry {
for p in 0..npts {
if tdim == 1 {
if gdim == 1 {
*jacobian_inverses.get_mut([p, 0]).unwrap() = 1.0 / *js.get([p, 0]).unwrap();
*jacobian_inverses.get_mut([p, 0]).unwrap() =
1.0 / *js.get([p, 0]).unwrap();
} else if gdim == 2 {
unimplemented!("Inverse jacobian for this dimension not implemented yet.");
} else if gdim == 3 {
Expand All @@ -704,8 +710,10 @@ impl Geometry for SerialGeometry {
let det = *js.get([p, 0]).unwrap() * *js.get([p, 3]).unwrap()
- *js.get([p, 1]).unwrap() * *js.get([p, 2]).unwrap();
*jacobian_inverses.get_mut([p, 0]).unwrap() = js.get([p, 3]).unwrap() / det;
*jacobian_inverses.get_mut([p, 1]).unwrap() = -js.get([p, 1]).unwrap() / det;
*jacobian_inverses.get_mut([p, 2]).unwrap() = -js.get([p, 2]).unwrap() / det;
*jacobian_inverses.get_mut([p, 1]).unwrap() =
-js.get([p, 1]).unwrap() / det;
*jacobian_inverses.get_mut([p, 2]).unwrap() =
-js.get([p, 2]).unwrap() / det;
*jacobian_inverses.get_mut([p, 3]).unwrap() = js.get([p, 0]).unwrap() / det;
} else if gdim == 3 {
let c = (*js.get([p, 3]).unwrap() * *js.get([p, 4]).unwrap()
Expand Down
2 changes: 1 addition & 1 deletion grid/src/shapes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use crate::grid::SerialGrid;
use bempp_element::cell::Triangle;
use bempp_tools::arrays::{zero_matrix, to_matrix, AdjacencyList};
use bempp_tools::arrays::{to_matrix, zero_matrix, AdjacencyList};
use bempp_traits::arrays::AdjacencyListAccess;
use bempp_traits::cell::{ReferenceCell, ReferenceCellType};
use bempp_traits::grid::{Geometry, Grid, Topology};
Expand Down
8 changes: 5 additions & 3 deletions kernel/src/laplace_3d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,14 @@ fn laplace_component_count(eval_type: EvalType) -> usize {
mod test {

use super::*;
use rand::prelude::*;
use approx::assert_relative_eq;
use bempp_tools::arrays::Mat;
use bempp_traits::types::Scalar;
use rlst_common::traits::{RawAccess, RawAccessMut, RandomAccessByRef, RandomAccessMut, Shape, FillFrom};
use rand::prelude::*;
use rlst_common::traits::{
FillFrom, RandomAccessByRef, RandomAccessMut, RawAccess, RawAccessMut, Shape,
};
use rlst_dense::rlst_dynamic_array2;
use bempp_tools::arrays::Mat;

fn copy(m_in: &Mat<f64>) -> Mat<f64> {
let mut m = rlst_dynamic_array2!(f64, m_in.shape());
Expand Down
8 changes: 5 additions & 3 deletions tools/src/arrays.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Containers to store multi-dimensional data
use bempp_traits::arrays::{AdjacencyListAccess, Array3DAccess, Array4DAccess};
use num::Num;
use rlst_common::{types::Scalar, traits::{UnsafeRandomAccessMut}};
use rlst_dense::{rlst_dynamic_array2, array::Array, base_array::BaseArray, data_container::VectorContainer};
use rlst_common::{traits::UnsafeRandomAccessMut, types::Scalar};
use rlst_dense::{
array::Array, base_array::BaseArray, data_container::VectorContainer, rlst_dynamic_array2,
};
use std::clone::Clone;

pub type Mat<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
Expand Down Expand Up @@ -44,7 +46,7 @@ impl<T: Num + Clone> Array3D<T> {
/// Create an array from a data vector
pub fn new(shape: (usize, usize, usize)) -> Self {
Self {
data: vec![T::zero(); shape.0 * shape. 1 * shape.2],
data: vec![T::zero(); shape.0 * shape.1 * shape.2],
shape,
}
}
Expand Down
4 changes: 3 additions & 1 deletion tree/src/implementations/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use num::Float;
use rand::prelude::*;
use rand::SeedableRng;

use rlst_dense::{base_array::BaseArray, array::Array, rlst_dynamic_array2, data_container::VectorContainer};
use rlst_dense::{
array::Array, base_array::BaseArray, data_container::VectorContainer, rlst_dynamic_array2,
};

use crate::types::morton::MortonKey;

Expand Down

0 comments on commit 2335735

Please sign in to comment.