From 81e4418759b32706fbcf0d6a80bc52ef92083629 Mon Sep 17 00:00:00 2001 From: Matthew Scroggs Date: Tue, 10 Oct 2023 14:09:35 +0100 Subject: [PATCH] directly transpose points --- bem/benches/assembly_benchmark.rs | 2 +- bem/src/assembly/batched.rs | 29 +++++++---------------- grid/src/grid.rs | 38 +++++++++++++++++++++++++++++++ grid/src/parallel_grid.rs | 9 ++++++++ traits/src/grid.rs | 9 ++++++++ 5 files changed, 65 insertions(+), 22 deletions(-) diff --git a/bem/benches/assembly_benchmark.rs b/bem/benches/assembly_benchmark.rs index 4f88af51..dfbaa323 100644 --- a/bem/benches/assembly_benchmark.rs +++ b/bem/benches/assembly_benchmark.rs @@ -11,7 +11,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; pub fn assembly_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("assembly"); - group.sample_size(10); + group.sample_size(20); for i in 3..5 { let grid = regular_sphere(i); diff --git a/bem/src/assembly/batched.rs b/bem/src/assembly/batched.rs index ff83d3bc..c5a43f74 100644 --- a/bem/src/assembly/batched.rs +++ b/bem/src/assembly/batched.rs @@ -149,11 +149,10 @@ fn assemble_batch_singular<'a>( let mut sum = 0.0; for (index, wt) in weights.iter().enumerate() { - kernel.evaluate_st( + kernel.assemble_st( EvalType::Value, test_mapped_pts.row(index).unwrap(), trial_mapped_pts.row(index).unwrap(), - &[1.0], &mut k, ); sum += k[0] @@ -202,14 +201,12 @@ fn assemble_batch_nonadjacent<'a>( // Memory assignment to be moved elsewhere as passed into here mutable? let mut test_jdet = vec![0.0; test_points.shape().0]; let mut trial_jdet = vec![0.0; trial_points.shape().0]; - let mut test_mapped_pts = Array2D::::new((test_points.shape().0, 3)); - let mut trial_mapped_pts = Array2D::::new((trial_points.shape().0, 3)); let mut test_normals = Array2D::::new((test_points.shape().0, 3)); let mut trial_normals = Array2D::::new((trial_points.shape().0, 3)); // TODO: remove transposing, and put points in this shape to start with - let mut test_mapped_pts_t = Array2D::::new((3, test_points.shape().0)); - let mut trial_mapped_pts_t = Array2D::::new((3, trial_points.shape().0)); + let mut test_mapped_pts = Array2D::::new((3, test_points.shape().0)); + let mut trial_mapped_pts = Array2D::::new((3, trial_points.shape().0)); for test_cell in test_cells { let test_cell_tindex = test_grid.topology().index_map()[*test_cell]; @@ -223,12 +220,8 @@ fn assemble_batch_nonadjacent<'a>( ); test_grid .geometry() - .compute_points(test_points, test_cell_gindex, &mut test_mapped_pts); - for i in 0..3 { - for j in 0..test_points.shape().0 { - *test_mapped_pts_t.get_mut(i, j).unwrap() = *test_mapped_pts.get(j, i).unwrap(); - } - } + .compute_points_transpose(test_points, test_cell_gindex, &mut test_mapped_pts); + if needs_test_normal { test_grid .geometry() @@ -245,17 +238,11 @@ fn assemble_batch_nonadjacent<'a>( trial_cell_gindex, &mut trial_jdet, ); - trial_grid.geometry().compute_points( + trial_grid.geometry().compute_points_transpose( trial_points, trial_cell_gindex, &mut trial_mapped_pts, ); - for i in 0..3 { - for j in 0..trial_points.shape().0 { - *trial_mapped_pts_t.get_mut(i, j).unwrap() = - *trial_mapped_pts.get(j, i).unwrap(); - } - } if needs_trial_normal { trial_grid.geometry().compute_normals( trial_points, @@ -266,8 +253,8 @@ fn assemble_batch_nonadjacent<'a>( kernel.assemble_st( EvalType::Value, - &test_mapped_pts_t.data, - &trial_mapped_pts_t.data, + &test_mapped_pts.data, + &trial_mapped_pts.data, &mut k, ); diff --git a/grid/src/grid.rs b/grid/src/grid.rs index 19eca8c0..c8556763 100644 --- a/grid/src/grid.rs +++ b/grid/src/grid.rs @@ -149,6 +149,44 @@ impl Geometry for SerialGeometry { } } } + fn compute_points_transpose<'a>( + &self, + points: &impl Array2DAccess<'a, f64>, + cell: usize, + physical_points: &mut impl Array2DAccess<'a, f64>, + ) { + let gdim = self.dim(); + if gdim != physical_points.shape().0 { + panic!("physical_points has wrong number of rows."); + } + if points.shape().0 != physical_points.shape().1 { + panic!("physical_points has wrong number of columns."); + } + let element = self.element(cell); + let mut data = Array4D::::new(element.tabulate_array_shape(0, points.shape().0)); // TODO: Memory is assigned here. Can we avoid this? + element.tabulate(points, 0, &mut data); + for i in 0..physical_points.shape().0 { + for p in 0..points.shape().0 { + unsafe { + *physical_points.get_unchecked_mut(i, p) = 0.0; + } + } + } + for i in 0..data.shape().2 { + let pt = unsafe { + self.coordinates + .row_unchecked(*self.cells.get_unchecked(cell, i)) + }; + for p in 0..points.shape().0 { + for (j, pt_j) in pt.iter().enumerate() { + unsafe { + *physical_points.get_unchecked_mut(j, p) += + *pt_j * data.get_unchecked(0, p, i, 0); + } + } + } + } + } fn compute_normals<'a>( &self, points: &impl Array2DAccess<'a, f64>, diff --git a/grid/src/parallel_grid.rs b/grid/src/parallel_grid.rs index fd7f318f..f580c9e6 100644 --- a/grid/src/parallel_grid.rs +++ b/grid/src/parallel_grid.rs @@ -70,6 +70,15 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> { self.serial_geometry .compute_points(points, cell, physical_points) } + fn compute_points_transpose<'b>( + &self, + points: &impl Array2DAccess<'b, f64>, + cell: usize, + physical_points_transpose: &mut impl Array2DAccess<'b, f64>, + ) { + self.serial_geometry + .compute_points(points, cell, physical_points) + } fn compute_normals<'b>( &self, points: &impl Array2DAccess<'b, f64>, diff --git a/traits/src/grid.rs b/traits/src/grid.rs index d3c02315..dacb72ff 100644 --- a/traits/src/grid.rs +++ b/traits/src/grid.rs @@ -41,6 +41,15 @@ pub trait Geometry { physical_points: &mut impl Array2DAccess<'a, f64>, ); + /// Compute the physical coordinates of a set of points in a given cell, + /// with return in transposed arrangement + fn compute_points_transpose<'a>( + &self, + points: &impl Array2DAccess<'a, f64>, + cell: usize, + physical_points: &mut impl Array2DAccess<'a, f64>, + ); + /// Compute the normals to a set of points in a given cell fn compute_normals<'a>( &self,