From d5de9671589542c6dc3b737416716c2a7610d71d Mon Sep 17 00:00:00 2001 From: Matthew Scroggs Date: Thu, 16 Nov 2023 11:11:49 +0000 Subject: [PATCH] Start translating opencl kernel to Rust --- bem/benches/assembly_benchmark.rs | 68 +++++++++- bem/src/assembly.rs | 30 +++++ bem/src/assembly/cl_kernel.rs | 205 ++++++++++++++++++++++++++++++ 3 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 bem/src/assembly/cl_kernel.rs diff --git a/bem/benches/assembly_benchmark.rs b/bem/benches/assembly_benchmark.rs index 38bf1986..9f2c9de3 100644 --- a/bem/benches/assembly_benchmark.rs +++ b/bem/benches/assembly_benchmark.rs @@ -1,4 +1,4 @@ -use bempp_bem::assembly::{assemble_batched, batched, BoundaryOperator, PDEType}; +use bempp_bem::assembly::{assemble_batched, batched, cl_kernel, BoundaryOperator, PDEType}; use bempp_bem::function_space::SerialFunctionSpace; use bempp_element::element::create_element; use bempp_grid::shapes::regular_sphere; @@ -116,6 +116,70 @@ pub fn assembly_parts_benchmark(c: &mut Criterion) { group.finish(); } +pub fn cl_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("cl_assembly"); + group.sample_size(20); + + for i in 3..5 { + let grid = regular_sphere(i); + let element = create_element( + ElementFamily::Lagrange, + ReferenceCellType::Triangle, + 0, + Continuity::Discontinuous, + ); + + let space = SerialFunctionSpace::new(&grid, &element); + let mut matrix = zero_matrix((space.dofmap().global_size(), space.dofmap().global_size())); + + /* + group.bench_function( + &format!( + "Assembly of singular terms of {}x{} matrix", + space.dofmap().global_size(), + space.dofmap().global_size() + ), + |b| { + b.iter(|| { + batched::assemble_singular( + &mut matrix, + &laplace_3d::Laplace3dKernel::new(), + false, + false, + &space, + &space, + &colouring, + &colouring, + 128, + ) + }) + }, + ); + */ + group.bench_function( + &format!( + "Bempp-cl style assembly of non-singular terms of {}x{} matrix", + space.dofmap().global_size(), + space.dofmap().global_size() + ), + |b| { + b.iter(|| { + cl_kernel::assemble( + &mut matrix, + &laplace_3d::Laplace3dKernel::new(), + false, + false, + &space, + &space, + ) + }) + }, + ); + } + group.finish(); +} + // criterion_group!(benches, full_assembly_benchmark, assembly_parts_benchmark); criterion_group!(benches, assembly_parts_benchmark); -criterion_main!(benches); +criterion_group!(cl_benches, cl_benchmark); +criterion_main!(benches, cl_benches); diff --git a/bem/src/assembly.rs b/bem/src/assembly.rs index 9ca2f193..dd8c9b77 100644 --- a/bem/src/assembly.rs +++ b/bem/src/assembly.rs @@ -1,4 +1,5 @@ pub mod batched; +pub mod cl_kernel; use crate::function_space::SerialFunctionSpace; use bempp_kernel::laplace_3d; use bempp_tools::arrays::Mat; @@ -52,6 +53,35 @@ pub fn assemble_batched<'a>( }; } +pub fn assemble_cl<'a>( + output: &mut Mat, + operator: BoundaryOperator, + pde: PDEType, + trial_space: &SerialFunctionSpace<'a>, + test_space: &SerialFunctionSpace<'a>, +) { + match pde { + PDEType::Laplace => match operator { + BoundaryOperator::SingleLayer => { + cl_kernel::assemble( + output, + &laplace_3d::Laplace3dKernel::new(), + false, + false, + trial_space, + test_space, + ); + } + _ => { + panic!("Invalid operator"); + } + }, + _ => { + panic!("Invalid PDE"); + } + }; +} + #[cfg(test)] mod test { use crate::assembly::batched; diff --git a/bem/src/assembly/cl_kernel.rs b/bem/src/assembly/cl_kernel.rs new file mode 100644 index 00000000..74f3d857 --- /dev/null +++ b/bem/src/assembly/cl_kernel.rs @@ -0,0 +1,205 @@ +use crate::function_space::SerialFunctionSpace; +use bempp_kernel::laplace_3d::Laplace3dKernel; +use bempp_quadrature::duffy::quadrilateral::quadrilateral_duffy; +use bempp_quadrature::duffy::triangle::triangle_duffy; +use bempp_quadrature::simplex_rules::simplex_rule; +use bempp_quadrature::types::{CellToCellConnectivity, TestTrialNumericalQuadratureDefinition}; +use bempp_tools::arrays::{transpose_to_matrix, zero_matrix, Array4D, Mat}; +use bempp_traits::arrays::{AdjacencyListAccess, Array4DAccess}; +use bempp_traits::bem::{DofMap, FunctionSpace}; +use bempp_traits::cell::ReferenceCellType; +use bempp_traits::element::FiniteElement; +use bempp_traits::grid::{Geometry, Grid, Topology}; +use bempp_traits::kernel::Kernel; +use bempp_traits::types::EvalType; +use bempp_traits::types::Scalar; +use rayon::prelude::*; +use rlst_dense::{RandomAccessByRef, RandomAccessMut, RawAccess, RawAccessMut, Shape}; + +fn get_corners<'a>(grid: &impl Grid<'a>, index: &usize, corners: &mut Vec>) {} + +fn get_corners_vec<'a>(grid: &impl Grid<'a>, indices: &[usize], corners: &mut Vec>) {} + +fn get_element(connectivity: &[usize], index: &usize, element: &mut Vec) {} +fn get_element_vec(connectivity: &[usize], indices: &[usize], element: &mut Vec>) {} + +fn get_local2global(local2global: &[usize], index: &usize, result: &mut Vec) {} +fn get_local2global_vec(local2global: &[usize], indices: &[usize], result: &mut Vec>) {} + +fn get_jacobian(corners: &Vec>, jac: &mut [f64]) {} +fn get_jacobian_vec(corners: &Vec>, jac: &mut Vec>) {} + +fn get_integration_element(jac: &Vec, int_elem: &f64) {} +fn get_integration_element_vec(jac: &Vec>, int_elem: &[f64]) {} + +fn get_global_point(corners: &Vec>, point: &Vec, result: &mut [f64]) {} +fn get_global_point_vec(corners: &Vec>, point: &Vec, result: &mut [f64]) {} + +fn lagrange_kernel<'a>( + test_indices: &[usize], + trial_indices: &[usize], + test_grid: &impl Grid<'a>, + trial_grid: &impl Grid<'a>, + test_connectivity: &[usize], + trial_connectivity: &[usize], + test_local2global: &[usize], + trial_local2global: &[usize], + quad_points: &[f64], + quad_weights: &[f64], + global_result: &Mat, +) { + let kernel = Laplace3dKernel::new(); + + let NUMBER_OF_QUAD_POINTS = quad_weights.len(); + let VEC_LENGTH = 10; + let TODO = 10; + + let test_index = 0; + let trial_index = trial_indices; + + let mut test_quad_index = 0; + let mut trial_quad_index = 0; + let mut i = 0; + let mut j = 0; + let mut global_row_index = 0; + let mut global_col_index = 0; + + let mut test_global_point = vec![0.0; 3]; + let mut trial_global_point = vec![0.0; TODO * 3]; + let mut test_corners = vec![vec![0.0; 3]; 3]; + let mut trial_corners = vec![vec![0.0; TODO]; 3]; + let mut test_element = vec![0; 3]; + let mut trial_element = vec![vec![0; 3]; VEC_LENGTH]; + + let mut my_test_local2global = vec![0; 1]; + let mut my_trial_local2global = vec![vec![0; 1]; VEC_LENGTH]; + + let mut test_jac = vec![0.0; 2]; + let mut trial_jac = vec![vec![0.0; 3]; 2]; + + let mut test_point = vec![0.0; 2]; + let mut trial_point = vec![0.0; 2]; + + let mut test_int_elem = 0.0; + let mut trial_int_elem = vec![0.0; TODO]; + + let mut test_value = vec![0.0]; + let mut trial_value = vec![0.0]; + + let mut kernel_value = vec![0.0; TODO]; + let mut temp_result = vec![vec![0.0]; TODO]; + let mut temp_factor = vec![0.0; TODO]; + let mut shape_integral = vec![vec![vec![0.0; TODO]]]; + + get_corners(test_grid, &test_index, &mut test_corners); + get_corners_vec(trial_grid, &trial_index, &mut trial_corners); + + get_element(test_connectivity, &test_index, &mut test_element); + get_element_vec(trial_connectivity, &trial_index, &mut trial_element); + + get_local2global(test_local2global, &test_index, &mut my_test_local2global); + get_local2global_vec(trial_local2global, &trial_index, &mut my_trial_local2global); + + get_jacobian(&test_corners, &mut test_jac); + get_jacobian_vec(&trial_corners, &mut trial_jac); + + get_integration_element(&test_jac, &mut test_int_elem); + get_integration_element_vec(&trial_jac, &mut trial_int_elem); + + for test_quad_index in 0..NUMBER_OF_QUAD_POINTS { + test_point[0] = quad_points[2 * test_quad_index]; + test_point[1] = quad_points[2 * test_quad_index + 1]; + get_global_point(&test_corners, &test_point, &mut test_global_point); + test_value[0] = 1.0; + + temp_result[0][0] = 0.0; + for trial_quad_index in 0..NUMBER_OF_QUAD_POINTS { + trial_point[0] = quad_points[2 * trial_quad_index]; + trial_point[1] = quad_points[2 * trial_quad_index + 1]; + get_global_point_vec(&trial_corners, &trial_point, &mut trial_global_point); + trial_value[0] = 1.0; + kernel.assemble_st( + EvalType::Value, + &test_global_point, + &trial_global_point, + &mut kernel_value, + ); + } + } + /* + + for (testQuadIndex = 0; testQuadIndex < NUMBER_OF_QUAD_POINTS; + ++testQuadIndex) { + testPoint = (REALTYPE2)(quadPoints[2 * testQuadIndex], quadPoints[2 * testQuadIndex + 1]); + testGlobalPoint = getGlobalPoint(testCorners, &testPoint); + BASIS(TEST, evaluate)(&testPoint, &testValue[0]); + + for (j = 0; j < NUMBER_OF_TRIAL_SHAPE_FUNCTIONS; ++j) { + tempResult[j] = M_ZERO; + } + + for (trialQuadIndex = 0; trialQuadIndex < NUMBER_OF_QUAD_POINTS; + ++trialQuadIndex) { + trialPoint = (REALTYPE2)(quadPoints[2 * trialQuadIndex], quadPoints[2 * trialQuadIndex + 1]); + getGlobalPointVec(trialCorners, &trialPoint, trialGlobalPoint); + BASIS(TRIAL, evaluate)(&trialPoint, &trialValue[0]); + KERNEL(VEC_STRING) + (testGlobalPoint, trialGlobalPoint, testNormal, trialNormal, kernel_parameters, + &kernelValue); + tempFactor = quadWeights[trialQuadIndex] * kernelValue; + for (j = 0; j < NUMBER_OF_TRIAL_SHAPE_FUNCTIONS; ++j) + tempResult[j] += trialValue[j] * tempFactor; + } + + for (i = 0; i < NUMBER_OF_TEST_SHAPE_FUNCTIONS; ++i) + for (j = 0; j < NUMBER_OF_TRIAL_SHAPE_FUNCTIONS; ++j) { + shapeIntegral[i][j] += + tempResult[j] * quadWeights[testQuadIndex] * testValue[i]; + } + } + + for (i = 0; i < NUMBER_OF_TEST_SHAPE_FUNCTIONS; ++i) + for (j = 0; j < NUMBER_OF_TRIAL_SHAPE_FUNCTIONS; ++j) { + shapeIntegral[i][j] *= + (testIntElem * myTestLocalMultipliers[i]) * trialIntElem; + } + + for (int vecIndex = 0; vecIndex < VEC_LENGTH; ++vecIndex) + if (!elementsAreAdjacent(testElement, trialElement[vecIndex], + gridsAreDisjoint)) { + for (i = 0; i < NUMBER_OF_TEST_SHAPE_FUNCTIONS; ++i) + for (j = 0; j < NUMBER_OF_TRIAL_SHAPE_FUNCTIONS; ++j) { + globalRowIndex = myTestLocal2Global[i]; + globalColIndex = myTrialLocal2Global[vecIndex][j]; + globalResult[globalRowIndex * nTrial + globalColIndex] += + ((REALTYPE*)(&shapeIntegral[i][j]))[vecIndex] * + myTrialLocalMultipliers[vecIndex][j]; + } + } + } + */ +} + +pub fn assemble<'a>( + output: &mut Mat, + kernel: &impl Kernel, + needs_trial_normal: bool, + needs_test_normal: bool, + trial_space: &SerialFunctionSpace<'a>, + test_space: &SerialFunctionSpace<'a>, +) { + *output.get_mut(0, 0).unwrap() = 0.5; + lagrange_kernel( + &[], + &[], + test_space.grid(), + trial_space.grid(), + &[], + &[], + &[], + &[], + &[], + &[], + output, + ); +}