diff --git a/bem/Cargo.toml b/bem/Cargo.toml
index 127fff7d..0e65539c 100644
--- a/bem/Cargo.toml
+++ b/bem/Cargo.toml
@@ -31,10 +31,10 @@ itertools = "0.10"
 mpi = { version = "0.6.*", optional = true }
 num = "0.4"
 rayon = "1.7"
-rlst = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-blis-src = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy" }
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy" }
-rlst-algorithms = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy" }
+rlst = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-blis-src = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
 
 [dev-dependencies]
 criterion = { version = "0.3", features = ["html_reports"]}
diff --git a/bem/benches/assembly_benchmark.rs b/bem/benches/assembly_benchmark.rs
index 38bf1986..6bedeaad 100644
--- a/bem/benches/assembly_benchmark.rs
+++ b/bem/benches/assembly_benchmark.rs
@@ -24,7 +24,7 @@ pub fn full_assembly_benchmark(c: &mut Criterion) {
         );
 
         let space = SerialFunctionSpace::new(&grid, &element);
-        let mut matrix = zero_matrix((space.dofmap().global_size(), space.dofmap().global_size()));
+        let mut matrix = zero_matrix([space.dofmap().global_size(), space.dofmap().global_size()]);
 
         group.bench_function(
             &format!(
@@ -62,7 +62,7 @@ pub fn assembly_parts_benchmark(c: &mut Criterion) {
         );
 
         let space = SerialFunctionSpace::new(&grid, &element);
-        let mut matrix = zero_matrix((space.dofmap().global_size(), space.dofmap().global_size()));
+        let mut matrix = zero_matrix([space.dofmap().global_size(), space.dofmap().global_size()]);
 
         let colouring = space.compute_cell_colouring();
 
@@ -101,8 +101,6 @@ pub fn assembly_parts_benchmark(c: &mut Criterion) {
                     batched::assemble_nonsingular::<16, 16>(
                         &mut matrix,
                         &laplace_3d::Laplace3dKernel::new(),
-                        false,
-                        false,
                         &space,
                         &space,
                         &colouring,
diff --git a/bem/examples/assemble.rs b/bem/examples/assemble.rs
index f6c27ba2..3af85f69 100644
--- a/bem/examples/assemble.rs
+++ b/bem/examples/assemble.rs
@@ -34,7 +34,7 @@ fn main() {
         space0.dofmap().global_size()
     );
     let mut matrix =
-        zero_matrix::<f64>((space1.dofmap().global_size(), space0.dofmap().global_size()));
+        zero_matrix::<f64>([space1.dofmap().global_size(), space0.dofmap().global_size()]);
 
     println!("Assembling dense matrix (complex)");
     assemble_batched(
diff --git a/bem/src/assembly.rs b/bem/src/assembly.rs
index 9ca2f193..a5e3bae5 100644
--- a/bem/src/assembly.rs
+++ b/bem/src/assembly.rs
@@ -67,7 +67,7 @@ mod test {
     use bempp_traits::element::{Continuity, ElementFamily};
     // use num::complex::Complex;
     use bempp_traits::bem::FunctionSpace;
-    use rlst_dense::RandomAccessByRef;
+    use rlst_dense::traits::RandomAccessByRef;
 
     #[test]
     fn test_laplace_single_layer() {
@@ -88,7 +88,7 @@ mod test {
         let space1 = SerialFunctionSpace::new(&grid, &element1);
 
         let mut matrix =
-            zero_matrix::<f64>((space1.dofmap().global_size(), space0.dofmap().global_size()));
+            zero_matrix::<f64>([space1.dofmap().global_size(), space0.dofmap().global_size()]);
         batched::assemble(
             &mut matrix,
             &Laplace3dKernel::new(),
@@ -99,7 +99,7 @@ mod test {
         );
 
         let mut matrix2 =
-            zero_matrix::<f64>((space1.dofmap().global_size(), space0.dofmap().global_size()));
+            zero_matrix::<f64>([space1.dofmap().global_size(), space0.dofmap().global_size()]);
 
         assemble_batched(
             &mut matrix2,
@@ -112,8 +112,8 @@ mod test {
         for i in 0..space1.dofmap().global_size() {
             for j in 0..space0.dofmap().global_size() {
                 assert_relative_eq!(
-                    *matrix.get(i, j).unwrap(),
-                    *matrix2.get(i, j).unwrap(),
+                    *matrix.get([i, j]).unwrap(),
+                    *matrix2.get([i, j]).unwrap(),
                     epsilon = 0.0001
                 );
             }
diff --git a/bem/src/assembly/batched.rs b/bem/src/assembly/batched.rs
index 4ad480ce..f584eb84 100644
--- a/bem/src/assembly/batched.rs
+++ b/bem/src/assembly/batched.rs
@@ -4,7 +4,7 @@ use bempp_quadrature::duffy::triangle::triangle_duffy;
 use bempp_quadrature::simplex_rules::simplex_rule;
 use bempp_quadrature::types::{CellToCellConnectivity, TestTrialNumericalQuadratureDefinition};
 use bempp_tools::arrays::{transpose_to_matrix, zero_matrix, Array4D, Mat};
-use bempp_traits::arrays::{AdjacencyListAccess, Array4DAccess};
+use bempp_traits::arrays::AdjacencyListAccess;
 use bempp_traits::bem::{DofMap, FunctionSpace};
 use bempp_traits::cell::ReferenceCellType;
 use bempp_traits::element::FiniteElement;
@@ -13,7 +13,10 @@ use bempp_traits::kernel::Kernel;
 use bempp_traits::types::EvalType;
 use bempp_traits::types::Scalar;
 use rayon::prelude::*;
-use rlst_dense::{RandomAccessByRef, RawAccess, RawAccessMut, Shape};
+use rlst_dense::rlst_dynamic_array4;
+use rlst_dense::traits::{
+    RandomAccessByRef, RawAccess, RawAccessMut, Shape, UnsafeRandomAccessByRef,
+};
 
 fn get_quadrature_rule(
     test_celltype: ReferenceCellType,
@@ -70,7 +73,7 @@ fn get_quadrature_rule(
 
 pub struct RawData2D<T: Scalar> {
     pub data: *mut T,
-    pub shape: (usize, usize),
+    pub shape: [usize; 2],
 }
 
 unsafe impl<T: Scalar> Sync for RawData2D<T> {}
@@ -95,12 +98,12 @@ fn assemble_batch_singular<'a>(
     let mut k = vec![0.0];
 
     // Memory assignment to be moved elsewhere as passed into here mutable?
-    let mut test_jdet = vec![0.0; test_points.shape().0];
-    let mut trial_jdet = vec![0.0; trial_points.shape().0];
-    let mut test_mapped_pts = zero_matrix((test_points.shape().0, 3));
-    let mut trial_mapped_pts = zero_matrix((trial_points.shape().0, 3));
-    let mut test_normals = zero_matrix((test_points.shape().0, 3));
-    let mut trial_normals = zero_matrix((trial_points.shape().0, 3));
+    let mut test_jdet = vec![0.0; test_points.shape()[0]];
+    let mut trial_jdet = vec![0.0; trial_points.shape()[0]];
+    let mut test_mapped_pts = zero_matrix([test_points.shape()[0], 3]);
+    let mut trial_mapped_pts = zero_matrix([trial_points.shape()[0], 3]);
+    let mut test_normals = zero_matrix([test_points.shape()[0], 3]);
+    let mut trial_normals = zero_matrix([trial_points.shape()[0], 3]);
 
     for (test_cell, trial_cell) in cell_pairs {
         let test_cell_tindex = grid.topology().index_map()[*test_cell];
@@ -149,29 +152,25 @@ fn assemble_batch_singular<'a>(
                 let mut sum = 0.0;
 
                 for (index, wt) in weights.iter().enumerate() {
-                    let mut test_row = vec![0.0; test_mapped_pts.shape().1];
+                    let mut test_row = vec![0.0; test_mapped_pts.shape()[1]];
                     for (i, ti) in test_row.iter_mut().enumerate() {
-                        *ti = *test_mapped_pts.get(index, i).unwrap();
+                        *ti = *test_mapped_pts.get([index, i]).unwrap();
                     }
-                    let mut trial_row = vec![0.0; trial_mapped_pts.shape().1];
+                    let mut trial_row = vec![0.0; trial_mapped_pts.shape()[1]];
                     for (i, ti) in trial_row.iter_mut().enumerate() {
-                        *ti = *trial_mapped_pts.get(index, i).unwrap();
+                        *ti = *trial_mapped_pts.get([index, i]).unwrap();
                     }
 
                     kernel.assemble_st(EvalType::Value, &test_row, &trial_row, &mut k);
                     sum += k[0]
                         * (wt
-                            * test_table.get(0, index, test_i, 0).unwrap()
+                            * test_table.get([0, index, test_i, 0]).unwrap()
                             * test_jdet[index]
-                            * trial_table.get(0, index, trial_i, 0).unwrap()
+                            * trial_table.get([0, index, trial_i, 0]).unwrap()
                             * trial_jdet[index]);
                 }
                 unsafe {
-                    *output.data.offset(
-                        (*test_dof + output.shape.0 * *trial_dof)
-                            .try_into()
-                            .unwrap(),
-                    ) += sum;
+                    *output.data.add(*test_dof + output.shape[0] * *trial_dof) += sum;
                 }
             }
         }
@@ -183,8 +182,6 @@ fn assemble_batch_singular<'a>(
 fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>(
     output: &RawData2D<f64>,
     kernel: &impl Kernel<T = f64>,
-    needs_trial_normal: bool,
-    needs_test_normal: bool,
     trial_space: &SerialFunctionSpace<'a>,
     trial_cells: &[usize],
     test_space: &SerialFunctionSpace<'a>,
@@ -196,19 +193,21 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
     trial_table: &Array4D<f64>,
     test_table: &Array4D<f64>,
 ) -> usize {
+    debug_assert!(test_weights.len() == NPTS_TEST);
+    debug_assert!(test_points.shape()[0] == NPTS_TEST);
+    debug_assert!(trial_weights.len() == NPTS_TRIAL);
+    debug_assert!(trial_points.shape()[0] == NPTS_TRIAL);
+
     let test_grid = test_space.grid();
     let test_c20 = test_grid.topology().connectivity(2, 0);
     let trial_grid = trial_space.grid();
     let trial_c20 = trial_grid.topology().connectivity(2, 0);
 
     let mut k = vec![0.0; NPTS_TEST * NPTS_TRIAL];
-    let mut test_jdet = vec![0.0; NPTS_TEST];
-    let mut trial_jdet = vec![0.0; NPTS_TRIAL];
-    let mut test_normals = zero_matrix((NPTS_TEST, 3));
-    let mut trial_normals = zero_matrix((NPTS_TRIAL, 3));
+    let mut test_jdet = [0.0; NPTS_TEST];
+    let mut test_normals = zero_matrix([NPTS_TEST, 3]);
 
-    let mut test_mapped_pts = rlst_dense::rlst_dynamic_mat![f64, (NPTS_TEST, 3)];
-    let mut trial_mapped_pts = rlst_dense::rlst_dynamic_mat![f64, (NPTS_TRIAL, 3)];
+    let mut test_mapped_pts = rlst_dense::rlst_dynamic_array2![f64, [NPTS_TEST, 3]];
 
     let test_element = test_grid.geometry().element(test_cells[0]);
     let trial_element = trial_grid.geometry().element(trial_cells[0]);
@@ -220,45 +219,60 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
         .geometry()
         .get_evaluator(trial_element, trial_points);
 
-    #[allow(clippy::type_complexity)]
-    let test_compute_normals: Box<dyn Fn(usize, &mut Mat<f64>)> = if needs_test_normal {
-        Box::new(|index: usize, normals: &mut Mat<f64>| {
-            test_evaluator.compute_normals(index, normals)
-        })
-    } else {
-        Box::new(|_: usize, _: &mut Mat<f64>| ())
-    };
-    #[allow(clippy::type_complexity)]
-    let trial_compute_normals: Box<dyn Fn(usize, &mut Mat<f64>)> = if needs_trial_normal {
-        Box::new(|index: usize, normals: &mut Mat<f64>| {
-            trial_evaluator.compute_normals(index, normals)
-        })
-    } else {
-        Box::new(|_: usize, _: &mut Mat<f64>| ())
-    };
+    let mut trial_jdet = vec![[0.0; NPTS_TRIAL]; trial_cells.len()];
+    let mut trial_mapped_pts = vec![];
+    let mut trial_normals = vec![];
+    for _i in 0..trial_cells.len() {
+        trial_mapped_pts.push(zero_matrix([NPTS_TRIAL, 3]));
+        trial_normals.push(zero_matrix([NPTS_TRIAL, 3]));
+    }
+
+    for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
+        let trial_cell_gindex = trial_grid.geometry().index_map()[*trial_cell];
+
+        trial_evaluator.compute_normals_and_jacobian_determinants(
+            trial_cell_gindex,
+            &mut trial_normals[trial_cell_i],
+            &mut trial_jdet[trial_cell_i],
+        );
+        trial_evaluator.compute_points(trial_cell_gindex, &mut trial_mapped_pts[trial_cell_i]);
+    }
+
+    let mut sum: f64;
+    let mut trial_integrands = [0.0; NPTS_TRIAL];
 
     for test_cell in test_cells {
         let test_cell_tindex = test_grid.topology().index_map()[*test_cell];
         let test_cell_gindex = test_grid.geometry().index_map()[*test_cell];
         let test_vertices = unsafe { test_c20.row_unchecked(test_cell_tindex) };
 
-        test_evaluator.compute_jacobian_determinants(test_cell_gindex, &mut test_jdet);
+        test_evaluator.compute_normals_and_jacobian_determinants(
+            test_cell_gindex,
+            &mut test_normals,
+            &mut test_jdet,
+        );
         test_evaluator.compute_points(test_cell_gindex, &mut test_mapped_pts);
-        test_compute_normals(test_cell_gindex, &mut test_normals);
 
-        for trial_cell in trial_cells {
+        for (trial_cell_i, trial_cell) in trial_cells.iter().enumerate() {
             let trial_cell_tindex = trial_grid.topology().index_map()[*trial_cell];
-            let trial_cell_gindex = trial_grid.geometry().index_map()[*trial_cell];
             let trial_vertices = unsafe { trial_c20.row_unchecked(trial_cell_tindex) };
 
-            trial_evaluator.compute_jacobian_determinants(trial_cell_gindex, &mut trial_jdet);
-            trial_evaluator.compute_points(trial_cell_gindex, &mut trial_mapped_pts);
-            trial_compute_normals(trial_cell_gindex, &mut trial_normals);
+            let mut neighbour = false;
+            for v in test_vertices {
+                if trial_vertices.contains(v) {
+                    neighbour = true;
+                    break;
+                }
+            }
+
+            if neighbour {
+                continue;
+            }
 
             kernel.assemble_st(
                 EvalType::Value,
                 test_mapped_pts.data(),
-                trial_mapped_pts.data(),
+                trial_mapped_pts[trial_cell_i].data(),
                 &mut k,
             );
 
@@ -276,35 +290,29 @@ fn assemble_batch_nonadjacent<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usiz
                     .iter()
                     .enumerate()
                 {
-                    let mut sum = 0.0;
-
+                    for (trial_index, trial_wt) in trial_weights.iter().enumerate() {
+                        unsafe {
+                            trial_integrands[trial_index] = trial_wt
+                                * trial_jdet[trial_cell_i][trial_index]
+                                * trial_table.get_unchecked([0, trial_index, trial_i, 0]);
+                        }
+                    }
+                    sum = 0.0;
                     for (test_index, test_wt) in test_weights.iter().enumerate() {
-                        for (trial_index, trial_wt) in trial_weights.iter().enumerate() {
+                        let test_integrand = unsafe {
+                            test_wt
+                                * test_jdet[test_index]
+                                * test_table.get_unchecked([0, test_index, test_i, 0])
+                        };
+                        for trial_index in 0..NPTS_TRIAL {
                             sum += k[test_index * trial_weights.len() + trial_index]
-                                * (test_wt
-                                    * trial_wt
-                                    * test_table.get(0, test_index, test_i, 0).unwrap()
-                                    * test_jdet[test_index]
-                                    * trial_table.get(0, trial_index, trial_i, 0).unwrap()
-                                    * trial_jdet[test_index]);
+                                * test_integrand
+                                * trial_integrands[trial_index];
                         }
                     }
                     // TODO: should we write into a result array, then copy into output after this loop?
-                    let mut neighbour = false;
-                    for v in test_vertices {
-                        if trial_vertices.contains(v) {
-                            neighbour = true;
-                            break;
-                        }
-                    }
-                    if !neighbour {
-                        unsafe {
-                            *output.data.offset(
-                                (*test_dof + output.shape.0 * *trial_dof)
-                                    .try_into()
-                                    .unwrap(),
-                            ) += sum;
-                        }
+                    unsafe {
+                        *output.data.add(*test_dof + output.shape[0] * *trial_dof) += sum;
                     }
                 }
             }
@@ -329,8 +337,6 @@ pub fn assemble<'a>(
     assemble_nonsingular::<16, 16>(
         output,
         kernel,
-        needs_trial_normal,
-        needs_test_normal,
         trial_space,
         test_space,
         &trial_colouring,
@@ -354,8 +360,6 @@ pub fn assemble<'a>(
 pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>(
     output: &mut Mat<f64>,
     kernel: &impl Kernel<T = f64>,
-    needs_trial_normal: bool,
-    needs_test_normal: bool,
     trial_space: &SerialFunctionSpace<'a>,
     test_space: &SerialFunctionSpace<'a>,
     trial_colouring: &Vec<Vec<usize>>,
@@ -365,8 +369,8 @@ pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>
     if !trial_space.is_serial() || !test_space.is_serial() {
         panic!("Dense assemble can only be used for function spaces stored in serial");
     }
-    if output.shape().0 != test_space.dofmap().global_size()
-        || output.shape().1 != trial_space.dofmap().global_size()
+    if output.shape()[0] != test_space.dofmap().global_size()
+        || output.shape()[1] != trial_space.dofmap().global_size()
     {
         panic!("Matrix has wrong shape");
     }
@@ -377,20 +381,22 @@ pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>
 
     // TODO: pass cell types into this function
     let qrule_test = simplex_rule(ReferenceCellType::Triangle, NPTS_TEST).unwrap();
-    let qpoints_test = transpose_to_matrix(&qrule_test.points, (NPTS_TEST, 2));
+    let qpoints_test = transpose_to_matrix(&qrule_test.points, [NPTS_TEST, 2]);
     let qweights_test = qrule_test.weights;
     let qrule_trial = simplex_rule(ReferenceCellType::Triangle, NPTS_TRIAL).unwrap();
-    let qpoints_trial = transpose_to_matrix(&qrule_trial.points, (NPTS_TRIAL, 2));
+    let qpoints_trial = transpose_to_matrix(&qrule_trial.points, [NPTS_TRIAL, 2]);
     let qweights_trial = qrule_trial.weights;
 
     let mut test_table =
-        Array4D::<f64>::new(test_space.element().tabulate_array_shape(0, NPTS_TEST));
+        rlst_dynamic_array4!(f64, test_space.element().tabulate_array_shape(0, NPTS_TEST));
     test_space
         .element()
         .tabulate(&qpoints_test, 0, &mut test_table);
 
-    let mut trial_table =
-        Array4D::<f64>::new(trial_space.element().tabulate_array_shape(0, NPTS_TRIAL));
+    let mut trial_table = rlst_dynamic_array4!(
+        f64,
+        trial_space.element().tabulate_array_shape(0, NPTS_TRIAL)
+    );
     trial_space
         .element()
         .tabulate(&qpoints_test, 0, &mut trial_table);
@@ -434,8 +440,6 @@ pub fn assemble_nonsingular<'a, const NPTS_TEST: usize, const NPTS_TRIAL: usize>
                     assemble_batch_nonadjacent::<NPTS_TEST, NPTS_TRIAL>(
                         &output_raw,
                         kernel,
-                        needs_trial_normal,
-                        needs_test_normal,
                         trial_space,
                         trial_cells[t],
                         test_space,
@@ -474,8 +478,8 @@ pub fn assemble_singular<'a>(
     if !trial_space.is_serial() || !test_space.is_serial() {
         panic!("Dense assemble can only be used for function spaces stored in serial");
     }
-    if output.shape().0 != test_space.dofmap().global_size()
-        || output.shape().1 != trial_space.dofmap().global_size()
+    if output.shape()[0] != test_space.dofmap().global_size()
+        || output.shape()[1] != trial_space.dofmap().global_size()
     {
         panic!("Matrix has wrong shape");
     }
@@ -530,21 +534,23 @@ pub fn assemble_singular<'a>(
             npoints,
         );
 
-        let points = transpose_to_matrix(&qrule.trial_points, (qrule.npoints, 2));
-        let mut table = Array4D::<f64>::new(
+        let points = transpose_to_matrix(&qrule.trial_points, [qrule.npoints, 2]);
+        let mut table = rlst_dynamic_array4!(
+            f64,
             trial_space
                 .element()
-                .tabulate_array_shape(0, points.shape().0),
+                .tabulate_array_shape(0, points.shape()[0])
         );
         trial_space.element().tabulate(&points, 0, &mut table);
         trial_points.push(points);
         trial_tables.push(table);
 
-        let points = transpose_to_matrix(&qrule.test_points, (qrule.npoints, 2));
-        let mut table = Array4D::<f64>::new(
+        let points = transpose_to_matrix(&qrule.test_points, [qrule.npoints, 2]);
+        let mut table = rlst_dynamic_array4!(
+            f64,
             test_space
                 .element()
-                .tabulate_array_shape(0, points.shape().0),
+                .tabulate_array_shape(0, points.shape()[0])
         );
         test_space.element().tabulate(&points, 0, &mut table);
         test_points.push(points);
@@ -638,7 +644,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = zero_matrix::<f64>((ndofs, ndofs));
+        let mut matrix = zero_matrix::<f64>([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &Laplace3dKernel::new(),
@@ -654,7 +660,7 @@ mod test {
 
         for (i, row) in from_cl.iter().enumerate() {
             for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), entry, epsilon = 1e-3);
+                assert_relative_eq!(*matrix.get([i, j]).unwrap(), entry, epsilon = 1e-3);
             }
         }
     }
@@ -673,7 +679,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<f64>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::LaplaceGreenDyKernel {},
@@ -707,7 +713,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<f64>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::LaplaceGreenDxKernel {},
@@ -741,7 +747,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<f64>::new([ndofs, ndofs]);
         laplace_hypersingular_assemble(&mut matrix, &space, &space);
 
         for i in 0..ndofs {
@@ -764,7 +770,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<f64>::new([ndofs, ndofs]);
 
         laplace_hypersingular_assemble(&mut matrix, &space, &space);
 
@@ -798,7 +804,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<f64>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::HelmholtzGreenKernel { k: 3.0 },
@@ -831,7 +837,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<Complex<f64>>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::HelmholtzGreenKernel { k: 3.0 },
@@ -865,7 +871,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<Complex<f64>>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::HelmholtzGreenDyKernel { k: 3.0 },
@@ -900,7 +906,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<Complex<f64>>::new([ndofs, ndofs]);
         assemble(
             &mut matrix,
             &green::HelmholtzGreenDxKernel { k: 3.0 },
@@ -935,7 +941,7 @@ mod test {
 
         let ndofs = space.dofmap().global_size();
 
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
+        let mut matrix = Array2D::<Complex<f64>>::new([ndofs, ndofs]);
 
         helmholtz_hypersingular_assemble(&mut matrix, &space, &space, 3.0);
 
diff --git a/bem/src/assembly/dense.rs b/bem/src/assembly/dense.rs
deleted file mode 100644
index d2e294e9..00000000
--- a/bem/src/assembly/dense.rs
+++ /dev/null
@@ -1,1495 +0,0 @@
-use crate::green::{
-    HelmholtzGreenHypersingularTermKernel, HelmholtzGreenKernel, LaplaceGreenKernel, Scalar,
-    SingularKernel,
-};
-use bempp_quadrature::duffy::quadrilateral::quadrilateral_duffy;
-use bempp_quadrature::duffy::triangle::triangle_duffy;
-use bempp_quadrature::simplex_rules::{available_rules, simplex_rule};
-use bempp_quadrature::types::{CellToCellConnectivity, TestTrialNumericalQuadratureDefinition};
-use bempp_tools::arrays::{Array2D, Array4D};
-use bempp_traits::arrays::{AdjacencyListAccess, Array2DAccess, Array4DAccess};
-use bempp_traits::bem::{DofMap, FunctionSpace};
-use bempp_traits::cell::ReferenceCellType;
-use bempp_traits::element::FiniteElement;
-use bempp_traits::grid::{Geometry, Grid, Topology};
-
-fn get_quadrature_rule(
-    test_celltype: ReferenceCellType,
-    trial_celltype: ReferenceCellType,
-    pairs: Vec<(usize, usize)>,
-    npoints: usize,
-) -> TestTrialNumericalQuadratureDefinition {
-    if pairs.is_empty() {
-        // Standard rules
-        let mut npoints_test = 10 * npoints * npoints;
-        for p in available_rules(test_celltype) {
-            if p >= npoints * npoints && p < npoints_test {
-                npoints_test = p;
-            }
-        }
-        let mut npoints_trial = 10 * npoints * npoints;
-        for p in available_rules(trial_celltype) {
-            if p >= npoints * npoints && p < npoints_trial {
-                npoints_trial = p;
-            }
-        }
-        let test_rule = simplex_rule(test_celltype, npoints_test).unwrap();
-        let trial_rule = simplex_rule(trial_celltype, npoints_trial).unwrap();
-        if test_rule.dim != trial_rule.dim {
-            unimplemented!("Quadrature with different dimension cells not supported");
-        }
-        if test_rule.order != trial_rule.order {
-            unimplemented!("Quadrature with different trial and test orders not supported");
-        }
-        let dim = test_rule.dim;
-        let npts = test_rule.npoints * trial_rule.npoints;
-        let mut test_points = Vec::<f64>::with_capacity(dim * npts);
-        let mut trial_points = Vec::<f64>::with_capacity(dim * npts);
-        let mut weights = Vec::<f64>::with_capacity(npts);
-
-        for test_i in 0..test_rule.npoints {
-            for trial_i in 0..trial_rule.npoints {
-                for d in 0..dim {
-                    test_points.push(test_rule.points[dim * test_i + d]);
-                    trial_points.push(trial_rule.points[dim * trial_i + d]);
-                }
-                weights.push(test_rule.weights[test_i] * trial_rule.weights[trial_i]);
-            }
-        }
-
-        TestTrialNumericalQuadratureDefinition {
-            dim,
-            order: test_rule.order,
-            npoints: npts,
-            weights,
-            test_points,
-            trial_points,
-        }
-    } else {
-        // Singular rules
-        if test_celltype == ReferenceCellType::Triangle {
-            if trial_celltype != ReferenceCellType::Triangle {
-                unimplemented!("Mixed meshes not yet supported");
-            }
-            triangle_duffy(
-                &CellToCellConnectivity {
-                    connectivity_dimension: if pairs.len() == 1 {
-                        0
-                    } else if pairs.len() == 2 {
-                        1
-                    } else {
-                        2
-                    },
-                    local_indices: pairs,
-                },
-                npoints,
-            )
-            .unwrap()
-        } else {
-            if test_celltype != ReferenceCellType::Quadrilateral {
-                unimplemented!("Only triangles and quadrilaterals are currently supported");
-            }
-            if trial_celltype != ReferenceCellType::Quadrilateral {
-                unimplemented!("Mixed meshes not yet supported");
-            }
-            quadrilateral_duffy(
-                &CellToCellConnectivity {
-                    connectivity_dimension: if pairs.len() == 1 {
-                        0
-                    } else if pairs.len() == 2 {
-                        1
-                    } else {
-                        2
-                    },
-                    local_indices: pairs,
-                },
-                npoints,
-            )
-            .unwrap()
-        }
-    }
-}
-
-pub fn assemble<'a, T: Scalar>(
-    output: &mut Array2D<T>,
-    kernel: &impl SingularKernel,
-    needs_trial_normal: bool,
-    needs_test_normal: bool,
-    trial_space: &impl FunctionSpace<'a>,
-    test_space: &impl FunctionSpace<'a>,
-) {
-    // Note: currently assumes that the two grids are the same
-    // TODO: implement == and != for grids, then add:
-    // if *trial_space.grid() != *test_space.grid() {
-    //    unimplemented!("Assembling operators with spaces on different grids not yet supported");
-    // }
-    if !trial_space.is_serial() || !test_space.is_serial() {
-        panic!("Dense assemble can only be used for function spaces stored in serial");
-    }
-    if output.shape().0 != test_space.dofmap().global_size()
-        || output.shape().1 != trial_space.dofmap().global_size()
-    {
-        panic!("Matrix has wrong shape");
-    }
-
-    // TODO: allow user to configure this
-    let npoints = 4;
-
-    let grid = trial_space.grid();
-    let c20 = grid.topology().connectivity(2, 0);
-
-    for test_cell in 0..grid.geometry().cell_count() {
-        let test_cell_tindex = grid.topology().index_map()[test_cell];
-        let test_cell_gindex = grid.geometry().index_map()[test_cell];
-        let test_vertices = c20.row(test_cell_tindex).unwrap();
-
-        let mut npoints_test_cell = 10 * npoints * npoints;
-        for p in available_rules(grid.topology().cell_type(test_cell_tindex).unwrap()) {
-            if p >= npoints * npoints && p < npoints_test_cell {
-                npoints_test_cell = p;
-            }
-        }
-        for trial_cell in 0..grid.geometry().cell_count() {
-            let trial_cell_tindex = grid.topology().index_map()[trial_cell];
-            let trial_cell_gindex = grid.geometry().index_map()[trial_cell];
-            let trial_vertices = c20.row(trial_cell_tindex).unwrap();
-
-            let mut npoints_trial_cell = 10 * npoints * npoints;
-            for p in available_rules(grid.topology().cell_type(trial_cell_tindex).unwrap()) {
-                if p >= npoints * npoints && p < npoints_trial_cell {
-                    npoints_trial_cell = p;
-                }
-            }
-
-            let mut pairs = vec![];
-            for (test_i, test_v) in test_vertices.iter().enumerate() {
-                for (trial_i, trial_v) in trial_vertices.iter().enumerate() {
-                    if test_v == trial_v {
-                        pairs.push((test_i, trial_i));
-                    }
-                }
-            }
-            let rule = get_quadrature_rule(
-                grid.topology().cell_type(test_cell_tindex).unwrap(),
-                grid.topology().cell_type(trial_cell_tindex).unwrap(),
-                pairs,
-                npoints,
-            );
-
-            let test_points = Array2D::from_data(rule.test_points, (rule.npoints, 2));
-            let trial_points = Array2D::from_data(rule.trial_points, (rule.npoints, 2));
-            let mut test_table =
-                Array4D::<f64>::new(test_space.element().tabulate_array_shape(0, rule.npoints));
-            let mut trial_table =
-                Array4D::<f64>::new(trial_space.element().tabulate_array_shape(0, rule.npoints));
-
-            test_space
-                .element()
-                .tabulate(&test_points, 0, &mut test_table);
-            trial_space
-                .element()
-                .tabulate(&trial_points, 0, &mut trial_table);
-
-            let mut test_jdet = vec![0.0; rule.npoints];
-            let mut trial_jdet = vec![0.0; rule.npoints];
-
-            grid.geometry().compute_jacobian_determinants(
-                &test_points,
-                test_cell_gindex,
-                &mut test_jdet,
-            );
-            grid.geometry().compute_jacobian_determinants(
-                &trial_points,
-                trial_cell_gindex,
-                &mut trial_jdet,
-            );
-
-            let mut test_mapped_pts = Array2D::<f64>::new((rule.npoints, 3));
-            let mut trial_mapped_pts = Array2D::<f64>::new((rule.npoints, 3));
-            let mut test_normals = Array2D::<f64>::new((rule.npoints, 3));
-            let mut trial_normals = Array2D::<f64>::new((rule.npoints, 3));
-
-            grid.geometry()
-                .compute_points(&test_points, test_cell_gindex, &mut test_mapped_pts);
-            grid.geometry()
-                .compute_points(&trial_points, trial_cell_gindex, &mut trial_mapped_pts);
-            if needs_test_normal {
-                grid.geometry()
-                    .compute_normals(&test_points, test_cell_gindex, &mut test_normals);
-            }
-            if needs_trial_normal {
-                grid.geometry().compute_normals(
-                    &trial_points,
-                    trial_cell_gindex,
-                    &mut trial_normals,
-                );
-            }
-
-            for (test_i, test_dof) in test_space
-                .dofmap()
-                .cell_dofs(test_cell_tindex)
-                .unwrap()
-                .iter()
-                .enumerate()
-            {
-                for (trial_i, trial_dof) in trial_space
-                    .dofmap()
-                    .cell_dofs(trial_cell_tindex)
-                    .unwrap()
-                    .iter()
-                    .enumerate()
-                {
-                    let mut sum = T::zero();
-
-                    for index in 0..rule.npoints {
-                        sum += kernel.eval::<T>(
-                            unsafe { test_mapped_pts.row_unchecked(index) },
-                            unsafe { trial_mapped_pts.row_unchecked(index) },
-                            unsafe { test_normals.row_unchecked(index) },
-                            unsafe { trial_normals.row_unchecked(index) },
-                        ) * T::from_f64(
-                            rule.weights[index]
-                                * unsafe { test_table.get_unchecked(0, index, test_i, 0) }
-                                * test_jdet[index]
-                                * unsafe { trial_table.get_unchecked(0, index, trial_i, 0) }
-                                * trial_jdet[index],
-                        );
-                    }
-                    *output.get_mut(*test_dof, *trial_dof).unwrap() += sum;
-                }
-            }
-        }
-    }
-}
-
-pub fn curl_curl_assemble<'a, T: Scalar>(
-    output: &mut Array2D<T>,
-    kernel: &impl SingularKernel,
-    trial_space: &impl FunctionSpace<'a>,
-    test_space: &impl FunctionSpace<'a>,
-) {
-    // Note: currently assumes that the two grids are the same
-    // TODO: implement == and != for grids, then add:
-    // if *trial_space.grid() != *test_space.grid() {
-    //    unimplemented!("Assembling operators with spaces on different grids not yet supported");
-    // }
-    if !trial_space.is_serial() || !test_space.is_serial() {
-        panic!("Dense assemble can only be used for function spaces stored in serial");
-    }
-    if output.shape().0 != test_space.dofmap().global_size()
-        || output.shape().1 != trial_space.dofmap().global_size()
-    {
-        panic!("Matrix has wrong shape");
-    }
-
-    let npoints = 4;
-
-    let grid = trial_space.grid();
-    let c20 = grid.topology().connectivity(2, 0);
-
-    for test_cell in 0..grid.geometry().cell_count() {
-        let test_cell_tindex = grid.topology().index_map()[test_cell];
-        let test_cell_gindex = grid.geometry().index_map()[test_cell];
-        let test_vertices = c20.row(test_cell_tindex).unwrap();
-
-        let mut npoints_test_cell = 10 * npoints * npoints;
-        for p in available_rules(grid.topology().cell_type(test_cell_tindex).unwrap()) {
-            if p >= npoints * npoints && p < npoints_test_cell {
-                npoints_test_cell = p;
-            }
-        }
-        for trial_cell in 0..grid.geometry().cell_count() {
-            let trial_cell_tindex = grid.topology().index_map()[trial_cell];
-            let trial_cell_gindex = grid.geometry().index_map()[trial_cell];
-            let trial_vertices = c20.row(trial_cell_tindex).unwrap();
-
-            let mut npoints_trial_cell = 10 * npoints * npoints;
-            for p in available_rules(grid.topology().cell_type(trial_cell_tindex).unwrap()) {
-                if p >= npoints * npoints && p < npoints_trial_cell {
-                    npoints_trial_cell = p;
-                }
-            }
-
-            let mut pairs = vec![];
-            for (test_i, test_v) in test_vertices.iter().enumerate() {
-                for (trial_i, trial_v) in trial_vertices.iter().enumerate() {
-                    if test_v == trial_v {
-                        pairs.push((test_i, trial_i));
-                    }
-                }
-            }
-            let rule = get_quadrature_rule(
-                grid.topology().cell_type(test_cell_tindex).unwrap(),
-                grid.topology().cell_type(trial_cell_tindex).unwrap(),
-                pairs,
-                npoints,
-            );
-            let test_points = Array2D::from_data(rule.test_points, (rule.npoints, 2));
-            let trial_points = Array2D::from_data(rule.trial_points, (rule.npoints, 2));
-            let mut test_table =
-                Array4D::<f64>::new(test_space.element().tabulate_array_shape(1, rule.npoints));
-            let mut trial_table =
-                Array4D::<f64>::new(trial_space.element().tabulate_array_shape(1, rule.npoints));
-
-            test_space
-                .element()
-                .tabulate(&test_points, 1, &mut test_table);
-            trial_space
-                .element()
-                .tabulate(&trial_points, 1, &mut trial_table);
-
-            let mut test_jdet = vec![0.0; rule.npoints];
-            let mut trial_jdet = vec![0.0; rule.npoints];
-            let mut test_jinv = Array2D::<f64>::new((rule.npoints, 6));
-            let mut trial_jinv = Array2D::<f64>::new((rule.npoints, 6));
-            let mut test_mapped_pts = Array2D::<f64>::new((rule.npoints, 3));
-            let mut trial_mapped_pts = Array2D::<f64>::new((rule.npoints, 3));
-            let mut test_normals = Array2D::<f64>::new((rule.npoints, 3));
-            let mut trial_normals = Array2D::<f64>::new((rule.npoints, 3));
-
-            grid.geometry().compute_jacobian_determinants(
-                &test_points,
-                test_cell_gindex,
-                &mut test_jdet,
-            );
-            grid.geometry().compute_jacobian_determinants(
-                &trial_points,
-                trial_cell_gindex,
-                &mut trial_jdet,
-            );
-            grid.geometry().compute_jacobian_inverses(
-                &test_points,
-                test_cell_gindex,
-                &mut test_jinv,
-            );
-            grid.geometry().compute_jacobian_inverses(
-                &trial_points,
-                trial_cell_gindex,
-                &mut trial_jinv,
-            );
-            grid.geometry()
-                .compute_points(&test_points, test_cell_gindex, &mut test_mapped_pts);
-            grid.geometry()
-                .compute_points(&trial_points, trial_cell_gindex, &mut trial_mapped_pts);
-            grid.geometry()
-                .compute_normals(&test_points, test_cell_gindex, &mut test_normals);
-            grid.geometry()
-                .compute_normals(&trial_points, trial_cell_gindex, &mut trial_normals);
-
-            for (test_i, test_dof) in test_space
-                .dofmap()
-                .cell_dofs(test_cell_tindex)
-                .unwrap()
-                .iter()
-                .enumerate()
-            {
-                for (trial_i, trial_dof) in trial_space
-                    .dofmap()
-                    .cell_dofs(trial_cell_tindex)
-                    .unwrap()
-                    .iter()
-                    .enumerate()
-                {
-                    let mut sum = T::zero();
-
-                    for index in 0..rule.npoints {
-                        let g0 = (
-                            unsafe {
-                                *trial_jinv.get_unchecked(index, 0)
-                                    * *trial_table.get_unchecked(1, index, trial_i, 0)
-                                    + *trial_jinv.get_unchecked(index, 3)
-                                        * *trial_table.get_unchecked(2, index, trial_i, 0)
-                            },
-                            unsafe {
-                                *trial_jinv.get_unchecked(index, 1)
-                                    * *trial_table.get_unchecked(1, index, trial_i, 0)
-                                    + *trial_jinv.get_unchecked(index, 4)
-                                        * *trial_table.get_unchecked(2, index, trial_i, 0)
-                            },
-                            unsafe {
-                                *trial_jinv.get_unchecked(index, 2)
-                                    * *trial_table.get_unchecked(1, index, trial_i, 0)
-                                    + *trial_jinv.get_unchecked(index, 5)
-                                        * *trial_table.get_unchecked(2, index, trial_i, 0)
-                            },
-                        );
-                        let g1 = (
-                            unsafe {
-                                *test_jinv.get_unchecked(index, 0)
-                                    * *test_table.get_unchecked(1, index, test_i, 0)
-                                    + *test_jinv.get_unchecked(index, 3)
-                                        * *test_table.get_unchecked(2, index, test_i, 0)
-                            },
-                            unsafe {
-                                *test_jinv.get_unchecked(index, 1)
-                                    * *test_table.get_unchecked(1, index, test_i, 0)
-                                    + *test_jinv.get_unchecked(index, 4)
-                                        * *test_table.get_unchecked(2, index, test_i, 0)
-                            },
-                            unsafe {
-                                *test_jinv.get_unchecked(index, 2)
-                                    * *test_table.get_unchecked(1, index, test_i, 0)
-                                    + *test_jinv.get_unchecked(index, 5)
-                                        * *test_table.get_unchecked(2, index, test_i, 0)
-                            },
-                        );
-                        let n0 = (
-                            unsafe { *trial_normals.get_unchecked(index, 0) },
-                            unsafe { *trial_normals.get_unchecked(index, 1) },
-                            unsafe { *trial_normals.get_unchecked(index, 2) },
-                        );
-                        let n1 = (
-                            unsafe { *test_normals.get_unchecked(index, 0) },
-                            unsafe { *test_normals.get_unchecked(index, 1) },
-                            unsafe { *test_normals.get_unchecked(index, 2) },
-                        );
-
-                        let dot_curls = (g0.0 * g1.0 + g0.1 * g1.1 + g0.2 * g1.2)
-                            * (n0.0 * n1.0 + n0.1 * n1.1 + n0.2 * n1.2)
-                            - (g0.0 * n1.0 + g0.1 * n1.1 + g0.2 * n1.2)
-                                * (n0.0 * g1.0 + n0.1 * g1.1 + n0.2 * g1.2);
-
-                        sum += kernel.eval::<T>(
-                            unsafe { test_mapped_pts.row_unchecked(index) },
-                            unsafe { trial_mapped_pts.row_unchecked(index) },
-                            unsafe { test_normals.row_unchecked(index) },
-                            unsafe { trial_normals.row_unchecked(index) },
-                        ) * T::from_f64(
-                            rule.weights[index] * dot_curls * test_jdet[index] * trial_jdet[index],
-                        );
-                    }
-                    *output.get_mut(*test_dof, *trial_dof).unwrap() += sum;
-                }
-            }
-        }
-    }
-}
-
-pub fn laplace_hypersingular_assemble<'a, T: Scalar>(
-    output: &mut Array2D<T>,
-    trial_space: &impl FunctionSpace<'a>,
-    test_space: &impl FunctionSpace<'a>,
-) {
-    curl_curl_assemble(output, &LaplaceGreenKernel {}, trial_space, test_space);
-}
-
-pub fn helmholtz_hypersingular_assemble<'a, T: Scalar>(
-    output: &mut Array2D<T>,
-    trial_space: &impl FunctionSpace<'a>,
-    test_space: &impl FunctionSpace<'a>,
-    k: f64,
-) {
-    curl_curl_assemble(output, &HelmholtzGreenKernel { k }, trial_space, test_space);
-    assemble(
-        output,
-        &HelmholtzGreenHypersingularTermKernel { k },
-        true,
-        true,
-        trial_space,
-        test_space,
-    );
-}
-
-#[cfg(test)]
-mod test {
-    use crate::assembly::dense::*;
-    use crate::function_space::SerialFunctionSpace;
-    use crate::green;
-    use approx::*;
-    use bempp_element::element::create_element;
-    use bempp_grid::shapes::regular_sphere;
-    use bempp_traits::cell::ReferenceCellType;
-    use bempp_traits::element::ElementFamily;
-    use num::complex::Complex;
-
-    #[test]
-    fn test_laplace_single_layer_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::LaplaceGreenKernel {},
-            false,
-            false,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                0.1854538822982487,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.08755414595678074,
-                0.05963897421514473,
-                0.04670742127454548,
-                0.05963897421514472,
-            ],
-            vec![
-                0.08755414595678074,
-                0.1854538822982487,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.05963897421514473,
-                0.04670742127454548,
-            ],
-            vec![
-                0.05963897421514472,
-                0.08755414595678074,
-                0.1854538822982487,
-                0.08755414595678074,
-                0.04670742127454548,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.05963897421514473,
-            ],
-            vec![
-                0.08755414595678074,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.1854538822982487,
-                0.05963897421514473,
-                0.04670742127454548,
-                0.05963897421514472,
-                0.08755414595678074,
-            ],
-            vec![
-                0.08755414595678074,
-                0.05963897421514472,
-                0.046707421274545476,
-                0.05963897421514473,
-                0.1854538822982487,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.08755414595678074,
-            ],
-            vec![
-                0.05963897421514473,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.046707421274545476,
-                0.08755414595678074,
-                0.1854538822982487,
-                0.08755414595678074,
-                0.05963897421514472,
-            ],
-            vec![
-                0.046707421274545476,
-                0.05963897421514473,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.1854538822982487,
-                0.08755414595678074,
-            ],
-            vec![
-                0.05963897421514472,
-                0.046707421274545476,
-                0.05963897421514473,
-                0.08755414595678074,
-                0.08755414595678074,
-                0.05963897421514472,
-                0.08755414595678074,
-                0.1854538822982487,
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), entry, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_laplace_double_layer_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::LaplaceGreenDyKernel {},
-            true,
-            false,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                -1.9658941517361406e-33,
-                -0.08477786720045567,
-                -0.048343860959178774,
-                -0.08477786720045567,
-                -0.08477786720045566,
-                -0.048343860959178774,
-                -0.033625570841778946,
-                -0.04834386095917877,
-            ],
-            vec![
-                -0.08477786720045567,
-                -1.9658941517361406e-33,
-                -0.08477786720045567,
-                -0.048343860959178774,
-                -0.04834386095917877,
-                -0.08477786720045566,
-                -0.048343860959178774,
-                -0.033625570841778946,
-            ],
-            vec![
-                -0.048343860959178774,
-                -0.08477786720045567,
-                -1.9658941517361406e-33,
-                -0.08477786720045567,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.08477786720045566,
-                -0.048343860959178774,
-            ],
-            vec![
-                -0.08477786720045567,
-                -0.048343860959178774,
-                -0.08477786720045567,
-                -1.9658941517361406e-33,
-                -0.048343860959178774,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.08477786720045566,
-            ],
-            vec![
-                -0.08477786720045566,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                4.910045345075783e-33,
-                -0.08477786720045566,
-                -0.048343860959178774,
-                -0.08477786720045566,
-            ],
-            vec![
-                -0.04834386095917877,
-                -0.08477786720045566,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.08477786720045566,
-                4.910045345075783e-33,
-                -0.08477786720045566,
-                -0.048343860959178774,
-            ],
-            vec![
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.08477786720045566,
-                -0.04834386095917877,
-                -0.048343860959178774,
-                -0.08477786720045566,
-                4.910045345075783e-33,
-                -0.08477786720045566,
-            ],
-            vec![
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.08477786720045566,
-                -0.08477786720045566,
-                -0.048343860959178774,
-                -0.08477786720045566,
-                4.910045345075783e-33,
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), entry, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_laplace_adjoint_double_layer_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::LaplaceGreenDxKernel {},
-            false,
-            true,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                1.9658941517361406e-33,
-                -0.08478435261011981,
-                -0.048343860959178774,
-                -0.0847843526101198,
-                -0.08478435261011981,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.048343860959178774,
-            ],
-            vec![
-                -0.0847843526101198,
-                1.9658941517361406e-33,
-                -0.08478435261011981,
-                -0.048343860959178774,
-                -0.048343860959178774,
-                -0.08478435261011981,
-                -0.04834386095917877,
-                -0.033625570841778946,
-            ],
-            vec![
-                -0.048343860959178774,
-                -0.0847843526101198,
-                1.9658941517361406e-33,
-                -0.08478435261011981,
-                -0.033625570841778946,
-                -0.048343860959178774,
-                -0.08478435261011981,
-                -0.04834386095917877,
-            ],
-            vec![
-                -0.08478435261011981,
-                -0.048343860959178774,
-                -0.0847843526101198,
-                1.9658941517361406e-33,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.048343860959178774,
-                -0.08478435261011981,
-            ],
-            vec![
-                -0.0847843526101198,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -4.910045345075783e-33,
-                -0.0847843526101198,
-                -0.048343860959178774,
-                -0.08478435261011981,
-            ],
-            vec![
-                -0.04834386095917877,
-                -0.0847843526101198,
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.08478435261011981,
-                -4.910045345075783e-33,
-                -0.0847843526101198,
-                -0.048343860959178774,
-            ],
-            vec![
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.0847843526101198,
-                -0.04834386095917877,
-                -0.048343860959178774,
-                -0.08478435261011981,
-                -4.910045345075783e-33,
-                -0.0847843526101198,
-            ],
-            vec![
-                -0.04834386095917877,
-                -0.033625570841778946,
-                -0.04834386095917877,
-                -0.0847843526101198,
-                -0.0847843526101198,
-                -0.048343860959178774,
-                -0.08478435261011981,
-                -4.910045345075783e-33,
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), entry, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_laplace_hypersingular_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-        laplace_hypersingular_assemble(&mut matrix, &space, &space);
-
-        for i in 0..ndofs {
-            for j in 0..ndofs {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), 0.0, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_laplace_hypersingular_p1_p1() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            1,
-            false,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-
-        laplace_hypersingular_assemble(&mut matrix, &space, &space);
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                0.33550642155494004,
-                -0.10892459915262698,
-                -0.05664545560057827,
-                -0.05664545560057828,
-                -0.0566454556005783,
-                -0.05664545560057828,
-            ],
-            vec![
-                -0.10892459915262698,
-                0.33550642155494004,
-                -0.05664545560057828,
-                -0.05664545560057827,
-                -0.05664545560057828,
-                -0.05664545560057829,
-            ],
-            vec![
-                -0.05664545560057828,
-                -0.05664545560057827,
-                0.33550642155494004,
-                -0.10892459915262698,
-                -0.056645455600578286,
-                -0.05664545560057829,
-            ],
-            vec![
-                -0.05664545560057827,
-                -0.05664545560057828,
-                -0.10892459915262698,
-                0.33550642155494004,
-                -0.05664545560057828,
-                -0.056645455600578286,
-            ],
-            vec![
-                -0.05664545560057829,
-                -0.0566454556005783,
-                -0.05664545560057829,
-                -0.05664545560057829,
-                0.33550642155494004,
-                -0.10892459915262698,
-            ],
-            vec![
-                -0.05664545560057829,
-                -0.05664545560057831,
-                -0.05664545560057829,
-                -0.05664545560057829,
-                -0.10892459915262698,
-                0.33550642155494004,
-            ],
-        ];
-
-        let perm = [0, 5, 2, 4, 3, 1];
-
-        for (i, pi) in perm.iter().enumerate() {
-            for (j, pj) in perm.iter().enumerate() {
-                assert_relative_eq!(
-                    *matrix.get(i, j).unwrap(),
-                    from_cl[*pi][*pj],
-                    epsilon = 1e-4
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_helmholtz_single_layer_real_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<f64>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::HelmholtzGreenKernel { k: 3.0 },
-            false,
-            false,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                0.08742460357596939,
-                -0.02332791148192136,
-                -0.04211947809894265,
-                -0.02332791148192136,
-                -0.023327911481921364,
-                -0.042119478098942634,
-                -0.03447046598405515,
-                -0.04211947809894265,
-            ],
-            vec![
-                -0.023327911481921364,
-                0.08742460357596939,
-                -0.02332791148192136,
-                -0.04211947809894265,
-                -0.04211947809894265,
-                -0.02332791148192136,
-                -0.042119478098942634,
-                -0.03447046598405515,
-            ],
-            vec![
-                -0.04211947809894265,
-                -0.02332791148192136,
-                0.08742460357596939,
-                -0.02332791148192136,
-                -0.03447046598405515,
-                -0.04211947809894265,
-                -0.023327911481921364,
-                -0.042119478098942634,
-            ],
-            vec![
-                -0.02332791148192136,
-                -0.04211947809894265,
-                -0.023327911481921364,
-                0.08742460357596939,
-                -0.042119478098942634,
-                -0.03447046598405515,
-                -0.04211947809894265,
-                -0.02332791148192136,
-            ],
-            vec![
-                -0.023327911481921364,
-                -0.04211947809894265,
-                -0.03447046598405515,
-                -0.042119478098942634,
-                0.08742460357596939,
-                -0.02332791148192136,
-                -0.04211947809894265,
-                -0.023327911481921364,
-            ],
-            vec![
-                -0.042119478098942634,
-                -0.02332791148192136,
-                -0.04211947809894265,
-                -0.034470465984055156,
-                -0.02332791148192136,
-                0.08742460357596939,
-                -0.023327911481921364,
-                -0.04211947809894265,
-            ],
-            vec![
-                -0.03447046598405515,
-                -0.042119478098942634,
-                -0.023327911481921364,
-                -0.04211947809894265,
-                -0.04211947809894265,
-                -0.023327911481921364,
-                0.08742460357596939,
-                -0.02332791148192136,
-            ],
-            vec![
-                -0.04211947809894265,
-                -0.034470465984055156,
-                -0.042119478098942634,
-                -0.02332791148192136,
-                -0.023327911481921364,
-                -0.04211947809894265,
-                -0.02332791148192136,
-                0.08742460357596939,
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(*matrix.get(i, j).unwrap(), entry, epsilon = 1e-4);
-            }
-        }
-    }
-    #[test]
-    fn test_helmholtz_single_layer_complex_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::HelmholtzGreenKernel { k: 3.0 },
-            false,
-            false,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                Complex::new(0.08742460357596939, 0.11004203436820102),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(-0.04211947809894265, 0.003720159902487029),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-                Complex::new(-0.023327911481921364, 0.04919102584271124),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.03447046598405515, -0.02816544680626108),
-                Complex::new(-0.04211947809894265, 0.0037201599024870254),
-            ],
-            vec![
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.04211947809894265, 0.0037201599024870254),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.03447046598405515, -0.028165446806261072),
-            ],
-            vec![
-                Complex::new(-0.04211947809894265, 0.003720159902487029),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-                Complex::new(0.08742460357596939, 0.11004203436820102),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(-0.03447046598405515, -0.02816544680626108),
-                Complex::new(-0.04211947809894265, 0.0037201599024870254),
-                Complex::new(-0.023327911481921364, 0.04919102584271124),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-            ],
-            vec![
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.03447046598405515, -0.028165446806261072),
-                Complex::new(-0.04211947809894265, 0.0037201599024870254),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-            ],
-            vec![
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.03447046598405515, -0.02816544680626108),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(-0.04211947809894265, 0.0037201599024870267),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-            ],
-            vec![
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.034470465984055156, -0.028165446806261075),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(-0.04211947809894265, 0.0037201599024870237),
-            ],
-            vec![
-                Complex::new(-0.03447046598405515, -0.02816544680626108),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.04211947809894265, 0.0037201599024870267),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-            ],
-            vec![
-                Complex::new(-0.04211947809894265, 0.0037201599024870263),
-                Complex::new(-0.034470465984055156, -0.028165446806261075),
-                Complex::new(-0.042119478098942634, 0.003720159902487025),
-                Complex::new(-0.02332791148192136, 0.04919102584271125),
-                Complex::new(-0.023327911481921364, 0.04919102584271125),
-                Complex::new(-0.04211947809894265, 0.0037201599024870237),
-                Complex::new(-0.02332791148192136, 0.04919102584271124),
-                Complex::new(0.08742460357596939, 0.11004203436820104),
-            ],
-        ];
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(matrix.get(i, j).unwrap().re, entry.re, epsilon = 1e-4);
-                assert_relative_eq!(matrix.get(i, j).unwrap().im, entry.im, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_helmholtz_double_layer_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::HelmholtzGreenDyKernel { k: 3.0 },
-            true,
-            false,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                Complex::new(-1.025266688854119e-33, -7.550086433767158e-36),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-                Complex::new(0.01906923918000323, -0.10276858786959302),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.019069239180003215, -0.10276858786959299),
-            ],
-            vec![
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(-1.025266688854119e-33, 1.0291684702482414e-35),
-                Complex::new(-0.0790262647376817, -0.08184681047051737),
-                Complex::new(0.019069239180003212, -0.10276858786959299),
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-                Complex::new(-0.07902626473768168, -0.08184681047051737),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-            ],
-            vec![
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(-1.025266688854119e-33, -7.550086433767158e-36),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.019069239180003215, -0.10276858786959299),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-                Complex::new(0.01906923918000323, -0.10276858786959302),
-            ],
-            vec![
-                Complex::new(-0.0790262647376817, -0.08184681047051737),
-                Complex::new(0.019069239180003212, -0.10276858786959299),
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(-1.025266688854119e-33, 1.0291684702482414e-35),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-                Complex::new(-0.07902626473768168, -0.08184681047051737),
-            ],
-            vec![
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(0.019069239180003215, -0.10276858786959298),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-                Complex::new(5.00373588753262e-33, -1.8116810507789718e-36),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-                Complex::new(0.019069239180003212, -0.10276858786959299),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-            ],
-            vec![
-                Complex::new(0.019069239180003222, -0.10276858786959299),
-                Complex::new(-0.07902626473768173, -0.08184681047051737),
-                Complex::new(0.01906923918000322, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-                Complex::new(7.314851820797302e-33, -1.088140415641433e-35),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-                Complex::new(0.01906923918000322, -0.10276858786959299),
-            ],
-            vec![
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-                Complex::new(-0.07902626473768172, -0.08184681047051737),
-                Complex::new(0.019069239180003215, -0.10276858786959298),
-                Complex::new(0.019069239180003212, -0.10276858786959299),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-                Complex::new(5.00373588753262e-33, -1.8116810507789718e-36),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-            ],
-            vec![
-                Complex::new(0.01906923918000322, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(0.019069239180003222, -0.10276858786959299),
-                Complex::new(-0.07902626473768173, -0.08184681047051737),
-                Complex::new(-0.07902626473768169, -0.08184681047051737),
-                Complex::new(0.01906923918000322, -0.10276858786959299),
-                Complex::new(-0.07902626473768169, -0.08184681047051735),
-                Complex::new(7.314851820797302e-33, -1.088140415641433e-35),
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(matrix.get(i, j).unwrap().re, entry.re, epsilon = 1e-4);
-                assert_relative_eq!(matrix.get(i, j).unwrap().im, entry.im, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_helmholtz_adjoint_double_layer_dp0_dp0() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            0,
-            true,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
-        assemble(
-            &mut matrix,
-            &green::HelmholtzGreenDxKernel { k: 3.0 },
-            false,
-            true,
-            &space,
-            &space,
-        );
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                Complex::new(1.025266688854119e-33, 7.550086433767158e-36),
-                Complex::new(-0.079034545070751, -0.08184700030244885),
-                Complex::new(0.019069239180003205, -0.10276858786959298),
-                Complex::new(-0.07903454507075097, -0.08184700030244886),
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-            ],
-            vec![
-                Complex::new(-0.07903454507075097, -0.08184700030244885),
-                Complex::new(1.025266688854119e-33, -1.0291684702482414e-35),
-                Complex::new(-0.079034545070751, -0.08184700030244887),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-                Complex::new(0.019069239180003233, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-            ],
-            vec![
-                Complex::new(0.019069239180003205, -0.10276858786959298),
-                Complex::new(-0.07903454507075097, -0.08184700030244886),
-                Complex::new(1.025266688854119e-33, 7.550086433767158e-36),
-                Complex::new(-0.079034545070751, -0.08184700030244885),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-                Complex::new(0.01906923918000323, -0.10276858786959299),
-            ],
-            vec![
-                Complex::new(-0.079034545070751, -0.08184700030244887),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07903454507075097, -0.08184700030244885),
-                Complex::new(1.025266688854119e-33, -1.0291684702482414e-35),
-                Complex::new(0.019069239180003233, -0.10276858786959299),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-            ],
-            vec![
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.01906923918000323, -0.10276858786959302),
-                Complex::new(-5.00373588753262e-33, 1.8116810507789718e-36),
-                Complex::new(-0.07903454507075099, -0.08184700030244885),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-            ],
-            vec![
-                Complex::new(0.019069239180003233, -0.10276858786959302),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(-0.07903454507075099, -0.08184700030244885),
-                Complex::new(-7.314851820797302e-33, 1.088140415641433e-35),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-                Complex::new(0.019069239180003215, -0.10276858786959298),
-            ],
-            vec![
-                Complex::new(0.10089706509966115, -0.07681163409722505),
-                Complex::new(0.01906923918000323, -0.10276858786959302),
-                Complex::new(-0.07903454507075099, -0.08184700030244887),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(0.01906923918000321, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-                Complex::new(-5.00373588753262e-33, 1.8116810507789718e-36),
-                Complex::new(-0.07903454507075099, -0.08184700030244885),
-            ],
-            vec![
-                Complex::new(0.019069239180003212, -0.10276858786959298),
-                Complex::new(0.10089706509966115, -0.07681163409722506),
-                Complex::new(0.019069239180003233, -0.10276858786959302),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-                Complex::new(-0.07903454507075099, -0.08184700030244886),
-                Complex::new(0.019069239180003215, -0.10276858786959298),
-                Complex::new(-0.07903454507075099, -0.08184700030244885),
-                Complex::new(-7.314851820797302e-33, 1.088140415641433e-35),
-            ],
-        ];
-
-        for (i, row) in from_cl.iter().enumerate() {
-            for (j, entry) in row.iter().enumerate() {
-                assert_relative_eq!(matrix.get(i, j).unwrap().re, entry.re, epsilon = 1e-4);
-                assert_relative_eq!(matrix.get(i, j).unwrap().im, entry.im, epsilon = 1e-4);
-            }
-        }
-    }
-
-    #[test]
-    fn test_helmholtz_hypersingular_p1_p1() {
-        let grid = regular_sphere(0);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            1,
-            false,
-        );
-        let space = SerialFunctionSpace::new(&grid, &element);
-
-        let ndofs = space.dofmap().global_size();
-
-        let mut matrix = Array2D::<Complex<f64>>::new((ndofs, ndofs));
-
-        helmholtz_hypersingular_assemble(&mut matrix, &space, &space, 3.0);
-
-        // Compare to result from bempp-cl
-        let from_cl = vec![
-            vec![
-                Complex::new(-0.24054975187128322, -0.37234907871793793),
-                Complex::new(-0.2018803657726846, -0.3708486980714607),
-                Complex::new(-0.31151549914430937, -0.36517694339435425),
-                Complex::new(-0.31146604913280734, -0.3652407688678574),
-                Complex::new(-0.3114620814217625, -0.36524076431695807),
-                Complex::new(-0.311434147468966, -0.36530056813389983),
-            ],
-            vec![
-                Complex::new(-0.2018803657726846, -0.3708486980714607),
-                Complex::new(-0.24054975187128322, -0.3723490787179379),
-                Complex::new(-0.31146604913280734, -0.3652407688678574),
-                Complex::new(-0.31151549914430937, -0.36517694339435425),
-                Complex::new(-0.3114620814217625, -0.36524076431695807),
-                Complex::new(-0.311434147468966, -0.36530056813389983),
-            ],
-            vec![
-                Complex::new(-0.31146604913280734, -0.3652407688678574),
-                Complex::new(-0.31151549914430937, -0.36517694339435425),
-                Complex::new(-0.24054975187128322, -0.3723490787179379),
-                Complex::new(-0.2018803657726846, -0.3708486980714607),
-                Complex::new(-0.31146208142176246, -0.36524076431695807),
-                Complex::new(-0.31143414746896597, -0.36530056813389983),
-            ],
-            vec![
-                Complex::new(-0.31151549914430937, -0.36517694339435425),
-                Complex::new(-0.31146604913280734, -0.3652407688678574),
-                Complex::new(-0.2018803657726846, -0.3708486980714607),
-                Complex::new(-0.24054975187128322, -0.3723490787179379),
-                Complex::new(-0.3114620814217625, -0.36524076431695807),
-                Complex::new(-0.311434147468966, -0.36530056813389983),
-            ],
-            vec![
-                Complex::new(-0.31146208142176257, -0.36524076431695807),
-                Complex::new(-0.3114620814217625, -0.3652407643169581),
-                Complex::new(-0.3114620814217625, -0.3652407643169581),
-                Complex::new(-0.3114620814217625, -0.3652407643169581),
-                Complex::new(-0.24056452443903534, -0.37231826606213236),
-                Complex::new(-0.20188036577268464, -0.37084869807146076),
-            ],
-            vec![
-                Complex::new(-0.3114335658086867, -0.36530052927274986),
-                Complex::new(-0.31143356580868675, -0.36530052927274986),
-                Complex::new(-0.3114335658086867, -0.36530052927274986),
-                Complex::new(-0.3114335658086867, -0.36530052927274986),
-                Complex::new(-0.2018803657726846, -0.37084869807146076),
-                Complex::new(-0.2402983805938184, -0.37203286968364935),
-            ],
-        ];
-
-        let perm = [0, 5, 2, 4, 3, 1];
-
-        for (i, pi) in perm.iter().enumerate() {
-            for (j, pj) in perm.iter().enumerate() {
-                assert_relative_eq!(
-                    matrix.get(i, j).unwrap().re,
-                    from_cl[*pi][*pj].re,
-                    epsilon = 1e-3
-                );
-                assert_relative_eq!(
-                    matrix.get(i, j).unwrap().im,
-                    from_cl[*pi][*pj].im,
-                    epsilon = 1e-3
-                );
-            }
-        }
-    }
-}
diff --git a/element/Cargo.toml b/element/Cargo.toml
index 04598fd4..6571ffc3 100644
--- a/element/Cargo.toml
+++ b/element/Cargo.toml
@@ -26,8 +26,7 @@ bempp-quadrature = { path = "../quadrature" }
 paste = "1.*"
 libc = "0.2"
 approx = "0.5"
-rlst = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-blis-src = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy" }
-rlst-algorithms = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy" }
-
+rlst = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-blis-src = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
diff --git a/element/src/element.rs b/element/src/element.rs
index 32269d33..6c83f5c5 100644
--- a/element/src/element.rs
+++ b/element/src/element.rs
@@ -3,12 +3,12 @@
 use crate::cell::create_cell;
 use crate::polynomials::{legendre_shape, polynomial_count, tabulate_legendre_polynomials};
 use bempp_tools::arrays::{AdjacencyList, Array3D, Mat};
-use bempp_traits::arrays::{AdjacencyListAccess, Array3DAccess, Array4DAccess};
+use bempp_traits::arrays::AdjacencyListAccess;
 use bempp_traits::cell::ReferenceCellType;
 use bempp_traits::element::{Continuity, ElementFamily, FiniteElement, MapType};
-use rlst_algorithms::linalg::LinAlg;
-use rlst_algorithms::traits::inverse::Inverse;
-use rlst_dense::{rlst_dynamic_mat, RandomAccessByRef, RandomAccessMut, Shape};
+use rlst_dense::linalg::inverse::MatrixInverse;
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessMut};
+use rlst_dense::{rlst_dynamic_array2, rlst_dynamic_array3};
 pub mod lagrange;
 pub mod raviart_thomas;
 
@@ -48,8 +48,8 @@ impl CiarletElement {
 
         for emats in &interpolation_weights {
             for mat in emats {
-                dim += mat.shape().0;
-                npts += mat.shape().2;
+                dim += mat.shape()[0];
+                npts += mat.shape()[2];
             }
         }
         let cell = create_cell(cell_type);
@@ -62,7 +62,7 @@ impl CiarletElement {
 
         for matrices in &interpolation_weights {
             for mat in matrices {
-                if mat.shape().1 != value_size {
+                if mat.shape()[1] != value_size {
                     panic!("Incompatible value size");
                 }
             }
@@ -71,20 +71,20 @@ impl CiarletElement {
         let new_pts = if continuity == Continuity::Discontinuous {
             let mut new_pts: [Vec<Mat<f64>>; 4] = [vec![], vec![], vec![], vec![]];
             let mut pn = 0;
-            let mut all_pts = rlst_dynamic_mat![f64, (npts, tdim)];
+            let mut all_pts = rlst_dynamic_array2![f64, [npts, tdim]];
             for (i, pts_i) in interpolation_points.iter().take(tdim).enumerate() {
                 for _pts in pts_i {
-                    new_pts[i].push(rlst_dynamic_mat![f64, (0, tdim)]);
+                    new_pts[i].push(rlst_dynamic_array2![f64, [0, tdim]]);
                 }
             }
             for pts_i in interpolation_points.iter() {
                 for pts in pts_i {
-                    for j in 0..pts.shape().0 {
+                    for j in 0..pts.shape()[0] {
                         for k in 0..tdim {
-                            *all_pts.get_mut(pn + j, k).unwrap() = *pts.get(j, k).unwrap();
+                            *all_pts.get_mut([pn + j, k]).unwrap() = *pts.get([j, k]).unwrap();
                         }
                     }
-                    pn += pts.shape().0;
+                    pn += pts.shape()[0];
                 }
             }
             new_pts[tdim].push(all_pts);
@@ -96,24 +96,24 @@ impl CiarletElement {
             let mut new_wts = [vec![], vec![], vec![], vec![]];
             let mut pn = 0;
             let mut dn = 0;
-            let mut all_mat = Array3D::<f64>::new((dim, value_size, npts));
+            let mut all_mat = rlst_dynamic_array3!(f64, [dim, value_size, npts]);
             for (i, mi) in interpolation_weights.iter().take(tdim).enumerate() {
                 for _mat in mi {
-                    new_wts[i].push(Array3D::<f64>::new((0, value_size, 0)));
+                    new_wts[i].push(rlst_dynamic_array3!(f64, [0, value_size, 0]));
                 }
             }
             for mi in interpolation_weights.iter() {
                 for mat in mi {
-                    for j in 0..mat.shape().0 {
+                    for j in 0..mat.shape()[0] {
                         for k in 0..value_size {
-                            for l in 0..mat.shape().2 {
-                                *all_mat.get_mut(dn + j, k, pn + l).unwrap() =
-                                    *mat.get(j, k, l).unwrap();
+                            for l in 0..mat.shape()[2] {
+                                *all_mat.get_mut([dn + j, k, pn + l]).unwrap() =
+                                    *mat.get([j, k, l]).unwrap();
                             }
                         }
                     }
-                    dn += mat.shape().0;
-                    pn += mat.shape().2;
+                    dn += mat.shape()[0];
+                    pn += mat.shape()[2];
                 }
             }
             new_wts[tdim].push(all_mat);
@@ -124,56 +124,62 @@ impl CiarletElement {
 
         // Compute the dual matrix
         let pdim = polynomial_count(cell_type, highest_degree);
-        let mut d_matrix = Array3D::<f64>::new((value_size, pdim, dim));
+        let mut d_matrix = rlst_dynamic_array3!(f64, [value_size, pdim, dim]);
 
         let mut dof = 0;
         for d in 0..4 {
             for (e, pts) in new_pts[d].iter().enumerate() {
-                if pts.shape().0 > 0 {
-                    let mut table = Array3D::<f64>::new((1, pdim, pts.shape().0));
+                if pts.shape()[0] > 0 {
+                    let mut table = rlst_dynamic_array3!(f64, [1, pdim, pts.shape()[0]]);
                     tabulate_legendre_polynomials(cell_type, pts, highest_degree, 0, &mut table);
                     let mat = &new_wts[d][e];
-                    for i in 0..mat.shape().0 {
+                    for i in 0..mat.shape()[0] {
                         for j in 0..value_size {
                             for l in 0..pdim {
-                                let value = d_matrix.get_mut(j, l, dof + i).unwrap();
+                                let value = d_matrix.get_mut([j, l, dof + i]).unwrap();
                                 *value = 0.0;
-                                for k in 0..pts.shape().0 {
-                                    *value +=
-                                        *mat.get(i, j, k).unwrap() * *table.get(0, l, k).unwrap();
+                                for k in 0..pts.shape()[0] {
+                                    *value += *mat.get([i, j, k]).unwrap()
+                                        * *table.get([0, l, k]).unwrap();
                                 }
                             }
                         }
                     }
-                    dof += mat.shape().0;
+                    dof += mat.shape()[0];
                 }
             }
         }
 
-        let mut dual_matrix = rlst_dense::rlst_dynamic_mat!(f64, (dim, dim));
+        let mut inverse = rlst_dense::rlst_dynamic_array2!(f64, [dim, dim]);
 
         for i in 0..dim {
             for j in 0..dim {
-                let entry = dual_matrix.get_mut(i, j).unwrap();
+                let entry = inverse.get_mut([i, j]).unwrap();
                 *entry = 0.0;
                 for k in 0..value_size {
                     for l in 0..pdim {
-                        *entry += *polynomial_coeffs.get(i, k, l).unwrap()
-                            * *d_matrix.get(k, l, j).unwrap();
+                        *entry += *polynomial_coeffs.get([i, k, l]).unwrap()
+                            * *d_matrix.get([k, l, j]).unwrap();
                     }
                 }
             }
         }
 
-        let inverse = dual_matrix.linalg().inverse().unwrap();
+        let mut ident = rlst_dense::rlst_dynamic_array2!(f64, [dim, dim]);
+        for i in 0..dim {
+            unsafe {
+                *ident.get_unchecked_mut([i, i]) = 1.0;
+            }
+        }
+        inverse.view_mut().into_inverse_alloc().unwrap();
 
-        let mut coefficients = Array3D::<f64>::new((dim, value_size, pdim));
+        let mut coefficients = rlst_dynamic_array3!(f64, [dim, value_size, pdim]);
         for i in 0..dim {
             for l in 0..pdim {
                 for j in 0..value_size {
                     for k in 0..pdim {
-                        *coefficients.get_mut(i, j, k).unwrap() +=
-                            *inverse.get(i, l).unwrap() * *polynomial_coeffs.get(l, j, k).unwrap()
+                        *coefficients.get_mut([i, j, k]).unwrap() += *inverse.get([i, l]).unwrap()
+                            * *polynomial_coeffs.get([l, j, k]).unwrap()
                     }
                 }
             }
@@ -188,9 +194,9 @@ impl CiarletElement {
         let mut dof = 0;
         for i in 0..4 {
             for pts in &new_pts[i] {
-                let dofs: Vec<usize> = (dof..dof + pts.shape().0).collect();
+                let dofs: Vec<usize> = (dof..dof + pts.shape()[0]).collect();
                 entity_dofs[i].add_row(&dofs);
-                dof += pts.shape().0;
+                dof += pts.shape()[0];
             }
         }
         CiarletElement {
@@ -240,18 +246,19 @@ impl FiniteElement for CiarletElement {
     fn dim(&self) -> usize {
         self.dim
     }
-    fn tabulate<T: RandomAccessByRef<Item = f64> + Shape>(
+    fn tabulate<
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        T4Mut: RandomAccessMut<4, Item = f64>,
+    >(
         &self,
         points: &T,
         nderivs: usize,
-        data: &mut impl Array4DAccess<f64>,
+        data: &mut T4Mut,
     ) {
-        let mut table = Array3D::<f64>::new(legendre_shape(
-            self.cell_type,
-            points,
-            self.highest_degree,
-            nderivs,
-        ));
+        let mut table = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(self.cell_type, points, self.highest_degree, nderivs,)
+        );
         tabulate_legendre_polynomials(
             self.cell_type,
             points,
@@ -260,15 +267,15 @@ impl FiniteElement for CiarletElement {
             &mut table,
         );
 
-        for d in 0..table.shape().0 {
-            for p in 0..points.shape().0 {
+        for d in 0..table.shape()[0] {
+            for p in 0..points.shape()[0] {
                 for j in 0..self.value_size {
                     for b in 0..self.dim {
-                        let value = data.get_mut(d, p, b, j).unwrap();
+                        let value = data.get_mut([d, p, b, j]).unwrap();
                         *value = 0.0;
-                        for i in 0..table.shape().1 {
-                            *value += *self.coefficients.get(b, j, i).unwrap()
-                                * *table.get_mut(d, i, p).unwrap();
+                        for i in 0..table.shape()[1] {
+                            *value += *self.coefficients.get([b, j, i]).unwrap()
+                                * *table.get_mut([d, i, p]).unwrap();
                         }
                     }
                 }
diff --git a/element/src/element/lagrange.rs b/element/src/element/lagrange.rs
index 1ecf923d..3e272307 100644
--- a/element/src/element/lagrange.rs
+++ b/element/src/element/lagrange.rs
@@ -2,11 +2,11 @@
 
 use crate::element::{create_cell, CiarletElement};
 use crate::polynomials::polynomial_count;
-use bempp_tools::arrays::{to_matrix, zero_matrix, Array3D};
-use bempp_traits::arrays::Array3DAccess;
+use bempp_tools::arrays::{to_matrix, zero_matrix};
 use bempp_traits::cell::ReferenceCellType;
 use bempp_traits::element::{Continuity, ElementFamily, MapType};
-use rlst_dense::RandomAccessMut;
+use rlst_dense::rlst_dynamic_array3;
+use rlst_dense::traits::RandomAccessMut;
 
 /// Create a Lagrange element
 pub fn create(
@@ -17,9 +17,9 @@ pub fn create(
     let cell = create_cell(cell_type);
     let dim = polynomial_count(cell_type, degree);
     let tdim = cell.dim();
-    let mut wcoeffs = Array3D::<f64>::new((dim, 1, dim));
+    let mut wcoeffs = rlst_dynamic_array3!(f64, [dim, 1, dim]);
     for i in 0..dim {
-        *wcoeffs.get_mut(i, 0, i).unwrap() = 1.0;
+        *wcoeffs.get_mut([i, 0, i]).unwrap() = 1.0;
     }
 
     let mut x = [vec![], vec![], vec![], vec![]];
@@ -30,41 +30,43 @@ pub fn create(
         }
         for d in 0..tdim {
             for _e in 0..cell.entity_count(d) {
-                x[d].push(zero_matrix((0, tdim)));
-                m[d].push(Array3D::<f64>::new((0, 1, 0)));
+                x[d].push(zero_matrix([0, tdim]));
+                m[d].push(rlst_dynamic_array3!(f64, [0, 1, 0]));
             }
         }
-        x[tdim].push(to_matrix(&cell.midpoint(), (1, tdim)));
-        m[tdim].push(Array3D::<f64>::from_data(vec![1.0], (1, 1, 1)));
+        x[tdim].push(to_matrix(&cell.midpoint(), [1, tdim]));
+        let mut mentry = rlst_dynamic_array3!(f64, [1, 1, 1]);
+        *mentry.get_mut([0, 0, 0]).unwrap() = 1.0;
+        m[tdim].push(mentry);
     } else {
         // TODO: GLL points
         for e in 0..cell.entity_count(0) {
-            let mut pts = zero_matrix((1, tdim));
+            let mut pts = zero_matrix([1, tdim]);
             for i in 0..tdim {
-                *pts.get_mut(0, i).unwrap() = cell.vertices()[e * tdim + i];
+                *pts.get_mut([0, i]).unwrap() = cell.vertices()[e * tdim + i];
             }
             x[0].push(pts);
-            m[0].push(Array3D::<f64>::from_data(vec![1.0], (1, 1, 1)));
+            let mut mentry = rlst_dynamic_array3!(f64, [1, 1, 1]);
+            *mentry.get_mut([0, 0, 0]).unwrap() = 1.0;
+            m[0].push(mentry);
         }
         for e in 0..cell.entity_count(1) {
-            let mut pts = zero_matrix((degree - 1, tdim));
-            let mut ident = vec![0.0; (degree - 1).pow(2)];
+            let mut pts = zero_matrix([degree - 1, tdim]);
             let vn0 = cell.edges()[2 * e];
             let vn1 = cell.edges()[2 * e + 1];
             let v0 = &cell.vertices()[vn0 * tdim..(vn0 + 1) * tdim];
             let v1 = &cell.vertices()[vn1 * tdim..(vn1 + 1) * tdim];
+            let mut ident = rlst_dynamic_array3!(f64, [degree - 1, 1, degree - 1]);
+
             for i in 1..degree {
-                ident[(i - 1) * degree] = 1.0;
+                *ident.get_mut([i - 1, 0, i - 1]).unwrap() = 1.0;
                 for j in 0..tdim {
-                    *pts.get_mut(i - 1, j).unwrap() =
+                    *pts.get_mut([i - 1, j]).unwrap() =
                         v0[j] + i as f64 / degree as f64 * (v1[j] - v0[j]);
                 }
             }
             x[1].push(pts);
-            m[1].push(Array3D::<f64>::from_data(
-                ident,
-                (degree - 1, 1, degree - 1),
-            ));
+            m[1].push(ident);
         }
         let mut start = 0;
         for e in 0..cell.entity_count(2) {
@@ -80,8 +82,7 @@ pub fn create(
             } else {
                 panic!("Unsupported face type");
             };
-            let mut pts = zero_matrix((npts, tdim));
-            let mut ident = vec![0.0; npts.pow(2)];
+            let mut pts = zero_matrix([npts, tdim]);
 
             let vn0 = cell.faces()[start];
             let vn1 = cell.faces()[start + 1];
@@ -96,7 +97,7 @@ pub fn create(
                 for i0 in 1..degree {
                     for i1 in 1..degree - i0 {
                         for j in 0..tdim {
-                            *pts.get_mut(n, j).unwrap() = v0[j]
+                            *pts.get_mut([n, j]).unwrap() = v0[j]
                                 + i0 as f64 / degree as f64 * (v1[j] - v0[j])
                                 + i1 as f64 / degree as f64 * (v2[j] - v0[j]);
                         }
@@ -109,7 +110,7 @@ pub fn create(
                 for i0 in 1..degree {
                     for i1 in 1..degree {
                         for j in 0..tdim {
-                            *pts.get_mut(n, j).unwrap() = v0[j]
+                            *pts.get_mut([n, j]).unwrap() = v0[j]
                                 + i0 as f64 / degree as f64 * (v1[j] - v0[j])
                                 + i1 as f64 / degree as f64 * (v2[j] - v0[j]);
                         }
@@ -120,11 +121,12 @@ pub fn create(
                 panic!("Unsupported face type.");
             }
 
+            let mut ident = rlst_dynamic_array3!(f64, [npts, 1, npts]);
             for i in 0..npts {
-                ident[i * npts + i] = 1.0;
+                *ident.get_mut([i, 0, i]).unwrap() = 1.0;
             }
             x[2].push(pts);
-            m[2].push(Array3D::<f64>::from_data(ident, (npts, 1, npts)));
+            m[2].push(ident);
             start += nvertices;
         }
     }
@@ -147,10 +149,9 @@ mod test {
     use crate::cell::*;
     use crate::element::lagrange::*;
     use approx::*;
-    use bempp_tools::arrays::Array4D;
-    use bempp_traits::arrays::Array4DAccess;
     use bempp_traits::element::FiniteElement;
-    use rlst_dense::RandomAccessByRef;
+    use rlst_dense::rlst_dynamic_array4;
+    use rlst_dense::traits::RandomAccessByRef;
 
     fn check_dofs(e: impl FiniteElement) {
         let cell_dim = match e.cell_type() {
@@ -186,12 +187,12 @@ mod test {
     fn test_lagrange_0_interval() {
         let e = create(ReferenceCellType::Interval, 0, Continuity::Discontinuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 4));
-        let points = to_matrix(&[0.0, 0.2, 0.4, 1.0], (4, 1));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 4));
+        let points = to_matrix(&[0.0, 0.2, 0.4, 1.0], [4, 1]);
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..4 {
-            assert_relative_eq!(*data.get(0, pt, 0, 0).unwrap(), 1.0);
+            assert_relative_eq!(*data.get([0, pt, 0, 0]).unwrap(), 1.0);
         }
         check_dofs(e);
     }
@@ -200,16 +201,19 @@ mod test {
     fn test_lagrange_1_interval() {
         let e = create(ReferenceCellType::Interval, 1, Continuity::Continuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 4));
-        let points = to_matrix(&[0.0, 0.2, 0.4, 1.0], (4, 1));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 4));
+        let points = to_matrix(&[0.0, 0.2, 0.4, 1.0], [4, 1]);
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..4 {
             assert_relative_eq!(
-                *data.get(0, pt, 0, 0).unwrap(),
-                1.0 - *points.get(pt, 0).unwrap()
+                *data.get([0, pt, 0, 0]).unwrap(),
+                1.0 - *points.get([pt, 0]).unwrap()
+            );
+            assert_relative_eq!(
+                *data.get([0, pt, 1, 0]).unwrap(),
+                *points.get([pt, 0]).unwrap()
             );
-            assert_relative_eq!(*data.get(0, pt, 1, 0).unwrap(), *points.get(pt, 0).unwrap());
         }
         check_dofs(e);
     }
@@ -218,15 +222,15 @@ mod test {
     fn test_lagrange_0_triangle() {
         let e = create(ReferenceCellType::Triangle, 0, Continuity::Discontinuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.5, 0.5],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
-            assert_relative_eq!(*data.get(0, pt, 0, 0).unwrap(), 1.0);
+            assert_relative_eq!(*data.get([0, pt, 0, 0]).unwrap(), 1.0);
         }
         check_dofs(e);
     }
@@ -235,20 +239,26 @@ mod test {
     fn test_lagrange_1_triangle() {
         let e = create(ReferenceCellType::Triangle, 1, Continuity::Continuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.5, 0.5],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
             assert_relative_eq!(
-                *data.get(0, pt, 0, 0).unwrap(),
-                1.0 - *points.get(pt, 0).unwrap() - *points.get(pt, 1).unwrap()
+                *data.get([0, pt, 0, 0]).unwrap(),
+                1.0 - *points.get([pt, 0]).unwrap() - *points.get([pt, 1]).unwrap()
+            );
+            assert_relative_eq!(
+                *data.get([0, pt, 1, 0]).unwrap(),
+                *points.get([pt, 0]).unwrap()
+            );
+            assert_relative_eq!(
+                *data.get([0, pt, 2, 0]).unwrap(),
+                *points.get([pt, 1]).unwrap()
             );
-            assert_relative_eq!(*data.get(0, pt, 1, 0).unwrap(), *points.get(pt, 0).unwrap());
-            assert_relative_eq!(*data.get(0, pt, 2, 0).unwrap(), *points.get(pt, 1).unwrap());
         }
         check_dofs(e);
     }
@@ -316,15 +326,15 @@ mod test {
             Continuity::Discontinuous,
         );
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 1.0, 0.25, 0.3, 0.0, 0.0, 1.0, 1.0, 0.5, 0.2],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
-            assert_relative_eq!(*data.get(0, pt, 0, 0).unwrap(), 1.0);
+            assert_relative_eq!(*data.get([0, pt, 0, 0]).unwrap(), 1.0);
         }
         check_dofs(e);
     }
@@ -333,29 +343,29 @@ mod test {
     fn test_lagrange_1_quadrilateral() {
         let e = create(ReferenceCellType::Quadrilateral, 1, Continuity::Continuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 1.0, 0.25, 0.3, 0.0, 0.0, 1.0, 1.0, 0.5, 0.2],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
             assert_relative_eq!(
-                *data.get(0, pt, 0, 0).unwrap(),
-                (1.0 - *points.get(pt, 0).unwrap()) * (1.0 - *points.get(pt, 1).unwrap())
+                *data.get([0, pt, 0, 0]).unwrap(),
+                (1.0 - *points.get([pt, 0]).unwrap()) * (1.0 - *points.get([pt, 1]).unwrap())
             );
             assert_relative_eq!(
-                *data.get(0, pt, 1, 0).unwrap(),
-                *points.get(pt, 0).unwrap() * (1.0 - *points.get(pt, 1).unwrap())
+                *data.get([0, pt, 1, 0]).unwrap(),
+                *points.get([pt, 0]).unwrap() * (1.0 - *points.get([pt, 1]).unwrap())
             );
             assert_relative_eq!(
-                *data.get(0, pt, 2, 0).unwrap(),
-                (1.0 - *points.get(pt, 0).unwrap()) * *points.get(pt, 1).unwrap()
+                *data.get([0, pt, 2, 0]).unwrap(),
+                (1.0 - *points.get([pt, 0]).unwrap()) * *points.get([pt, 1]).unwrap()
             );
             assert_relative_eq!(
-                *data.get(0, pt, 3, 0).unwrap(),
-                *points.get(pt, 0).unwrap() * *points.get(pt, 1).unwrap()
+                *data.get([0, pt, 3, 0]).unwrap(),
+                *points.get([pt, 0]).unwrap() * *points.get([pt, 1]).unwrap()
             );
         }
         check_dofs(e);
@@ -365,51 +375,60 @@ mod test {
     fn test_lagrange_2_quadrilateral() {
         let e = create(ReferenceCellType::Quadrilateral, 2, Continuity::Continuous);
         assert_eq!(e.value_size(), 1);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 1.0, 0.25, 0.3, 0.0, 0.0, 1.0, 1.0, 0.5, 0.2],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
-            let x = *points.get(pt, 0).unwrap();
-            let y = *points.get(pt, 1).unwrap();
+            let x = *points.get([pt, 0]).unwrap();
+            let y = *points.get([pt, 1]).unwrap();
             assert_relative_eq!(
-                *data.get(0, pt, 0, 0).unwrap(),
-                (1.0 - x) * (1.0 - 2.0 * x) * (1.0 - y) * (1.0 - 2.0 * y)
+                *data.get([0, pt, 0, 0]).unwrap(),
+                (1.0 - x) * (1.0 - 2.0 * x) * (1.0 - y) * (1.0 - 2.0 * y),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 1, 0).unwrap(),
-                x * (2.0 * x - 1.0) * (1.0 - y) * (1.0 - 2.0 * y)
+                *data.get([0, pt, 1, 0]).unwrap(),
+                x * (2.0 * x - 1.0) * (1.0 - y) * (1.0 - 2.0 * y),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 2, 0).unwrap(),
-                (1.0 - x) * (1.0 - 2.0 * x) * y * (2.0 * y - 1.0)
+                *data.get([0, pt, 2, 0]).unwrap(),
+                (1.0 - x) * (1.0 - 2.0 * x) * y * (2.0 * y - 1.0),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 3, 0).unwrap(),
-                x * (2.0 * x - 1.0) * y * (2.0 * y - 1.0)
+                *data.get([0, pt, 3, 0]).unwrap(),
+                x * (2.0 * x - 1.0) * y * (2.0 * y - 1.0),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 4, 0).unwrap(),
-                4.0 * x * (1.0 - x) * (1.0 - y) * (1.0 - 2.0 * y)
+                *data.get([0, pt, 4, 0]).unwrap(),
+                4.0 * x * (1.0 - x) * (1.0 - y) * (1.0 - 2.0 * y),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 5, 0).unwrap(),
-                (1.0 - x) * (1.0 - 2.0 * x) * 4.0 * y * (1.0 - y)
+                *data.get([0, pt, 5, 0]).unwrap(),
+                (1.0 - x) * (1.0 - 2.0 * x) * 4.0 * y * (1.0 - y),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 6, 0).unwrap(),
-                x * (2.0 * x - 1.0) * 4.0 * y * (1.0 - y)
+                *data.get([0, pt, 6, 0]).unwrap(),
+                x * (2.0 * x - 1.0) * 4.0 * y * (1.0 - y),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 7, 0).unwrap(),
-                4.0 * x * (1.0 - x) * y * (2.0 * y - 1.0)
+                *data.get([0, pt, 7, 0]).unwrap(),
+                4.0 * x * (1.0 - x) * y * (2.0 * y - 1.0),
+                epsilon = 1e-14
             );
             assert_relative_eq!(
-                *data.get(0, pt, 8, 0).unwrap(),
-                4.0 * x * (1.0 - x) * 4.0 * y * (1.0 - y)
+                *data.get([0, pt, 8, 0]).unwrap(),
+                4.0 * x * (1.0 - x) * 4.0 * y * (1.0 - y),
+                epsilon = 1e-14
             );
         }
         check_dofs(e);
diff --git a/element/src/element/raviart_thomas.rs b/element/src/element/raviart_thomas.rs
index 7bb60551..5b7c035e 100644
--- a/element/src/element/raviart_thomas.rs
+++ b/element/src/element/raviart_thomas.rs
@@ -2,11 +2,11 @@
 
 use crate::element::{create_cell, CiarletElement};
 use crate::polynomials::polynomial_count;
-use bempp_tools::arrays::{zero_matrix, Array3D};
-use bempp_traits::arrays::Array3DAccess;
+use bempp_tools::arrays::zero_matrix;
 use bempp_traits::cell::ReferenceCellType;
 use bempp_traits::element::{Continuity, ElementFamily, MapType};
-use rlst_dense::RandomAccessMut;
+use rlst_dense::rlst_dynamic_array3;
+use rlst_dense::traits::RandomAccessMut;
 
 /// Create a Raviart-Thomas element
 pub fn create(
@@ -30,45 +30,45 @@ pub fn create(
     let tdim = cell.dim();
     let edim = tdim * polynomial_count(cell_type, degree - 1) + degree;
 
-    let mut wcoeffs = Array3D::<f64>::new((edim, tdim, pdim));
+    let mut wcoeffs = rlst_dynamic_array3!(f64, [edim, tdim, pdim]);
 
     // [sqrt(2), 6*y - 2, 4*sqrt(3)*(x + y/2 - 1/2)]
 
     // norm(x**2 + y**2)
     // sqrt(70)/30
 
-    *wcoeffs.get_mut(0, 0, 0).unwrap() = 1.0;
-    *wcoeffs.get_mut(1, 1, 0).unwrap() = 1.0;
-    *wcoeffs.get_mut(2, 0, 1).unwrap() = -0.5 / f64::sqrt(2.0);
-    *wcoeffs.get_mut(2, 0, 2).unwrap() = 0.5 * f64::sqrt(1.5);
-    *wcoeffs.get_mut(2, 1, 1).unwrap() = 1.0 / f64::sqrt(2.0);
+    *wcoeffs.get_mut([0, 0, 0]).unwrap() = 1.0;
+    *wcoeffs.get_mut([1, 1, 0]).unwrap() = 1.0;
+    *wcoeffs.get_mut([2, 0, 1]).unwrap() = -0.5 / f64::sqrt(2.0);
+    *wcoeffs.get_mut([2, 0, 2]).unwrap() = 0.5 * f64::sqrt(1.5);
+    *wcoeffs.get_mut([2, 1, 1]).unwrap() = 1.0 / f64::sqrt(2.0);
 
     let mut x = [vec![], vec![], vec![], vec![]];
     let mut m = [vec![], vec![], vec![], vec![]];
     for _e in 0..cell.entity_count(0) {
-        x[0].push(zero_matrix((0, tdim)));
-        m[0].push(Array3D::<f64>::new((0, 2, 0)));
+        x[0].push(zero_matrix([0, tdim]));
+        m[0].push(rlst_dynamic_array3!(f64, [0, 2, 0]));
     }
 
     for e in 0..cell.entity_count(1) {
-        let mut pts = zero_matrix((1, tdim));
-        let mut mat = vec![0.0; 2];
+        let mut pts = zero_matrix([1, tdim]);
+        let mut mat = rlst_dynamic_array3!(f64, [1, 2, 1]);
         let vn0 = cell.edges()[2 * e];
         let vn1 = cell.edges()[2 * e + 1];
         let v0 = &cell.vertices()[vn0 * tdim..(vn0 + 1) * tdim];
         let v1 = &cell.vertices()[vn1 * tdim..(vn1 + 1) * tdim];
         for i in 0..tdim {
-            *pts.get_mut(0, i).unwrap() = (v0[i] + v1[i]) / 2.0;
+            *pts.get_mut([0, i]).unwrap() = (v0[i] + v1[i]) / 2.0;
         }
-        mat[0] = v0[1] - v1[1];
-        mat[1] = v1[0] - v0[0];
+        *mat.get_mut([0, 0, 0]).unwrap() = v0[1] - v1[1];
+        *mat.get_mut([0, 1, 0]).unwrap() = v1[0] - v0[0];
         x[1].push(pts);
-        m[1].push(Array3D::<f64>::from_data(mat, (1, 2, 1)));
+        m[1].push(mat);
     }
 
     for _e in 0..cell.entity_count(2) {
-        x[2].push(zero_matrix((0, tdim)));
-        m[2].push(Array3D::<f64>::new((0, 2, 0)));
+        x[2].push(zero_matrix([0, tdim]));
+        m[2].push(rlst_dynamic_array3!(f64, [0, 2, 0]))
     }
 
     CiarletElement::create(
@@ -90,10 +90,10 @@ mod test {
     use crate::cell::*;
     use crate::element::raviart_thomas::*;
     use approx::*;
-    use bempp_tools::arrays::{to_matrix, Array4D};
-    use bempp_traits::arrays::Array4DAccess;
+    use bempp_tools::arrays::to_matrix;
     use bempp_traits::element::FiniteElement;
-    use rlst_dense::RandomAccessByRef;
+    use rlst_dense::rlst_dynamic_array4;
+    use rlst_dense::traits::RandomAccessByRef;
 
     fn check_dofs(e: impl FiniteElement) {
         let cell_dim = match e.cell_type() {
@@ -129,34 +129,37 @@ mod test {
     fn test_raviart_thomas_1_triangle() {
         let e = create(ReferenceCellType::Triangle, 1, Continuity::Continuous);
         assert_eq!(e.value_size(), 2);
-        let mut data = Array4D::<f64>::new(e.tabulate_array_shape(0, 6));
+        let mut data = rlst_dynamic_array4!(f64, e.tabulate_array_shape(0, 6));
         let points = to_matrix(
             &[0.0, 1.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.5, 0.5],
-            (6, 2),
+            [6, 2],
         );
         e.tabulate(&points, 0, &mut data);
 
         for pt in 0..6 {
             assert_relative_eq!(
-                *data.get(0, pt, 0, 0).unwrap(),
-                -*points.get(pt, 0).unwrap()
+                *data.get([0, pt, 0, 0]).unwrap(),
+                -*points.get([pt, 0]).unwrap()
             );
             assert_relative_eq!(
-                *data.get(0, pt, 0, 1).unwrap(),
-                -*points.get(pt, 1).unwrap()
+                *data.get([0, pt, 0, 1]).unwrap(),
+                -*points.get([pt, 1]).unwrap()
             );
             assert_relative_eq!(
-                *data.get(0, pt, 1, 0).unwrap(),
-                *points.get(pt, 0).unwrap() - 1.0
+                *data.get([0, pt, 1, 0]).unwrap(),
+                *points.get([pt, 0]).unwrap() - 1.0
             );
-            assert_relative_eq!(*data.get(0, pt, 1, 1).unwrap(), *points.get(pt, 1).unwrap());
             assert_relative_eq!(
-                *data.get(0, pt, 2, 0).unwrap(),
-                -*points.get(pt, 0).unwrap()
+                *data.get([0, pt, 1, 1]).unwrap(),
+                *points.get([pt, 1]).unwrap()
             );
             assert_relative_eq!(
-                *data.get(0, pt, 2, 1).unwrap(),
-                1.0 - *points.get(pt, 1).unwrap()
+                *data.get([0, pt, 2, 0]).unwrap(),
+                -*points.get([pt, 0]).unwrap()
+            );
+            assert_relative_eq!(
+                *data.get([0, pt, 2, 1]).unwrap(),
+                1.0 - *points.get([pt, 1]).unwrap()
             );
         }
         check_dofs(e);
diff --git a/element/src/polynomials.rs b/element/src/polynomials.rs
index 677c46d1..cfce8498 100644
--- a/element/src/polynomials.rs
+++ b/element/src/polynomials.rs
@@ -1,27 +1,29 @@
 //! Orthonormal polynomials
 
-use bempp_traits::arrays::Array3DAccess;
 use bempp_traits::cell::ReferenceCellType;
-use rlst_dense::{RandomAccessByRef, Shape};
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, Shape};
 
 /// Tabulate orthonormal polynomials on a interval
-fn tabulate_legendre_polynomials_interval<T: RandomAccessByRef<Item = f64> + Shape>(
+fn tabulate_legendre_polynomials_interval<
+    T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+    T3Mut: RandomAccessMut<3, Item = f64> + RandomAccessByRef<3, Item = f64> + Shape<3>,
+>(
     points: &T,
     degree: usize,
     derivatives: usize,
-    data: &mut impl Array3DAccess<f64>,
+    data: &mut T3Mut,
 ) {
-    assert_eq!(data.shape().0, derivatives + 1);
-    assert_eq!(data.shape().1, degree + 1);
-    assert_eq!(data.shape().2, points.shape().0);
-    assert_eq!(points.shape().1, 1);
+    assert_eq!(data.shape()[0], derivatives + 1);
+    assert_eq!(data.shape()[1], degree + 1);
+    assert_eq!(data.shape()[2], points.shape()[0]);
+    assert_eq!(points.shape()[1], 1);
 
-    for i in 0..data.shape().2 {
-        *data.get_mut(0, 0, i).unwrap() = 1.0;
+    for i in 0..data.shape()[2] {
+        *data.get_mut([0, 0, i]).unwrap() = 1.0;
     }
-    for k in 1..data.shape().0 {
-        for i in 0..data.shape().2 {
-            *data.get_mut(k, 0, i).unwrap() = 0.0;
+    for k in 1..data.shape()[0] {
+        for i in 0..data.shape()[2] {
+            *data.get_mut([k, 0, i]).unwrap() = 0.0;
         }
     }
 
@@ -29,20 +31,21 @@ fn tabulate_legendre_polynomials_interval<T: RandomAccessByRef<Item = f64> + Sha
         for p in 1..degree + 1 {
             let a = 1.0 - 1.0 / p as f64;
             let b = (a + 1.0) * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 1.0)).sqrt();
-            for i in 0..data.shape().2 {
-                *data.get_mut(k, p, i).unwrap() =
-                    (points.get(i, 0).unwrap() * 2.0 - 1.0) * data.get(k, p - 1, i).unwrap() * b;
+            for i in 0..data.shape()[2] {
+                *data.get_mut([k, p, i]).unwrap() = (points.get([i, 0]).unwrap() * 2.0 - 1.0)
+                    * data.get([k, p - 1, i]).unwrap()
+                    * b;
             }
             if p > 1 {
                 let c = a * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 3.0)).sqrt();
-                for i in 0..data.shape().2 {
-                    *data.get_mut(k, p, i).unwrap() -= data.get(k, p - 2, i).unwrap() * c;
+                for i in 0..data.shape()[2] {
+                    *data.get_mut([k, p, i]).unwrap() -= data.get([k, p - 2, i]).unwrap() * c;
                 }
             }
             if k > 0 {
-                for i in 0..data.shape().2 {
-                    *data.get_mut(k, p, i).unwrap() +=
-                        2.0 * k as f64 * data.get(k - 1, p - 1, i).unwrap() * b;
+                for i in 0..data.shape()[2] {
+                    *data.get_mut([k, p, i]).unwrap() +=
+                        2.0 * k as f64 * data.get([k - 1, p - 1, i]).unwrap() * b;
                 }
             }
         }
@@ -58,28 +61,31 @@ fn quad_index(i: usize, j: usize, n: usize) -> usize {
 }
 
 /// Tabulate orthonormal polynomials on a quadrilateral
-fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64> + Shape>(
+fn tabulate_legendre_polynomials_quadrilateral<
+    T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+    T3Mut: RandomAccessMut<3, Item = f64> + RandomAccessByRef<3, Item = f64> + Shape<3>,
+>(
     points: &T,
     degree: usize,
     derivatives: usize,
-    data: &mut impl Array3DAccess<f64>,
+    data: &mut T3Mut,
 ) {
-    assert_eq!(data.shape().0, (derivatives + 1) * (derivatives + 2) / 2);
-    assert_eq!(data.shape().1, (degree + 1) * (degree + 1));
-    assert_eq!(data.shape().2, points.shape().0);
-    assert_eq!(points.shape().1, 2);
+    assert_eq!(data.shape()[0], (derivatives + 1) * (derivatives + 2) / 2);
+    assert_eq!(data.shape()[1], (degree + 1) * (degree + 1));
+    assert_eq!(data.shape()[2], points.shape()[0]);
+    assert_eq!(points.shape()[1], 2);
 
-    for i in 0..data.shape().2 {
+    for i in 0..data.shape()[2] {
         *data
-            .get_mut(tri_index(0, 0), quad_index(0, 0, degree), i)
+            .get_mut([tri_index(0, 0), quad_index(0, 0, degree), i])
             .unwrap() = 1.0;
     }
 
     // Tabulate polynomials in x
     for k in 1..derivatives + 1 {
-        for i in 0..data.shape().2 {
+        for i in 0..data.shape()[2] {
             *data
-                .get_mut(tri_index(k, 0), quad_index(0, 0, degree), i)
+                .get_mut([tri_index(k, 0), quad_index(0, 0, degree), i])
                 .unwrap() = 0.0;
         }
     }
@@ -88,34 +94,34 @@ fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64>
         for p in 1..degree + 1 {
             let a = 1.0 - 1.0 / p as f64;
             let b = (a + 1.0) * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 1.0)).sqrt();
-            for i in 0..data.shape().2 {
+            for i in 0..data.shape()[2] {
                 *data
-                    .get_mut(tri_index(k, 0), quad_index(p, 0, degree), i)
-                    .unwrap() = (points.get(i, 0).unwrap() * 2.0 - 1.0)
+                    .get_mut([tri_index(k, 0), quad_index(p, 0, degree), i])
+                    .unwrap() = (points.get([i, 0]).unwrap() * 2.0 - 1.0)
                     * data
-                        .get(tri_index(k, 0), quad_index(p - 1, 0, degree), i)
+                        .get([tri_index(k, 0), quad_index(p - 1, 0, degree), i])
                         .unwrap()
                     * b;
             }
             if p > 1 {
                 let c = a * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 3.0)).sqrt();
-                for i in 0..data.shape().2 {
+                for i in 0..data.shape()[2] {
                     *data
-                        .get_mut(tri_index(k, 0), quad_index(p, 0, degree), i)
+                        .get_mut([tri_index(k, 0), quad_index(p, 0, degree), i])
                         .unwrap() -= data
-                        .get(tri_index(k, 0), quad_index(p - 2, 0, degree), i)
+                        .get([tri_index(k, 0), quad_index(p - 2, 0, degree), i])
                         .unwrap()
                         * c;
                 }
             }
             if k > 0 {
-                for i in 0..data.shape().2 {
+                for i in 0..data.shape()[2] {
                     *data
-                        .get_mut(tri_index(k, 0), quad_index(p, 0, degree), i)
+                        .get_mut([tri_index(k, 0), quad_index(p, 0, degree), i])
                         .unwrap() += 2.0
                         * k as f64
                         * data
-                            .get(tri_index(k - 1, 0), quad_index(p - 1, 0, degree), i)
+                            .get([tri_index(k - 1, 0), quad_index(p - 1, 0, degree), i])
                             .unwrap()
                         * b;
                 }
@@ -125,9 +131,9 @@ fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64>
 
     // Tabulate polynomials in y
     for k in 1..derivatives + 1 {
-        for i in 0..data.shape().2 {
+        for i in 0..data.shape()[2] {
             *data
-                .get_mut(tri_index(0, k), quad_index(0, 0, degree), i)
+                .get_mut([tri_index(0, k), quad_index(0, 0, degree), i])
                 .unwrap() = 0.0;
         }
     }
@@ -136,34 +142,34 @@ fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64>
         for p in 1..degree + 1 {
             let a = 1.0 - 1.0 / p as f64;
             let b = (a + 1.0) * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 1.0)).sqrt();
-            for i in 0..data.shape().2 {
+            for i in 0..data.shape()[2] {
                 *data
-                    .get_mut(tri_index(0, k), quad_index(0, p, degree), i)
-                    .unwrap() = (points.get(i, 1).unwrap() * 2.0 - 1.0)
+                    .get_mut([tri_index(0, k), quad_index(0, p, degree), i])
+                    .unwrap() = (points.get([i, 1]).unwrap() * 2.0 - 1.0)
                     * data
-                        .get(tri_index(0, k), quad_index(0, p - 1, degree), i)
+                        .get([tri_index(0, k), quad_index(0, p - 1, degree), i])
                         .unwrap()
                     * b;
             }
             if p > 1 {
                 let c = a * ((2.0 * p as f64 + 1.0) / (2.0 * p as f64 - 3.0)).sqrt();
-                for i in 0..data.shape().2 {
+                for i in 0..data.shape()[2] {
                     *data
-                        .get_mut(tri_index(0, k), quad_index(0, p, degree), i)
+                        .get_mut([tri_index(0, k), quad_index(0, p, degree), i])
                         .unwrap() -= data
-                        .get(tri_index(0, k), quad_index(0, p - 2, degree), i)
+                        .get([tri_index(0, k), quad_index(0, p - 2, degree), i])
                         .unwrap()
                         * c;
                 }
             }
             if k > 0 {
-                for i in 0..data.shape().2 {
+                for i in 0..data.shape()[2] {
                     *data
-                        .get_mut(tri_index(0, k), quad_index(0, p, degree), i)
+                        .get_mut([tri_index(0, k), quad_index(0, p, degree), i])
                         .unwrap() += 2.0
                         * k as f64
                         * data
-                            .get(tri_index(0, k - 1), quad_index(0, p - 1, degree), i)
+                            .get([tri_index(0, k - 1), quad_index(0, p - 1, degree), i])
                             .unwrap()
                         * b;
                 }
@@ -176,14 +182,14 @@ fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64>
         for ky in 0..derivatives + 1 - kx {
             for px in 1..degree + 1 {
                 for py in 1..degree + 1 {
-                    for i in 0..data.shape().2 {
+                    for i in 0..data.shape()[2] {
                         *data
-                            .get_mut(tri_index(kx, ky), quad_index(px, py, degree), i)
+                            .get_mut([tri_index(kx, ky), quad_index(px, py, degree), i])
                             .unwrap() = *data
-                            .get_mut(tri_index(kx, 0), quad_index(px, 0, degree), i)
+                            .get_mut([tri_index(kx, 0), quad_index(px, 0, degree), i])
                             .unwrap()
                             * *data
-                                .get_mut(tri_index(0, ky), quad_index(0, py, degree), i)
+                                .get_mut([tri_index(0, ky), quad_index(0, py, degree), i])
                                 .unwrap();
                     }
                 }
@@ -192,24 +198,27 @@ fn tabulate_legendre_polynomials_quadrilateral<T: RandomAccessByRef<Item = f64>
     }
 }
 /// Tabulate orthonormal polynomials on a triangle
-fn tabulate_legendre_polynomials_triangle<T: RandomAccessByRef<Item = f64> + Shape>(
+fn tabulate_legendre_polynomials_triangle<
+    T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+    T3Mut: RandomAccessMut<3, Item = f64> + RandomAccessByRef<3, Item = f64> + Shape<3>,
+>(
     points: &T,
     degree: usize,
     derivatives: usize,
-    data: &mut impl Array3DAccess<f64>,
+    data: &mut T3Mut,
 ) {
-    assert_eq!(data.shape().0, (derivatives + 1) * (derivatives + 2) / 2);
-    assert_eq!(data.shape().1, (degree + 1) * (degree + 2) / 2);
-    assert_eq!(data.shape().2, points.shape().0);
-    assert_eq!(points.shape().1, 2);
+    assert_eq!(data.shape()[0], (derivatives + 1) * (derivatives + 2) / 2);
+    assert_eq!(data.shape()[1], (degree + 1) * (degree + 2) / 2);
+    assert_eq!(data.shape()[2], points.shape()[0]);
+    assert_eq!(points.shape()[1], 2);
 
-    for i in 0..data.shape().2 {
-        *data.get_mut(tri_index(0, 0), tri_index(0, 0), i).unwrap() = f64::sqrt(2.0);
+    for i in 0..data.shape()[2] {
+        *data.get_mut([tri_index(0, 0), tri_index(0, 0), i]).unwrap() = f64::sqrt(2.0);
     }
 
-    for k in 1..data.shape().0 {
-        for i in 0..data.shape().2 {
-            *data.get_mut(k, tri_index(0, 0), i).unwrap() = 0.0;
+    for k in 1..data.shape()[0] {
+        for i in 0..data.shape()[2] {
+            *data.get_mut([k, tri_index(0, 0), i]).unwrap() = 0.0;
         }
     }
 
@@ -219,30 +228,38 @@ fn tabulate_legendre_polynomials_triangle<T: RandomAccessByRef<Item = f64> + Sha
                 let a = 2.0 - 1.0 / p as f64;
                 let scale1 =
                     f64::sqrt((p as f64 + 0.5) * (p as f64 + 1.0) / ((p as f64 - 0.5) * p as f64));
-                for i in 0..data.shape().2 {
-                    *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() =
-                        (*points.get(i, 0).unwrap() * 2.0 + *points.get(i, 1).unwrap() - 1.0)
-                            * *data.get(tri_index(kx, ky), tri_index(0, p - 1), i).unwrap()
+                for i in 0..data.shape()[2] {
+                    *data
+                        .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                        .unwrap() =
+                        (*points.get([i, 0]).unwrap() * 2.0 + *points.get([i, 1]).unwrap() - 1.0)
+                            * *data
+                                .get([tri_index(kx, ky), tri_index(0, p - 1), i])
+                                .unwrap()
                             * a
                             * scale1;
                 }
                 if kx > 0 {
-                    for i in 0..data.shape().2 {
-                        *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() += 2.0
+                    for i in 0..data.shape()[2] {
+                        *data
+                            .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                            .unwrap() += 2.0
                             * kx as f64
                             * a
                             * *data
-                                .get(tri_index(kx - 1, ky), tri_index(0, p - 1), i)
+                                .get([tri_index(kx - 1, ky), tri_index(0, p - 1), i])
                                 .unwrap()
                             * scale1;
                     }
                 }
                 if ky > 0 {
-                    for i in 0..data.shape().2 {
-                        *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() += ky as f64
+                    for i in 0..data.shape()[2] {
+                        *data
+                            .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                            .unwrap() += ky as f64
                             * a
                             * *data
-                                .get(tri_index(kx, ky - 1), tri_index(0, p - 1), i)
+                                .get([tri_index(kx, ky - 1), tri_index(0, p - 1), i])
                                 .unwrap()
                             * scale1;
                     }
@@ -251,33 +268,40 @@ fn tabulate_legendre_polynomials_triangle<T: RandomAccessByRef<Item = f64> + Sha
                     let scale2 = f64::sqrt((p as f64 + 0.5) * (p as f64 + 1.0))
                         / f64::sqrt((p as f64 - 1.5) * (p as f64 - 1.0));
 
-                    for i in 0..data.shape().2 {
-                        let b = 1.0 - *points.get(i, 1).unwrap();
-                        *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() -= b
+                    for i in 0..data.shape()[2] {
+                        let b = 1.0 - *points.get([i, 1]).unwrap();
+                        *data
+                            .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                            .unwrap() -= b
                             * b
-                            * *data.get(tri_index(kx, ky), tri_index(0, p - 2), i).unwrap()
+                            * *data
+                                .get([tri_index(kx, ky), tri_index(0, p - 2), i])
+                                .unwrap()
                             * (a - 1.0)
                             * scale2;
                     }
                     if ky > 0 {
-                        for i in 0..data.shape().2 {
-                            *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() -= 2.0
+                        for i in 0..data.shape()[2] {
+                            *data
+                                .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                                .unwrap() -= 2.0
                                 * ky as f64
-                                * (*points.get(i, 1).unwrap() - 1.0)
+                                * (*points.get([i, 1]).unwrap() - 1.0)
                                 * *data
-                                    .get(tri_index(kx, ky - 1), tri_index(0, p - 2), i)
+                                    .get([tri_index(kx, ky - 1), tri_index(0, p - 2), i])
                                     .unwrap()
                                 * scale2
                                 * (a - 1.0);
                         }
                     }
                     if ky > 1 {
-                        for i in 0..data.shape().2 {
-                            *data.get_mut(tri_index(kx, ky), tri_index(0, p), i).unwrap() -= ky
-                                as f64
+                        for i in 0..data.shape()[2] {
+                            *data
+                                .get_mut([tri_index(kx, ky), tri_index(0, p), i])
+                                .unwrap() -= ky as f64
                                 * (ky as f64 - 1.0)
                                 * *data
-                                    .get(tri_index(kx, ky - 2), tri_index(0, p - 2), i)
+                                    .get([tri_index(kx, ky - 2), tri_index(0, p - 2), i])
                                     .unwrap()
                                 * scale2
                                 * (a - 1.0);
@@ -287,20 +311,25 @@ fn tabulate_legendre_polynomials_triangle<T: RandomAccessByRef<Item = f64> + Sha
             }
             for p in 0..degree {
                 let scale3 = f64::sqrt((p as f64 + 2.0) / (p as f64 + 1.0));
-                for i in 0..data.shape().2 {
-                    *data.get_mut(tri_index(kx, ky), tri_index(1, p), i).unwrap() =
-                        *data.get(tri_index(kx, ky), tri_index(0, p), i).unwrap()
-                            * scale3
-                            * ((*points.get(i, 1).unwrap() * 2.0 - 1.0) * (1.5 + p as f64)
-                                + 0.5
-                                + p as f64);
+                for i in 0..data.shape()[2] {
+                    *data
+                        .get_mut([tri_index(kx, ky), tri_index(1, p), i])
+                        .unwrap() = *data.get([tri_index(kx, ky), tri_index(0, p), i]).unwrap()
+                        * scale3
+                        * ((*points.get([i, 1]).unwrap() * 2.0 - 1.0) * (1.5 + p as f64)
+                            + 0.5
+                            + p as f64);
                 }
                 if ky > 0 {
-                    for i in 0..data.shape().2 {
-                        *data.get_mut(tri_index(kx, ky), tri_index(1, p), i).unwrap() += 2.0
+                    for i in 0..data.shape()[2] {
+                        *data
+                            .get_mut([tri_index(kx, ky), tri_index(1, p), i])
+                            .unwrap() += 2.0
                             * ky as f64
                             * (1.5 + p as f64)
-                            * *data.get(tri_index(kx, ky - 1), tri_index(0, p), i).unwrap()
+                            * *data
+                                .get([tri_index(kx, ky - 1), tri_index(0, p), i])
+                                .unwrap()
                             * scale3;
                     }
                 }
@@ -315,28 +344,29 @@ fn tabulate_legendre_polynomials_triangle<T: RandomAccessByRef<Item = f64> + Sha
                     let a3 = (q * (2 * p + q + 1) * (2 * p + 2 * q + 3)) as f64
                         / ((q + 1) * (2 * p + q + 2) * (2 * p + 2 * q + 1)) as f64;
 
-                    for i in 0..data.shape().2 {
+                    for i in 0..data.shape()[2] {
                         *data
-                            .get_mut(tri_index(kx, ky), tri_index(q + 1, p), i)
-                            .unwrap() =
-                            *data.get_mut(tri_index(kx, ky), tri_index(q, p), i).unwrap()
-                                * scale4
-                                * ((*points.get(i, 1).unwrap() * 2.0 - 1.0) * a1 + a2)
-                                - *data
-                                    .get_mut(tri_index(kx, ky), tri_index(q - 1, p), i)
-                                    .unwrap()
-                                    * scale5
-                                    * a3;
+                            .get_mut([tri_index(kx, ky), tri_index(q + 1, p), i])
+                            .unwrap() = *data
+                            .get_mut([tri_index(kx, ky), tri_index(q, p), i])
+                            .unwrap()
+                            * scale4
+                            * ((*points.get([i, 1]).unwrap() * 2.0 - 1.0) * a1 + a2)
+                            - *data
+                                .get_mut([tri_index(kx, ky), tri_index(q - 1, p), i])
+                                .unwrap()
+                                * scale5
+                                * a3;
                     }
                     if ky > 0 {
-                        for i in 0..data.shape().2 {
+                        for i in 0..data.shape()[2] {
                             *data
-                                .get_mut(tri_index(kx, ky), tri_index(q + 1, p), i)
+                                .get_mut([tri_index(kx, ky), tri_index(q + 1, p), i])
                                 .unwrap() += 2.0
                                 * ky as f64
                                 * a1
                                 * *data
-                                    .get_mut(tri_index(kx, ky - 1), tri_index(q, p), i)
+                                    .get_mut([tri_index(kx, ky - 1), tri_index(q, p), i])
                                     .unwrap()
                                 * scale4;
                         }
@@ -369,26 +399,29 @@ pub fn derivative_count(cell_type: ReferenceCellType, derivatives: usize) -> usi
     }
 }
 
-pub fn legendre_shape<T: RandomAccessByRef<Item = f64> + Shape>(
+pub fn legendre_shape<T: RandomAccessByRef<2, Item = f64> + Shape<2>>(
     cell_type: ReferenceCellType,
     points: &T,
     degree: usize,
     derivatives: usize,
-) -> (usize, usize, usize) {
-    (
+) -> [usize; 3] {
+    [
         derivative_count(cell_type, derivatives),
         polynomial_count(cell_type, degree),
-        points.shape().0,
-    )
+        points.shape()[0],
+    ]
 }
 
 /// Tabulate orthonormal polynomials
-pub fn tabulate_legendre_polynomials<T: RandomAccessByRef<Item = f64> + Shape>(
+pub fn tabulate_legendre_polynomials<
+    T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+    T3Mut: RandomAccessMut<3, Item = f64> + RandomAccessByRef<3, Item = f64> + Shape<3>,
+>(
     cell_type: ReferenceCellType,
     points: &T,
     degree: usize,
     derivatives: usize,
-    data: &mut impl Array3DAccess<f64>,
+    data: &mut T3Mut,
 ) {
     match cell_type {
         ReferenceCellType::Interval => {
@@ -411,30 +444,30 @@ mod test {
     use crate::polynomials::*;
     use approx::*;
     use bempp_quadrature::simplex_rules::simplex_rule;
-    use bempp_tools::arrays::{transpose_to_matrix, zero_matrix, Array3D};
-    use rlst_dense::RandomAccessMut;
+    use bempp_tools::arrays::{transpose_to_matrix, zero_matrix};
+    use rlst_dense::rlst_dynamic_array3;
+    use rlst_dense::traits::RandomAccessMut;
 
     #[test]
     fn test_legendre_interval() {
         let degree = 6;
 
         let rule = simplex_rule(ReferenceCellType::Interval, degree + 1).unwrap();
-        let points = transpose_to_matrix(&rule.points, (rule.npoints, 1));
+        let points = transpose_to_matrix(&rule.points, [rule.npoints, 1]);
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Interval,
-            &points,
-            degree,
-            0,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Interval, &points, degree, 0,)
+        );
         tabulate_legendre_polynomials(ReferenceCellType::Interval, &points, degree, 0, &mut data);
 
         for i in 0..degree + 1 {
             for j in 0..degree + 1 {
                 let mut product = 0.0;
                 for k in 0..rule.npoints {
-                    product +=
-                        data.get(0, i, k).unwrap() * data.get(0, j, k).unwrap() * rule.weights[k];
+                    product += data.get([0, i, k]).unwrap()
+                        * data.get([0, j, k]).unwrap()
+                        * rule.weights[k];
                 }
                 if i == j {
                     assert_relative_eq!(product, 1.0, epsilon = 1e-12);
@@ -450,22 +483,21 @@ mod test {
         let degree = 5;
 
         let rule = simplex_rule(ReferenceCellType::Triangle, 79).unwrap();
-        let points = transpose_to_matrix(&rule.points, (rule.npoints, 2));
+        let points = transpose_to_matrix(&rule.points, [rule.npoints, 2]);
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Triangle,
-            &points,
-            degree,
-            0,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Triangle, &points, degree, 0,)
+        );
         tabulate_legendre_polynomials(ReferenceCellType::Triangle, &points, degree, 0, &mut data);
 
-        for i in 0..data.shape().1 {
-            for j in 0..data.shape().1 {
+        for i in 0..data.shape()[1] {
+            for j in 0..data.shape()[1] {
                 let mut product = 0.0;
                 for k in 0..rule.npoints {
-                    product +=
-                        data.get(0, i, k).unwrap() * data.get(0, j, k).unwrap() * rule.weights[k];
+                    product += data.get([0, i, k]).unwrap()
+                        * data.get([0, j, k]).unwrap()
+                        * rule.weights[k];
                 }
                 if i == j {
                     assert_relative_eq!(product, 1.0, epsilon = 1e-12);
@@ -481,14 +513,12 @@ mod test {
         let degree = 5;
 
         let rule = simplex_rule(ReferenceCellType::Quadrilateral, 85).unwrap();
-        let points = transpose_to_matrix(&rule.points, (rule.npoints, 2));
+        let points = transpose_to_matrix(&rule.points, [rule.npoints, 2]);
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Quadrilateral,
-            &points,
-            degree,
-            0,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Quadrilateral, &points, degree, 0,)
+        );
         tabulate_legendre_polynomials(
             ReferenceCellType::Quadrilateral,
             &points,
@@ -497,12 +527,13 @@ mod test {
             &mut data,
         );
 
-        for i in 0..data.shape().1 {
-            for j in 0..data.shape().1 {
+        for i in 0..data.shape()[1] {
+            for j in 0..data.shape()[1] {
                 let mut product = 0.0;
                 for k in 0..rule.npoints {
-                    product +=
-                        data.get(0, i, k).unwrap() * data.get(0, j, k).unwrap() * rule.weights[k];
+                    product += data.get([0, i, k]).unwrap()
+                        * data.get([0, j, k]).unwrap()
+                        * rule.weights[k];
                 }
                 if i == j {
                     assert_relative_eq!(product, 1.0, epsilon = 1e-12);
@@ -518,25 +549,24 @@ mod test {
         let degree = 6;
 
         let epsilon = 1e-10;
-        let mut points = zero_matrix((20, 1));
+        let mut points = zero_matrix([20, 1]);
         for i in 0..10 {
-            *points.get_mut(2 * i, 0).unwrap() = i as f64 / 10.0;
-            *points.get_mut(2 * i + 1, 0).unwrap() = points.get(2 * i, 0).unwrap() + epsilon;
+            *points.get_mut([2 * i, 0]).unwrap() = i as f64 / 10.0;
+            *points.get_mut([2 * i + 1, 0]).unwrap() = points.get([2 * i, 0]).unwrap() + epsilon;
         }
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Interval,
-            &points,
-            degree,
-            1,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Interval, &points, degree, 1,)
+        );
         tabulate_legendre_polynomials(ReferenceCellType::Interval, &points, degree, 1, &mut data);
 
         for i in 0..degree + 1 {
-            for k in 0..points.shape().0 / 2 {
+            for k in 0..points.shape()[0] / 2 {
                 assert_relative_eq!(
-                    *data.get(1, i, 2 * k).unwrap(),
-                    (data.get(0, i, 2 * k + 1).unwrap() - data.get(0, i, 2 * k).unwrap()) / epsilon,
+                    *data.get([1, i, 2 * k]).unwrap(),
+                    (data.get([0, i, 2 * k + 1]).unwrap() - data.get([0, i, 2 * k]).unwrap())
+                        / epsilon,
                     epsilon = 1e-4
                 );
             }
@@ -548,40 +578,40 @@ mod test {
         let degree = 6;
 
         let epsilon = 1e-10;
-        let mut points = zero_matrix((165, 2));
+        let mut points = zero_matrix([165, 2]);
         let mut index = 0;
         for i in 0..10 {
             for j in 0..10 - i {
-                *points.get_mut(3 * index, 0).unwrap() = i as f64 / 10.0;
-                *points.get_mut(3 * index, 1).unwrap() = j as f64 / 10.0;
-                *points.get_mut(3 * index + 1, 0).unwrap() =
-                    *points.get(3 * index, 0).unwrap() + epsilon;
-                *points.get_mut(3 * index + 1, 1).unwrap() = *points.get(3 * index, 1).unwrap();
-                *points.get_mut(3 * index + 2, 0).unwrap() = *points.get(3 * index, 0).unwrap();
-                *points.get_mut(3 * index + 2, 1).unwrap() =
-                    *points.get(3 * index, 1).unwrap() + epsilon;
+                *points.get_mut([3 * index, 0]).unwrap() = i as f64 / 10.0;
+                *points.get_mut([3 * index, 1]).unwrap() = j as f64 / 10.0;
+                *points.get_mut([3 * index + 1, 0]).unwrap() =
+                    *points.get([3 * index, 0]).unwrap() + epsilon;
+                *points.get_mut([3 * index + 1, 1]).unwrap() = *points.get([3 * index, 1]).unwrap();
+                *points.get_mut([3 * index + 2, 0]).unwrap() = *points.get([3 * index, 0]).unwrap();
+                *points.get_mut([3 * index + 2, 1]).unwrap() =
+                    *points.get([3 * index, 1]).unwrap() + epsilon;
                 index += 1;
             }
         }
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Triangle,
-            &points,
-            degree,
-            1,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Triangle, &points, degree, 1,)
+        );
         tabulate_legendre_polynomials(ReferenceCellType::Triangle, &points, degree, 1, &mut data);
 
         for i in 0..degree + 1 {
-            for k in 0..points.shape().0 / 3 {
+            for k in 0..points.shape()[0] / 3 {
                 assert_relative_eq!(
-                    *data.get(1, i, 3 * k).unwrap(),
-                    (data.get(0, i, 3 * k + 1).unwrap() - data.get(0, i, 3 * k).unwrap()) / epsilon,
+                    *data.get([1, i, 3 * k]).unwrap(),
+                    (data.get([0, i, 3 * k + 1]).unwrap() - data.get([0, i, 3 * k]).unwrap())
+                        / epsilon,
                     epsilon = 1e-4
                 );
                 assert_relative_eq!(
-                    *data.get(2, i, 3 * k).unwrap(),
-                    (data.get(0, i, 3 * k + 2).unwrap() - data.get(0, i, 3 * k).unwrap()) / epsilon,
+                    *data.get([2, i, 3 * k]).unwrap(),
+                    (data.get([0, i, 3 * k + 2]).unwrap() - data.get([0, i, 3 * k]).unwrap())
+                        / epsilon,
                     epsilon = 1e-4
                 );
             }
@@ -593,27 +623,25 @@ mod test {
         let degree = 6;
 
         let epsilon = 1e-10;
-        let mut points = zero_matrix((300, 2));
+        let mut points = zero_matrix([300, 2]);
         for i in 0..10 {
             for j in 0..10 {
                 let index = 10 * i + j;
-                *points.get_mut(3 * index, 0).unwrap() = i as f64 / 10.0;
-                *points.get_mut(3 * index, 1).unwrap() = j as f64 / 10.0;
-                *points.get_mut(3 * index + 1, 0).unwrap() =
-                    *points.get(3 * index, 0).unwrap() + epsilon;
-                *points.get_mut(3 * index + 1, 1).unwrap() = *points.get(3 * index, 1).unwrap();
-                *points.get_mut(3 * index + 2, 0).unwrap() = *points.get(3 * index, 0).unwrap();
-                *points.get_mut(3 * index + 2, 1).unwrap() =
-                    *points.get(3 * index, 1).unwrap() + epsilon;
+                *points.get_mut([3 * index, 0]).unwrap() = i as f64 / 10.0;
+                *points.get_mut([3 * index, 1]).unwrap() = j as f64 / 10.0;
+                *points.get_mut([3 * index + 1, 0]).unwrap() =
+                    *points.get([3 * index, 0]).unwrap() + epsilon;
+                *points.get_mut([3 * index + 1, 1]).unwrap() = *points.get([3 * index, 1]).unwrap();
+                *points.get_mut([3 * index + 2, 0]).unwrap() = *points.get([3 * index, 0]).unwrap();
+                *points.get_mut([3 * index + 2, 1]).unwrap() =
+                    *points.get([3 * index, 1]).unwrap() + epsilon;
             }
         }
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Quadrilateral,
-            &points,
-            degree,
-            1,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Quadrilateral, &points, degree, 1,)
+        );
         tabulate_legendre_polynomials(
             ReferenceCellType::Quadrilateral,
             &points,
@@ -623,15 +651,17 @@ mod test {
         );
 
         for i in 0..degree + 1 {
-            for k in 0..points.shape().0 / 3 {
+            for k in 0..points.shape()[0] / 3 {
                 assert_relative_eq!(
-                    *data.get(1, i, 3 * k).unwrap(),
-                    (data.get(0, i, 3 * k + 1).unwrap() - data.get(0, i, 3 * k).unwrap()) / epsilon,
+                    *data.get([1, i, 3 * k]).unwrap(),
+                    (data.get([0, i, 3 * k + 1]).unwrap() - data.get([0, i, 3 * k]).unwrap())
+                        / epsilon,
                     epsilon = 1e-4
                 );
                 assert_relative_eq!(
-                    *data.get(2, i, 3 * k).unwrap(),
-                    (data.get(0, i, 3 * k + 2).unwrap() - data.get(0, i, 3 * k).unwrap()) / epsilon,
+                    *data.get([2, i, 3 * k]).unwrap(),
+                    (data.get([0, i, 3 * k + 2]).unwrap() - data.get([0, i, 3 * k]).unwrap())
+                        / epsilon,
                     epsilon = 1e-4
                 );
             }
@@ -642,78 +672,76 @@ mod test {
     fn test_legendre_interval_against_known_polynomials() {
         let degree = 3;
 
-        let mut points = zero_matrix((11, 1));
+        let mut points = zero_matrix([11, 1]);
         for i in 0..11 {
-            *points.get_mut(i, 0).unwrap() = i as f64 / 10.0;
+            *points.get_mut([i, 0]).unwrap() = i as f64 / 10.0;
         }
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Interval,
-            &points,
-            degree,
-            3,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Interval, &points, degree, 3,)
+        );
         tabulate_legendre_polynomials(ReferenceCellType::Interval, &points, degree, 3, &mut data);
 
-        for k in 0..points.shape().0 {
-            let x = *points.get(k, 0).unwrap();
+        for k in 0..points.shape()[0] {
+            let x = *points.get([k, 0]).unwrap();
 
             // 0 => 1
-            assert_relative_eq!(*data.get(0, 0, k).unwrap(), 1.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(1, 0, k).unwrap(), 0.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(2, 0, k).unwrap(), 0.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(3, 0, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([0, 0, k]).unwrap(), 1.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([1, 0, k]).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([2, 0, k]).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([3, 0, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 1 => sqrt(3)*(2x - 1)
             assert_relative_eq!(
-                *data.get(0, 1, k).unwrap(),
+                *data.get([0, 1, k]).unwrap(),
                 f64::sqrt(3.0) * (2.0 * x - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 1, k).unwrap(),
+                *data.get([1, 1, k]).unwrap(),
                 2.0 * f64::sqrt(3.0),
                 epsilon = 1e-12
             );
-            assert_relative_eq!(*data.get(2, 1, k).unwrap(), 0.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(3, 1, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([2, 1, k]).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([3, 1, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 2 => sqrt(5)*(6x^2 - 6x + 1)
             assert_relative_eq!(
-                *data.get(0, 2, k).unwrap(),
+                *data.get([0, 2, k]).unwrap(),
                 f64::sqrt(5.0) * (6.0 * x * x - 6.0 * x + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 2, k).unwrap(),
+                *data.get([1, 2, k]).unwrap(),
                 f64::sqrt(5.0) * (12.0 * x - 6.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 2, k).unwrap(),
+                *data.get([2, 2, k]).unwrap(),
                 f64::sqrt(5.0) * 12.0,
                 epsilon = 1e-12
             );
-            assert_relative_eq!(*data.get(3, 2, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([3, 2, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 3 => sqrt(7)*(20x^3 - 30x^2 + 12x - 1)
             assert_relative_eq!(
-                *data.get(0, 3, k).unwrap(),
+                *data.get([0, 3, k]).unwrap(),
                 f64::sqrt(7.0) * (20.0 * x * x * x - 30.0 * x * x + 12.0 * x - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 3, k).unwrap(),
+                *data.get([1, 3, k]).unwrap(),
                 f64::sqrt(7.0) * (60.0 * x * x - 60.0 * x + 12.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 3, k).unwrap(),
+                *data.get([2, 3, k]).unwrap(),
                 f64::sqrt(7.0) * (120.0 * x - 60.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(3, 3, k).unwrap(),
+                *data.get([3, 3, k]).unwrap(),
                 f64::sqrt(7.0) * 120.0,
                 epsilon = 1e-12
             );
@@ -724,20 +752,18 @@ mod test {
     fn test_legendre_quadrilateral_against_known_polynomials() {
         let degree = 2;
 
-        let mut points = zero_matrix((121, 2));
+        let mut points = zero_matrix([121, 2]);
         for i in 0..11 {
             for j in 0..11 {
-                *points.get_mut(11 * i + j, 0).unwrap() = i as f64 / 10.0;
-                *points.get_mut(11 * i + j, 1).unwrap() = j as f64 / 10.0;
+                *points.get_mut([11 * i + j, 0]).unwrap() = i as f64 / 10.0;
+                *points.get_mut([11 * i + j, 1]).unwrap() = j as f64 / 10.0;
             }
         }
 
-        let mut data = Array3D::<f64>::new(legendre_shape(
-            ReferenceCellType::Quadrilateral,
-            &points,
-            degree,
-            1,
-        ));
+        let mut data = rlst_dynamic_array3!(
+            f64,
+            legendre_shape(ReferenceCellType::Quadrilateral, &points, degree, 1,)
+        );
         tabulate_legendre_polynomials(
             ReferenceCellType::Quadrilateral,
             &points,
@@ -746,132 +772,132 @@ mod test {
             &mut data,
         );
 
-        for k in 0..points.shape().0 {
-            let x = *points.get(k, 0).unwrap();
-            let y = *points.get(k, 1).unwrap();
+        for k in 0..points.shape()[0] {
+            let x = *points.get([k, 0]).unwrap();
+            let y = *points.get([k, 1]).unwrap();
 
             // 0 => 1
-            assert_relative_eq!(*data.get(0, 0, k).unwrap(), 1.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(1, 0, k).unwrap(), 0.0, epsilon = 1e-12);
-            assert_relative_eq!(*data.get(2, 0, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([0, 0, k]).unwrap(), 1.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([1, 0, k]).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([2, 0, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 1 => sqrt(3)*(2x - 1)
             assert_relative_eq!(
-                *data.get(0, 1, k).unwrap(),
+                *data.get([0, 1, k]).unwrap(),
                 f64::sqrt(3.0) * (2.0 * x - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 1, k).unwrap(),
+                *data.get([1, 1, k]).unwrap(),
                 2.0 * f64::sqrt(3.0),
                 epsilon = 1e-12
             );
-            assert_relative_eq!(*data.get(2, 1, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([2, 1, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 2 => sqrt(5)*(6x^2 - 6x + 1)
             assert_relative_eq!(
-                *data.get(0, 2, k).unwrap(),
+                *data.get([0, 2, k]).unwrap(),
                 f64::sqrt(5.0) * (6.0 * x * x - 6.0 * x + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 2, k).unwrap(),
+                *data.get([1, 2, k]).unwrap(),
                 f64::sqrt(5.0) * (12.0 * x - 6.0),
                 epsilon = 1e-12
             );
-            assert_relative_eq!(*data.get(2, 2, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([2, 2, k]).unwrap(), 0.0, epsilon = 1e-12);
 
             // 3 => sqrt(3)*(2y - 1)
             assert_relative_eq!(
-                *data.get(0, 3, k).unwrap(),
+                *data.get([0, 3, k]).unwrap(),
                 f64::sqrt(3.0) * (2.0 * y - 1.0),
                 epsilon = 1e-12
             );
 
-            assert_relative_eq!(*data.get(1, 3, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([1, 3, k]).unwrap(), 0.0, epsilon = 1e-12);
             assert_relative_eq!(
-                *data.get(2, 3, k).unwrap(),
+                *data.get([2, 3, k]).unwrap(),
                 2.0 * f64::sqrt(3.0),
                 epsilon = 1e-12
             );
 
             // 4 => 3*(2x - 1)*(2y - 1)
             assert_relative_eq!(
-                *data.get(0, 4, k).unwrap(),
+                *data.get([0, 4, k]).unwrap(),
                 3.0 * (2.0 * x - 1.0) * (2.0 * y - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 4, k).unwrap(),
+                *data.get([1, 4, k]).unwrap(),
                 6.0 * (2.0 * y - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 4, k).unwrap(),
+                *data.get([2, 4, k]).unwrap(),
                 6.0 * (2.0 * x - 1.0),
                 epsilon = 1e-12
             );
 
             // 5 => sqrt(15)*(6x^2 - 6x + 1)*(2y - 1)
             assert_relative_eq!(
-                *data.get(0, 5, k).unwrap(),
+                *data.get([0, 5, k]).unwrap(),
                 f64::sqrt(15.0) * (6.0 * x * x - 6.0 * x + 1.0) * (2.0 * y - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 5, k).unwrap(),
+                *data.get([1, 5, k]).unwrap(),
                 f64::sqrt(15.0) * (12.0 * x - 6.0) * (2.0 * y - 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 5, k).unwrap(),
+                *data.get([2, 5, k]).unwrap(),
                 2.0 * f64::sqrt(15.0) * (6.0 * x * x - 6.0 * x + 1.0),
                 epsilon = 1e-12
             );
 
             // 6 => sqrt(5)*(6y^2 - 6y + 1)
             assert_relative_eq!(
-                *data.get(0, 6, k).unwrap(),
+                *data.get([0, 6, k]).unwrap(),
                 f64::sqrt(5.0) * (6.0 * y * y - 6.0 * y + 1.0),
                 epsilon = 1e-12
             );
-            assert_relative_eq!(*data.get(1, 6, k).unwrap(), 0.0, epsilon = 1e-12);
+            assert_relative_eq!(*data.get([1, 6, k]).unwrap(), 0.0, epsilon = 1e-12);
             assert_relative_eq!(
-                *data.get(2, 6, k).unwrap(),
+                *data.get([2, 6, k]).unwrap(),
                 f64::sqrt(5.0) * (12.0 * y - 6.0),
                 epsilon = 1e-12
             );
 
             // 7 => sqrt(15)*(2x - 1)*(6y^2 - 6y + 1)
             assert_relative_eq!(
-                *data.get(0, 7, k).unwrap(),
+                *data.get([0, 7, k]).unwrap(),
                 f64::sqrt(15.0) * (2.0 * x - 1.0) * (6.0 * y * y - 6.0 * y + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 7, k).unwrap(),
+                *data.get([1, 7, k]).unwrap(),
                 2.0 * f64::sqrt(15.0) * (6.0 * y * y - 6.0 * y + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 7, k).unwrap(),
+                *data.get([2, 7, k]).unwrap(),
                 f64::sqrt(15.0) * (2.0 * x - 1.0) * (12.0 * y - 6.0),
                 epsilon = 1e-12
             );
 
             // 8 => 5*(6x^2 - 6x + 1)*(6y^2 - 6y + 1)
             assert_relative_eq!(
-                *data.get(0, 8, k).unwrap(),
+                *data.get([0, 8, k]).unwrap(),
                 5.0 * (6.0 * x * x - 6.0 * x + 1.0) * (6.0 * y * y - 6.0 * y + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(1, 8, k).unwrap(),
+                *data.get([1, 8, k]).unwrap(),
                 5.0 * (12.0 * x - 6.0) * (6.0 * y * y - 6.0 * y + 1.0),
                 epsilon = 1e-12
             );
             assert_relative_eq!(
-                *data.get(2, 8, k).unwrap(),
+                *data.get([2, 8, k]).unwrap(),
                 5.0 * (12.0 * y - 6.0) * (6.0 * x * x - 6.0 * x + 1.0),
                 epsilon = 1e-12
             );
diff --git a/field/Cargo.toml b/field/Cargo.toml
index a443186b..0eb1d80b 100644
--- a/field/Cargo.toml
+++ b/field/Cargo.toml
@@ -28,11 +28,15 @@ bempp-kernel = { path = "../kernel" }
 bempp-tools = { path = "../tools" }
 itertools = "0.10"
 num = "0.4"
-rlst = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
+rlst = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-blis = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
 fftw = {git = "https://github.com/skailasa/fftw.git" }
 cauchy = "0.4.*"
 dashmap = {version = "5.5.0", features=["rayon"]}
+approx = "0.5"
 rayon = "1.7"
 
 [dev-dependencies]
-approx_eq = "0.1.8"
\ No newline at end of file
+approx_eq = "0.1.8"
diff --git a/field/src/array.rs b/field/src/array.rs
index d442cb4c..00528be4 100644
--- a/field/src/array.rs
+++ b/field/src/array.rs
@@ -5,7 +5,11 @@ use itertools::Itertools;
 use num::traits::Num;
 
 use bempp_tools::Array3D;
-use bempp_traits::arrays::Array3DAccess;
+use rlst_common::types::Scalar;
+use rlst_dense::{
+    rlst_dynamic_array3,
+    traits::{RandomAccessByRef, RandomAccessMut, Shape},
+};
 
 /// Return indices that sort a vec.
 ///
@@ -25,7 +29,7 @@ pub fn argsort<T: Ord>(arr: &[T]) -> Vec<usize> {
 /// * `arr` - An array to be padded.
 /// * `pad_size` - The amount of padding to be added along each axis.
 /// * `pad_index` - The position in the array to start the padding from.
-pub fn pad3<T>(
+pub fn pad3<T: Scalar>(
     arr: &Array3D<T>,
     pad_size: (usize, usize, usize),
     pad_index: (usize, usize, usize),
@@ -33,7 +37,7 @@ pub fn pad3<T>(
 where
     T: Clone + Copy + Num,
 {
-    let &(m, n, o) = arr.shape();
+    let [m, n, o] = arr.shape();
 
     let (x, y, z) = pad_index;
     let (p, q, r) = pad_size;
@@ -41,12 +45,12 @@ where
     // Check that there is enough space for pad
     assert!(x + p <= m + p && y + q <= n + q && z + r <= o + r);
 
-    let mut padded = Array3D::new((p + m, q + n, r + o));
+    let mut padded = rlst_dynamic_array3!(T, [p + m, q + n, r + o]);
 
     for i in 0..m {
         for j in 0..n {
             for k in 0..o {
-                *padded.get_mut(x + i, y + j, z + k).unwrap() = *arr.get(i, j, k).unwrap();
+                *padded.get_mut([x + i, y + j, z + k]).unwrap() = *arr.get([i, j, k]).unwrap();
             }
         }
     }
@@ -58,19 +62,19 @@ where
 ///
 /// # Arguments
 /// * `arr` - An array to be flipped.
-pub fn flip3<T>(arr: &Array3D<T>) -> Array3D<T>
+pub fn flip3<T: Scalar>(arr: &Array3D<T>) -> Array3D<T>
 where
     T: Clone + Copy + Num,
 {
-    let mut flipped = Array3D::new(*arr.shape());
+    let mut flipped = rlst_dynamic_array3!(T, arr.shape());
 
-    let &(m, n, o) = arr.shape();
+    let [m, n, o] = arr.shape();
 
     for i in 0..m {
         for j in 0..n {
             for k in 0..o {
-                *flipped.get_mut(i, j, k).unwrap() =
-                    *arr.get(m - i - 1, n - j - 1, o - k - 1).unwrap();
+                *flipped.get_mut([i, j, k]).unwrap() =
+                    *arr.get([m - i - 1, n - j - 1, o - k - 1]).unwrap();
             }
         }
     }
@@ -82,6 +86,7 @@ where
 mod test {
 
     use super::*;
+    use approx::*;
 
     #[test]
     fn test_argsort() {
@@ -103,28 +108,34 @@ mod test {
     #[test]
     fn test_flip3() {
         let n = 2;
-        let mut arr: Array3D<usize> = Array3D::new((n, n, n));
+        let mut arr = rlst_dynamic_array3!(f64, [n, n, n]);
         for i in 0..n {
             for j in 0..n {
                 for k in 0..n {
-                    *arr.get_mut(i, j, k).unwrap() = i + j * n + k * n * n;
+                    *arr.get_mut([i, j, k]).unwrap() = (i + j * n + k * n * n) as f64;
                 }
             }
         }
-        let expected = vec![7, 3, 5, 1, 6, 2, 4, 0];
-        let result = flip3(&arr).get_data().to_vec();
-        assert_eq!(result, expected);
+        let result = flip3(&arr);
+        assert_relative_eq!(*result.get([0, 0, 0]).unwrap(), 7.0);
+        assert_relative_eq!(*result.get([0, 0, 1]).unwrap(), 3.0);
+        assert_relative_eq!(*result.get([0, 1, 0]).unwrap(), 5.0);
+        assert_relative_eq!(*result.get([0, 1, 1]).unwrap(), 1.0);
+        assert_relative_eq!(*result.get([1, 0, 0]).unwrap(), 6.0);
+        assert_relative_eq!(*result.get([1, 0, 1]).unwrap(), 2.0);
+        assert_relative_eq!(*result.get([1, 1, 0]).unwrap(), 4.0);
+        assert_relative_eq!(*result.get([1, 1, 1]).unwrap(), 0.0);
     }
 
     #[test]
     fn test_pad3() {
         let dim = 3;
         // Initialise input data
-        let mut input = Array3D::new((dim, dim, dim));
+        let mut input = rlst_dynamic_array3!(f64, [dim, dim, dim]);
         for i in 0..dim {
             for j in 0..dim {
                 for k in 0..dim {
-                    *input.get_mut(i, j, k).unwrap() = (i + j * dim + k * dim * dim + 1) as f64
+                    *input.get_mut([i, j, k]).unwrap() = (i + j * dim + k * dim * dim + 1) as f64
                 }
             }
         }
@@ -134,7 +145,7 @@ mod test {
         let pad_index = (0, 0, 0);
         let padded = pad3(&input, pad_size, pad_index);
 
-        let &(m, n, o) = padded.shape();
+        let [m, n, o] = padded.shape();
 
         // Check dimension
         assert_eq!(m, dim + pad_size.0);
@@ -145,7 +156,7 @@ mod test {
         for i in dim..m {
             for j in dim..n {
                 for k in dim..o {
-                    assert_eq!(*padded.get(i, j, k).unwrap(), 0f64)
+                    assert_eq!(*padded.get([i, j, k]).unwrap(), 0f64)
                 }
             }
         }
@@ -153,7 +164,10 @@ mod test {
         for i in 0..dim {
             for j in 0..dim {
                 for k in 0..dim {
-                    assert_eq!(*padded.get(i, j, k).unwrap(), *input.get(i, j, k).unwrap())
+                    assert_eq!(
+                        *padded.get([i, j, k]).unwrap(),
+                        *input.get([i, j, k]).unwrap()
+                    )
                 }
             }
         }
@@ -167,7 +181,7 @@ mod test {
         for i in 0..pad_index.0 {
             for j in 0..pad_index.1 {
                 for k in 0..pad_index.2 {
-                    assert_eq!(*padded.get(i, j, k).unwrap(), 0f64)
+                    assert_eq!(*padded.get([i, j, k]).unwrap(), 0f64)
                 }
             }
         }
@@ -177,9 +191,9 @@ mod test {
                 for k in 0..dim {
                     assert_eq!(
                         *padded
-                            .get(i + pad_index.0, j + pad_index.1, k + pad_index.2)
+                            .get([i + pad_index.0, j + pad_index.1, k + pad_index.2])
                             .unwrap(),
-                        *input.get(i, j, k).unwrap()
+                        *input.get([i, j, k]).unwrap()
                     );
                 }
             }
diff --git a/field/src/field.rs b/field/src/field.rs
index 97f79343..a9bf7a0a 100644
--- a/field/src/field.rs
+++ b/field/src/field.rs
@@ -1,26 +1,24 @@
 //! Implementation of traits for field translations via the FFT and SVD.
-use cauchy::Scalar;
 use itertools::Itertools;
 use num::Zero;
 use num::{Complex, Float};
-use rlst::dense::LayoutType;
-use rlst::{
-    algorithms::{
-        linalg::{DenseMatrixLinAlgBuilder, LinAlg},
-        traits::svd::{Mode, Svd},
-    },
-    common::traits::{Eval, Transpose},
-    dense::{
-        rlst_dynamic_mat, rlst_pointer_mat, Dot, Dynamic, MultiplyAdd, RawAccess, RawAccessMut,
-        Shape, VectorContainer,
+use rlst_blis::interface::gemm::Gemm;
+use rlst_common::types::Scalar;
+use rlst_dense::{
+    array::{empty_array, Array},
+    base_array::BaseArray,
+    data_container::VectorContainer,
+    linalg::svd::SvdMode,
+    rlst_dynamic_array2, rlst_dynamic_array3,
+    traits::{
+        MatrixSvd, MultIntoResize, RawAccess, RawAccessMut, Shape, UnsafeRandomAccessByRef,
+        UnsafeRandomAccessMut,
     },
 };
 use std::collections::HashSet;
 
-use bempp_tools::Array3D;
-use bempp_traits::{
-    arrays::Array3DAccess, field::FieldTranslationData, kernel::Kernel, types::EvalType,
-};
+use bempp_tools::arrays::Array3D;
+use bempp_traits::{field::FieldTranslationData, kernel::Kernel, types::EvalType};
 use bempp_tree::{
     implementations::helpers::find_corners, types::domain::Domain, types::morton::MortonKey,
 };
@@ -37,20 +35,10 @@ use crate::{
 
 impl<T, U> FieldTranslationData<U> for SvdFieldTranslationKiFmm<T, U>
 where
-    T: Float
-        + Default
-        + MultiplyAdd<
-            T,
-            VectorContainer<T>,
-            VectorContainer<T>,
-            VectorContainer<T>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
-    DenseMatrixLinAlgBuilder<T>: Svd,
-    T: Scalar<Real = T>,
+    T: Float + Default,
+    T: Scalar<Real = T> + Gemm,
     U: Kernel<T = T> + Default,
+    Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>: MatrixSvd<Item = T>,
 {
     type TransferVector = Vec<TransferVector>;
     type M2LOperators = SvdM2lOperatorData<T>;
@@ -68,8 +56,8 @@ where
         let ncols = self.ncoeffs(order);
 
         let ntransfer_vectors = self.transfer_vectors.len();
-        let mut se2tc_fat = rlst_dynamic_mat![T, (nrows, ncols * ntransfer_vectors)];
-        let mut se2tc_thin = rlst_dynamic_mat![T, (nrows * ntransfer_vectors, ncols)];
+        let mut se2tc_fat = rlst_dynamic_array2!(T, [nrows, ncols * ntransfer_vectors]);
+        let mut se2tc_thin = rlst_dynamic_array2!(T, [nrows * ntransfer_vectors, ncols]);
 
         for (i, t) in self.transfer_vectors.iter().enumerate() {
             let source_equivalent_surface = t.source.compute_surface(&domain, order, self.alpha);
@@ -78,86 +66,100 @@ where
             let target_check_surface = t.target.compute_surface(&domain, order, self.alpha);
             let ntargets = target_check_surface.len() / self.kernel.space_dimension();
 
-            let mut tmp_gram = rlst_dynamic_mat![T, (ntargets, nsources)];
+            let mut tmp_gram_t = rlst_dynamic_array2!(T, [ntargets, nsources]);
 
             self.kernel.assemble_st(
                 EvalType::Value,
                 &source_equivalent_surface[..],
                 &target_check_surface[..],
-                tmp_gram.data_mut(),
+                tmp_gram_t.data_mut(),
             );
 
             // Need to transpose so that rows correspond to targets, and columns to sources
-            let mut tmp_gram = tmp_gram.transpose().eval();
-
-            let block_size = nrows * ncols;
-            let start_idx = i * block_size;
-            let end_idx = start_idx + block_size;
-            let block = se2tc_fat.get_slice_mut(start_idx, end_idx);
-            block.copy_from_slice(tmp_gram.data_mut());
-
-            for j in 0..ncols {
-                let start_idx = j * ntransfer_vectors * nrows + i * nrows;
-                let end_idx = start_idx + nrows;
-                let block_column = se2tc_thin.get_slice_mut(start_idx, end_idx);
-                let gram_column = tmp_gram.get_slice_mut(j * ncols, j * ncols + ncols);
-                block_column.copy_from_slice(gram_column);
-            }
+            let mut tmp_gram = rlst_dynamic_array2!(T, [nsources, ntargets]);
+            tmp_gram.fill_from(tmp_gram_t.transpose());
+
+            let mut block = se2tc_fat
+                .view_mut()
+                .into_subview([0, i * ncols], [nrows, ncols]);
+            block.fill_from(tmp_gram.view());
+
+            let mut block_column = se2tc_thin
+                .view_mut()
+                .into_subview([i * nrows, 0], [nrows, ncols]);
+            block_column.fill_from(tmp_gram.view());
         }
 
-        let (sigma, u, vt) = se2tc_fat.linalg().svd(Mode::All, Mode::Slim).unwrap();
+        let mu = se2tc_fat.shape()[0];
+        let nvt = se2tc_fat.shape()[1];
+        let k = std::cmp::min(mu, nvt);
+
+        let mut u_big = rlst_dynamic_array2!(T, [mu, k]);
+        let mut sigma = vec![T::zero(); k];
+        let mut vt_big = rlst_dynamic_array2!(T, [k, nvt]);
+
+        se2tc_fat
+            .into_svd_alloc(
+                u_big.view_mut(),
+                vt_big.view_mut(),
+                &mut sigma[..],
+                SvdMode::Reduced,
+            )
+            .unwrap();
 
-        let u = u.unwrap();
-        let vt = vt.unwrap();
+        let mut u = rlst_dynamic_array2!(T, [mu, self.k]);
+        let mut sigma_mat = rlst_dynamic_array2!(T, [self.k, self.k]);
+        let mut vt = rlst_dynamic_array2!(T, [self.k, nvt]);
 
-        // Keep 'k' singular values
-        let mut sigma_mat = rlst_dynamic_mat![T, (self.k, self.k)];
-        for i in 0..self.k {
-            sigma_mat[[i, i]] = T::from(sigma[i]).unwrap()
+        u.fill_from(u_big.into_subview([0, 0], [mu, self.k]));
+        vt.fill_from(vt_big.into_subview([0, 0], [self.k, nvt]));
+        for (j, s) in sigma.iter().enumerate().take(self.k) {
+            unsafe {
+                *sigma_mat.get_unchecked_mut([j, j]) = T::from(*s).unwrap();
+            }
         }
 
-        let (mu, _) = u.shape();
-        let u = u.block((0, 0), (mu, self.k)).eval();
-        let u = u.data().iter().map(|&x| T::from(x).unwrap()).collect_vec();
-        let u = unsafe { rlst_pointer_mat!['static, T, u.as_ptr(), (mu, self.k), (1, mu)] }.eval();
-
-        let (_, nvt) = vt.shape();
-        let vt = vt.block((0, 0), (self.k, nvt)).eval();
-        let vt: Vec<T> = vt.data().iter().map(|&x| T::from(x).unwrap()).collect();
-        let vt = unsafe { rlst_pointer_mat!['static, T, vt.as_ptr(), (self.k, nvt), (1, self.k)] };
-
         // Store compressed M2L operators
-        let (_gamma, _r, st) = se2tc_thin.linalg().svd(Mode::Slim, Mode::All).unwrap();
-        let st = st.unwrap();
-        let (_, nst) = st.shape();
-        let st_block = st.block((0, 0), (self.k, nst));
-        let s_block = st_block.transpose().eval();
-        let s_block: Vec<T> = s_block
-            .data()
-            .iter()
-            .map(|&x| T::from(x).unwrap())
-            .collect();
-        let s_block =
-            unsafe { rlst_pointer_mat!['static, T, s_block.as_ptr(), (nst, self.k), (1, nst)] };
+        let thin_nrows = se2tc_thin.shape()[0];
+        let nst = se2tc_thin.shape()[1];
+        let k = std::cmp::min(thin_nrows, nst);
+        let mut _gamma = rlst_dynamic_array2!(T, [thin_nrows, k]);
+        let mut _r = vec![T::zero(); k];
+        let mut st = rlst_dynamic_array2!(T, [k, nst]);
+
+        se2tc_thin
+            .into_svd_alloc(
+                _gamma.view_mut(),
+                st.view_mut(),
+                &mut _r[..],
+                SvdMode::Reduced,
+            )
+            .unwrap();
 
-        let mut c = rlst_dynamic_mat![T, (self.k, self.k * ntransfer_vectors)];
+        let mut s_block = rlst_dynamic_array2!(T, [nst, self.k]);
+        for j in 0..self.k {
+            for i in 0..nst {
+                unsafe { *s_block.get_unchecked_mut([i, j]) = *st.get_unchecked([j, i]) }
+            }
+        }
 
-        for i in 0..self.transfer_vectors.len() {
-            let top_left = (0, i * ncols);
-            let dim = (self.k, ncols);
-            let vt_block = vt.block(top_left, dim);
+        let mut c = rlst_dynamic_array2!(T, [self.k, self.k * ntransfer_vectors]);
 
-            let tmp = sigma_mat.dot(&vt_block.dot(&s_block));
+        for i in 0..self.transfer_vectors.len() {
+            let vt_block = vt.view().into_subview([0, i * ncols], [self.k, ncols]);
 
-            let top_left = (0, i * self.k);
-            let dim = (self.k, self.k);
+            let tmp = empty_array::<T, 2>().simple_mult_into_resize(
+                sigma_mat.view(),
+                empty_array::<T, 2>().simple_mult_into_resize(vt_block.view(), s_block.view()),
+            );
 
-            c.block_mut(top_left, dim)
-                .data_mut()
-                .copy_from_slice(tmp.data());
+            c.view_mut()
+                .into_subview([0, i * self.k], [self.k, self.k])
+                .fill_from(tmp);
         }
 
-        let st_block = s_block.transpose().eval();
+        let mut st_block = rlst_dynamic_array2!(T, [self.k, nst]);
+        st_block.fill_from(s_block.transpose());
 
         SvdM2lOperatorData { u, st_block, c }
     }
@@ -165,20 +167,10 @@ where
 
 impl<T, U> SvdFieldTranslationKiFmm<T, U>
 where
-    T: Float
-        + Default
-        + MultiplyAdd<
-            T,
-            VectorContainer<T>,
-            VectorContainer<T>,
-            VectorContainer<T>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
-    DenseMatrixLinAlgBuilder<T>: Svd,
-    T: Scalar<Real = T>,
+    T: Float + Default,
+    T: Scalar<Real = T> + rlst_blis::interface::gemm::Gemm,
     U: Kernel<T = T> + Default,
+    Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>: MatrixSvd<Item = T>,
 {
     /// Constructor for SVD field translation struct for the kernel independent FMM (KiFMM).
     ///
@@ -194,9 +186,8 @@ where
             k: 0,
             kernel,
             operator_data: SvdM2lOperatorData::default(),
-            transfer_vectors: Vec::new(),
+            transfer_vectors: vec![],
         };
-
         let ncoeffs = result.ncoeffs(order);
         if let Some(k) = k {
             // Compression rank <= number of coefficients
@@ -208,7 +199,6 @@ where
         } else {
             result.k = 50;
         }
-
         result.transfer_vectors = compute_transfer_vectors();
         result.operator_data = result.compute_m2l_operators(order, domain);
 
@@ -255,23 +245,27 @@ where
         let halo_children = halo.iter().map(|h| h.children()).collect_vec();
 
         // The child boxes in the halo of the sibling set
-        let mut sources = vec![Vec::new(); halo_children.len()];
-
+        let mut sources = vec![];
         // The sibling set
-        let mut targets = vec![Vec::new(); halo_children.len()];
-
+        let mut targets = vec![];
         // The transfer vectors corresponding to source->target translations
-        let mut transfer_vectors = vec![Vec::new(); halo_children.len()];
-
+        let mut transfer_vectors = vec![];
         // Green's function evaluations for each source, target pair interaction
-        let mut kernel_data_vec = vec![Vec::new(); halo_children.len()];
+        let mut kernel_data_vec = vec![];
+
+        for _ in &halo_children {
+            sources.push(vec![]);
+            targets.push(vec![]);
+            transfer_vectors.push(vec![]);
+            kernel_data_vec.push(vec![]);
+        }
 
         // Each set of 64 M2L operators will correspond to a point in the halo
         // Computing transfer of potential from sibling set to halo
         for (i, halo_child_set) in halo_children.iter().enumerate() {
-            let mut tmp_transfer_vectors = Vec::new();
-            let mut tmp_targets = Vec::new();
-            let mut tmp_sources = Vec::new();
+            let mut tmp_transfer_vectors = vec![];
+            let mut tmp_targets = vec![];
+            let mut tmp_sources = vec![];
 
             // Consider all halo children for a given sibling at a time
             for sibling in siblings.iter() {
@@ -343,16 +337,17 @@ where
                     let mut kernel = flip3(&kernel);
 
                     // Compute FFT of padded kernel
-                    let mut kernel_hat = Array3D::<Complex<T>>::new((p, p, p / 2 + 1));
+                    let mut kernel_hat = rlst_dynamic_array3!(Complex<T>, [p, p, p / 2 + 1]);
 
-                    T::rfft3_fftw(kernel.get_data_mut(), kernel_hat.get_data_mut(), &[p, p, p]);
+                    // TODO: is kernel_hat the transpose of what it used to be?
+                    T::rfft3_fftw(kernel.data_mut(), kernel_hat.data_mut(), &[p, p, p]);
 
                     kernel_data_vec[i].push(kernel_hat);
                 } else {
                     // Fill with zeros when interaction doesn't exist
                     let n = 2 * order - 1;
                     let p = n + 1;
-                    let kernel_hat_zeros = Array3D::<Complex<T>>::new((p, p, p / 2 + 1));
+                    let kernel_hat_zeros = rlst_dynamic_array3!(Complex<T>, [p, p, p / 2 + 1]);
                     kernel_data_vec[i].push(kernel_hat_zeros);
                 }
             }
@@ -368,13 +363,16 @@ where
             for j in 0..nconvolutions {
                 let offset = j * size_real;
                 kernel_data[i][offset..offset + size_real]
-                    .copy_from_slice(kernel_data_vec[i][j].get_data())
+                    .copy_from_slice(kernel_data_vec[i][j].data())
             }
         }
 
         // We want to use this data by frequency in the implementation of FFT M2L
         // Rearrangement: Grouping by frequency, then halo child, then sibling
-        let mut kernel_data_f = vec![Vec::new(); halo_children.len()];
+        let mut kernel_data_f = vec![];
+        for _ in &halo_children {
+            kernel_data_f.push(vec![]);
+        }
         for i in 0..halo_children.len() {
             let current_vector = &kernel_data[i];
             for l in 0..size_real {
@@ -510,7 +508,7 @@ where
         let n = 2 * order - 1;
         let npad = n + 1;
 
-        let mut result = Array3D::<T>::new((npad, npad, npad));
+        let mut result = rlst_dynamic_array3!(T, [npad, npad, npad]);
 
         let nconv = n.pow(3);
         let mut kernel_evals = vec![T::zero(); nconv];
@@ -526,7 +524,7 @@ where
                 for i in 0..n {
                     let conv_idx = i + j * n + k * n * n;
                     let save_idx = i + j * npad + k * npad * npad;
-                    result.get_data_mut()[save_idx..(save_idx + 1)]
+                    result.data_mut()[save_idx..(save_idx + 1)]
                         .copy_from_slice(&kernel_evals[(conv_idx)..(conv_idx + 1)]);
                 }
             }
@@ -544,10 +542,10 @@ where
         let n = 2 * order - 1;
         let npad = n + 1;
 
-        let mut result = Array3D::new((npad, npad, npad));
+        let mut result = rlst_dynamic_array3!(T, [npad, npad, npad]);
 
         for (i, &j) in self.surf_to_conv_map.iter().enumerate() {
-            result.get_data_mut()[j] = charges[i];
+            result.data_mut()[j] = charges[i];
         }
 
         result
@@ -561,7 +559,7 @@ mod test {
     use bempp_kernel::laplace_3d::Laplace3dKernel;
     use cauchy::{c32, c64};
     use num::complex::Complex;
-    use rlst::dense::RandomAccessMut;
+    use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut};
 
     #[test]
     pub fn test_svd_operator_data() {
@@ -579,9 +577,9 @@ mod test {
         let m2l = svd.compute_m2l_operators(order, domain);
 
         // Test that the rank cutoff has been taken correctly (k < ncoeffs)
-        assert_eq!(m2l.st_block.shape(), (k, svd.ncoeffs(order)));
-        assert_eq!(m2l.c.shape(), (k, k * ntransfer_vectors));
-        assert_eq!(m2l.u.shape(), (svd.ncoeffs(order), k));
+        assert_eq!(m2l.st_block.shape(), [k, svd.ncoeffs(order)]);
+        assert_eq!(m2l.c.shape(), [k, k * ntransfer_vectors]);
+        assert_eq!(m2l.u.shape(), [svd.ncoeffs(order), k]);
 
         // Test that the rank cutoff has been taken correctly (k > ncoeffs)
         let k = 100;
@@ -589,22 +587,22 @@ mod test {
         let m2l = svd.compute_m2l_operators(order, domain);
         assert_eq!(
             m2l.st_block.shape(),
-            (svd.ncoeffs(order), svd.ncoeffs(order))
+            [svd.ncoeffs(order), svd.ncoeffs(order)]
         );
         assert_eq!(
             m2l.c.shape(),
-            (svd.ncoeffs(order), svd.ncoeffs(order) * ntransfer_vectors)
+            [svd.ncoeffs(order), svd.ncoeffs(order) * ntransfer_vectors]
         );
-        assert_eq!(m2l.u.shape(), (svd.ncoeffs(order), svd.ncoeffs(order)));
+        assert_eq!(m2l.u.shape(), [svd.ncoeffs(order), svd.ncoeffs(order)]);
 
         // Test that the rank cutoff has been taken correctly (k unspecified)
         let k = None;
         let default_k = 50;
         let svd = SvdFieldTranslationKiFmm::new(kernel, k, order, domain, alpha);
         let m2l = svd.compute_m2l_operators(order, domain);
-        assert_eq!(m2l.st_block.shape(), (default_k, svd.ncoeffs(order)));
-        assert_eq!(m2l.c.shape(), (default_k, default_k * ntransfer_vectors));
-        assert_eq!(m2l.u.shape(), (svd.ncoeffs(order), default_k));
+        assert_eq!(m2l.st_block.shape(), [default_k, svd.ncoeffs(order)]);
+        assert_eq!(m2l.c.shape(), [default_k, default_k * ntransfer_vectors]);
+        assert_eq!(m2l.u.shape(), [svd.ncoeffs(order), default_k]);
     }
 
     #[test]
@@ -648,10 +646,10 @@ mod test {
 
         // Some expansion data
         let ncoeffs = 6 * (order - 1).pow(2) + 2;
-        let mut multipole = rlst_dynamic_mat![f64, (ncoeffs, 1)];
+        let mut multipole = rlst_dynamic_array2!(f64, [ncoeffs, 1]);
 
         for i in 0..ncoeffs {
-            *multipole.get_mut(i, 0).unwrap() = i as f64;
+            *multipole.get_mut([i, 0]).unwrap() = i as f64;
         }
 
         // Create field translation object
@@ -670,18 +668,23 @@ mod test {
             .position(|x| x.hash == transfer_vector.hash)
             .unwrap();
 
-        let (nrows, _) = svd.operator_data.c.shape();
-        let top_left = (0, c_idx * svd.k);
-        let dim = (nrows, svd.k);
+        let [nrows, _] = svd.operator_data.c.shape();
+        let c_sub = svd
+            .operator_data
+            .c
+            .into_subview([0, c_idx * svd.k], [nrows, svd.k]);
 
-        let c_sub = svd.operator_data.c.block(top_left, dim);
+        let compressed_multipole = empty_array::<f64, 2>()
+            .simple_mult_into_resize(svd.operator_data.st_block.view(), multipole.view());
 
-        let compressed_multipole = svd.operator_data.st_block.dot(&multipole).eval();
-
-        let compressed_check_potential = c_sub.dot(&compressed_multipole);
+        let compressed_check_potential = empty_array::<f64, 2>()
+            .simple_mult_into_resize(c_sub.view(), compressed_multipole.view());
 
         // Post process to find check potential
-        let check_potential = svd.operator_data.u.dot(&compressed_check_potential).eval();
+        let check_potential = empty_array::<f64, 2>().simple_mult_into_resize(
+            svd.operator_data.u.view(),
+            compressed_check_potential.view(),
+        );
 
         let sources = transfer_vector
             .source
@@ -733,10 +736,10 @@ mod test {
 
         // Some expansion data998
         let ncoeffs = 6 * (order - 1).pow(2) + 2;
-        let mut multipole = rlst_dynamic_mat![f64, (ncoeffs, 1)];
+        let mut multipole = rlst_dynamic_array2!(f64, [ncoeffs, 1]);
 
         for i in 0..ncoeffs {
-            *multipole.get_mut(i, 0).unwrap() = i as f64;
+            *multipole.get_mut([i, 0]).unwrap() = i as f64;
         }
 
         let level = 2;
@@ -748,7 +751,10 @@ mod test {
         let key = MortonKey::from_point(&[0.5, 0.5, 0.5], &domain, level);
 
         let parent_neighbours = key.parent().neighbors();
-        let mut v_list_structured = vec![Vec::new(); 26];
+        let mut v_list_structured = vec![];
+        for _ in 0..26 {
+            v_list_structured.push(vec![]);
+        }
         for (i, pn) in parent_neighbours.iter().enumerate() {
             for child in pn.children() {
                 if !key.is_adjacent(&child) {
@@ -807,19 +813,19 @@ mod test {
 
         // Compute kernel from source/target pair
         let test_kernel = fft.compute_kernel(order, &conv_grid, kernel_point);
-        let &(m, n, o) = test_kernel.shape();
+        let [m, n, o] = test_kernel.shape();
 
         let mut test_kernel = flip3(&test_kernel);
 
         // Compute FFT of padded kernel
-        let mut test_kernel_hat = Array3D::<c64>::new((m, n, o / 2 + 1));
+        let mut test_kernel_hat = rlst_dynamic_array3!(c64, [m, n, o / 2 + 1]);
         f64::rfft3_fftw(
-            test_kernel.get_data_mut(),
-            test_kernel_hat.get_data_mut(),
+            test_kernel.data_mut(),
+            test_kernel_hat.data_mut(),
             &[m, n, o],
         );
 
-        for (p, t) in test_kernel_hat.get_data().iter().zip(kernel_hat.iter()) {
+        for (p, t) in test_kernel_hat.data().iter().zip(kernel_hat.iter()) {
             assert!((p - t).norm() < 1e-6)
         }
     }
@@ -830,7 +836,10 @@ mod test {
         // here each '1000' corresponds to a sibling index
         // each '100' to a child in a given halo element
         // and each '1' to a frequency
-        let mut kernel_data_mat = vec![Vec::new(); 26];
+        let mut kernel_data_mat = vec![];
+        for _ in 0..26 {
+            kernel_data_mat.push(vec![]);
+        }
         let size_real = 10;
 
         for elem in kernel_data_mat.iter_mut().take(26) {
@@ -848,7 +857,10 @@ mod test {
 
         // We want to use this data by frequency in the implementation of FFT M2L
         // Rearrangement: Grouping by frequency, then halo child, then sibling
-        let mut rearranged = vec![Vec::new(); 26];
+        let mut rearranged = vec![];
+        for _ in 0..26 {
+            rearranged.push(vec![]);
+        }
         for i in 0..26 {
             let current_vector = &kernel_data_mat[i];
             for l in 0..size_real {
@@ -893,10 +905,10 @@ mod test {
 
         // Some expansion data
         let ncoeffs = 6 * (order - 1).pow(2) + 2;
-        let mut multipole = rlst_dynamic_mat![f64, (ncoeffs, 1)];
+        let mut multipole = rlst_dynamic_array2!(f64, [ncoeffs, 1]);
 
         for i in 0..ncoeffs {
-            *multipole.get_mut(i, 0).unwrap() = i as f64;
+            *multipole.get_mut([i, 0]).unwrap() = i as f64;
         }
 
         // Create field translation object
@@ -912,10 +924,10 @@ mod test {
 
         // Compute FFT of the representative signal
         let mut signal = fft.compute_signal(order, multipole.data());
-        let &(m, n, o) = signal.shape();
-        let mut signal_hat = Array3D::<c64>::new((m, n, o / 2 + 1));
+        let [m, n, o] = signal.shape();
+        let mut signal_hat = rlst_dynamic_array3!(c64, [m, n, o / 2 + 1]);
 
-        f64::rfft3_fftw(signal.get_data_mut(), signal_hat.get_data_mut(), &[m, n, o]);
+        f64::rfft3_fftw(signal.data_mut(), signal_hat.data_mut(), &[m, n, o]);
 
         let source_equivalent_surface = transfer_vector
             .source
@@ -951,35 +963,35 @@ mod test {
 
         // Compute kernel
         let kernel = fft.compute_kernel(order, &conv_grid, kernel_point);
-        let &(m, n, o) = kernel.shape();
+        let [m, n, o] = kernel.shape();
 
         let mut kernel = flip3(&kernel);
 
         // Compute FFT of padded kernel
-        let mut kernel_hat = Array3D::<c64>::new((m, n, o / 2 + 1));
-        f64::rfft3_fftw(kernel.get_data_mut(), kernel_hat.get_data_mut(), &[m, n, o]);
-
-        // Compute convolution
-        let hadamard_product = signal_hat
-            .get_data()
-            .iter()
-            .zip(kernel_hat.get_data().iter())
-            .map(|(a, b)| a * b)
-            .collect_vec();
+        let mut kernel_hat = rlst_dynamic_array3!(c64, [m, n, o / 2 + 1]);
+        f64::rfft3_fftw(kernel.data_mut(), kernel_hat.data_mut(), &[m, n, o]);
 
-        let mut hadamard_product = Array3D::from_data(hadamard_product, (m, n, o / 2 + 1));
+        let mut hadamard_product = rlst_dynamic_array3!(c64, [m, n, o / 2 + 1]);
+        for k in 0..o / 2 + 1 {
+            for j in 0..n {
+                for i in 0..m {
+                    *hadamard_product.get_mut([i, j, k]).unwrap() =
+                        kernel_hat.get([i, j, k]).unwrap() * signal_hat.get([i, j, k]).unwrap();
+                }
+            }
+        }
 
-        let mut potentials = Array3D::new((m, n, o));
+        let mut potentials = rlst_dynamic_array3!(f64, [m, n, o]);
 
         f64::irfft3_fftw(
-            hadamard_product.get_data_mut(),
-            potentials.get_data_mut(),
+            hadamard_product.data_mut(),
+            potentials.data_mut(),
             &[m, n, o],
         );
 
         let mut result = vec![0f64; ntargets];
         for (i, &idx) in fft.conv_to_surf_map.iter().enumerate() {
-            result[i] = potentials.get_data()[idx];
+            result[i] = potentials.data()[idx];
         }
 
         // Get direct evaluations for testing
diff --git a/field/src/types.rs b/field/src/types.rs
index 61177fa7..7b80a8b9 100644
--- a/field/src/types.rs
+++ b/field/src/types.rs
@@ -1,11 +1,7 @@
 //! Types for storing field translation data.
 use num::{Complex, Float};
-use rlst::{
-    common::traits::{Eval, NewLikeSelf},
-    dense::{
-        base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, rlst_dynamic_mat,
-        Dynamic,
-    },
+use rlst_dense::{
+    array::Array, base_array::BaseArray, data_container::VectorContainer, rlst_dynamic_array2,
 };
 
 use bempp_traits::kernel::Kernel;
@@ -13,7 +9,7 @@ use bempp_traits::types::Scalar;
 use bempp_tree::types::morton::MortonKey;
 
 /// Simple type alias for a 2D `Matrix<f64>`
-pub type SvdM2lEntry<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+pub type SvdM2lEntry<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
 
 /// Simple type alias for pre-computed FFT of green's function evaluations computed for each transfer vector in a box's halo
 /// Each index corresponds to a halo position, and contains 64 convolutions, one for each of a box's siblings with each child
@@ -48,7 +44,7 @@ where
 /// A type to store the M2L field translation meta-data  and datafor an SVD based sparsification in the kernel independent FMM.
 pub struct SvdFieldTranslationKiFmm<T, U>
 where
-    T: Scalar<Real = T> + Float + Default,
+    T: Scalar<Real = T> + Float + Default + rlst_blis::interface::gemm::Gemm,
     U: Kernel<T = T> + Default,
 {
     /// Amount to dilate inner check surface by when computing operator.
@@ -116,12 +112,10 @@ where
     T: Scalar,
 {
     fn default() -> Self {
-        let tmp = rlst_dynamic_mat![T, (1, 1)];
+        let u = rlst_dynamic_array2!(T, [1, 1]);
+        let st_block = rlst_dynamic_array2!(T, [1, 1]);
+        let c = rlst_dynamic_array2!(T, [1, 1]);
 
-        SvdM2lOperatorData {
-            u: tmp.new_like_self().eval(),
-            st_block: tmp.new_like_self().eval(),
-            c: tmp.new_like_self().eval(),
-        }
+        SvdM2lOperatorData { u, st_block, c }
     }
 }
diff --git a/find_examples.py b/find_examples.py
index c5f78f31..ddb6154a 100644
--- a/find_examples.py
+++ b/find_examples.py
@@ -29,6 +29,8 @@
 
 files = []
 for folder in os.listdir(root_dir):
+    if folder in ["fmm"]:
+        continue
     if not folder.startswith("."):
         sub_dir = os.path.join(root_dir, folder)
         example_dir = os.path.join(sub_dir, "examples")
diff --git a/fmm/Cargo.toml b/fmm/Cargo.toml
index ce2a0577..a8042a54 100644
--- a/fmm/Cargo.toml
+++ b/fmm/Cargo.toml
@@ -35,6 +35,8 @@ rand = "0.8.*"
 float-cmp = "0.9.0"
 num_cpus = "1"
 num = "0.4"
-rlst = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-fftw = {git = "https://github.com/skailasa/fftw.git" }
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-blis = { git = "https://github.com/linalg-rs/rlst.git" }
+fftw = { git = "https://github.com/skailasa/fftw.git" }
 rayon = "1.7"
diff --git a/fmm/src/charge.rs b/fmm/src/charge.rs
index 9e239a3f..c0e5fa1f 100644
--- a/fmm/src/charge.rs
+++ b/fmm/src/charge.rs
@@ -1,8 +1,8 @@
 //! Helper functions to handle charge data.
 use std::collections::HashMap;
 
-use cauchy::Scalar;
 use num::Float;
+use rlst_common::types::Scalar;
 
 use crate::types::{Charge, ChargeDict, GlobalIdx};
 
diff --git a/fmm/src/field_translation/source.rs b/fmm/src/field_translation/source.rs
index 6400a95e..32c541b1 100644
--- a/fmm/src/field_translation/source.rs
+++ b/fmm/src/field_translation/source.rs
@@ -21,25 +21,23 @@ use crate::{
         FmmDataAdaptive, FmmDataUniform, FmmDataUniformMatrix, KiFmmLinear, KiFmmLinearMatrix,
     },
 };
-use rlst::{
-    common::traits::*,
-    dense::{rlst_col_vec, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer},
+use bempp_traits::types::Scalar;
+use rlst_dense::{
+    array::empty_array,
+    rlst_array_from_slice2, rlst_dynamic_array2,
+    traits::{MultIntoResize, RawAccess, RawAccessMut},
 };
 
 impl<T, U, V> SourceTranslation for FmmDataUniform<KiFmmLinear<SingleNodeTree<V>, T, U, V>, V>
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     /// Point to multipole evaluations, multithreaded over each leaf box.
     fn p2m<'a>(&self) {
@@ -52,7 +50,7 @@ where
 
         let surface_size = ncoeffs * self.fmm.kernel.space_dimension();
 
-        let mut check_potentials = rlst_col_vec![V, nleaves * ncoeffs];
+        let mut check_potentials = rlst_dynamic_array2!(V, [nleaves * ncoeffs, 1]);
         let coordinates = self.fmm.tree().get_all_coordinates().unwrap();
         let dim = self.fmm.kernel.space_dimension();
 
@@ -65,19 +63,24 @@ where
             .for_each(
                 |((check_potential, upward_check_surface), charge_index_pointer)| {
                     let charges = &self.charges[charge_index_pointer.0..charge_index_pointer.1];
-                    let coordinates = &coordinates
-                        [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
+                    let coordinates_row_major =
+                        &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
 
-                    let nsources = coordinates.len() / dim;
+                    let nsources = coordinates_row_major.len() / dim;
 
                     if nsources > 0 {
-                        let coordinates = unsafe {
-                            rlst_pointer_mat!['a, V, coordinates.as_ptr(), (nsources, dim), (dim, 1)]
-                        }.eval();
+                        let coordinates_row_major = rlst_array_from_slice2!(
+                            V,
+                            coordinates_row_major,
+                            [nsources, dim],
+                            [dim, 1]
+                        );
+                        let mut coordinates_col_major = rlst_dynamic_array2!(V, [nsources, dim]);
+                        coordinates_col_major.fill_from(coordinates_row_major.view());
 
                         self.fmm.kernel.evaluate_st(
                             EvalType::Value,
-                            coordinates.data(),
+                            coordinates_col_major.data(),
                             upward_check_surface,
                             charges,
                             check_potential,
@@ -91,19 +94,30 @@ where
 
         check_potentials
             .data()
-            .par_chunks_exact(ncoeffs*chunk_size)
+            .par_chunks_exact(ncoeffs * chunk_size)
             .zip(self.leaf_multipoles.par_chunks_exact(chunk_size))
-            .zip(self.scales.par_chunks_exact(ncoeffs*chunk_size))
+            .zip(self.scales.par_chunks_exact(ncoeffs * chunk_size))
             .for_each(|((check_potential, multipole_ptrs), scale)| {
+                let check_potential =
+                    rlst_array_from_slice2!(V, check_potential, [ncoeffs, chunk_size]);
+                let scale = rlst_array_from_slice2!(V, scale, [ncoeffs, chunk_size]);
 
-                let check_potential = unsafe { rlst_pointer_mat!['a, V, check_potential.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)] };
-                let scale = unsafe {rlst_pointer_mat!['a, V, scale.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)]}.eval();
+                let mut cmp_prod = rlst_dynamic_array2!(V, [ncoeffs, chunk_size]);
+                cmp_prod.fill_from(check_potential * scale);
 
-                let tmp = (self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential.cmp_wise_product(&scale)))).eval();
+                let tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                    self.fmm.uc2e_inv_1.view(),
+                    empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.uc2e_inv_2.view(), cmp_prod),
+                );
 
                 for (i, multipole_ptr) in multipole_ptrs.iter().enumerate().take(chunk_size) {
-                    let multipole = unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, ncoeffs) };
-                    multipole.iter_mut().zip(&tmp.data()[i*ncoeffs..(i+1)*ncoeffs]).for_each(|(m, t)| *m += *t);
+                    let multipole =
+                        unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, ncoeffs) };
+                    multipole
+                        .iter_mut()
+                        .zip(&tmp.data()[i * ncoeffs..(i + 1) * ncoeffs])
+                        .for_each(|(m, t)| *m += *t);
                 }
             })
     }
@@ -150,17 +164,37 @@ where
 
         // 3. Compute M2M kernel over sets of siblings
         child_multipoles
-            .par_chunks_exact(nsiblings * ncoeffs*chunk_size)
+            .par_chunks_exact(nsiblings * ncoeffs * chunk_size)
             .zip(parent_multipoles.par_chunks_exact(chunk_size))
-            .for_each(|(child_multipoles_chunk, parent_multipole_pointers_chunk)| {
-                let child_multipoles_chunk = unsafe { rlst_pointer_mat!['a, V, child_multipoles_chunk.as_ptr(), (ncoeffs*nsiblings, chunk_size), (1, ncoeffs*nsiblings)] };
-                let parent_multipoles_chunk = self.fmm.m2m.dot(&child_multipoles_chunk).eval();
-
-                for (chunk_idx, parent_multipole_pointer) in parent_multipole_pointers_chunk.iter().enumerate().take(chunk_size) {
-                    let parent_multipole = unsafe { std::slice::from_raw_parts_mut(parent_multipole_pointer.raw, ncoeffs) };
-                    parent_multipole.iter_mut().zip(&parent_multipoles_chunk.data()[chunk_idx*ncoeffs..(chunk_idx+1)*ncoeffs]).for_each(|(p, t)| *p += *t);
-                }
-            })
+            .for_each(
+                |(child_multipoles_chunk, parent_multipole_pointers_chunk)| {
+                    let child_multipoles_chunk_mat = rlst_array_from_slice2!(
+                        V,
+                        child_multipoles_chunk,
+                        [ncoeffs * nsiblings, chunk_size]
+                    );
+
+                    let parent_multipoles_chunk = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.m2m.view(), child_multipoles_chunk_mat);
+
+                    for (chunk_idx, parent_multipole_pointer) in parent_multipole_pointers_chunk
+                        .iter()
+                        .enumerate()
+                        .take(chunk_size)
+                    {
+                        let parent_multipole = unsafe {
+                            std::slice::from_raw_parts_mut(parent_multipole_pointer.raw, ncoeffs)
+                        };
+                        parent_multipole
+                            .iter_mut()
+                            .zip(
+                                &parent_multipoles_chunk.data()
+                                    [chunk_idx * ncoeffs..(chunk_idx + 1) * ncoeffs],
+                            )
+                            .for_each(|(p, t)| *p += *t);
+                    }
+                },
+            )
     }
 }
 
@@ -168,16 +202,12 @@ impl<T, U, V> SourceTranslation for FmmDataAdaptive<KiFmmLinear<SingleNodeTree<V
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     /// Point to multipole evaluations, multithreaded over each leaf box.
     fn p2m<'a>(&self) {
@@ -188,61 +218,76 @@ where
         let nleaves = leaves.len();
         let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order);
 
-        let surface_size = ncoeffs * self.fmm.kernel.space_dimension();
-
-        let mut check_potentials = rlst_col_vec![V, nleaves * ncoeffs];
+        let mut check_potentials = rlst_dynamic_array2!(V, [nleaves * ncoeffs, 1]);
         let coordinates = self.fmm.tree().get_all_coordinates().unwrap();
         let dim = self.fmm.kernel.space_dimension();
+        let surface_size = ncoeffs * self.fmm.kernel.space_dimension();
 
         // 1. Compute the check potential for each box
         check_potentials
-                .data_mut()
-                .par_chunks_exact_mut(ncoeffs)
-                .zip(self.leaf_upward_surfaces.par_chunks_exact(surface_size))
-                .zip(&self.charge_index_pointer)
-                .for_each(
-                    |((check_potential, upward_check_surface), charge_index_pointer)| {
-                        let charges = &self.charges[charge_index_pointer.0..charge_index_pointer.1];
-                        let coordinates = &coordinates
-                            [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-
-                        let nsources = coordinates.len() / dim;
-
-                        if nsources > 0 {
-                            let coordinates = unsafe {
-                                rlst_pointer_mat!['a, V, coordinates.as_ptr(), (nsources, dim), (dim, 1)]
-                            }.eval();
+            .data_mut()
+            .par_chunks_exact_mut(ncoeffs)
+            .zip(self.leaf_upward_surfaces.par_chunks_exact(surface_size))
+            .zip(&self.charge_index_pointer)
+            .for_each(
+                |((check_potential, upward_check_surface), charge_index_pointer)| {
+                    let charges = &self.charges[charge_index_pointer.0..charge_index_pointer.1];
+                    let coordinates_row_major =
+                        &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
 
-                            self.fmm.kernel.evaluate_st(
-                                EvalType::Value,
-                                coordinates.data(),
-                                upward_check_surface,
-                                charges,
-                                check_potential,
-                            );
-                        }
-                    },
-                );
+                    let nsources = coordinates_row_major.len() / dim;
+
+                    if nsources > 0 {
+                        let coordinates_mat = rlst_array_from_slice2!(
+                            V,
+                            coordinates_row_major,
+                            [nsources, dim],
+                            [dim, 1]
+                        );
+                        let mut coordinates_col_major = rlst_dynamic_array2!(V, [nsources, dim]);
+                        coordinates_col_major.fill_from(coordinates_mat.view());
+
+                        self.fmm.kernel.evaluate_st(
+                            EvalType::Value,
+                            coordinates_col_major.data(),
+                            upward_check_surface,
+                            charges,
+                            check_potential,
+                        );
+                    }
+                },
+            );
 
         // 2. Compute the multipole expansions, with each of chunk_size boxes at a time.
         let chunk_size = find_chunk_size(nleaves, P2M_MAX_CHUNK_SIZE);
 
         check_potentials
-                .data()
-                .par_chunks_exact(ncoeffs*chunk_size)
-                .zip(self.leaf_multipoles.par_chunks_exact(chunk_size))
-                .zip(self.scales.par_chunks_exact(ncoeffs*chunk_size))
-                .for_each(|((check_potential, multipole_ptrs), scale)| {
-
-                    let check_potential = unsafe { rlst_pointer_mat!['a, V, check_potential.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)] };
-                    let scale = unsafe {rlst_pointer_mat!['a, V, scale.as_ptr(), (ncoeffs, chunk_size), (1, ncoeffs)]}.eval();
-
-                    let tmp = (self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential.cmp_wise_product(&scale)))).eval();
-                    for (i, multipole_ptr) in multipole_ptrs.iter().enumerate().take(chunk_size) {
-                        let multipole = unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, ncoeffs) };
-                        multipole.iter_mut().zip(&tmp.data()[i*ncoeffs..(i+1)*ncoeffs]).for_each(|(m, t)| *m += *t);
-                    }
-                })
+            .data()
+            .par_chunks_exact(ncoeffs * chunk_size)
+            .zip(self.leaf_multipoles.par_chunks_exact(chunk_size))
+            .zip(self.scales.par_chunks_exact(ncoeffs * chunk_size))
+            .for_each(|((check_potential, multipole_ptrs), scale)| {
+                let check_potential =
+                    rlst_array_from_slice2!(V, check_potential, [ncoeffs, chunk_size]);
+                let scale = rlst_array_from_slice2!(V, scale, [ncoeffs, chunk_size]);
+
+                let mut cmp_prod = rlst_dynamic_array2!(V, [ncoeffs, chunk_size]);
+                cmp_prod.fill_from(check_potential * scale);
+
+                let tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                    self.fmm.uc2e_inv_1.view(),
+                    empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.uc2e_inv_2.view(), cmp_prod),
+                );
+                for (i, multipole_ptr) in multipole_ptrs.iter().enumerate().take(chunk_size) {
+                    let multipole =
+                        unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, ncoeffs) };
+                    multipole
+                        .iter_mut()
+                        .zip(&tmp.data()[i * ncoeffs..(i + 1) * ncoeffs])
+                        .for_each(|(m, t)| *m += *t);
+                }
+            })
     }
 
     /// Multipole to multipole translations, multithreaded over all boxes at a given level.
@@ -287,17 +332,36 @@ where
 
         // 2. Compute M2M kernel over sets of siblings
         child_multipoles
-            .par_chunks_exact(nsiblings * ncoeffs*chunk_size)
+            .par_chunks_exact(nsiblings * ncoeffs * chunk_size)
             .zip(parent_multipoles.par_chunks_exact(chunk_size))
-            .for_each(|(child_multipoles_chunk, parent_multipole_pointers_chunk)| {
-                let child_multipoles_chunk = unsafe { rlst_pointer_mat!['a, V, child_multipoles_chunk.as_ptr(), (ncoeffs*nsiblings, chunk_size), (1, ncoeffs*nsiblings)] };
-                let parent_multipoles_chunk = self.fmm.m2m.dot(&child_multipoles_chunk).eval();
-
-                for (chunk_idx, parent_multipole_pointer) in parent_multipole_pointers_chunk.iter().enumerate().take(chunk_size) {
-                    let parent_multipole = unsafe { std::slice::from_raw_parts_mut(parent_multipole_pointer.raw, ncoeffs) };
-                    parent_multipole.iter_mut().zip(&parent_multipoles_chunk.data()[chunk_idx*ncoeffs..(chunk_idx+1)*ncoeffs]).for_each(|(p, t)| *p += *t);
-                }
-            })
+            .for_each(
+                |(child_multipoles_chunk, parent_multipole_pointers_chunk)| {
+                    let child_multipoles_chunk_mat = rlst_array_from_slice2!(
+                        V,
+                        child_multipoles_chunk,
+                        [ncoeffs * nsiblings, chunk_size]
+                    );
+                    let parent_multipoles_chunk = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.m2m.view(), child_multipoles_chunk_mat);
+
+                    for (chunk_idx, parent_multipole_pointer) in parent_multipole_pointers_chunk
+                        .iter()
+                        .enumerate()
+                        .take(chunk_size)
+                    {
+                        let parent_multipole = unsafe {
+                            std::slice::from_raw_parts_mut(parent_multipole_pointer.raw, ncoeffs)
+                        };
+                        parent_multipole
+                            .iter_mut()
+                            .zip(
+                                &parent_multipoles_chunk.data()
+                                    [chunk_idx * ncoeffs..(chunk_idx + 1) * ncoeffs],
+                            )
+                            .for_each(|(p, t)| *p += *t);
+                    }
+                },
+            )
     }
 }
 
@@ -306,16 +370,12 @@ impl<T, U, V> SourceTranslation
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     /// Point to multipole evaluations, multithreaded over each leaf box.
     fn p2m<'a>(&self) {
@@ -329,7 +389,7 @@ where
         let ncoordinates = coordinates.len() / dim;
 
         let mut check_potentials =
-            rlst_col_vec![V, self.nleaves * self.ncoeffs * self.ncharge_vectors];
+            rlst_dynamic_array2!(V, [self.nleaves * self.ncoeffs * self.ncharge_vectors, 1]);
 
         // 1. Compute the check potential for each box for each charge vector
         check_potentials
@@ -339,23 +399,32 @@ where
             .zip(&self.charge_index_pointer)
             .for_each(
                 |((check_potential, upward_check_surface), charge_index_pointer)| {
-                    let coordinates = &coordinates
-                        [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                    let nsources = coordinates.len() / dim;
+                    let coordinates_row_major =
+                        &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
+                    let nsources = coordinates_row_major.len() / dim;
 
                     if nsources > 0 {
-                        let source_coordinates = unsafe {
-                            rlst_pointer_mat!['a, V, coordinates.as_ptr(), (nsources, dim), (dim, 1)]
-                        }.eval();
-
                         for i in 0..self.ncharge_vectors {
                             let charge_vec_displacement = i * ncoordinates;
-                            let charges_i = &self.charges[charge_vec_displacement + charge_index_pointer.0..charge_vec_displacement + charge_index_pointer.1];
-                            let check_potential_i = &mut check_potential[i*self.ncoeffs..(i+1)*self.ncoeffs];
+                            let charges_i = &self.charges[charge_vec_displacement
+                                + charge_index_pointer.0
+                                ..charge_vec_displacement + charge_index_pointer.1];
+                            let check_potential_i =
+                                &mut check_potential[i * self.ncoeffs..(i + 1) * self.ncoeffs];
+
+                            let coordinates_mat = rlst_array_from_slice2!(
+                                V,
+                                coordinates_row_major,
+                                [nsources, dim],
+                                [dim, 1]
+                            );
+                            let mut coordinates_col_major =
+                                rlst_dynamic_array2!(V, [nsources, dim]);
+                            coordinates_col_major.fill_from(coordinates_mat.view());
 
                             self.fmm.kernel.evaluate_st(
                                 EvalType::Value,
-                                source_coordinates.data(),
+                                coordinates_col_major.data(),
                                 upward_check_surface,
                                 charges_i,
                                 check_potential_i,
@@ -372,16 +441,34 @@ where
             .zip(self.leaf_multipoles.into_par_iter())
             .zip(self.scales.par_chunks_exact(self.ncoeffs))
             .for_each(|((check_potential, multipole_ptrs), scale)| {
+                let check_potential = rlst_array_from_slice2!(
+                    V,
+                    check_potential,
+                    [self.ncoeffs, self.ncharge_vectors]
+                );
 
-                let mut check_potential = unsafe { rlst_pointer_mat!['a, V, check_potential.as_ptr(), (self.ncoeffs, self.ncharge_vectors), (1, self.ncoeffs)] }.eval();
-                let scale = scale[0];
-                check_potential.data_mut().iter_mut().for_each(|cp| *cp *= scale );
-
-                let tmp = (self.fmm.uc2e_inv_1.dot(&self.fmm.uc2e_inv_2.dot(&check_potential))).eval();
+                let mut scaled_check_potential =
+                    rlst_dynamic_array2!(V, [self.ncoeffs, self.ncharge_vectors]);
+                scaled_check_potential.fill_from(check_potential);
+                scaled_check_potential.scale_in_place(scale[0]);
+
+                let tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                    self.fmm.uc2e_inv_1.view(),
+                    empty_array::<V, 2>().simple_mult_into_resize(
+                        self.fmm.uc2e_inv_2.view(),
+                        scaled_check_potential.view(),
+                    ),
+                );
 
-                for (i, multipole_ptr) in multipole_ptrs.iter().enumerate().take(self.ncharge_vectors) {
-                    let multipole = unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, self.ncoeffs) };
-                    multipole.iter_mut().zip(&tmp.data()[i*self.ncoeffs..(i+1)*self.ncoeffs]).for_each(|(m, t)| *m += *t);
+                for (i, multipole_ptr) in
+                    multipole_ptrs.iter().enumerate().take(self.ncharge_vectors)
+                {
+                    let multipole =
+                        unsafe { std::slice::from_raw_parts_mut(multipole_ptr.raw, self.ncoeffs) };
+                    multipole
+                        .iter_mut()
+                        .zip(&tmp.data()[i * self.ncoeffs..(i + 1) * self.ncoeffs])
+                        .for_each(|(m, t)| *m += *t);
                 }
             })
     }
@@ -428,18 +515,32 @@ where
             .par_chunks_exact(self.ncharge_vectors * self.ncoeffs * nsiblings)
             .zip(parent_multipoles.into_par_iter())
             .for_each(|(child_multipoles, parent_multipole_pointers)| {
-
                 for i in 0..nsiblings {
                     let sibling_displacement = i * self.ncoeffs * self.ncharge_vectors;
-                    let ptr = unsafe { child_multipoles.as_ptr().add(sibling_displacement) };
-                    let child_multipoles_i = unsafe { rlst_pointer_mat!['a, V, ptr, (self.ncoeffs, self.ncharge_vectors), (1, self.ncoeffs)] };
-                    let result_i = self.fmm.m2m[i].dot(&child_multipoles_i).eval();
 
-                    for (j, send_ptr) in parent_multipole_pointers.iter().enumerate().take(self.ncharge_vectors) {
+                    let child_multipoles_i = rlst_array_from_slice2!(
+                        V,
+                        &child_multipoles[sibling_displacement
+                            ..sibling_displacement + self.ncoeffs * self.ncharge_vectors],
+                        [self.ncoeffs, self.ncharge_vectors]
+                    );
+
+                    let result_i = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.m2m[i].view(), child_multipoles_i);
+
+                    for (j, send_ptr) in parent_multipole_pointers
+                        .iter()
+                        .enumerate()
+                        .take(self.ncharge_vectors)
+                    {
                         let raw = send_ptr.raw;
-                        let parent_multipole_j = unsafe { std::slice::from_raw_parts_mut(raw, self.ncoeffs) };
-                        let result_ij = &result_i.data()[j*self.ncoeffs..(j+1)*self.ncoeffs];
-                        parent_multipole_j.iter_mut().zip(result_ij.iter()).for_each(|(p, r)| *p += *r);
+                        let parent_multipole_j =
+                            unsafe { std::slice::from_raw_parts_mut(raw, self.ncoeffs) };
+                        let result_ij = &result_i.data()[j * self.ncoeffs..(j + 1) * self.ncoeffs];
+                        parent_multipole_j
+                            .iter_mut()
+                            .zip(result_ij.iter())
+                            .for_each(|(p, r)| *p += *r);
                     }
                 }
             });
@@ -448,7 +549,6 @@ where
 
 #[cfg(test)]
 mod test {
-
     use super::*;
 
     use float_cmp::assert_approx_eq;
@@ -533,6 +633,8 @@ mod test {
 
         let abs_error = num::Float::abs(expected[0] - found[0]);
         let rel_error = abs_error / expected[0];
+
+        println!("{}", rel_error);
         assert!(rel_error <= 1e-5);
     }
 
@@ -631,7 +733,6 @@ mod test {
                 None,
             );
         }
-
         // Uniformly refined sphere surface
         {
             let points = points_fixture_sphere::<f64>(npoints);
@@ -647,7 +748,6 @@ mod test {
                 None,
             );
         }
-
         // Adaptively refined point cloud
         {
             let points = points_fixture::<f64>(npoints, None, None);
@@ -682,8 +782,8 @@ mod test {
 
         // Uniformly refined, matrix input point cloud
         {
-            let npoints = 1000000;
-            let ncharge_vecs = 10;
+            let npoints = 10000;
+            let ncharge_vecs = 3;
             let points = points_fixture::<f64>(npoints, None, None);
             let global_idxs = (0..npoints).collect_vec();
             let mut charge_mat = vec![vec![0.0; npoints]; ncharge_vecs];
@@ -754,19 +854,13 @@ mod test {
         let (l, r) = datatree.charge_index_pointer[leaf_idx];
         let leaf_coordinates = &coordinates[l * 3..r * 3];
 
-        let nsources = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
-
-        let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (nsources, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
-
         let charges = &datatree.charges[l..r];
 
         let kernel = Laplace3dKernel::<f64>::default();
 
         kernel.evaluate_st(
             EvalType::Value,
-            leaf_coordinates.data(),
+            leaf_coordinates,
             &test_point,
             charges,
             &mut expected,
@@ -843,19 +937,13 @@ mod test {
         let (l, r) = datatree.charge_index_pointer[leaf_idx];
         let leaf_coordinates = &coordinates[l * 3..r * 3];
 
-        let nsources = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
-
-        let leaf_coordinates = unsafe {
-            rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (nsources, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-        }.eval();
-
         let charges = &datatree.charges[l..r];
 
         let kernel = Laplace3dKernel::<f64>::default();
 
         kernel.evaluate_st(
             EvalType::Value,
-            leaf_coordinates.data(),
+            leaf_coordinates,
             &test_point,
             charges,
             &mut expected,
@@ -933,10 +1021,6 @@ mod test {
         let ncoordinates = coordinates.len() / datatree.fmm.kernel.space_dimension();
         let (l, r) = datatree.charge_index_pointer[leaf_idx];
         let leaf_coordinates = &coordinates[l * 3..r * 3];
-        let nsources = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
-        let leaf_coordinates = unsafe {
-              rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (nsources, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-          }.eval();
 
         for i in 0..ncharge_vecs {
             let charge_vec_displacement = i * ncoordinates;
@@ -945,7 +1029,7 @@ mod test {
 
             datatree.fmm.kernel.evaluate_st(
                 EvalType::Value,
-                leaf_coordinates.data(),
+                leaf_coordinates,
                 &test_point,
                 charges,
                 &mut expected[i..i + 1],
diff --git a/fmm/src/field_translation/source_to_target.rs b/fmm/src/field_translation/source_to_target.rs
index 1b3d969d..e960740f 100644
--- a/fmm/src/field_translation/source_to_target.rs
+++ b/fmm/src/field_translation/source_to_target.rs
@@ -14,23 +14,29 @@ use bempp_traits::{
     fmm::{Fmm, InteractionLists},
     kernel::{Kernel, ScaleInvariantKernel},
     tree::Tree,
-    types::EvalType,
+    types::{EvalType, Scalar},
 };
 use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree};
 
 use crate::helpers::find_chunk_size;
 use crate::types::{FmmDataAdaptive, FmmDataUniform, KiFmmLinear, SendPtrMut};
 
-use rlst::{
-    algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd},
-    common::traits::*,
-    dense::{rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer},
+use rlst_dense::{
+    array::{empty_array, Array},
+    base_array::BaseArray,
+    data_container::VectorContainer,
+    rlst_dynamic_array2,
+    traits::{MatrixSvd, MultIntoResize, RawAccess, RawAccessMut, Shape},
 };
 
+use rlst_dense::traits::RandomAccessMut;
+
 use super::hadamard::matmul8x8;
 
 /// Field translations defined on uniformly refined trees.
 pub mod uniform {
+    use rlst_dense::rlst_array_from_slice2;
+
     use super::*;
 
     impl<T, U> FmmDataUniform<KiFmmLinear<SingleNodeTree<U>, T, FftFieldTranslationKiFmm<U, T>, U>, U>
@@ -42,15 +48,7 @@ pub mod uniform {
             + Default,
         U: Scalar<Real = U> + Float + Default + std::marker::Send + std::marker::Sync + Fft,
         Complex<U>: Scalar,
-        U: MultiplyAdd<
-            U,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn displacements(&self, level: u64) -> Vec<Vec<usize>> {
             let nneighbors = 26;
@@ -102,17 +100,15 @@ pub mod uniform {
             + std::marker::Send
             + std::marker::Sync
             + Default,
-        U: Scalar<Real = U> + Float + Default + std::marker::Send + std::marker::Sync + Fft,
+        U: Scalar<Real = U>
+            + Float
+            + Default
+            + std::marker::Send
+            + std::marker::Sync
+            + Fft
+            + rlst_blis::interface::gemm::Gemm,
         Complex<U>: Scalar,
-        U: MultiplyAdd<
-            U,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn p2l(&self, _level: u64) {}
 
@@ -335,33 +331,31 @@ pub mod uniform {
                 .zip(self.level_locals[level as usize].par_chunks_exact(nsiblings))
                 .for_each(|(check_potential_chunk, local_ptrs)| {
                     // Map to surface grid
-                    let mut potential_buffer = vec![U::zero(); ncoeffs * nsiblings];
+                    let mut potential_chunk = rlst_dynamic_array2!(U, [ncoeffs, nsiblings]);
+
                     for i in 0..nsiblings {
-                        let tmp = &mut potential_buffer[i * ncoeffs..(i + 1) * ncoeffs];
-                        let check_potential = &check_potential_chunk[i * size..(i + 1) * size];
-                        for (surf_idx, &conv_idx) in self.fmm.m2l.conv_to_surf_map.iter().enumerate() {
-                            tmp[surf_idx] = check_potential[conv_idx];
+                        for (surf_idx, &conv_idx) in
+                            self.fmm.m2l.conv_to_surf_map.iter().enumerate()
+                        {
+                            *potential_chunk.get_mut([surf_idx, i]).unwrap() =
+                                check_potential_chunk[i * size + conv_idx];
                         }
                     }
 
                     // Can now find local expansion coefficients
-                    let potential_chunk = unsafe {
-                        rlst_pointer_mat!['a, U, potential_buffer.as_ptr(), (ncoeffs, nsiblings), (1, ncoeffs)]
-                    };
-
-                    let local_chunk = self
-                        .fmm
-                        .dc2e_inv_1
-                        .dot(&self.fmm.dc2e_inv_2.dot(&potential_chunk))
-                        .eval();
-
+                    let local_chunk = empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_1.view(),
+                        empty_array::<U, 2>()
+                            .simple_mult_into_resize(self.fmm.dc2e_inv_2.view(), potential_chunk),
+                    );
 
                     local_chunk
                         .data()
                         .chunks_exact(ncoeffs)
                         .zip(local_ptrs)
                         .for_each(|(result, local)| {
-                            let local = unsafe { std::slice::from_raw_parts_mut(local.raw, ncoeffs) };
+                            let local =
+                                unsafe { std::slice::from_raw_parts_mut(local.raw, ncoeffs) };
                             local.iter_mut().zip(result).for_each(|(l, r)| *l += *r);
                         });
                 });
@@ -390,20 +384,10 @@ pub mod uniform {
             + std::marker::Send
             + std::marker::Sync
             + Default,
-        DenseMatrixLinAlgBuilder<U>: Svd,
-        U: Scalar<Real = U>,
-        U: Float
-            + Default
-            + MultiplyAdd<
-                U,
-                VectorContainer<U>,
-                VectorContainer<U>,
-                VectorContainer<U>,
-                Dynamic,
-                Dynamic,
-                Dynamic,
-            >,
+        U: Scalar<Real = U> + rlst_blis::interface::gemm::Gemm,
+        U: Float + Default,
         U: std::marker::Send + std::marker::Sync + Default,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn p2l(&self, _level: u64) {}
 
@@ -453,32 +437,50 @@ pub mod uniform {
 
             // Interpret multipoles as a matrix
             let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order);
-            let multipoles = unsafe {
-                rlst_pointer_mat!['a, U, self.level_multipoles[level as usize][0].raw, (ncoeffs, nsources), (1, ncoeffs)]
-            };
 
-            let (nrows, _) = self.fmm.m2l.operator_data.c.shape();
-            let c_dim = (nrows, self.fmm.m2l.k);
+            let multipoles = rlst_array_from_slice2!(
+                U,
+                unsafe {
+                    std::slice::from_raw_parts(
+                        self.level_multipoles[level as usize][0].raw,
+                        ncoeffs * nsources,
+                    )
+                },
+                [ncoeffs, nsources]
+            );
 
-            let mut compressed_multipoles = self.fmm.m2l.operator_data.st_block.dot(&multipoles);
+            let [nrows, _] = self.fmm.m2l.operator_data.c.shape();
+            let c_dim = [nrows, self.fmm.m2l.k];
+
+            let mut compressed_multipoles = empty_array::<U, 2>()
+                .simple_mult_into_resize(self.fmm.m2l.operator_data.st_block.view(), multipoles);
 
             compressed_multipoles
                 .data_mut()
                 .iter_mut()
                 .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level));
 
-            (0..316).into_par_iter().for_each(|c_idx| {
-                let top_left = (0, c_idx * self.fmm.m2l.k);
-                let c_sub = self.fmm.m2l.operator_data.c.block(top_left, c_dim);
-
-                let locals = self.fmm.dc2e_inv_1.dot(
-                    &self.fmm.dc2e_inv_2.dot(
-                        &self
-                            .fmm
-                            .m2l
-                            .operator_data
-                            .u
-                            .dot(&c_sub.dot(&compressed_multipoles)),
+            (0..316).for_each(|c_idx| {
+                let top_left = [0, c_idx * self.fmm.m2l.k];
+                let c_sub = self
+                    .fmm
+                    .m2l
+                    .operator_data
+                    .c
+                    .view()
+                    .into_subview(top_left, c_dim);
+
+                let locals = empty_array::<U, 2>().simple_mult_into_resize(
+                    self.fmm.dc2e_inv_1.view(),
+                    empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_2.view(),
+                        empty_array::<U, 2>().simple_mult_into_resize(
+                            self.fmm.m2l.operator_data.u.view(),
+                            empty_array::<U, 2>().simple_mult_into_resize(
+                                c_sub.view(),
+                                compressed_multipoles.view(),
+                            ),
+                        ),
                     ),
                 );
 
@@ -529,20 +531,10 @@ pub mod uniform {
                 + std::marker::Send
                 + std::marker::Sync
                 + Default,
-            DenseMatrixLinAlgBuilder<U>: Svd,
-            U: Scalar<Real = U>,
-            U: Float
-                + Default
-                + MultiplyAdd<
-                    U,
-                    VectorContainer<U>,
-                    VectorContainer<U>,
-                    VectorContainer<U>,
-                    Dynamic,
-                    Dynamic,
-                    Dynamic,
-                >,
+            U: Scalar<Real = U> + rlst_blis::interface::gemm::Gemm,
+            U: Float + Default,
             U: std::marker::Send + std::marker::Sync + Default,
+            Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
         {
             fn p2l(&self, _level: u64) {}
 
@@ -592,33 +584,52 @@ pub mod uniform {
                 }
 
                 // Interpret multipoles as a matrix
-                let multipoles = unsafe {
-                    rlst_pointer_mat!['a, U, self.level_multipoles[level as usize][0][0].raw, (self.ncoeffs, nsources * self.ncharge_vectors), (1, self.ncoeffs)]
-                };
 
-                let (nrows, _) = self.fmm.m2l.operator_data.c.shape();
-                let c_dim = (nrows, self.fmm.m2l.k);
+                let multipoles = rlst_array_from_slice2!(
+                    U,
+                    unsafe {
+                        std::slice::from_raw_parts(
+                            self.level_multipoles[level as usize][0][0].raw,
+                            self.ncoeffs * nsources * self.ncharge_vectors,
+                        )
+                    },
+                    [self.ncoeffs, nsources * self.ncharge_vectors]
+                );
+
+                let [nrows, _] = self.fmm.m2l.operator_data.c.shape();
+                let c_dim = [nrows, self.fmm.m2l.k];
 
-                let mut compressed_multipoles =
-                    self.fmm.m2l.operator_data.st_block.dot(&multipoles);
+                let mut compressed_multipoles = empty_array::<U, 2>().simple_mult_into_resize(
+                    self.fmm.m2l.operator_data.st_block.view(),
+                    multipoles,
+                );
 
                 compressed_multipoles
                     .data_mut()
                     .iter_mut()
                     .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level));
 
-                (0..316).into_par_iter().for_each(|c_idx| {
-                    let top_left = (0, c_idx * self.fmm.m2l.k);
-                    let c_sub = self.fmm.m2l.operator_data.c.block(top_left, c_dim);
-
-                    let locals = self.fmm.dc2e_inv_1.dot(
-                        &self.fmm.dc2e_inv_2.dot(
-                            &self
-                                .fmm
-                                .m2l
-                                .operator_data
-                                .u
-                                .dot(&c_sub.dot(&compressed_multipoles)),
+                (0..316).for_each(|c_idx| {
+                    let top_left = [0, c_idx * self.fmm.m2l.k];
+                    let c_sub = self
+                        .fmm
+                        .m2l
+                        .operator_data
+                        .c
+                        .view()
+                        .into_subview(top_left, c_dim);
+
+                    let locals = empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_1.view(),
+                        empty_array::<U, 2>().simple_mult_into_resize(
+                            self.fmm.dc2e_inv_2.view(),
+                            empty_array::<U, 2>().simple_mult_into_resize(
+                                self.fmm.m2l.operator_data.u.view(),
+                                empty_array::<U, 2>().simple_mult_into_resize(
+                                    c_sub.view(),
+                                    compressed_multipoles.view(),
+                                ),
+                            ),
                         ),
                     );
 
@@ -666,6 +677,8 @@ pub mod uniform {
 
 /// Field translations defined on adaptively refined
 pub mod adaptive {
+    use rlst_dense::rlst_array_from_slice2;
+
     use super::*;
 
     impl<T, U> FmmDataAdaptive<KiFmmLinear<SingleNodeTree<U>, T, FftFieldTranslationKiFmm<U, T>, U>, U>
@@ -675,17 +688,15 @@ pub mod adaptive {
             + std::marker::Send
             + std::marker::Sync
             + Default,
-        U: Scalar<Real = U> + Float + Default + std::marker::Send + std::marker::Sync + Fft,
+        U: Scalar<Real = U>
+            + Float
+            + Default
+            + std::marker::Send
+            + std::marker::Sync
+            + Fft
+            + rlst_blis::interface::gemm::Gemm,
         Complex<U>: Scalar,
-        U: MultiplyAdd<
-            U,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn displacements(&self, level: u64) -> Vec<Vec<usize>> {
             let nneighbors = 26;
@@ -737,17 +748,15 @@ pub mod adaptive {
             + std::marker::Send
             + std::marker::Sync
             + Default,
-        U: Scalar<Real = U> + Float + Default + std::marker::Send + std::marker::Sync + Fft,
+        U: Scalar<Real = U>
+            + Float
+            + Default
+            + std::marker::Send
+            + std::marker::Sync
+            + Fft
+            + rlst_blis::interface::gemm::Gemm,
         Complex<U>: Scalar,
-        U: MultiplyAdd<
-            U,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            VectorContainer<U>,
-            Dynamic,
-            Dynamic,
-            Dynamic,
-        >,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn p2l<'a>(&self, level: u64) {
             let Some(targets) = self.fmm.tree().get_keys(level) else {
@@ -802,15 +811,9 @@ pub mod adaptive {
                             let nsources = sources.len() / dim;
 
                             if nsources > 0 {
-
-                                let sources = unsafe {
-                                    rlst_pointer_mat!['a, U, sources.as_ptr(), (nsources, dim), (dim, 1)]
-                                }
-                                .eval();
-
                                 self.fmm.kernel.evaluate_st(
                                     EvalType::Value,
-                                    sources.data(),
+                                    sources,
                                     downward_surface,
                                     charges,
                                     check_potential,
@@ -827,15 +830,18 @@ pub mod adaptive {
                 .for_each(|(local_ptr, check_potential)| {
                     let target_local =
                         unsafe { std::slice::from_raw_parts_mut(local_ptr.raw, ncoeffs) };
-                    let check_potential = unsafe {
-                        rlst_pointer_mat!['a, U, check_potential.as_ptr(), (ncoeffs, 1), (1, ncoeffs)]
-                    };
+
+                    let check_potential_mat =
+                        rlst_array_from_slice2!(U, check_potential, [ncoeffs, 1]);
 
                     let scale = self.fmm.kernel().scale(level);
-                    let mut tmp = self
-                        .fmm
-                        .dc2e_inv_1
-                        .dot(&self.fmm.dc2e_inv_2.dot(&check_potential));
+                    let mut tmp = empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_1.view(),
+                        empty_array::<U, 2>().simple_mult_into_resize(
+                            self.fmm.dc2e_inv_2.view(),
+                            check_potential_mat,
+                        ),
+                    );
                     tmp.data_mut().iter_mut().for_each(|val| *val *= scale);
 
                     target_local
@@ -1066,32 +1072,29 @@ pub mod adaptive {
                 .zip(self.level_locals[level as usize].par_chunks_exact(nsiblings))
                 .for_each(|(check_potential_chunk, local_ptrs)| {
                     // Map to surface grid
-                    let mut potential_buffer = vec![U::zero(); ncoeffs * nsiblings];
+                    let mut potential_chunk = rlst_dynamic_array2!(U, [ncoeffs, nsiblings]);
                     for i in 0..nsiblings {
-                        let tmp = &mut potential_buffer[i * ncoeffs..(i + 1) * ncoeffs];
-                        let check_potential = &check_potential_chunk[i * size..(i + 1) * size];
-                        for (surf_idx, &conv_idx) in self.fmm.m2l.conv_to_surf_map.iter().enumerate() {
-                            tmp[surf_idx] = check_potential[conv_idx];
+                        for (surf_idx, &conv_idx) in
+                            self.fmm.m2l.conv_to_surf_map.iter().enumerate()
+                        {
+                            *potential_chunk.get_mut([surf_idx, i]).unwrap() =
+                                check_potential_chunk[i * size + conv_idx];
                         }
                     }
 
-                    // Can now find local expansion coefficients
-                    let potential_chunk = unsafe {
-                        rlst_pointer_mat!['a, U, potential_buffer.as_ptr(), (ncoeffs, nsiblings), (1, ncoeffs)]
-                    };
-
-                    let local_chunk = self
-                        .fmm
-                        .dc2e_inv_1
-                        .dot(&self.fmm.dc2e_inv_2.dot(&potential_chunk))
-                        .eval();
+                    let local_chunk = empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_1.view(),
+                        empty_array::<U, 2>()
+                            .simple_mult_into_resize(self.fmm.dc2e_inv_2.view(), potential_chunk),
+                    );
 
                     local_chunk
                         .data()
                         .chunks_exact(ncoeffs)
                         .zip(local_ptrs)
                         .for_each(|(result, local)| {
-                            let local = unsafe { std::slice::from_raw_parts_mut(local.raw, ncoeffs) };
+                            let local =
+                                unsafe { std::slice::from_raw_parts_mut(local.raw, ncoeffs) };
                             local.iter_mut().zip(result).for_each(|(l, r)| *l += *r);
                         });
                 });
@@ -1120,20 +1123,10 @@ pub mod adaptive {
             + std::marker::Send
             + std::marker::Sync
             + Default,
-        DenseMatrixLinAlgBuilder<U>: Svd,
-        U: Scalar<Real = U>,
-        U: Float
-            + Default
-            + MultiplyAdd<
-                U,
-                VectorContainer<U>,
-                VectorContainer<U>,
-                VectorContainer<U>,
-                Dynamic,
-                Dynamic,
-                Dynamic,
-            >,
+        U: Scalar<Real = U> + rlst_blis::interface::gemm::Gemm,
+        U: Float + Default,
         U: std::marker::Send + std::marker::Sync + Default,
+        Array<U, BaseArray<U, VectorContainer<U>, 2>, 2>: MatrixSvd<Item = U>,
     {
         fn p2l<'a>(&self, level: u64) {
             let Some(targets) = self.fmm.tree().get_keys(level) else {
@@ -1188,15 +1181,9 @@ pub mod adaptive {
                             let nsources = sources.len() / dim;
 
                             if nsources > 0 {
-
-                                let sources = unsafe {
-                                    rlst_pointer_mat!['a, U, sources.as_ptr(), (nsources, dim), (dim, 1)]
-                                }
-                                .eval();
-
                                 self.fmm.kernel.evaluate_st(
                                     EvalType::Value,
-                                    sources.data(),
+                                    sources,
                                     downward_surface,
                                     charges,
                                     check_potential,
@@ -1213,15 +1200,18 @@ pub mod adaptive {
                 .for_each(|(local_ptr, check_potential)| {
                     let target_local =
                         unsafe { std::slice::from_raw_parts_mut(local_ptr.raw, ncoeffs) };
-                    let check_potential = unsafe {
-                        rlst_pointer_mat!['a, U, check_potential.as_ptr(), (ncoeffs, 1), (1, ncoeffs)]
-                    };
+
+                    let check_potential_mat =
+                        rlst_array_from_slice2!(U, check_potential, [ncoeffs, 1]);
 
                     let scale = self.fmm.kernel().scale(level);
-                    let mut tmp = self
-                        .fmm
-                        .dc2e_inv_1
-                        .dot(&self.fmm.dc2e_inv_2.dot(&check_potential));
+                    let mut tmp = empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_1.view(),
+                        empty_array::<U, 2>().simple_mult_into_resize(
+                            self.fmm.dc2e_inv_2.view(),
+                            check_potential_mat,
+                        ),
+                    );
                     tmp.data_mut().iter_mut().for_each(|val| *val *= scale);
 
                     target_local
@@ -1281,32 +1271,51 @@ pub mod adaptive {
 
             // Interpret multipoles as a matrix
             let ncoeffs = self.fmm.m2l.ncoeffs(self.fmm.order);
-            let multipoles = unsafe {
-                rlst_pointer_mat!['a, U, self.level_multipoles[level as usize][0].raw, (ncoeffs, nsources), (1, ncoeffs)]
-            };
+            let multipoles = rlst_array_from_slice2!(
+                U,
+                unsafe {
+                    std::slice::from_raw_parts(
+                        self.level_multipoles[level as usize][0].raw,
+                        ncoeffs * nsources,
+                    )
+                },
+                [ncoeffs, nsources]
+            );
 
-            let (nrows, _) = self.fmm.m2l.operator_data.c.shape();
-            let dim = (nrows, self.fmm.m2l.k);
+            let [nrows, _] = self.fmm.m2l.operator_data.c.shape();
+            let dim = [nrows, self.fmm.m2l.k];
 
-            let mut compressed_multipoles = self.fmm.m2l.operator_data.st_block.dot(&multipoles);
+            let mut compressed_multipoles = empty_array::<U, 2>().simple_mult_into_resize(
+                self.fmm.m2l.operator_data.st_block.view(),
+                multipoles.view(),
+            );
 
             compressed_multipoles
                 .data_mut()
                 .iter_mut()
                 .for_each(|d| *d *= self.fmm.kernel.scale(level) * self.m2l_scale(level));
 
-            (0..316).into_par_iter().for_each(|c_idx| {
-                let top_left = (0, c_idx * self.fmm.m2l.k);
-                let c_sub = self.fmm.m2l.operator_data.c.block(top_left, dim);
-
-                let locals = self.fmm.dc2e_inv_1.dot(
-                    &self.fmm.dc2e_inv_2.dot(
-                        &self
-                            .fmm
-                            .m2l
-                            .operator_data
-                            .u
-                            .dot(&c_sub.dot(&compressed_multipoles)),
+            (0..316).for_each(|c_idx| {
+                let top_left = [0, c_idx * self.fmm.m2l.k];
+                let c_sub = self
+                    .fmm
+                    .m2l
+                    .operator_data
+                    .c
+                    .view()
+                    .into_subview(top_left, dim);
+
+                let locals = empty_array::<U, 2>().simple_mult_into_resize(
+                    self.fmm.dc2e_inv_1.view(),
+                    empty_array::<U, 2>().simple_mult_into_resize(
+                        self.fmm.dc2e_inv_2.view(),
+                        empty_array::<U, 2>().simple_mult_into_resize(
+                            self.fmm.m2l.operator_data.u.view(),
+                            empty_array::<U, 2>().simple_mult_into_resize(
+                                c_sub.view(),
+                                compressed_multipoles.view(),
+                            ),
+                        ),
                     ),
                 );
 
diff --git a/fmm/src/field_translation/target.rs b/fmm/src/field_translation/target.rs
index 28264c5f..e56a3692 100644
--- a/fmm/src/field_translation/target.rs
+++ b/fmm/src/field_translation/target.rs
@@ -10,7 +10,7 @@ use bempp_traits::{
     fmm::{Fmm, InteractionLists, TargetTranslation},
     kernel::{Kernel, ScaleInvariantKernel},
     tree::Tree,
-    types::EvalType,
+    types::{EvalType, Scalar},
 };
 use bempp_tree::types::{morton::MortonKey, single_node::SingleNodeTree};
 
@@ -22,25 +22,22 @@ use crate::{
     },
 };
 
-use rlst::{
-    common::traits::*,
-    dense::{rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer},
+use rlst_dense::{
+    array::empty_array,
+    rlst_array_from_slice2, rlst_dynamic_array2,
+    traits::{MultIntoResize, RawAccess, RawAccessMut},
 };
 
 impl<T, U, V> TargetTranslation for FmmDataUniform<KiFmmLinear<SingleNodeTree<V>, T, U, V>, V>
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     fn l2l<'a>(&self, level: u64) {
         let Some(child_targets) = self.fmm.tree().get_keys(level) else {
@@ -74,23 +71,37 @@ where
 
         parent_locals
             .par_chunks_exact(chunk_size)
-            .zip(child_locals.par_chunks_exact(nsiblings*chunk_size))
+            .zip(child_locals.par_chunks_exact(nsiblings * chunk_size))
             .for_each(|(parent_local_pointer_chunk, child_local_pointers_chunk)| {
-
-                let mut parent_locals = rlst_dynamic_mat![V, (ncoeffs, chunk_size)];
-                for (chunk_idx, parent_local_pointer) in parent_local_pointer_chunk.iter().enumerate().take(chunk_size) {
-                    let tmp = unsafe { rlst_pointer_mat!['a, V, parent_local_pointer.raw, (ncoeffs, 1), (1, ncoeffs)] };
-                    parent_locals.data_mut()[chunk_idx*ncoeffs..(chunk_idx+1)*ncoeffs].copy_from_slice(tmp.data());
+                let mut parent_locals = rlst_dynamic_array2!(V, [ncoeffs, chunk_size]);
+                for (chunk_idx, parent_local_pointer) in parent_local_pointer_chunk
+                    .iter()
+                    .enumerate()
+                    .take(chunk_size)
+                {
+                    parent_locals.data_mut()[chunk_idx * ncoeffs..(chunk_idx + 1) * ncoeffs]
+                        .copy_from_slice(unsafe {
+                            std::slice::from_raw_parts_mut(parent_local_pointer.raw, ncoeffs)
+                        });
                 }
 
                 for i in 0..nsiblings {
-                    let tmp = self.fmm.l2l[i].dot(&parent_locals).eval();
+                    let tmp = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.l2l[i].view(), parent_locals.view());
 
                     for j in 0..chunk_size {
-                        let chunk_displacement = j*nsiblings;
+                        let chunk_displacement = j * nsiblings;
                         let child_displacement = chunk_displacement + i;
-                        let child_local = unsafe { std::slice::from_raw_parts_mut(child_local_pointers_chunk[child_displacement].raw, ncoeffs)};
-                        child_local.iter_mut().zip(&tmp.data()[j*ncoeffs..(j+1)*ncoeffs]).for_each(|(l, t)| *l += *t);
+                        let child_local = unsafe {
+                            std::slice::from_raw_parts_mut(
+                                child_local_pointers_chunk[child_displacement].raw,
+                                ncoeffs,
+                            )
+                        };
+                        child_local
+                            .iter_mut()
+                            .zip(&tmp.data()[j * ncoeffs..(j + 1) * ncoeffs])
+                            .for_each(|(l, t)| *l += *t);
                     }
                 }
             });
@@ -119,30 +130,33 @@ where
                     ((leaf_downward_equivalent_surface, local_ptr), charge_index_pointer),
                     potential_send_ptr,
                 )| {
-                    let target_coordinates = &coordinates
-                        [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                    let ntargets = target_coordinates.len() / dim;
+                    let target_coordinates_row_major =
+                        &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
+                    let ntargets = target_coordinates_row_major.len() / dim;
 
                     // Compute direct
                     if ntargets > 0 {
+                        let target_coordinates_row_major = rlst_array_from_slice2!(
+                            V,
+                            target_coordinates_row_major,
+                            [ntargets, dim],
+                            [dim, 1]
+                        );
+                        let mut target_coordinates_col_major =
+                            rlst_dynamic_array2!(V, [ntargets, dim]);
+                        target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
-                        let target_coordinates = unsafe {
-                            rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, dim), (dim, 1)]
-                        }.eval();
-
-                        let local_expansion =
-                            unsafe { rlst_pointer_mat!['a, V, local_ptr.raw, (ncoeffs, 1), (1, ncoeffs) ]};
-
-                        let result = unsafe { std::slice::from_raw_parts_mut(potential_send_ptr.raw, ntargets)};
+                        let result = unsafe {
+                            std::slice::from_raw_parts_mut(potential_send_ptr.raw, ntargets)
+                        };
 
                         self.fmm.kernel.evaluate_st(
                             EvalType::Value,
                             leaf_downward_equivalent_surface,
-                            target_coordinates.data(),
-                            local_expansion.data(),
+                            target_coordinates_col_major.data(),
+                            unsafe { std::slice::from_raw_parts_mut(local_ptr.raw, ncoeffs) },
                             result,
                         );
-
                     }
                 },
             );
@@ -162,18 +176,21 @@ where
             .zip(&self.charge_index_pointer)
             .zip(&self.potentials_send_pointers)
             .for_each(|((leaf, charge_index_pointer), potential_send_pointer)| {
-                let targets =
+                let target_coordinates_row_major =
                     &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                let ntargets = targets.len() / dim;
+                let ntargets = target_coordinates_row_major.len() / dim;
 
                 if ntargets > 0 {
-
-                    let targets = unsafe {
-                        rlst_pointer_mat!['a, V, targets.as_ptr(), (ntargets, dim), (dim, 1)]
-                    }.eval();
+                    let target_coordinates_row_major = rlst_array_from_slice2!(
+                        V,
+                        target_coordinates_row_major,
+                        [ntargets, dim],
+                        [dim, 1]
+                    );
+                    let mut target_coordinates_col_major = rlst_dynamic_array2!(V, [ntargets, dim]);
+                    target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
                     if let Some(u_list) = self.fmm.get_u_list(leaf) {
-
                         let u_list_indices = u_list
                             .iter()
                             .filter_map(|k| self.fmm.tree().get_leaf_index(k));
@@ -194,20 +211,33 @@ where
                             })
                             .collect_vec();
 
-                        for (&charges, sources) in charges.iter().zip(sources_coordinates) {
-                            let nsources = sources.len() / dim;
+                        for (&charges, source_coordinates_row_major) in
+                            charges.iter().zip(sources_coordinates)
+                        {
+                            let nsources = source_coordinates_row_major.len() / dim;
 
                             if nsources > 0 {
-
-                                let sources = unsafe {
-                                    rlst_pointer_mat!['a, V, sources.as_ptr(), (nsources, dim), (dim, 1)]
-                                }.eval();
-
-                                let result = unsafe { std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)};
+                                let source_coordinates_row_major = rlst_array_from_slice2!(
+                                    V,
+                                    source_coordinates_row_major,
+                                    [nsources, dim],
+                                    [dim, 1]
+                                );
+                                let mut source_coordinates_col_major =
+                                    rlst_dynamic_array2!(V, [nsources, dim]);
+                                source_coordinates_col_major
+                                    .fill_from(source_coordinates_row_major.view());
+
+                                let result = unsafe {
+                                    std::slice::from_raw_parts_mut(
+                                        potential_send_pointer.raw,
+                                        ntargets,
+                                    )
+                                };
                                 self.fmm.kernel.evaluate_st(
                                     EvalType::Value,
-                                    sources.data(),
-                                    targets.data(),
+                                    source_coordinates_col_major.data(),
+                                    target_coordinates_col_major.data(),
                                     charges,
                                     result,
                                 );
@@ -223,16 +253,12 @@ impl<T, U, V> TargetTranslation for FmmDataAdaptive<KiFmmLinear<SingleNodeTree<V
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     fn l2l<'a>(&self, level: u64) {
         let Some(child_targets) = self.fmm.tree().get_keys(level) else {
@@ -270,12 +296,15 @@ where
             .into_par_iter()
             .zip(child_locals.par_chunks_exact(nsiblings))
             .for_each(|(parent_local_pointer, child_local_pointers)| {
-                let parent_local = unsafe {
-                    rlst_pointer_mat!['a, V, parent_local_pointer.raw, (ncoeffs, 1), (1, ncoeffs)]
-                };
+                let parent_local = rlst_array_from_slice2!(
+                    V,
+                    unsafe { std::slice::from_raw_parts(parent_local_pointer.raw, ncoeffs) },
+                    [ncoeffs, 1]
+                );
 
                 for (i, child_local_pointer) in child_local_pointers.iter().enumerate().take(8) {
-                    let tmp = self.fmm.l2l[i].dot(&parent_local).eval();
+                    let tmp = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.l2l[i].view(), parent_local.view());
                     let child_local =
                         unsafe { std::slice::from_raw_parts_mut(child_local_pointer.raw, ncoeffs) };
                     child_local
@@ -301,15 +330,19 @@ where
             .zip(&self.charge_index_pointer)
             .zip(&self.potentials_send_pointers)
             .for_each(|((leaf, charge_index_pointer), potential_send_pointer)| {
-                let targets =
+                let target_coordinates_row_major =
                     &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                let ntargets = targets.len() / dim;
+                let ntargets = target_coordinates_row_major.len() / dim;
 
                 if ntargets > 0 {
-                    let targets = unsafe {
-                        rlst_pointer_mat!['a, V, targets.as_ptr(), (ntargets, dim), (dim, 1)]
-                    }
-                    .eval();
+                    let target_coordinates_row_major = rlst_array_from_slice2!(
+                        V,
+                        target_coordinates_row_major,
+                        [ntargets, dim],
+                        [dim, 1]
+                    );
+                    let mut target_coordinates_col_major = rlst_dynamic_array2!(V, [ntargets, dim]);
+                    target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
                     if let Some(w_list) = self.fmm.get_w_list(leaf) {
                         let result = unsafe {
@@ -332,7 +365,7 @@ where
                             self.fmm.kernel.evaluate_st(
                                 EvalType::Value,
                                 surface,
-                                targets.data(),
+                                target_coordinates_col_major.data(),
                                 multipole,
                                 result,
                             )
@@ -363,30 +396,33 @@ where
                     ((leaf_downward_equivalent_surface, local_ptr), charge_index_pointer),
                     potential_send_ptr,
                 )| {
-                    let target_coordinates = &coordinates
-                        [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                    let ntargets = target_coordinates.len() / dim;
-
-                    let local_expansion =
-                        unsafe { rlst_pointer_mat!['a, V, local_ptr.raw, (ncoeffs, 1), (1, ncoeffs) ]};
+                    let target_coordinates_row_major =
+                        &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
+                    let ntargets = target_coordinates_row_major.len() / dim;
 
                     // Compute direct
                     if ntargets > 0 {
+                        let target_coordinates_row_major = rlst_array_from_slice2!(
+                            V,
+                            target_coordinates_row_major,
+                            [ntargets, dim],
+                            [dim, 1]
+                        );
+                        let mut target_coordinates_col_major =
+                            rlst_dynamic_array2!(V, [ntargets, dim]);
+                        target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
-                        let targets = unsafe {
-                            rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, dim), (dim, 1)]
-                        }.eval();
-
-                        let result = unsafe { std::slice::from_raw_parts_mut(potential_send_ptr.raw, ntargets)};
+                        let result = unsafe {
+                            std::slice::from_raw_parts_mut(potential_send_ptr.raw, ntargets)
+                        };
 
                         self.fmm.kernel.evaluate_st(
                             EvalType::Value,
                             leaf_downward_equivalent_surface,
-                            targets.data(),
-                            local_expansion.data(),
+                            target_coordinates_col_major.data(),
+                            unsafe { std::slice::from_raw_parts_mut(local_ptr.raw, ncoeffs) },
                             result,
                         );
-
                     }
                 },
             );
@@ -404,19 +440,21 @@ where
             .zip(&self.charge_index_pointer)
             .zip(&self.potentials_send_pointers)
             .for_each(|((leaf, charge_index_pointer), potential_send_pointer)| {
-                let targets =
+                let target_coordinates_row_major =
                     &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                let ntargets = targets.len() / dim;
+                let ntargets = target_coordinates_row_major.len() / dim;
 
                 if ntargets > 0 {
-
-                    let targets = unsafe {
-                        rlst_pointer_mat!['a, V, targets.as_ptr(), (ntargets, dim), (dim, 1)]
-                    }.eval();
+                    let target_coordinates_row_major = rlst_array_from_slice2!(
+                        V,
+                        target_coordinates_row_major,
+                        [ntargets, dim],
+                        [dim, 1]
+                    );
+                    let mut target_coordinates_col_major = rlst_dynamic_array2!(V, [ntargets, dim]);
+                    target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
                     if let Some(u_list) = self.fmm.get_u_list(leaf) {
-
-
                         let u_list_indices = u_list
                             .iter()
                             .filter_map(|k| self.fmm.tree().get_leaf_index(k));
@@ -437,20 +475,33 @@ where
                             })
                             .collect_vec();
 
-                        for (&charges, sources) in charges.iter().zip(sources_coordinates) {
-                            let nsources = sources.len() / dim;
+                        for (&charges, source_coordinates_row_major) in
+                            charges.iter().zip(sources_coordinates)
+                        {
+                            let nsources = source_coordinates_row_major.len() / dim;
 
                             if nsources > 0 {
-
-                                let sources = unsafe {
-                                    rlst_pointer_mat!['a, V, sources.as_ptr(), (nsources, dim), (dim, 1)]
-                                }.eval();
-
-                                let result = unsafe { std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)};
+                                let source_coordinates_row_major = rlst_array_from_slice2!(
+                                    V,
+                                    source_coordinates_row_major,
+                                    [nsources, dim],
+                                    [dim, 1]
+                                );
+                                let mut source_coordinates_col_major =
+                                    rlst_dynamic_array2!(V, [nsources, dim]);
+                                source_coordinates_col_major
+                                    .fill_from(source_coordinates_row_major.view());
+
+                                let result = unsafe {
+                                    std::slice::from_raw_parts_mut(
+                                        potential_send_pointer.raw,
+                                        ntargets,
+                                    )
+                                };
                                 self.fmm.kernel.evaluate_st(
                                     EvalType::Value,
-                                    sources.data(),
-                                    targets.data(),
+                                    source_coordinates_col_major.data(),
+                                    target_coordinates_col_major.data(),
                                     charges,
                                     result,
                                 );
@@ -467,16 +518,12 @@ impl<T, U, V> TargetTranslation
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V> + std::marker::Send + std::marker::Sync,
     U: FieldTranslationData<T> + std::marker::Sync + std::marker::Send,
-    V: Scalar<Real = V> + Float + Default + std::marker::Sync + std::marker::Send,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V>
+        + Float
+        + Default
+        + std::marker::Sync
+        + std::marker::Send
+        + rlst_blis::interface::gemm::Gemm,
 {
     fn l2l<'a>(&self, level: u64) {
         let Some(child_targets) = self.fmm.tree().get_keys(level) else {
@@ -508,7 +555,8 @@ where
             .into_par_iter()
             .zip(child_locals.par_chunks_exact(nsiblings))
             .for_each(|(parent_local_pointers, child_locals_pointers)| {
-                let mut parent_locals = rlst_dynamic_mat![V, (self.ncoeffs, self.ncharge_vectors)];
+                let mut parent_locals =
+                    rlst_dynamic_array2!(V, [self.ncoeffs, self.ncharge_vectors]);
 
                 for (charge_vec_idx, parent_local_pointer) in parent_local_pointers
                     .iter()
@@ -525,7 +573,8 @@ where
 
                 for (i, child_locals_i) in child_locals_pointers.iter().enumerate().take(nsiblings)
                 {
-                    let result_i = self.fmm.l2l[i].dot(&parent_locals).eval();
+                    let result_i = empty_array::<V, 2>()
+                        .simple_mult_into_resize(self.fmm.l2l[i].view(), parent_locals.view());
 
                     for (j, child_locals_ij) in
                         child_locals_i.iter().enumerate().take(self.ncharge_vectors)
@@ -556,35 +605,49 @@ where
 
         for i in 0..self.ncharge_vectors {
             self.leaf_upward_surfaces
-            .par_chunks_exact(surface_size)
-            .zip(&self.leaf_locals)
-            .zip(&self.charge_index_pointer)
-            .zip(&self.potentials_send_pointers[i*self.nleaves..(i+1)*self.nleaves])
-            .for_each(|(((leaf_downward_equivalent_surface, leaf_locals), charge_index_pointer), potential_send_pointer)| {
-
-                let target_coordinates = &coordinates
-                    [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                let ntargets = target_coordinates.len() / dim;
-
-                if ntargets > 0 {
-                    let target_coordinates = unsafe {
-                        rlst_pointer_mat!['a, V, target_coordinates.as_ptr(), (ntargets, dim), (dim, 1)]
-                    }.eval();
-
-                        let local_expansion_ptr = leaf_locals[i].raw;
-                        let local_expansion = unsafe { std::slice::from_raw_parts(local_expansion_ptr, self.ncoeffs) };
-                        let result = unsafe { std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)};
+                .par_chunks_exact(surface_size)
+                .zip(&self.leaf_locals)
+                .zip(&self.charge_index_pointer)
+                .zip(&self.potentials_send_pointers[i * self.nleaves..(i + 1) * self.nleaves])
+                .for_each(
+                    |(
+                        ((leaf_downward_equivalent_surface, leaf_locals), charge_index_pointer),
+                        potential_send_pointer,
+                    )| {
+                        let target_coordinates_row_major = &coordinates
+                            [charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
+                        let ntargets = target_coordinates_row_major.len() / dim;
+
+                        if ntargets > 0 {
+                            let target_coordinates_row_major = rlst_array_from_slice2!(
+                                V,
+                                target_coordinates_row_major,
+                                [ntargets, dim],
+                                [dim, 1]
+                            );
+                            let mut target_coordinates_col_major =
+                                rlst_dynamic_array2!(V, [ntargets, dim]);
+                            target_coordinates_col_major
+                                .fill_from(target_coordinates_row_major.view());
+
+                            let local_expansion_ptr = leaf_locals[i].raw;
+                            let local_expansion = unsafe {
+                                std::slice::from_raw_parts(local_expansion_ptr, self.ncoeffs)
+                            };
+                            let result = unsafe {
+                                std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)
+                            };
 
-                        self.fmm.kernel().evaluate_st(
+                            self.fmm.kernel().evaluate_st(
                                 EvalType::Value,
                                 leaf_downward_equivalent_surface,
-                                target_coordinates.data(),
-                        local_expansion,
-                                result
+                                target_coordinates_col_major.data(),
+                                local_expansion,
+                                result,
                             );
-
-                }
-            })
+                        }
+                    },
+                )
         }
     }
 
@@ -601,20 +664,24 @@ where
             leaves
                 .par_iter()
                 .zip(&self.charge_index_pointer)
-                .zip(&self.potentials_send_pointers[i*self.nleaves..(i+1)*self.nleaves])
+                .zip(&self.potentials_send_pointers[i * self.nleaves..(i + 1) * self.nleaves])
                 .for_each(|((leaf, charge_index_pointer), potential_send_pointer)| {
-                    let targets =
+                    let target_coordinates_row_major =
                         &coordinates[charge_index_pointer.0 * dim..charge_index_pointer.1 * dim];
-                    let ntargets = targets.len() / dim;
+                    let ntargets = target_coordinates_row_major.len() / dim;
 
                     if ntargets > 0 {
-
-                        let targets = unsafe {
-                            rlst_pointer_mat!['a, V, targets.as_ptr(), (ntargets, dim), (dim, 1)]
-                        }.eval();
+                        let target_coordinates_row_major = rlst_array_from_slice2!(
+                            V,
+                            target_coordinates_row_major,
+                            [ntargets, dim],
+                            [dim, 1]
+                        );
+                        let mut target_coordinates_col_major =
+                            rlst_dynamic_array2!(V, [ntargets, dim]);
+                        target_coordinates_col_major.fill_from(target_coordinates_row_major.view());
 
                         if let Some(u_list) = self.fmm.get_u_list(leaf) {
-
                             let u_list_indices = u_list
                                 .iter()
                                 .filter_map(|k| self.fmm.tree().get_leaf_index(k));
@@ -624,7 +691,8 @@ where
                                 .clone()
                                 .map(|&idx| {
                                     let index_pointer = &self.charge_index_pointer[idx];
-                                    &self.charges[charge_vec_displacement + index_pointer.0..charge_vec_displacement + index_pointer.1]
+                                    &self.charges[charge_vec_displacement + index_pointer.0
+                                        ..charge_vec_displacement + index_pointer.1]
                                 })
                                 .collect_vec();
 
@@ -636,20 +704,32 @@ where
                                 })
                                 .collect_vec();
 
-                            for (&charges, sources) in charges.iter().zip(sources_coordinates) {
-                                let nsources = sources.len() / dim;
+                            for (&charges, source_coordinates_row_major) in
+                                charges.iter().zip(sources_coordinates)
+                            {
+                                let nsources = source_coordinates_row_major.len() / dim;
+                                let source_coordinates_row_major = rlst_array_from_slice2!(
+                                    V,
+                                    source_coordinates_row_major,
+                                    [nsources, dim],
+                                    [dim, 1]
+                                );
+                                let mut source_coordinates_col_major =
+                                    rlst_dynamic_array2!(V, [nsources, dim]);
+                                source_coordinates_col_major
+                                    .fill_from(source_coordinates_row_major.view());
 
                                 if nsources > 0 {
-
-                                    let sources = unsafe {
-                                        rlst_pointer_mat!['a, V, sources.as_ptr(), (nsources, dim), (dim, 1)]
-                                    }.eval();
-
-                                    let result = unsafe { std::slice::from_raw_parts_mut(potential_send_pointer.raw, ntargets)};
+                                    let result = unsafe {
+                                        std::slice::from_raw_parts_mut(
+                                            potential_send_pointer.raw,
+                                            ntargets,
+                                        )
+                                    };
                                     self.fmm.kernel.evaluate_st(
                                         EvalType::Value,
-                                        sources.data(),
-                                        targets.data(),
+                                        source_coordinates_col_major.data(),
+                                        target_coordinates_col_major.data(),
                                         charges,
                                         result,
                                     );
diff --git a/fmm/src/fmm.rs b/fmm/src/fmm.rs
index 11f8a019..8b16df4c 100644
--- a/fmm/src/fmm.rs
+++ b/fmm/src/fmm.rs
@@ -1,13 +1,15 @@
 //! Implementation of FmmData and Fmm traits.
-use cauchy::Scalar;
 use itertools::Itertools;
-use num::{Float, ToPrimitive};
+use num::Float;
+use rlst_common::types::Scalar;
 use std::time::Instant;
 
-use rlst::{
-    algorithms::{linalg::DenseMatrixLinAlgBuilder, traits::svd::Svd},
-    common::traits::{Eval, Transpose},
-    dense::{rlst_dynamic_mat, rlst_pointer_mat, traits::*, Dot, MultiplyAdd, VectorContainer},
+use rlst_dense::{
+    array::{empty_array, Array},
+    base_array::BaseArray,
+    data_container::VectorContainer,
+    rlst_dynamic_array2,
+    traits::{MatrixSvd, MultIntoResize, RawAccess, RawAccessMut, Shape},
 };
 
 use bempp_traits::{
@@ -17,41 +19,19 @@ use bempp_traits::{
     tree::Tree,
     types::EvalType,
 };
+
 use bempp_tree::{constants::ROOT, types::single_node::SingleNodeTree};
 
 use crate::types::{FmmDataAdaptive, FmmDataUniform, FmmDataUniformMatrix, KiFmmLinearMatrix};
-use crate::{
-    pinv::{pinv, SvdScalar},
-    types::KiFmmLinear,
-};
+use crate::{pinv::pinv, types::KiFmmLinear};
 
 /// Implementation of constructor for single node KiFMM
-impl<'a, T, U, V> KiFmmLinear<SingleNodeTree<V>, T, U, V>
+impl<T, U, V> KiFmmLinear<SingleNodeTree<V>, T, U, V>
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V>,
     U: FieldTranslationData<T>,
-    V: Scalar<Real = V> + Default + Float,
-    SvdScalar<V>: PartialOrd,
-    SvdScalar<V>: Scalar + Float + ToPrimitive,
-    DenseMatrixLinAlgBuilder<V>: Svd,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
-    SvdScalar<V>: MultiplyAdd<
-        SvdScalar<V>,
-        VectorContainer<SvdScalar<V>>,
-        VectorContainer<SvdScalar<V>>,
-        VectorContainer<SvdScalar<V>>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V> + Default + Float + rlst_blis::interface::gemm::Gemm,
+    Array<V, BaseArray<V, VectorContainer<V>, 2>, 2>: MatrixSvd<Item = V>,
 {
     /// Constructor for single node kernel independent FMM (KiFMM). This object contains all the precomputed operator matrices and metadata, as well as references to
     /// the associated single node octree, and the associated kernel function.
@@ -80,110 +60,54 @@ where
         let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension();
         let ncheck_surface = upward_check_surface.len() / kernel.space_dimension();
 
-        // Store in RLST matrices
-        let upward_equivalent_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-        };
-        let upward_check_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-        };
-        let downward_equivalent_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-        };
-        let downward_check_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-        };
-
         // Compute upward check to equivalent, and downward check to equivalent Gram matrices
         // as well as their inverses using DGESVD.
-        let mut uc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+        let mut uc2e_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
         kernel.assemble_st(
             EvalType::Value,
-            upward_equivalent_surface.data(),
-            upward_check_surface.data(),
-            uc2e.data_mut(),
+            &upward_equivalent_surface[..],
+            &upward_check_surface[..],
+            uc2e_t.data_mut(),
         );
 
         // Need to tranapose so that rows correspond to targets and columns to sources
-        let uc2e = uc2e.transpose().eval();
+        let mut uc2e = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+        uc2e.fill_from(uc2e_t.transpose());
 
-        let mut dc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+        let mut dc2e_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
         kernel.assemble_st(
             EvalType::Value,
-            downward_equivalent_surface.data(),
-            downward_check_surface.data(),
-            dc2e.data_mut(),
+            &downward_equivalent_surface[..],
+            &downward_check_surface[..],
+            dc2e_t.data_mut(),
         );
 
         // Need to tranapose so that rows correspond to targets and columns to sources
-        let dc2e = dc2e.transpose().eval();
+        let mut dc2e = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+        dc2e.fill_from(dc2e_t.transpose());
 
         let (s, ut, v) = pinv::<V>(&uc2e, None, None).unwrap();
 
-        let mut mat_s = rlst_dynamic_mat![SvdScalar<V>, (s.len(), s.len())];
+        let mut mat_s = rlst_dynamic_array2!(V, [s.len(), s.len()]);
         for i in 0..s.len() {
-            mat_s[[i, i]] = SvdScalar::<V>::from_real(s[i]);
+            mat_s[[i, i]] = V::from_real(s[i]);
         }
-        let uc2e_inv_1 = v.dot(&mat_s);
+        let uc2e_inv_1 = empty_array::<V, 2>().simple_mult_into_resize(v.view(), mat_s.view());
         let uc2e_inv_2 = ut;
 
-        let uc2e_inv_1_shape = uc2e_inv_1.shape();
-        let uc2e_inv_2_shape = uc2e_inv_2.shape();
-
-        let uc2e_inv_1 = uc2e_inv_1
-            .data()
-            .iter()
-            .map(|x| V::from(*x).unwrap())
-            .collect_vec();
-        let uc2e_inv_1 = unsafe {
-            rlst_pointer_mat!['a, V, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)]
-        }
-        .eval();
-        let uc2e_inv_2 = uc2e_inv_2
-            .data()
-            .iter()
-            .map(|x| V::from(*x).unwrap())
-            .collect_vec();
-        let uc2e_inv_2 = unsafe {
-            rlst_pointer_mat!['a, V, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)]
-        }
-        .eval();
-
         let (s, ut, v) = pinv::<V>(&dc2e, None, None).unwrap();
 
-        let mut mat_s = rlst_dynamic_mat![SvdScalar<V>, (s.len(), s.len())];
+        let mut mat_s = rlst_dynamic_array2!(V, [s.len(), s.len()]);
         for i in 0..s.len() {
-            mat_s[[i, i]] = SvdScalar::<V>::from_real(s[i]);
+            mat_s[[i, i]] = V::from_real(s[i]);
         }
 
-        let dc2e_inv_1 = v.dot(&mat_s);
+        let dc2e_inv_1 = empty_array::<V, 2>().simple_mult_into_resize(v.view(), mat_s.view());
         let dc2e_inv_2 = ut;
 
-        let dc2e_inv_1_shape = dc2e_inv_1.shape();
-        let dc2e_inv_2_shape = dc2e_inv_2.shape();
-
-        let dc2e_inv_1 = dc2e_inv_1
-            .data()
-            .iter()
-            .map(|x| V::from(*x).unwrap())
-            .collect_vec();
-        let dc2e_inv_1 = unsafe {
-            rlst_pointer_mat!['a, V, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)]
-        }
-        .eval();
-        let dc2e_inv_2 = dc2e_inv_2
-            .data()
-            .iter()
-            .map(|x| V::from(*x).unwrap())
-            .collect_vec();
-        let dc2e_inv_2 = unsafe {
-            rlst_pointer_mat!['a, V, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)]
-        }
-        .eval();
-
         // Calculate M2M/L2L matrices
         let children = ROOT.children();
-        let mut m2m = rlst_dynamic_mat![V, (nequiv_surface, 8 * nequiv_surface)];
+        let mut m2m = rlst_dynamic_array2!(V, [nequiv_surface, 8 * nequiv_surface]);
         let mut l2l = Vec::new();
 
         for (i, child) in children.iter().enumerate() {
@@ -191,43 +115,45 @@ where
                 child.compute_surface(tree.get_domain(), order, alpha_inner);
             let child_downward_check_surface =
                 child.compute_surface(tree.get_domain(), order, alpha_inner);
-            let child_upward_equivalent_surface = unsafe {
-                rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-            };
-            let child_downward_check_surface = unsafe {
-                rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-            };
 
-            let mut pc2ce = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+            let mut pc2ce_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
 
             kernel.assemble_st(
                 EvalType::Value,
-                child_upward_equivalent_surface.data(),
-                upward_check_surface.data(),
-                pc2ce.data_mut(),
+                &child_upward_equivalent_surface,
+                &upward_check_surface,
+                pc2ce_t.data_mut(),
             );
 
             // Need to transpose so that rows correspond to targets, and columns to sources
-            let pc2ce = pc2ce.transpose().eval();
+            let mut pc2ce = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+            pc2ce.fill_from(pc2ce_t.transpose());
 
-            let tmp = uc2e_inv_1.dot(&uc2e_inv_2.dot(&pc2ce)).eval();
+            let tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                uc2e_inv_1.view(),
+                empty_array::<V, 2>().simple_mult_into_resize(uc2e_inv_2.view(), pc2ce.view()),
+            );
             let l = i * nequiv_surface * nequiv_surface;
             let r = l + nequiv_surface * nequiv_surface;
 
             m2m.data_mut()[l..r].copy_from_slice(tmp.data());
 
-            let mut cc2pe = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+            let mut cc2pe_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
 
             kernel.assemble_st(
                 EvalType::Value,
-                downward_equivalent_surface.data(),
-                child_downward_check_surface.data(),
-                cc2pe.data_mut(),
+                &downward_equivalent_surface,
+                &child_downward_check_surface,
+                cc2pe_t.data_mut(),
             );
 
             // Need to transpose so that rows correspond to targets, and columns to sources
-            let cc2pe = cc2pe.transpose().eval();
-            let mut tmp = dc2e_inv_1.dot(&dc2e_inv_2.dot(&cc2pe)).eval();
+            let mut cc2pe = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+            cc2pe.fill_from(cc2pe_t.transpose());
+            let mut tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                dc2e_inv_1.view(),
+                empty_array::<V, 2>().simple_mult_into_resize(dc2e_inv_2.view(), cc2pe.view()),
+            );
             tmp.data_mut()
                 .iter_mut()
                 .for_each(|d| *d *= kernel.scale(child.level()));
@@ -292,32 +218,12 @@ where
 }
 
 /// Implementation of constructor for single node KiFMM
-impl<'a, T, U, V> KiFmmLinearMatrix<SingleNodeTree<V>, T, U, V>
+impl<T, U, V> KiFmmLinearMatrix<SingleNodeTree<V>, T, U, V>
 where
     T: Kernel<T = V> + ScaleInvariantKernel<T = V>,
     U: FieldTranslationData<T>,
-    V: Scalar<Real = V> + Default + Float,
-    SvdScalar<V>: PartialOrd,
-    SvdScalar<V>: Scalar + Float + ToPrimitive,
-    DenseMatrixLinAlgBuilder<V>: Svd,
-    V: MultiplyAdd<
-        V,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        VectorContainer<V>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
-    SvdScalar<V>: MultiplyAdd<
-        SvdScalar<V>,
-        VectorContainer<SvdScalar<V>>,
-        VectorContainer<SvdScalar<V>>,
-        VectorContainer<SvdScalar<V>>,
-        Dynamic,
-        Dynamic,
-        Dynamic,
-    >,
+    V: Scalar<Real = V> + Default + Float + rlst_blis::interface::gemm::Gemm,
+    Array<V, BaseArray<V, VectorContainer<V>, 2>, 2>: MatrixSvd<Item = V>,
 {
     /// Constructor for single node kernel independent FMM (KiFMM). This object contains all the precomputed operator matrices and metadata, as well as references to
     /// the associated single node octree, and the associated kernel function.
@@ -346,83 +252,69 @@ where
         let nequiv_surface = upward_equivalent_surface.len() / kernel.space_dimension();
         let ncheck_surface = upward_check_surface.len() / kernel.space_dimension();
 
-        // Store in RLST matrices
-        let upward_equivalent_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-        };
-        let upward_check_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, upward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-        };
-        let downward_equivalent_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, downward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-        };
-        let downward_check_surface = unsafe {
-            rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-        };
-
         // Compute upward check to equivalent, and downward check to equivalent Gram matrices
         // as well as their inverses using DGESVD.
-        let mut uc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+        let mut uc2e_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
         kernel.assemble_st(
             EvalType::Value,
-            upward_equivalent_surface.data(),
-            upward_check_surface.data(),
-            uc2e.data_mut(),
+            &upward_equivalent_surface,
+            &upward_check_surface,
+            uc2e_t.data_mut(),
         );
 
         // Need to tranapose so that rows correspond to targets and columns to sources
-        let uc2e = uc2e.transpose().eval();
+        let mut uc2e = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+        uc2e.fill_from(uc2e_t.transpose());
 
-        let mut dc2e = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+        let mut dc2e_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
         kernel.assemble_st(
             EvalType::Value,
-            downward_equivalent_surface.data(),
-            downward_check_surface.data(),
-            dc2e.data_mut(),
+            &downward_equivalent_surface,
+            &downward_check_surface,
+            dc2e_t.data_mut(),
         );
 
         // Need to tranapose so that rows correspond to targets and columns to sources
-        let dc2e = dc2e.transpose().eval();
+        let mut dc2e = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+        dc2e.fill_from(dc2e_t.transpose());
 
         let (s, ut, v) = pinv::<V>(&uc2e, None, None).unwrap();
 
-        let mut mat_s = rlst_dynamic_mat![SvdScalar<V>, (s.len(), s.len())];
+        let mut mat_s = rlst_dynamic_array2!(V, [s.len(), s.len()]);
         for i in 0..s.len() {
-            mat_s[[i, i]] = SvdScalar::<V>::from_real(s[i]);
+            mat_s[[i, i]] = V::from_real(s[i]);
         }
-        let uc2e_inv_1 = v.dot(&mat_s);
+        let uc2e_inv_1 = empty_array::<V, 2>().simple_mult_into_resize(v.view(), mat_s.view());
         let uc2e_inv_2 = ut;
 
         let uc2e_inv_1_shape = uc2e_inv_1.shape();
         let uc2e_inv_2_shape = uc2e_inv_2.shape();
 
+        // TODO: Can this be reduced to one copy?
         let uc2e_inv_1 = uc2e_inv_1
             .data()
             .iter()
             .map(|x| V::from(*x).unwrap())
             .collect_vec();
-        let uc2e_inv_1 = unsafe {
-            rlst_pointer_mat!['a, V, uc2e_inv_1.as_ptr(), uc2e_inv_1_shape, (1, uc2e_inv_1_shape.0)]
-        }
-        .eval();
+        let mut uc2e_inv_1_mat = rlst_dynamic_array2!(V, uc2e_inv_1_shape);
+        uc2e_inv_1_mat.data_mut().copy_from_slice(&uc2e_inv_1);
+
         let uc2e_inv_2 = uc2e_inv_2
             .data()
             .iter()
             .map(|x| V::from(*x).unwrap())
             .collect_vec();
-        let uc2e_inv_2 = unsafe {
-            rlst_pointer_mat!['a, V, uc2e_inv_2.as_ptr(), uc2e_inv_2_shape, (1, uc2e_inv_2_shape.0)]
-        }
-        .eval();
+        let mut uc2e_inv_2_mat = rlst_dynamic_array2!(V, uc2e_inv_2_shape);
+        uc2e_inv_2_mat.data_mut().copy_from_slice(&uc2e_inv_2);
 
         let (s, ut, v) = pinv::<V>(&dc2e, None, None).unwrap();
 
-        let mut mat_s = rlst_dynamic_mat![SvdScalar<V>, (s.len(), s.len())];
+        let mut mat_s = rlst_dynamic_array2!(V, [s.len(), s.len()]);
         for i in 0..s.len() {
-            mat_s[[i, i]] = SvdScalar::<V>::from_real(s[i]);
+            mat_s[[i, i]] = V::from_real(s[i]);
         }
 
-        let dc2e_inv_1 = v.dot(&mat_s);
+        let dc2e_inv_1 = empty_array::<V, 2>().simple_mult_into_resize(v.view(), mat_s.view());
         let dc2e_inv_2 = ut;
 
         let dc2e_inv_1_shape = dc2e_inv_1.shape();
@@ -433,19 +325,16 @@ where
             .iter()
             .map(|x| V::from(*x).unwrap())
             .collect_vec();
-        let dc2e_inv_1 = unsafe {
-            rlst_pointer_mat!['a, V, dc2e_inv_1.as_ptr(), dc2e_inv_1_shape, (1, dc2e_inv_1_shape.0)]
-        }
-        .eval();
+        let mut dc2e_inv_1_mat = rlst_dynamic_array2!(V, dc2e_inv_1_shape);
+        dc2e_inv_1_mat.data_mut().copy_from_slice(&dc2e_inv_1);
+
         let dc2e_inv_2 = dc2e_inv_2
             .data()
             .iter()
             .map(|x| V::from(*x).unwrap())
             .collect_vec();
-        let dc2e_inv_2 = unsafe {
-            rlst_pointer_mat!['a, V, dc2e_inv_2.as_ptr(), dc2e_inv_2_shape, (1, dc2e_inv_2_shape.0)]
-        }
-        .eval();
+        let mut dc2e_inv_2_mat = rlst_dynamic_array2!(V, dc2e_inv_2_shape);
+        dc2e_inv_2_mat.data_mut().copy_from_slice(&dc2e_inv_2);
 
         // Calculate M2M/L2L matrices
         let children = ROOT.children();
@@ -457,40 +346,42 @@ where
                 child.compute_surface(tree.get_domain(), order, alpha_inner);
             let child_downward_check_surface =
                 child.compute_surface(tree.get_domain(), order, alpha_inner);
-            let child_upward_equivalent_surface = unsafe {
-                rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, child_upward_equivalent_surface.as_ptr(), (nequiv_surface, kernel.space_dimension()), (1, nequiv_surface)]
-            };
-            let child_downward_check_surface = unsafe {
-                rlst_pointer_mat!['a, <V as cauchy::Scalar>::Real, child_downward_check_surface.as_ptr(), (ncheck_surface, kernel.space_dimension()), (1, ncheck_surface)]
-            };
 
-            let mut pc2ce = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+            let mut pc2ce_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
 
             kernel.assemble_st(
                 EvalType::Value,
-                child_upward_equivalent_surface.data(),
-                upward_check_surface.data(),
-                pc2ce.data_mut(),
+                &child_upward_equivalent_surface,
+                &upward_check_surface,
+                pc2ce_t.data_mut(),
             );
 
             // Need to transpose so that rows correspond to targets, and columns to sources
-            let pc2ce = pc2ce.transpose().eval();
+            let mut pc2ce = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+            pc2ce.fill_from(pc2ce_t.transpose());
 
-            let tmp = uc2e_inv_1.dot(&uc2e_inv_2.dot(&pc2ce)).eval();
+            let tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                uc2e_inv_1_mat.view(),
+                empty_array::<V, 2>().simple_mult_into_resize(uc2e_inv_2_mat.view(), pc2ce.view()),
+            );
             m2m.push(tmp);
 
-            let mut cc2pe = rlst_dynamic_mat![V, (ncheck_surface, nequiv_surface)];
+            let mut cc2pe_t = rlst_dynamic_array2!(V, [ncheck_surface, nequiv_surface]);
 
             kernel.assemble_st(
                 EvalType::Value,
-                downward_equivalent_surface.data(),
-                child_downward_check_surface.data(),
-                cc2pe.data_mut(),
+                &downward_equivalent_surface,
+                &child_downward_check_surface,
+                cc2pe_t.data_mut(),
             );
 
             // Need to transpose so that rows correspond to targets, and columns to sources
-            let cc2pe = cc2pe.transpose().eval();
-            let mut tmp = dc2e_inv_1.dot(&dc2e_inv_2.dot(&cc2pe)).eval();
+            let mut cc2pe = rlst_dynamic_array2!(V, [nequiv_surface, ncheck_surface]);
+            cc2pe.fill_from(cc2pe_t.transpose());
+            let mut tmp = empty_array::<V, 2>().simple_mult_into_resize(
+                dc2e_inv_1_mat.view(),
+                empty_array::<V, 2>().simple_mult_into_resize(dc2e_inv_2_mat.view(), cc2pe.view()),
+            );
             tmp.data_mut()
                 .iter_mut()
                 .for_each(|d| *d *= kernel.scale(child.level()));
@@ -500,10 +391,10 @@ where
 
         Self {
             order,
-            uc2e_inv_1,
-            uc2e_inv_2,
-            dc2e_inv_1,
-            dc2e_inv_2,
+            uc2e_inv_1: uc2e_inv_1_mat,
+            uc2e_inv_2: uc2e_inv_2_mat,
+            dc2e_inv_1: dc2e_inv_1_mat,
+            dc2e_inv_2: dc2e_inv_2_mat,
             alpha_inner,
             alpha_outer,
             m2m,
@@ -884,17 +775,17 @@ where
 mod test {
 
     use super::*;
+    use rlst_dense::rlst_array_from_slice2;
 
     use bempp_field::types::{FftFieldTranslationKiFmm, SvdFieldTranslationKiFmm};
     use bempp_kernel::laplace_3d::Laplace3dKernel;
     use bempp_tree::implementations::helpers::{points_fixture, points_fixture_sphere};
-    use rlst::dense::{base_matrix::BaseMatrix, Matrix};
 
     use crate::charge::build_charge_dict;
 
     #[allow(clippy::too_many_arguments)]
-    fn test_uniform_f64(
-        points: &Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>,
+    fn test_uniform_f64_fft(
+        points: &Array<f64, BaseArray<f64, VectorContainer<f64>, 2>, 2>,
         charges: &[f64],
         global_idxs: &[usize],
         order: usize,
@@ -904,149 +795,167 @@ mod test {
         depth: u64,
     ) {
         // Test with FFT based field translation
-        {
-            let tree =
-                SingleNodeTree::new(points.data(), false, None, Some(depth), global_idxs, sparse);
+        let tree =
+            SingleNodeTree::new(points.data(), false, None, Some(depth), global_idxs, sparse);
 
-            let kernel = Laplace3dKernel::default();
-            let m2l_data: FftFieldTranslationKiFmm<f64, Laplace3dKernel<f64>> =
-                FftFieldTranslationKiFmm::new(
-                    kernel.clone(),
-                    order,
-                    *tree.get_domain(),
-                    alpha_inner,
-                );
+        let kernel = Laplace3dKernel::default();
+        let m2l_data: FftFieldTranslationKiFmm<f64, Laplace3dKernel<f64>> =
+            FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner);
 
-            let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
+        let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
 
-            // Form charge dict, matching charges with their associated global indices
-            let charge_dict = build_charge_dict(global_idxs, charges);
+        // Form charge dict, matching charges with their associated global indices
+        let charge_dict = build_charge_dict(global_idxs, charges);
 
-            let datatree = FmmDataUniform::new(fmm, &charge_dict).unwrap();
+        let datatree = FmmDataUniform::new(fmm, &charge_dict).unwrap();
 
-            datatree.run(false);
+        datatree.run(false);
 
-            // Test that direct computation is close to the FMM.
-            let mut test_idx_vec = Vec::new();
-            for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
-                if index_pointer.1 - index_pointer.0 > 0 {
-                    test_idx_vec.push(idx);
-                }
+        // Test that direct computation is close to the FMM.
+        let mut test_idx_vec = Vec::new();
+        for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
+            if index_pointer.1 - index_pointer.0 > 0 {
+                test_idx_vec.push(idx);
             }
-            let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+        }
+        let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
 
-            let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
+        let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
 
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
 
-            let potentials = &datatree.potentials[l..r];
+        let potentials = &datatree.potentials[l..r];
 
-            let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
-            let leaf_coordinates = &coordinates[l * 3..r * 3];
+        let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let leaf_coordinates_row_major = &coordinates[l * 3..r * 3];
 
-            let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
+        let dim = datatree.fmm.kernel.space_dimension();
+        let ntargets = leaf_coordinates_row_major.len() / dim;
 
-            let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
+        let leaf_coordinates_row_major =
+            rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]);
+        let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]);
+        leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view());
 
-            let mut direct = vec![0f64; ntargets];
+        let mut direct = vec![0f64; ntargets];
 
-            let all_charges = charge_dict.into_values().collect_vec();
+        let all_charges = charge_dict.into_values().collect_vec();
 
-            let kernel = Laplace3dKernel::default();
+        let kernel = Laplace3dKernel::default();
 
-            kernel.evaluate_st(
-                EvalType::Value,
-                points.data(),
-                leaf_coordinates.data(),
-                &all_charges[..],
-                &mut direct[..],
-            );
+        kernel.evaluate_st(
+            EvalType::Value,
+            points.data(),
+            leaf_coordinates_col_major.data(),
+            &all_charges,
+            &mut direct,
+        );
 
-            let abs_error: f64 = potentials
-                .iter()
-                .zip(direct.iter())
-                .map(|(a, b)| (a - b).abs())
-                .sum();
-            let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
-            assert!(rel_error <= 1e-5);
-        }
+        let abs_error: f64 = potentials
+            .iter()
+            .zip(direct.iter())
+            .map(|(a, b)| (a - b).abs())
+            .sum();
+        let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
+        // TODO: remove this print
+        println!(
+            "rel_error = {rel_error} = {abs_error} / {}",
+            direct.iter().sum::<f64>()
+        );
+        assert!(rel_error <= 1e-5);
+    }
 
+    #[allow(clippy::too_many_arguments)]
+    fn test_uniform_f64_svd(
+        points: &Array<f64, BaseArray<f64, VectorContainer<f64>, 2>, 2>,
+        charges: &[f64],
+        global_idxs: &[usize],
+        order: usize,
+        alpha_inner: f64,
+        alpha_outer: f64,
+        sparse: bool,
+        depth: u64,
+    ) {
         // Test with SVD field translation
-        {
-            let tree =
-                SingleNodeTree::new(points.data(), false, None, Some(depth), global_idxs, sparse);
+        let tree =
+            SingleNodeTree::new(points.data(), false, None, Some(depth), global_idxs, sparse);
 
-            let kernel = Laplace3dKernel::default();
+        let kernel = Laplace3dKernel::default();
 
-            let m2l_data = SvdFieldTranslationKiFmm::new(
-                kernel.clone(),
-                Some(1000),
-                order,
-                *tree.get_domain(),
-                alpha_inner,
-            );
+        let m2l_data = SvdFieldTranslationKiFmm::new(
+            kernel.clone(),
+            Some(1000),
+            order,
+            *tree.get_domain(),
+            alpha_inner,
+        );
 
-            let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
+        let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
 
-            // Form charge dict, matching charges with their associated global indices
-            let charge_dict = build_charge_dict(global_idxs, charges);
+        // Form charge dict, matching charges with their associated global indices
+        let charge_dict = build_charge_dict(global_idxs, charges);
 
-            let datatree = FmmDataUniform::new(fmm, &charge_dict).unwrap();
+        let datatree = FmmDataUniform::new(fmm, &charge_dict).unwrap();
 
-            datatree.run(false);
+        datatree.run(false);
 
-            // Test that direct computation is close to the FMM.
-            let mut test_idx_vec = Vec::new();
-            for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
-                if index_pointer.1 - index_pointer.0 > 0 {
-                    test_idx_vec.push(idx);
-                }
+        // Test that direct computation is close to the FMM.
+        let mut test_idx_vec = Vec::new();
+        for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
+            if index_pointer.1 - index_pointer.0 > 0 {
+                test_idx_vec.push(idx);
             }
-            let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+        }
+        let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
 
-            let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
+        let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
 
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
 
-            let potentials = &datatree.potentials[l..r];
+        let potentials = &datatree.potentials[l..r];
 
-            let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
-            let leaf_coordinates = &coordinates[l * 3..r * 3];
+        let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let leaf_coordinates_row_major = &coordinates[l * 3..r * 3];
 
-            let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
+        let dim = datatree.fmm.kernel.space_dimension();
+        let ntargets = leaf_coordinates_row_major.len() / dim;
 
-            let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
+        let leaf_coordinates_row_major =
+            rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]);
+        let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]);
+        leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view());
 
-            let mut direct = vec![0f64; ntargets];
+        let mut direct = vec![0f64; ntargets];
 
-            let all_charges = charge_dict.into_values().collect_vec();
+        let all_charges = charge_dict.into_values().collect_vec();
 
-            datatree.fmm.kernel().evaluate_st(
-                EvalType::Value,
-                points.data(),
-                leaf_coordinates.data(),
-                &all_charges[..],
-                &mut direct[..],
-            );
+        datatree.fmm.kernel().evaluate_st(
+            EvalType::Value,
+            points.data(),
+            leaf_coordinates_col_major.data(),
+            &all_charges,
+            &mut direct,
+        );
 
-            let abs_error: f64 = potentials
-                .iter()
-                .zip(direct.iter())
-                .map(|(a, b)| (a - b).abs())
-                .sum();
-            let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
-            assert!(rel_error <= 1e-5);
-        }
+        let abs_error: f64 = potentials
+            .iter()
+            .zip(direct.iter())
+            .map(|(a, b)| (a - b).abs())
+            .sum();
+        let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
+        // TODO: remove this print
+        println!(
+            "rel_error = {rel_error} = {abs_error} / {}",
+            direct.iter().sum::<f64>()
+        );
+        assert!(rel_error <= 1e-3);
     }
 
-    fn test_adaptive_f64(
-        points: Matrix<f64, BaseMatrix<f64, VectorContainer<f64>, Dynamic>, Dynamic>,
+    #[allow(clippy::too_many_arguments)]
+    fn test_adaptive_f64_fft(
+        points: Array<f64, BaseArray<f64, VectorContainer<f64>, 2>, 2>,
         charges: &[f64],
         global_idxs: &[usize],
         ncrit: u64,
@@ -1055,151 +964,166 @@ mod test {
         alpha_outer: f64,
     ) {
         // Test with FFT based field translation
-        {
-            let tree =
-                SingleNodeTree::new(points.data(), true, Some(ncrit), None, global_idxs, false);
 
-            let kernel = Laplace3dKernel::default();
-            let m2l_data: FftFieldTranslationKiFmm<f64, Laplace3dKernel<f64>> =
-                FftFieldTranslationKiFmm::new(
-                    kernel.clone(),
-                    order,
-                    *tree.get_domain(),
-                    alpha_inner,
-                );
+        let tree = SingleNodeTree::new(points.data(), true, Some(ncrit), None, global_idxs, false);
 
-            let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
+        let kernel = Laplace3dKernel::default();
+        let m2l_data: FftFieldTranslationKiFmm<f64, Laplace3dKernel<f64>> =
+            FftFieldTranslationKiFmm::new(kernel.clone(), order, *tree.get_domain(), alpha_inner);
 
-            // Form charge dict, matching charges with their associated global indices
-            let charge_dict = build_charge_dict(global_idxs, charges);
+        let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
 
-            let datatree = FmmDataAdaptive::new(fmm, &charge_dict).unwrap();
+        // Form charge dict, matching charges with their associated global indices
+        let charge_dict = build_charge_dict(global_idxs, charges);
 
-            datatree.run(false);
+        let datatree = FmmDataAdaptive::new(fmm, &charge_dict).unwrap();
 
-            // Test that direct computation is close to the FMM.
-            let mut test_idx_vec = Vec::new();
-            for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
-                if index_pointer.1 - index_pointer.0 > 0 {
-                    test_idx_vec.push(idx);
-                }
+        datatree.run(false);
+
+        // Test that direct computation is close to the FMM.
+        let mut test_idx_vec = Vec::new();
+        for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
+            if index_pointer.1 - index_pointer.0 > 0 {
+                test_idx_vec.push(idx);
             }
+        }
 
-            let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+        let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
 
-            let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
+        let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
 
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
 
-            let potentials = &datatree.potentials[l..r];
+        let potentials = &datatree.potentials[l..r];
 
-            let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
-            let leaf_coordinates = &coordinates[l * 3..r * 3];
+        let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let leaf_coordinates_row_major = &coordinates[l * 3..r * 3];
 
-            let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
+        let dim = datatree.fmm.kernel.space_dimension();
+        let ntargets = leaf_coordinates_row_major.len() / dim;
 
-            let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
+        let leaf_coordinates_row_major =
+            rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]);
+        let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]);
+        leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view());
 
-            let mut direct = vec![0f64; ntargets];
+        let mut direct = vec![0f64; ntargets];
 
-            let all_charges = charge_dict.into_values().collect_vec();
+        let all_charges = charge_dict.into_values().collect_vec();
 
-            let kernel = Laplace3dKernel::default();
+        let kernel = Laplace3dKernel::default();
 
-            kernel.evaluate_st(
-                EvalType::Value,
-                points.data(),
-                leaf_coordinates.data(),
-                &all_charges[..],
-                &mut direct[..],
-            );
+        kernel.evaluate_st(
+            EvalType::Value,
+            points.data(),
+            leaf_coordinates_col_major.data(),
+            &all_charges,
+            &mut direct,
+        );
 
-            let abs_error: f64 = potentials
-                .iter()
-                .zip(direct.iter())
-                .map(|(a, b)| (a - b).abs())
-                .sum();
-            let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
-            assert!(rel_error <= 1e-5);
-        }
+        let abs_error: f64 = potentials
+            .iter()
+            .zip(direct.iter())
+            .map(|(a, b)| (a - b).abs())
+            .sum();
+        let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
+        // TODO: remove this print
+        println!(
+            "rel_error = {rel_error} = {abs_error} / {}",
+            direct.iter().sum::<f64>()
+        );
+        assert!(rel_error <= 1e-5);
+    }
 
+    #[allow(clippy::too_many_arguments)]
+    fn test_adaptive_f64_svd(
+        points: Array<f64, BaseArray<f64, VectorContainer<f64>, 2>, 2>,
+        charges: &[f64],
+        global_idxs: &[usize],
+        ncrit: u64,
+        order: usize,
+        alpha_inner: f64,
+        alpha_outer: f64,
+    ) {
         // Test with SVD field translation
-        {
-            let tree =
-                SingleNodeTree::new(points.data(), true, Some(ncrit), None, global_idxs, false);
-            let kernel = Laplace3dKernel::default();
-
-            let m2l_data = SvdFieldTranslationKiFmm::new(
-                kernel.clone(),
-                Some(1000),
-                order,
-                *tree.get_domain(),
-                alpha_inner,
-            );
+        let tree = SingleNodeTree::new(points.data(), true, Some(ncrit), None, global_idxs, false);
+        let kernel = Laplace3dKernel::default();
 
-            let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
+        let m2l_data = SvdFieldTranslationKiFmm::new(
+            kernel.clone(),
+            Some(1000),
+            order,
+            *tree.get_domain(),
+            alpha_inner,
+        );
 
-            // Form charge dict, matching charges with their associated global indices
-            let charge_dict = build_charge_dict(global_idxs, charges);
+        let fmm = KiFmmLinear::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
 
-            let datatree = FmmDataAdaptive::new(fmm, &charge_dict).unwrap();
+        // Form charge dict, matching charges with their associated global indices
+        let charge_dict = build_charge_dict(global_idxs, charges);
 
-            datatree.run(false);
+        let datatree = FmmDataAdaptive::new(fmm, &charge_dict).unwrap();
 
-            // Test that direct computation is close to the FMM.
-            let mut test_idx_vec = Vec::new();
-            for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
-                if index_pointer.1 - index_pointer.0 > 0 {
-                    test_idx_vec.push(idx);
-                }
+        datatree.run(false);
+
+        // Test that direct computation is close to the FMM.
+        let mut test_idx_vec = Vec::new();
+        for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
+            if index_pointer.1 - index_pointer.0 > 0 {
+                test_idx_vec.push(idx);
             }
-            let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+        }
+        let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
 
-            let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
+        let leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
 
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
 
-            let potentials = &datatree.potentials[l..r];
+        let potentials = &datatree.potentials[l..r];
 
-            let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
-            let (l, r) = datatree.charge_index_pointer[*leaf_idx];
-            let leaf_coordinates = &coordinates[l * 3..r * 3];
+        let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
+        let (l, r) = datatree.charge_index_pointer[*leaf_idx];
+        let leaf_coordinates_row_major = &coordinates[l * 3..r * 3];
 
-            let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
+        let dim = datatree.fmm.kernel.space_dimension();
+        let ntargets = leaf_coordinates_row_major.len() / dim;
 
-            let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
+        let leaf_coordinates_row_major =
+            rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]);
+        let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]);
+        leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view());
 
-            let mut direct = vec![0f64; ntargets];
+        let mut direct = vec![0f64; ntargets];
 
-            let all_charges = charge_dict.into_values().collect_vec();
+        let all_charges = charge_dict.into_values().collect_vec();
 
-            let kernel = Laplace3dKernel::default();
+        let kernel = Laplace3dKernel::default();
 
-            kernel.evaluate_st(
-                EvalType::Value,
-                points.data(),
-                leaf_coordinates.data(),
-                &all_charges[..],
-                &mut direct[..],
-            );
+        kernel.evaluate_st(
+            EvalType::Value,
+            points.data(),
+            leaf_coordinates_col_major.data(),
+            &all_charges,
+            &mut direct,
+        );
 
-            let abs_error: f64 = potentials
-                .iter()
-                .zip(direct.iter())
-                .map(|(a, b)| (a - b).abs())
-                .sum();
-            let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
-            assert!(rel_error <= 1e-5);
-        }
+        let abs_error: f64 = potentials
+            .iter()
+            .zip(direct.iter())
+            .map(|(a, b)| (a - b).abs())
+            .sum();
+        let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
+        // TODO: remove this print
+        println!(
+            "rel_error = {rel_error} = {abs_error} / {}",
+            direct.iter().sum::<f64>()
+        );
+        assert!(rel_error <= 1e-5);
     }
 
     #[allow(clippy::too_many_arguments)]
-    fn test_uniform_matrix_f64(
+    fn test_uniform_matrix_f64_svd(
         order: usize,
         alpha_inner: f64,
         alpha_outer: f64,
@@ -1210,91 +1134,129 @@ mod test {
         charge_mat: &Vec<Vec<f64>>,
     ) {
         // SVD based field translations
-        {
-            let ncharge_vecs = charge_mat.len();
+        let ncharge_vecs = charge_mat.len();
 
-            let kernel = Laplace3dKernel::default();
+        let kernel = Laplace3dKernel::default();
 
-            // Create a tree
-            let tree = SingleNodeTree::new(points, false, None, Some(depth), global_idxs, sparse);
+        // Create a tree
+        let tree = SingleNodeTree::new(points, false, None, Some(depth), global_idxs, sparse);
 
-            // Precompute the M2L data
-            let m2l_data = SvdFieldTranslationKiFmm::new(
-                kernel.clone(),
-                Some(1000),
-                order,
-                *tree.get_domain(),
-                alpha_inner,
-            );
+        // Precompute the M2L data
+        let m2l_data = SvdFieldTranslationKiFmm::new(
+            kernel.clone(),
+            Some(1000),
+            order,
+            *tree.get_domain(),
+            alpha_inner,
+        );
 
-            let fmm =
-                KiFmmLinearMatrix::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
+        let fmm = KiFmmLinearMatrix::new(order, alpha_inner, alpha_outer, kernel, tree, m2l_data);
 
-            // Form charge dict, matching charges with their associated global indices
-            let charge_dicts: Vec<_> = (0..ncharge_vecs)
-                .map(|i| build_charge_dict(global_idxs, &charge_mat[i]))
-                .collect();
+        // Form charge dict, matching charges with their associated global indices
+        let charge_dicts: Vec<_> = (0..ncharge_vecs)
+            .map(|i| build_charge_dict(global_idxs, &charge_mat[i]))
+            .collect();
 
-            // Associate data with the FMM
-            let datatree = FmmDataUniformMatrix::new(fmm, &charge_dicts).unwrap();
+        // Associate data with the FMM
+        let datatree = FmmDataUniformMatrix::new(fmm, &charge_dicts).unwrap();
 
-            datatree.run(false);
+        datatree.run(false);
 
-            // Test that direct computation is close to the FMM.
-            let mut test_idx_vec = Vec::new();
-            for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
-                if index_pointer.1 - index_pointer.0 > 0 {
-                    test_idx_vec.push(idx);
-                }
+        // Test that direct computation is close to the FMM.
+        let mut test_idx_vec = Vec::new();
+        for (idx, index_pointer) in datatree.charge_index_pointer.iter().enumerate() {
+            if index_pointer.1 - index_pointer.0 > 0 {
+                test_idx_vec.push(idx);
             }
-            let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+        }
+        let leaf = &datatree.fmm.tree().get_all_leaves().unwrap()[test_idx_vec[3]];
+
+        let &leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
+        let (l, r) = datatree.charge_index_pointer[leaf_idx];
 
-            let &leaf_idx = datatree.fmm.tree().get_leaf_index(leaf).unwrap();
-            let (l, r) = datatree.charge_index_pointer[leaf_idx];
+        let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
+        let leaf_coordinates_row_major = &coordinates[l * 3..r * 3];
+
+        let dim = datatree.fmm.kernel.space_dimension();
+        let ntargets = leaf_coordinates_row_major.len() / dim;
+
+        let leaf_coordinates_row_major =
+            rlst_array_from_slice2!(f64, leaf_coordinates_row_major, [ntargets, dim], [dim, 1]);
+        let mut leaf_coordinates_col_major = rlst_dynamic_array2!(f64, [ntargets, dim]);
+        leaf_coordinates_col_major.fill_from(leaf_coordinates_row_major.view());
+
+        for (i, charge_dict) in charge_dicts
+            .iter()
+            .enumerate()
+            .take(datatree.ncharge_vectors)
+        {
+            let potentials_ptr =
+                datatree.potentials_send_pointers[i * datatree.nleaves + leaf_idx].raw;
+            let potentials = unsafe { std::slice::from_raw_parts(potentials_ptr, ntargets) };
 
-            let coordinates = datatree.fmm.tree().get_all_coordinates().unwrap();
-            let leaf_coordinates = &coordinates[l * 3..r * 3];
+            let all_charges = &charge_dict.values().cloned().collect_vec();
 
-            let ntargets = leaf_coordinates.len() / datatree.fmm.kernel.space_dimension();
+            let mut direct = vec![0f64; ntargets];
 
-            let leaf_coordinates = unsafe {
-                rlst_pointer_mat!['static, f64, leaf_coordinates.as_ptr(), (ntargets, datatree.fmm.kernel.space_dimension()), (datatree.fmm.kernel.space_dimension(), 1)]
-            }.eval();
+            datatree.fmm.kernel().evaluate_st(
+                EvalType::Value,
+                points,
+                leaf_coordinates_col_major.data(),
+                all_charges,
+                &mut direct,
+            );
 
-            for (i, charge_dict) in charge_dicts
+            let abs_error: f64 = potentials
                 .iter()
-                .enumerate()
-                .take(datatree.ncharge_vectors)
-            {
-                let potentials_ptr =
-                    datatree.potentials_send_pointers[i * datatree.nleaves + leaf_idx].raw;
-                let potentials = unsafe { std::slice::from_raw_parts(potentials_ptr, ntargets) };
-
-                let all_charges = &charge_dict.values().cloned().collect_vec();
-
-                let mut direct = vec![0f64; ntargets];
-
-                datatree.fmm.kernel().evaluate_st(
-                    EvalType::Value,
-                    points,
-                    leaf_coordinates.data(),
-                    all_charges,
-                    &mut direct,
-                );
-
-                let abs_error: f64 = potentials
-                    .iter()
-                    .zip(direct.iter())
-                    .map(|(a, b)| (a - b).abs())
-                    .sum();
-                let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
-                assert!(rel_error <= 1e-5);
-            }
+                .zip(direct.iter())
+                .map(|(a, b)| (a - b).abs())
+                .sum();
+            let rel_error: f64 = abs_error / (direct.iter().sum::<f64>());
+            // TODO: remove this print
+            println!(
+                "rel_error = {rel_error} = {abs_error} / {}",
+                direct.iter().sum::<f64>()
+            );
+            assert!(rel_error <= 1e-5);
         }
     }
 
     #[test]
-    fn test_uniform() {
+    fn test_uniform_sphere_fft() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
+
+        // Test case where points are distributed on surface of a sphere
+        let points_sphere = points_fixture_sphere::<f64>(npoints);
+        test_uniform_f64_fft(
+            &points_sphere,
+            &charges,
+            &global_idxs,
+            order,
+            alpha_inner,
+            alpha_outer,
+            true,
+            3,
+        );
+        test_uniform_f64_fft(
+            &points_sphere,
+            &charges,
+            &global_idxs,
+            order,
+            alpha_inner,
+            alpha_outer,
+            false,
+            3,
+        );
+    }
+    #[test]
+    fn test_uniform_sphere_svd() {
         let npoints = 10000;
 
         let global_idxs = (0..npoints).collect_vec();
@@ -1306,7 +1268,7 @@ mod test {
 
         // Test case where points are distributed on surface of a sphere
         let points_sphere = points_fixture_sphere::<f64>(npoints);
-        test_uniform_f64(
+        test_uniform_f64_svd(
             &points_sphere,
             &charges,
             &global_idxs,
@@ -1316,7 +1278,7 @@ mod test {
             true,
             3,
         );
-        test_uniform_f64(
+        test_uniform_f64_svd(
             &points_sphere,
             &charges,
             &global_idxs,
@@ -1326,10 +1288,21 @@ mod test {
             false,
             3,
         );
+    }
+    #[test]
+    fn test_uniform_box_fft() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
 
         // Test case where points are distributed randomly in a box
         let points_cloud = points_fixture::<f64>(npoints, None, None);
-        test_uniform_f64(
+        test_uniform_f64_fft(
             &points_cloud,
             &charges,
             &global_idxs,
@@ -1339,7 +1312,7 @@ mod test {
             true,
             3,
         );
-        test_uniform_f64(
+        test_uniform_f64_fft(
             &points_cloud,
             &charges,
             &global_idxs,
@@ -1349,6 +1322,50 @@ mod test {
             false,
             3,
         );
+    }
+    #[test]
+    fn test_uniform_box_svd() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
+
+        // Test case where points are distributed randomly in a box
+        let points_cloud = points_fixture::<f64>(npoints, None, None);
+        test_uniform_f64_svd(
+            &points_cloud,
+            &charges,
+            &global_idxs,
+            order,
+            alpha_inner,
+            alpha_outer,
+            true,
+            3,
+        );
+        test_uniform_f64_svd(
+            &points_cloud,
+            &charges,
+            &global_idxs,
+            order,
+            alpha_inner,
+            alpha_outer,
+            false,
+            3,
+        );
+    }
+    #[test]
+    fn test_uniform_box_matrix_svd() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
 
         // Test matrix input
         let points = points_fixture::<f64>(npoints, None, None);
@@ -1360,7 +1377,7 @@ mod test {
             .enumerate()
             .for_each(|(i, charge_mat_i)| *charge_mat_i = vec![i as f64 + 1.0; npoints]);
 
-        test_uniform_matrix_f64(
+        test_uniform_matrix_f64_svd(
             order,
             alpha_inner,
             alpha_outer,
@@ -1373,7 +1390,7 @@ mod test {
     }
 
     #[test]
-    fn test_adaptive() {
+    fn test_adaptive_sphere_fft() {
         let npoints = 10000;
 
         let global_idxs = (0..npoints).collect_vec();
@@ -1386,7 +1403,7 @@ mod test {
 
         // Test case where points are distributed on surface of a sphere
         let points_sphere = points_fixture_sphere::<f64>(npoints);
-        test_adaptive_f64(
+        test_adaptive_f64_fft(
             points_sphere,
             &charges,
             &global_idxs,
@@ -1395,10 +1412,70 @@ mod test {
             alpha_inner,
             alpha_outer,
         );
+    }
+    #[test]
+    fn test_adaptive_sphere_svd() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
+        let ncrit = 100;
+
+        // Test case where points are distributed on surface of a sphere
+        let points_sphere = points_fixture_sphere::<f64>(npoints);
+        test_adaptive_f64_svd(
+            points_sphere,
+            &charges,
+            &global_idxs,
+            ncrit,
+            order,
+            alpha_inner,
+            alpha_outer,
+        );
+    }
+    #[test]
+    fn test_adaptive_box_fft() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
+        let ncrit = 100;
+
+        // Test case where points are distributed randomly in a box
+        let points_cloud = points_fixture::<f64>(npoints, None, None);
+        test_adaptive_f64_fft(
+            points_cloud,
+            &charges,
+            &global_idxs,
+            ncrit,
+            order,
+            alpha_inner,
+            alpha_outer,
+        );
+    }
+    #[test]
+    fn test_adaptive_box_svd() {
+        let npoints = 10000;
+
+        let global_idxs = (0..npoints).collect_vec();
+        let charges = vec![1.0; npoints];
+
+        let order = 6;
+        let alpha_inner = 1.05;
+        let alpha_outer = 2.95;
+        let ncrit = 100;
 
         // Test case where points are distributed randomly in a box
         let points_cloud = points_fixture::<f64>(npoints, None, None);
-        test_adaptive_f64(
+        test_adaptive_f64_svd(
             points_cloud,
             &charges,
             &global_idxs,
diff --git a/fmm/src/interaction_lists.rs b/fmm/src/interaction_lists.rs
index a8112fe3..2eb8bbe7 100644
--- a/fmm/src/interaction_lists.rs
+++ b/fmm/src/interaction_lists.rs
@@ -1,6 +1,6 @@
 //! Implementation of interaction lists for FMMs (single and multi node)
-use cauchy::Scalar;
 use itertools::Itertools;
+use rlst_common::types::Scalar;
 
 use bempp_traits::{
     field::FieldTranslationData, fmm::InteractionLists, kernel::Kernel, tree::Tree,
diff --git a/fmm/src/pinv.rs b/fmm/src/pinv.rs
index e22f0e07..74f69188 100644
--- a/fmm/src/pinv.rs
+++ b/fmm/src/pinv.rs
@@ -1,20 +1,20 @@
 //! Implementation of Moore-Penrose PseudoInverse
-use num::{Float, Zero};
-use rlst::algorithms::linalg::{DenseMatrixLinAlgBuilder, LinAlg};
-use rlst::algorithms::traits::svd::{Mode, Svd};
-use rlst::dense::{
-    base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix, Dynamic, Shape,
+use num::Float;
+use rlst_common::types::{RlstError, RlstResult, Scalar};
+use rlst_dense::{
+    array::Array,
+    base_array::BaseArray,
+    data_container::VectorContainer,
+    linalg::svd::SvdMode,
+    rlst_dynamic_array2,
+    traits::{MatrixSvd, Shape},
 };
-// use rlst_common::traits::*;
-use rlst::common::traits::{Eval, Transpose};
-use rlst::common::types::{RlstError, RlstResult, Scalar};
-use rlst::dense::MatrixD;
 
-pub type PinvMatrix<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+pub type PinvMatrix<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
 
-type PinvReturnType<T> = RlstResult<(Vec<<T as Scalar>::Real>, MatrixD<T>, MatrixD<T>)>;
+type PinvReturnType<T> = RlstResult<(Vec<<T as Scalar>::Real>, PinvMatrix<T>, PinvMatrix<T>)>;
 
-pub type SvdScalar<T> = <DenseMatrixLinAlgBuilder<T> as Svd>::T;
+//pub type SvdScalar<T> = <DenseMatrixLinAlgBuilder<T> as Svd>::T;
 
 /// Compute the (Moore-Penrose) pseudo-inverse of a matrix.
 ///
@@ -30,52 +30,61 @@ pub fn pinv<T>(
     mat: &PinvMatrix<T>,
     atol: Option<T::Real>,
     rtol: Option<T::Real>,
-) -> PinvReturnType<SvdScalar<T>>
+) -> PinvReturnType<T>
 where
-    DenseMatrixLinAlgBuilder<T>: Svd,
-    SvdScalar<T>: PartialOrd,
-    SvdScalar<T>: Scalar + Float,
-    T: Scalar + Float,
+    //DenseMatrixLinAlgBuilder<T>: Svd,
+    T: Scalar<Real = T> + Float,
+    Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>: MatrixSvd<Item = T>,
 {
     let shape = mat.shape();
 
-    if shape.0 == 0 || shape.1 == 0 {
-        return Err(RlstError::MatrixIsEmpty(shape));
+    if shape[0] == 0 || shape[1] == 0 {
+        return Err(RlstError::MatrixIsEmpty((shape[0], shape[1])));
     }
 
     // If we have a vector return error
-    if shape.0 == 1 || shape.1 == 1 {
+    if shape[0] == 1 || shape[1] == 1 {
         Err(RlstError::SingleDimensionError {
             expected: 2,
             actual: 1,
         })
     } else {
         // For matrices compute the full SVD
-        let (mut s, u, vt) = mat.linalg().svd(Mode::All, Mode::All)?;
-        let u = u.unwrap();
-        let vt = vt.unwrap();
+        let k = std::cmp::min(shape[0], shape[1]);
+        let mut u = rlst_dynamic_array2!(T, [shape[0], k]);
+        let mut s = vec![T::zero(); k];
+        let mut vt = rlst_dynamic_array2!(T, [k, shape[1]]);
+
+        // TODO: work out why it fails without this copy and remove this copy
+        let mut mat_copy = rlst_dynamic_array2!(T, shape);
+        mat_copy.fill_from(mat.view());
+        mat_copy
+            .into_svd_alloc(u.view_mut(), vt.view_mut(), &mut s[..], SvdMode::Reduced)
+            .unwrap();
 
         let eps = T::real(T::epsilon());
-        let max_dim = T::real(std::cmp::max(shape.0, shape.1));
+        let max_dim = T::real(std::cmp::max(shape[0], shape[1]));
 
         let atol = atol.unwrap_or(T::Real::zero());
         let rtol = rtol.unwrap_or(max_dim * eps);
 
         let max_s = s[0];
-        let threshold = SvdScalar::<T>::real(atol + rtol) * SvdScalar::<T>::real(max_s);
+        let threshold = T::real(atol + rtol) * T::real(max_s);
 
         // Filter singular values below this threshold
         for s in s.iter_mut() {
             if *s > threshold {
-                *s = SvdScalar::<T>::real(1.0) / SvdScalar::<T>::real(*s);
+                *s = T::real(1.0) / T::real(*s);
             } else {
-                *s = SvdScalar::<T>::real(0.)
+                *s = T::real(0.)
             }
         }
 
         // Return pseudo-inverse in component form
-        let v = vt.transpose().eval();
-        let ut = u.transpose().eval();
+        let mut v = rlst_dynamic_array2!(T, [vt.shape()[1], vt.shape()[0]]);
+        let mut ut = rlst_dynamic_array2!(T, [u.shape()[1], u.shape()[0]]);
+        v.fill_from(vt.transpose());
+        ut.fill_from(u.transpose());
 
         Ok((s, ut, v))
     }
@@ -86,34 +95,46 @@ mod test {
 
     use super::*;
     use approx::assert_relative_eq;
-    use rlst::common::traits::ColumnMajorIterator;
-    use rlst::common::traits::NewLikeSelf;
-    use rlst::dense::{rlst_dynamic_mat, rlst_rand_mat, Dot};
+    use rlst_dense::{
+        array::empty_array,
+        rlst_dynamic_array2,
+        traits::{MultIntoResize, RandomAccessByRef},
+    };
 
     #[test]
     fn test_pinv() {
         let dim: usize = 5;
-        let mat = rlst_rand_mat![f64, (dim, dim)];
+        let mut mat = rlst_dynamic_array2!(f64, [dim, dim]);
+        mat.fill_from_seed_equally_distributed(0);
 
         let (s, ut, v) = pinv::<f64>(&mat, None, None).unwrap();
 
-        let mut mat_s = rlst_dynamic_mat![f64, (s.len(), s.len())];
+        let mut mat_s = rlst_dynamic_array2!(f64, [s.len(), s.len()]);
         for i in 0..s.len() {
             mat_s[[i, i]] = s[i];
         }
 
-        let inv = v.dot(&mat_s).dot(&ut);
+        let inv = empty_array::<f64, 2>().simple_mult_into_resize(
+            v.view(),
+            empty_array::<f64, 2>().simple_mult_into_resize(mat_s.view(), ut.view()),
+        );
 
-        let actual = inv.dot(&mat);
+        let actual = empty_array::<f64, 2>().simple_mult_into_resize(inv.view(), mat.view());
 
         // Expect the identity matrix
-        let mut expected = actual.new_like_self();
+        let mut expected = rlst_dynamic_array2!(f64, actual.shape());
         for i in 0..dim {
             expected[[i, i]] = 1.0
         }
 
-        for (a, e) in actual.iter_col_major().zip(expected.iter_col_major()) {
-            assert_relative_eq!(a, e, epsilon = 1E-13);
+        for i in 0..actual.shape()[0] {
+            for j in 0..actual.shape()[1] {
+                assert_relative_eq!(
+                    *actual.get([i, j]).unwrap(),
+                    *expected.get([i, j]).unwrap(),
+                    epsilon = 1E-13
+                );
+            }
         }
     }
 }
diff --git a/fmm/src/types.rs b/fmm/src/types.rs
index 02e5f9f6..afbcfa8f 100644
--- a/fmm/src/types.rs
+++ b/fmm/src/types.rs
@@ -6,10 +6,9 @@ use bempp_traits::kernel::ScaleInvariantKernel;
 use bempp_traits::{field::FieldTranslationData, fmm::Fmm, kernel::Kernel, tree::Tree};
 use bempp_tree::types::morton::MortonKey;
 use bempp_tree::types::single_node::SingleNodeTree;
-use cauchy::Scalar;
 use num::{Complex, Float};
-use rlst::dense::traits::*;
-use rlst::dense::{base_matrix::BaseMatrix, data_container::VectorContainer, matrix::Matrix};
+use rlst_common::types::Scalar;
+use rlst_dense::{array::Array, base_array::BaseArray, data_container::VectorContainer};
 
 /// Type alias for charge data
 pub type Charge<T> = T;
@@ -21,7 +20,7 @@ pub type GlobalIdx = usize;
 pub type ChargeDict<T> = HashMap<GlobalIdx, Charge<T>>;
 
 /// Type alias for approximation of FMM operator matrices.
-pub type C2EType<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+pub type C2EType<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
 
 pub struct FmmDataUniform<T, U>
 where
@@ -977,7 +976,7 @@ mod test {
         implementations::helpers::points_fixture, types::single_node::SingleNodeTree,
     };
     use itertools::Itertools;
-    use rlst::dense::RawAccess;
+    use rlst_dense::traits::RawAccess;
 
     #[test]
     fn test_fmm_data_uniform_matrix() {
diff --git a/grid/Cargo.toml b/grid/Cargo.toml
index f57d81ea..f540f853 100644
--- a/grid/Cargo.toml
+++ b/grid/Cargo.toml
@@ -26,6 +26,6 @@ bempp-element = { path = "../element"}
 approx = "0.5"
 itertools = "0.10"
 mpi = { version = "0.6.*", optional = true }
-rlst-common = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-proc-macro = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-proc-macro = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
diff --git a/grid/examples/curved_cells.rs b/grid/examples/curved_cells.rs
index 85e2df25..7d7b4e9a 100644
--- a/grid/examples/curved_cells.rs
+++ b/grid/examples/curved_cells.rs
@@ -14,7 +14,7 @@ fn main() {
                 0.25, 0.5, 0.5, 0.5, 0.5, 0.75, 1.0, 1.0, 1.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5,
                 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
             ],
-            (13, 3),
+            [13, 3],
         ),
         AdjacencyList::from_data(
             vec![2, 7, 12, 0, 2, 9, 11, 1, 4, 6, 10, 5, 2, 7, 11, 8, 6, 3],
diff --git a/grid/examples/parallel_grid.rs b/grid/examples/parallel_grid.rs
index 184a9622..06d7456b 100644
--- a/grid/examples/parallel_grid.rs
+++ b/grid/examples/parallel_grid.rs
@@ -15,7 +15,7 @@ use bempp_traits::grid::{Geometry, Grid, Ownership, Topology};
 #[cfg(feature = "mpi")]
 use mpi::{environment::Universe, request::WaitGuard, topology::Communicator, traits::*};
 #[cfg(feature = "mpi")]
-use rlst_dense::RandomAccessMut;
+use rlst_dense::traits::RandomAccessMut;
 
 #[cfg(feature = "mpi")]
 fn test_parallel_grid() {
@@ -29,13 +29,13 @@ fn test_parallel_grid() {
     let n = 10;
 
     let grid = if rank == 0 {
-        let mut pts = zero_matrix((n * n, 3));
+        let mut pts = zero_matrix([n * n, 3]);
         let mut i = 0;
         for y in 0..n {
             for x in 0..n {
-                *pts.get_mut(i, 0).unwrap() = x as f64 / (n - 1) as f64;
-                *pts.get_mut(i, 1).unwrap() = y as f64 / (n - 1) as f64;
-                *pts.get_mut(i, 2).unwrap() = 0.0;
+                *pts.get_mut([i, 0]).unwrap() = x as f64 / (n - 1) as f64;
+                *pts.get_mut([i, 1]).unwrap() = y as f64 / (n - 1) as f64;
+                *pts.get_mut([i, 2]).unwrap() = 0.0;
                 i += 1;
             }
         }
diff --git a/grid/src/grid.rs b/grid/src/grid.rs
index 69bdcde6..0a676334 100644
--- a/grid/src/grid.rs
+++ b/grid/src/grid.rs
@@ -2,16 +2,16 @@
 use bempp_element::cell;
 use bempp_element::element::{create_element, CiarletElement};
 use bempp_tools::arrays::{zero_matrix, AdjacencyList, Array4D, Mat};
-use bempp_traits::arrays::{AdjacencyListAccess, Array4DAccess};
+use bempp_traits::arrays::AdjacencyListAccess;
 use bempp_traits::cell::{ReferenceCell, ReferenceCellType};
 use bempp_traits::element::{Continuity, ElementFamily, FiniteElement};
 use bempp_traits::grid::{Geometry, GeometryEvaluator, Grid, Ownership, Topology};
 use itertools::izip;
-use rlst_dense::{
-    rlst_static_mat, RandomAccessByRef, RandomAccessMut, Shape, SizeIdentifier,
-    UnsafeRandomAccessByRef, UnsafeRandomAccessMut,
+use rlst_dense::rlst_dynamic_array4;
+use rlst_dense::traits::{
+    RandomAccessByRef, RandomAccessMut, Shape, UnsafeRandomAccessByRef, UnsafeRandomAccessMut,
 };
-use rlst_proc_macro::rlst_static_size;
+use rlst_proc_macro::rlst_static_array;
 use std::cell::RefCell;
 use std::ptr;
 
@@ -21,7 +21,6 @@ pub struct EvaluatorTdim2Gdim3<'a> {
     table: Array4D<f64>,
     npts: usize,
     axes: RefCell<Mat<f64>>,
-    js: RefCell<Mat<f64>>,
 }
 
 impl<'a> EvaluatorTdim2Gdim3<'a> {
@@ -30,13 +29,12 @@ impl<'a> EvaluatorTdim2Gdim3<'a> {
         element: &impl FiniteElement,
         points: &'a Mat<f64>,
     ) -> Self {
-        let npts = points.shape().0;
-        assert_eq!(points.shape().1, 2);
+        let npts = points.shape()[0];
+        assert_eq!(points.shape()[1], 2);
         assert_eq!(geometry.dim(), 3);
-        let mut table = Array4D::<f64>::new(element.tabulate_array_shape(1, npts));
+        let mut table = rlst_dynamic_array4!(f64, element.tabulate_array_shape(1, npts));
         element.tabulate(points, 1, &mut table);
-        let axes = RefCell::new(zero_matrix((2, 3)));
-        let js = RefCell::new(zero_matrix((npts, 6)));
+        let axes = RefCell::new(zero_matrix([2, 3]));
 
         Self {
             geometry,
@@ -44,7 +42,6 @@ impl<'a> EvaluatorTdim2Gdim3<'a> {
             table,
             npts,
             axes,
-            js,
         }
     }
 }
@@ -58,112 +55,72 @@ impl<'a> GeometryEvaluator<Mat<f64>, Mat<f64>> for EvaluatorTdim2Gdim3<'a> {
         for i in 0..3 {
             for p in 0..self.npts {
                 unsafe {
-                    *points.get_unchecked_mut(p, i) = 0.0;
+                    *points.get_unchecked_mut([p, i]) = 0.0;
                 }
             }
         }
-        for i in 0..self.table.shape().2 {
+        for i in 0..self.table.shape()[2] {
             let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
             for j in 0..3 {
                 for p in 0..self.npts {
                     unsafe {
-                        *points.get_unchecked_mut(p, j) +=
+                        *points.get_unchecked_mut([p, j]) +=
                             *self.geometry.coordinate_unchecked(v, j)
-                                * *self.table.get(0, p, i, 0).unwrap();
+                                * *self.table.get([0, p, i, 0]).unwrap();
                     }
                 }
             }
         }
     }
 
-    fn compute_normals(&self, cell_index: usize, normals: &mut Mat<f64>) {
+    fn compute_normals_and_jacobian_determinants(
+        &self,
+        cell_index: usize,
+        normals: &mut Mat<f64>,
+        jdets: &mut [f64],
+    ) {
         let mut axes = self.axes.borrow_mut();
-        for p in 0..self.npts {
+        for (p, jdet) in jdets.iter_mut().enumerate() {
             for i in 0..2 {
                 for j in 0..3 {
                     unsafe {
-                        *axes.get_unchecked_mut(i, j) = 0.0;
+                        *axes.get_unchecked_mut([i, j]) = 0.0;
                     }
                 }
             }
-            for i in 0..self.table.shape().2 {
+            for i in 0..self.table.shape()[2] {
                 let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
                 for j in 0..3 {
                     unsafe {
-                        *axes.get_unchecked_mut(0, j) += *self.geometry.coordinate_unchecked(v, j)
-                            * self.table.get(1, p, i, 0).unwrap();
-                        *axes.get_unchecked_mut(1, j) += *self.geometry.coordinate_unchecked(v, j)
-                            * self.table.get(2, p, i, 0).unwrap();
-                    }
-                }
-            }
-            unsafe {
-                *normals.get_unchecked_mut(p, 0) = *axes.get_unchecked(0, 1)
-                    * *axes.get_unchecked(1, 2)
-                    - *axes.get_unchecked(0, 2) * *axes.get_unchecked(1, 1);
-                *normals.get_unchecked_mut(p, 1) = *axes.get_unchecked(0, 2)
-                    * *axes.get_unchecked(1, 0)
-                    - *axes.get_unchecked(0, 0) * *axes.get_unchecked(1, 2);
-                *normals.get_unchecked_mut(p, 2) = *axes.get_unchecked(0, 0)
-                    * *axes.get_unchecked(1, 1)
-                    - *axes.get_unchecked(0, 1) * *axes.get_unchecked(1, 0);
-                let size = (*normals.get_unchecked(p, 0) * *normals.get_unchecked(p, 0)
-                    + *normals.get_unchecked(p, 1) * *normals.get_unchecked(p, 1)
-                    + *normals.get_unchecked(p, 2) * *normals.get_unchecked(p, 2))
-                .sqrt();
-                *normals.get_unchecked_mut(p, 0) /= size;
-                *normals.get_unchecked_mut(p, 1) /= size;
-                *normals.get_unchecked_mut(p, 2) /= size;
-            }
-        }
-    }
-
-    fn compute_jacobians(&self, cell_index: usize, jacobians: &mut Mat<f64>) {
-        for i in 0..6 {
-            for p in 0..self.npts {
-                unsafe {
-                    *jacobians.get_unchecked_mut(p, i) = 0.0;
-                }
-            }
-        }
-        for i in 0..self.table.shape().2 {
-            let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
-            for j in 0..3 {
-                for k in 0..2 {
-                    for p in 0..self.npts {
-                        unsafe {
-                            *jacobians.get_unchecked_mut(p, k + 2 * j) +=
-                                *self.geometry.coordinate_unchecked(v, j)
-                                    * self.table.get(k + 1, p, i, 0).unwrap();
-                        }
+                        *axes.get_unchecked_mut([0, j]) +=
+                            *self.geometry.coordinate_unchecked(v, j)
+                                * self.table.get([1, p, i, 0]).unwrap();
+                        *axes.get_unchecked_mut([1, j]) +=
+                            *self.geometry.coordinate_unchecked(v, j)
+                                * self.table.get([2, p, i, 0]).unwrap();
                     }
                 }
             }
-        }
-    }
-
-    fn compute_jacobian_determinants(&self, cell_index: usize, jdets: &mut [f64]) {
-        let mut js = self.js.borrow_mut();
-        self.compute_jacobians(cell_index, &mut js);
-        for (p, jdet) in jdets.iter_mut().enumerate() {
             unsafe {
-                *jdet = ((js.get_unchecked(p, 0).powi(2)
-                    + js.get_unchecked(p, 2).powi(2)
-                    + js.get_unchecked(p, 4).powi(2))
-                    * (js.get_unchecked(p, 1).powi(2)
-                        + js.get_unchecked(p, 3).powi(2)
-                        + js.get_unchecked(p, 5).powi(2))
-                    - (js.get_unchecked(p, 0) * js.get_unchecked(p, 1)
-                        + js.get_unchecked(p, 2) * js.get_unchecked(p, 3)
-                        + js.get_unchecked(p, 4) * js.get_unchecked(p, 5))
-                    .powi(2))
+                *normals.get_unchecked_mut([p, 0]) = *axes.get_unchecked([0, 1])
+                    * *axes.get_unchecked([1, 2])
+                    - *axes.get_unchecked([0, 2]) * *axes.get_unchecked([1, 1]);
+                *normals.get_unchecked_mut([p, 1]) = *axes.get_unchecked([0, 2])
+                    * *axes.get_unchecked([1, 0])
+                    - *axes.get_unchecked([0, 0]) * *axes.get_unchecked([1, 2]);
+                *normals.get_unchecked_mut([p, 2]) = *axes.get_unchecked([0, 0])
+                    * *axes.get_unchecked([1, 1])
+                    - *axes.get_unchecked([0, 1]) * *axes.get_unchecked([1, 0]);
+                *jdet = (*normals.get_unchecked([p, 0]) * *normals.get_unchecked([p, 0])
+                    + *normals.get_unchecked([p, 1]) * *normals.get_unchecked([p, 1])
+                    + *normals.get_unchecked([p, 2]) * *normals.get_unchecked([p, 2]))
                 .sqrt();
+                *normals.get_unchecked_mut([p, 0]) /= *jdet;
+                *normals.get_unchecked_mut([p, 1]) /= *jdet;
+                *normals.get_unchecked_mut([p, 2]) /= *jdet;
             }
         }
     }
-    fn compute_jacobian_inverses(&self, _cell_index: usize, _jinvs: &mut Mat<f64>) {
-        panic!("Not implemented yet");
-    }
 }
 
 pub struct LinearSimplexEvaluatorTdim2Gdim3<'a> {
@@ -171,8 +128,7 @@ pub struct LinearSimplexEvaluatorTdim2Gdim3<'a> {
     points: &'a Mat<f64>,
     table: Array4D<f64>,
     npts: usize,
-    axes: RefCell<Mat<f64>>,
-    js: RefCell<Vec<f64>>,
+    js: RefCell<[f64; 6]>,
 }
 
 impl<'a> LinearSimplexEvaluatorTdim2Gdim3<'a> {
@@ -181,20 +137,18 @@ impl<'a> LinearSimplexEvaluatorTdim2Gdim3<'a> {
         element: &impl FiniteElement,
         points: &'a Mat<f64>,
     ) -> Self {
-        let npts = points.shape().0;
-        assert_eq!(points.shape().1, 2);
+        let npts = points.shape()[0];
+        assert_eq!(points.shape()[1], 2);
         assert_eq!(geometry.dim(), 3);
-        let mut table = Array4D::<f64>::new(element.tabulate_array_shape(1, npts));
+        let mut table = rlst_dynamic_array4!(f64, element.tabulate_array_shape(1, npts));
         element.tabulate(points, 1, &mut table);
-        let axes = RefCell::new(zero_matrix((2, 3)));
-        let js = RefCell::new(vec![0.0; 6]);
+        let js = RefCell::new([0.0; 6]);
 
         Self {
             geometry,
             points,
             table,
             npts,
-            axes,
             js,
         }
     }
@@ -203,20 +157,15 @@ impl<'a> LinearSimplexEvaluatorTdim2Gdim3<'a> {
 impl<'a> LinearSimplexEvaluatorTdim2Gdim3<'a> {
     fn single_jacobian(&self, cell_index: usize) {
         let mut js = self.js.borrow_mut();
-        for i in 0..6 {
-            unsafe {
-                *js.get_unchecked_mut(i) = 0.0;
-            }
-        }
-        for i in 0..self.table.shape().2 {
-            let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
-            for j in 0..3 {
-                for k in 0..2 {
-                    unsafe {
-                        *js.get_unchecked_mut(k + 2 * j) +=
-                            *self.geometry.coordinate_unchecked(v, j)
-                                * self.table.get(k + 1, 0, i, 0).unwrap();
-                    }
+
+        let vs = self.geometry.cell_vertices(cell_index).unwrap();
+
+        for j in 0..3 {
+            for i in 0..2 {
+                unsafe {
+                    *js.get_unchecked_mut(i + 2 * j) =
+                        self.geometry.coordinate_unchecked(vs[i + 1], j)
+                            - self.geometry.coordinate_unchecked(vs[0], j);
                 }
             }
         }
@@ -229,110 +178,57 @@ impl<'a> GeometryEvaluator<Mat<f64>, Mat<f64>> for LinearSimplexEvaluatorTdim2Gd
     }
 
     fn compute_points(&self, cell_index: usize, points: &mut Mat<f64>) {
-        for i in 0..3 {
+        for j in 0..3 {
             for p in 0..self.npts {
-                unsafe {
-                    *points.get_unchecked_mut(p, i) = 0.0;
-                }
-            }
-        }
-        for i in 0..self.table.shape().2 {
-            let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
-            for j in 0..3 {
-                for p in 0..self.npts {
+                let mut sum = 0.0;
+                for i in 0..self.table.shape()[2] {
+                    let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
                     unsafe {
-                        *points.get_unchecked_mut(p, j) +=
-                            *self.geometry.coordinate_unchecked(v, j)
-                                * *self.table.get(0, p, i, 0).unwrap();
+                        sum += *self.geometry.coordinate_unchecked(v, j)
+                            * *self.table.get([0, p, i, 0]).unwrap();
                     }
                 }
-            }
-        }
-    }
-
-    fn compute_normals(&self, cell_index: usize, normals: &mut Mat<f64>) {
-        let mut axes = self.axes.borrow_mut();
-        for j in 0..3 {
-            for i in 0..2 {
                 unsafe {
-                    *axes.get_unchecked_mut(i, j) = 0.0;
-                }
-            }
-        }
-        for i in 0..self.table.shape().2 {
-            let v = unsafe { *self.geometry.cells.get_unchecked(cell_index, i) };
-            for j in 0..3 {
-                for k in 0..2 {
-                    unsafe {
-                        *axes.get_unchecked_mut(k, j) += *self.geometry.coordinate_unchecked(v, j)
-                            * self.table.get(k + 1, 0, i, 0).unwrap();
-                    }
+                    *points.get_unchecked_mut([p, j]) = sum;
                 }
             }
         }
+    }
+
+    fn compute_normals_and_jacobian_determinants(
+        &self,
+        cell_index: usize,
+        normals: &mut Mat<f64>,
+        jdets: &mut [f64],
+    ) {
+        self.single_jacobian(cell_index);
+        let js = self.js.borrow();
         unsafe {
-            *normals.get_unchecked_mut(0, 0) = *axes.get_unchecked(0, 1)
-                * *axes.get_unchecked(1, 2)
-                - *axes.get_unchecked(0, 2) * *axes.get_unchecked(1, 1);
-            *normals.get_unchecked_mut(0, 1) = *axes.get_unchecked(0, 2)
-                * *axes.get_unchecked(1, 0)
-                - *axes.get_unchecked(0, 0) * *axes.get_unchecked(1, 2);
-            *normals.get_unchecked_mut(0, 2) = *axes.get_unchecked(0, 0)
-                * *axes.get_unchecked(1, 1)
-                - *axes.get_unchecked(0, 1) * *axes.get_unchecked(1, 0);
-            let size = ((*normals.get_unchecked(0, 0)).powi(2)
-                + (*normals.get_unchecked(0, 1)).powi(2)
-                + (*normals.get_unchecked(0, 2)).powi(2))
+            *normals.get_unchecked_mut([0, 0]) = *js.get_unchecked(2) * *js.get_unchecked(5)
+                - *js.get_unchecked(4) * *js.get_unchecked(3);
+            *normals.get_unchecked_mut([0, 1]) = *js.get_unchecked(4) * *js.get_unchecked(1)
+                - *js.get_unchecked(0) * *js.get_unchecked(5);
+            *normals.get_unchecked_mut([0, 2]) = *js.get_unchecked(0) * *js.get_unchecked(3)
+                - *js.get_unchecked(2) * *js.get_unchecked(1);
+            *jdets.get_unchecked_mut(0) = ((*normals.get_unchecked([0, 0])).powi(2)
+                + (*normals.get_unchecked([0, 1])).powi(2)
+                + (*normals.get_unchecked([0, 2])).powi(2))
             .sqrt();
-            *normals.get_unchecked_mut(0, 0) /= size;
-            *normals.get_unchecked_mut(0, 1) /= size;
-            *normals.get_unchecked_mut(0, 2) /= size;
+            for i in 0..3 {
+                *normals.get_unchecked_mut([0, i]) /= *jdets.get_unchecked(0);
+            }
+        }
+        for p in 1..self.npts {
+            jdets[p] = jdets[0];
         }
         for i in 0..3 {
             for p in 1..self.npts {
                 unsafe {
-                    *normals.get_unchecked_mut(p, i) = *normals.get_unchecked(0, i);
-                }
-            }
-        }
-    }
-
-    fn compute_jacobians(&self, cell_index: usize, jacobians: &mut Mat<f64>) {
-        self.single_jacobian(cell_index);
-        let js = self.js.borrow();
-        for i in 0..6 {
-            for p in 0..self.npts {
-                unsafe {
-                    *jacobians.get_unchecked_mut(p, i) = *js.get_unchecked(i);
+                    *normals.get_unchecked_mut([p, i]) = *normals.get_unchecked([0, i]);
                 }
             }
         }
     }
-
-    fn compute_jacobian_determinants(&self, cell_index: usize, jdets: &mut [f64]) {
-        self.single_jacobian(cell_index);
-        let js = self.js.borrow();
-        let d = unsafe {
-            ((js.get_unchecked(0).powi(2)
-                + js.get_unchecked(2).powi(2)
-                + js.get_unchecked(4).powi(2))
-                * (js.get_unchecked(1).powi(2)
-                    + js.get_unchecked(3).powi(2)
-                    + js.get_unchecked(5).powi(2))
-                - (js.get_unchecked(0) * js.get_unchecked(1)
-                    + js.get_unchecked(2) * js.get_unchecked(3)
-                    + js.get_unchecked(4) * js.get_unchecked(5))
-                .powi(2))
-            .sqrt()
-        };
-        for jdet in jdets.iter_mut() {
-            *jdet = d;
-        }
-    }
-
-    fn compute_jacobian_inverses(&self, _cell_index: usize, _jinvs: &mut Mat<f64>) {
-        panic!("Not implemented yet");
-    }
 }
 
 /// Geometry of a serial grid
@@ -344,9 +240,6 @@ pub struct SerialGeometry {
     index_map: Vec<usize>,
 }
 
-#[rlst_static_size(2, 3)]
-struct TwoByThree;
-
 fn element_from_npts(cell_type: ReferenceCellType, npts: usize) -> CiarletElement {
     create_element(
         ElementFamily::Lagrange,
@@ -420,14 +313,14 @@ impl SerialGeometry {
 
 impl SerialGeometry {
     unsafe fn coordinate_unchecked(&self, point_index: usize, coord_index: usize) -> &f64 {
-        self.coordinates.get_unchecked(point_index, coord_index)
+        self.coordinates.get_unchecked([point_index, coord_index])
     }
 }
 impl Geometry for SerialGeometry {
     type T = Mat<f64>;
     type TMut = Mat<f64>;
     fn dim(&self) -> usize {
-        self.coordinates.shape().1
+        self.coordinates.shape()[1]
     }
 
     fn coordinate(&self, point_index: usize, coord_index: usize) -> Option<&f64> {
@@ -439,7 +332,7 @@ impl Geometry for SerialGeometry {
     }
 
     fn point_count(&self) -> usize {
-        self.coordinates.shape().0
+        self.coordinates.shape()[0]
     }
 
     fn cell_vertices(&self, index: usize) -> Option<&[usize]> {
@@ -465,159 +358,161 @@ impl Geometry for SerialGeometry {
     }
 
     fn compute_points<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
         cell: usize,
         physical_points: &mut TMut,
     ) {
-        let npts = points.shape().0;
+        let npts = points.shape()[0];
         let gdim = self.dim();
-        if physical_points.shape().0 != npts {
+        if physical_points.shape()[0] != npts {
             panic!("physical_points has wrong number of rows.");
         }
-        if physical_points.shape().1 != gdim {
+        if physical_points.shape()[1] != gdim {
             panic!("physical_points has wrong number of columns.");
         }
         let element = self.element(cell);
-        let mut data = Array4D::<f64>::new(element.tabulate_array_shape(0, npts));
+        let mut data = rlst_dynamic_array4!(f64, element.tabulate_array_shape(0, npts));
         element.tabulate(points, 0, &mut data);
         for p in 0..npts {
             for i in 0..gdim {
-                *physical_points.get_mut(p, i).unwrap() = 0.0;
+                *physical_points.get_mut([p, i]).unwrap() = 0.0;
             }
         }
-        for i in 0..data.shape().2 {
+        for i in 0..data.shape()[2] {
             let v = *self.cells.get(cell, i).unwrap();
             for j in 0..gdim {
                 for p in 0..npts {
-                    *physical_points.get_mut(p, j).unwrap() +=
-                        *self.coordinate(v, j).unwrap() * data.get(0, p, i, 0).unwrap();
+                    *physical_points.get_mut([p, j]).unwrap() +=
+                        *self.coordinate(v, j).unwrap() * data.get([0, p, i, 0]).unwrap();
                 }
             }
         }
     }
     fn compute_normals<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
         cell: usize,
         normals: &mut TMut,
     ) {
-        let npts = points.shape().0;
-        let tdim = points.shape().1;
+        let npts = points.shape()[0];
+        let tdim = points.shape()[1];
         let gdim = self.dim();
         if gdim != 3 {
             unimplemented!("normals currently only implemented for 2D cells embedded in 3D.");
         }
-        if normals.shape().0 != npts {
+        if normals.shape()[0] != npts {
             panic!("normals has wrong number of columns.");
         }
-        if normals.shape().1 != gdim {
+        if normals.shape()[1] != gdim {
             panic!("normals has wrong number of rows.");
         }
         let element = self.element(cell);
-        let mut data = Array4D::<f64>::new(element.tabulate_array_shape(1, npts));
-        let mut axes = rlst_static_mat![f64, TwoByThree];
+        let mut data = rlst_dynamic_array4!(f64, element.tabulate_array_shape(1, npts));
+        let mut axes = rlst_static_array![f64, 2, 3];
         element.tabulate(points, 1, &mut data);
         for p in 0..npts {
             for i in 0..tdim {
                 for j in 0..gdim {
-                    *axes.get_mut(i, j).unwrap() = 0.0;
+                    *axes.get_mut([i, j]).unwrap() = 0.0;
                 }
             }
-            for i in 0..data.shape().2 {
+            for i in 0..data.shape()[2] {
                 let v = *self.cells.get(cell, i).unwrap();
                 for j in 0..gdim {
-                    *axes.get_mut(0, j).unwrap() +=
-                        *self.coordinate(v, j).unwrap() * data.get(1, p, i, 0).unwrap();
-                    *axes.get_mut(1, j).unwrap() +=
-                        *self.coordinate(v, j).unwrap() * data.get(2, p, i, 0).unwrap();
+                    *axes.get_mut([0, j]).unwrap() +=
+                        *self.coordinate(v, j).unwrap() * data.get([1, p, i, 0]).unwrap();
+                    *axes.get_mut([1, j]).unwrap() +=
+                        *self.coordinate(v, j).unwrap() * data.get([2, p, i, 0]).unwrap();
                 }
             }
-            *normals.get_mut(p, 0).unwrap() = *axes.get(0, 1).unwrap() * *axes.get(1, 2).unwrap()
-                - *axes.get(0, 2).unwrap() * *axes.get(1, 1).unwrap();
-            *normals.get_mut(p, 1).unwrap() = *axes.get(0, 2).unwrap() * *axes.get(1, 0).unwrap()
-                - *axes.get(0, 0).unwrap() * *axes.get(1, 2).unwrap();
-            *normals.get_mut(p, 2).unwrap() = *axes.get(0, 0).unwrap() * *axes.get(1, 1).unwrap()
-                - *axes.get(0, 1).unwrap() * *axes.get(1, 0).unwrap();
-            let size = (*normals.get(p, 0).unwrap() * *normals.get(p, 0).unwrap()
-                + *normals.get(p, 1).unwrap() * *normals.get(p, 1).unwrap()
-                + *normals.get(p, 2).unwrap() * *normals.get(p, 2).unwrap())
+            *normals.get_mut([p, 0]).unwrap() = *axes.get([0, 1]).unwrap()
+                * *axes.get([1, 2]).unwrap()
+                - *axes.get([0, 2]).unwrap() * *axes.get([1, 1]).unwrap();
+            *normals.get_mut([p, 1]).unwrap() = *axes.get([0, 2]).unwrap()
+                * *axes.get([1, 0]).unwrap()
+                - *axes.get([0, 0]).unwrap() * *axes.get([1, 2]).unwrap();
+            *normals.get_mut([p, 2]).unwrap() = *axes.get([0, 0]).unwrap()
+                * *axes.get([1, 1]).unwrap()
+                - *axes.get([0, 1]).unwrap() * *axes.get([1, 0]).unwrap();
+            let size = (*normals.get([p, 0]).unwrap() * *normals.get([p, 0]).unwrap()
+                + *normals.get([p, 1]).unwrap() * *normals.get([p, 1]).unwrap()
+                + *normals.get([p, 2]).unwrap() * *normals.get([p, 2]).unwrap())
             .sqrt();
-            *normals.get_mut(p, 0).unwrap() /= size;
-            *normals.get_mut(p, 1).unwrap() /= size;
-            *normals.get_mut(p, 2).unwrap() /= size;
+            *normals.get_mut([p, 0]).unwrap() /= size;
+            *normals.get_mut([p, 1]).unwrap() /= size;
+            *normals.get_mut([p, 2]).unwrap() /= size;
         }
     }
     fn compute_jacobians<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
         cell: usize,
         jacobians: &mut TMut,
     ) {
-        let npts = points.shape().0;
-        let tdim = points.shape().1;
+        let npts = points.shape()[0];
+        let tdim = points.shape()[1];
         let gdim = self.dim();
-        if jacobians.shape().0 != npts {
+        if jacobians.shape()[0] != npts {
             panic!("jacobians has wrong number of rows.");
         }
-        if jacobians.shape().1 != gdim * tdim {
+        if jacobians.shape()[1] != gdim * tdim {
             panic!("jacobians has wrong number of columns.");
         }
         let element = self.element(cell);
-        let mut data = Array4D::<f64>::new(element.tabulate_array_shape(1, npts));
-        let tdim = data.shape().0 - 1;
+        let mut data = rlst_dynamic_array4!(f64, element.tabulate_array_shape(1, npts));
+        let tdim = data.shape()[0] - 1;
         element.tabulate(points, 1, &mut data);
         for p in 0..npts {
-            for i in 0..jacobians.shape().1 {
-                *jacobians.get_mut(p, i).unwrap() = 0.0;
+            for i in 0..jacobians.shape()[1] {
+                *jacobians.get_mut([p, i]).unwrap() = 0.0;
             }
         }
-        for i in 0..data.shape().2 {
+        for i in 0..data.shape()[2] {
             let v = *self.cells.get(cell, i).unwrap();
             for p in 0..npts {
                 for j in 0..gdim {
                     for k in 0..tdim {
-                        *jacobians.get_mut(p, k + tdim * j).unwrap() +=
-                            *self.coordinate(v, j).unwrap() * data.get(k + 1, p, i, 0).unwrap();
+                        *jacobians.get_mut([p, k + tdim * j]).unwrap() +=
+                            *self.coordinate(v, j).unwrap() * data.get([k + 1, p, i, 0]).unwrap();
                     }
                 }
             }
         }
     }
-    fn compute_jacobian_determinants<T: RandomAccessByRef<Item = f64> + Shape>(
+    fn compute_jacobian_determinants<T: RandomAccessByRef<2, Item = f64> + Shape<2>>(
         &self,
         points: &T,
         cell: usize,
         jacobian_determinants: &mut [f64],
     ) {
-        let npts = points.shape().0;
-        let tdim = points.shape().1;
+        let npts = points.shape()[0];
+        let tdim = points.shape()[1];
         let gdim = self.dim();
-        if points.shape().0 != jacobian_determinants.len() {
+        if points.shape()[0] != jacobian_determinants.len() {
             panic!("jacobian_determinants has wrong length.");
         }
-        let mut js = zero_matrix((npts, gdim * tdim));
+        let mut js = zero_matrix([npts, gdim * tdim]);
         self.compute_jacobians(points, cell, &mut js);
 
         for (p, jdet) in jacobian_determinants.iter_mut().enumerate() {
             *jdet = match tdim {
                 1 => match gdim {
-                    1 => *js.get(p, 0).unwrap(),
-                    2 => {
-                        ((*js.get(p, 0).unwrap()).powi(2) + (*js.get(p, 1).unwrap()).powi(2)).sqrt()
-                    }
-                    3 => ((*js.get(p, 0).unwrap()).powi(2)
-                        + (*js.get(p, 1).unwrap()).powi(2)
-                        + (*js.get(p, 2).unwrap()).powi(2))
+                    1 => *js.get([p, 0]).unwrap(),
+                    2 => ((*js.get([p, 0]).unwrap()).powi(2) + (*js.get([p, 1]).unwrap()).powi(2))
+                        .sqrt(),
+                    3 => ((*js.get([p, 0]).unwrap()).powi(2)
+                        + (*js.get([p, 1]).unwrap()).powi(2)
+                        + (*js.get([p, 2]).unwrap()).powi(2))
                     .sqrt(),
                     _ => {
                         panic!("Unsupported dimensions.");
@@ -625,18 +520,18 @@ impl Geometry for SerialGeometry {
                 },
                 2 => match gdim {
                     2 => {
-                        *js.get(p, 0).unwrap() * *js.get(p, 3).unwrap()
-                            - *js.get(p, 1).unwrap() * *js.get(p, 2).unwrap()
+                        *js.get([p, 0]).unwrap() * *js.get([p, 3]).unwrap()
+                            - *js.get([p, 1]).unwrap() * *js.get([p, 2]).unwrap()
                     }
-                    3 => (((*js.get(p, 0).unwrap()).powi(2)
-                        + (*js.get(p, 2).unwrap()).powi(2)
-                        + (*js.get(p, 4).unwrap()).powi(2))
-                        * ((*js.get(p, 1).unwrap()).powi(2)
-                            + (*js.get(p, 3).unwrap()).powi(2)
-                            + (*js.get(p, 5).unwrap()).powi(2))
-                        - (*js.get(p, 0).unwrap() * *js.get(p, 1).unwrap()
-                            + *js.get(p, 2).unwrap() * *js.get(p, 3).unwrap()
-                            + *js.get(p, 4).unwrap() * *js.get(p, 5).unwrap())
+                    3 => (((*js.get([p, 0]).unwrap()).powi(2)
+                        + (*js.get([p, 2]).unwrap()).powi(2)
+                        + (*js.get([p, 4]).unwrap()).powi(2))
+                        * ((*js.get([p, 1]).unwrap()).powi(2)
+                            + (*js.get([p, 3]).unwrap()).powi(2)
+                            + (*js.get([p, 5]).unwrap()).powi(2))
+                        - (*js.get([p, 0]).unwrap() * *js.get([p, 1]).unwrap()
+                            + *js.get([p, 2]).unwrap() * *js.get([p, 3]).unwrap()
+                            + *js.get([p, 4]).unwrap() * *js.get([p, 5]).unwrap())
                         .powi(2))
                     .sqrt(),
                     _ => {
@@ -645,15 +540,15 @@ impl Geometry for SerialGeometry {
                 },
                 3 => match gdim {
                     3 => {
-                        *js.get(p, 0).unwrap()
-                            * (*js.get(p, 4).unwrap() * *js.get(p, 8).unwrap()
-                                - *js.get(p, 5).unwrap() * *js.get(p, 7).unwrap())
-                            - *js.get(p, 1).unwrap()
-                                * (*js.get(p, 3).unwrap() * *js.get(p, 8).unwrap()
-                                    - *js.get(p, 5).unwrap() * *js.get(p, 6).unwrap())
-                            + *js.get(p, 2).unwrap()
-                                * (*js.get(p, 3).unwrap() * *js.get(p, 7).unwrap()
-                                    - *js.get(p, 4).unwrap() * *js.get(p, 6).unwrap())
+                        *js.get([p, 0]).unwrap()
+                            * (*js.get([p, 4]).unwrap() * *js.get([p, 8]).unwrap()
+                                - *js.get([p, 5]).unwrap() * *js.get([p, 7]).unwrap())
+                            - *js.get([p, 1]).unwrap()
+                                * (*js.get([p, 3]).unwrap() * *js.get([p, 8]).unwrap()
+                                    - *js.get([p, 5]).unwrap() * *js.get([p, 6]).unwrap())
+                            + *js.get([p, 2]).unwrap()
+                                * (*js.get([p, 3]).unwrap() * *js.get([p, 7]).unwrap()
+                                    - *js.get([p, 4]).unwrap() * *js.get([p, 6]).unwrap())
                     }
                     _ => {
                         panic!("Unsupported dimensions.");
@@ -666,21 +561,21 @@ impl Geometry for SerialGeometry {
         }
     }
     fn compute_jacobian_inverses<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
         cell: usize,
         jacobian_inverses: &mut TMut,
     ) {
-        let npts = points.shape().0;
+        let npts = points.shape()[0];
         let gdim = self.dim();
-        let tdim = points.shape().1;
-        if jacobian_inverses.shape().0 != npts {
+        let tdim = points.shape()[1];
+        if jacobian_inverses.shape()[0] != npts {
             panic!("jacobian_inverses has wrong number of rows.");
         }
-        if jacobian_inverses.shape().1 != gdim * tdim {
+        if jacobian_inverses.shape()[1] != gdim * tdim {
             panic!("jacobian_inverses has wrong number of columns.");
         }
         let element = self.element(cell);
@@ -689,13 +584,14 @@ impl Geometry for SerialGeometry {
             && element.degree() == 1
         {
             // Map is affine
-            let mut js = zero_matrix((npts, gdim * tdim));
+            let mut js = zero_matrix([npts, gdim * tdim]);
             self.compute_jacobians(points, cell, &mut js);
 
             for p in 0..npts {
                 if tdim == 1 {
                     if gdim == 1 {
-                        *jacobian_inverses.get_mut(p, 0).unwrap() = 1.0 / *js.get(p, 0).unwrap();
+                        *jacobian_inverses.get_mut([p, 0]).unwrap() =
+                            1.0 / *js.get([p, 0]).unwrap();
                     } else if gdim == 2 {
                         unimplemented!("Inverse jacobian for this dimension not implemented yet.");
                     } else if gdim == 3 {
@@ -705,63 +601,65 @@ impl Geometry for SerialGeometry {
                     }
                 } else if tdim == 2 {
                     if gdim == 2 {
-                        let det = *js.get(p, 0).unwrap() * *js.get(p, 3).unwrap()
-                            - *js.get(p, 1).unwrap() * *js.get(p, 2).unwrap();
-                        *jacobian_inverses.get_mut(p, 0).unwrap() = js.get(p, 3).unwrap() / det;
-                        *jacobian_inverses.get_mut(p, 1).unwrap() = -js.get(p, 1).unwrap() / det;
-                        *jacobian_inverses.get_mut(p, 2).unwrap() = -js.get(p, 2).unwrap() / det;
-                        *jacobian_inverses.get_mut(p, 3).unwrap() = js.get(p, 0).unwrap() / det;
+                        let det = *js.get([p, 0]).unwrap() * *js.get([p, 3]).unwrap()
+                            - *js.get([p, 1]).unwrap() * *js.get([p, 2]).unwrap();
+                        *jacobian_inverses.get_mut([p, 0]).unwrap() = js.get([p, 3]).unwrap() / det;
+                        *jacobian_inverses.get_mut([p, 1]).unwrap() =
+                            -js.get([p, 1]).unwrap() / det;
+                        *jacobian_inverses.get_mut([p, 2]).unwrap() =
+                            -js.get([p, 2]).unwrap() / det;
+                        *jacobian_inverses.get_mut([p, 3]).unwrap() = js.get([p, 0]).unwrap() / det;
                     } else if gdim == 3 {
-                        let c = (*js.get(p, 3).unwrap() * *js.get(p, 4).unwrap()
-                            - *js.get(p, 2).unwrap() * *js.get(p, 5).unwrap())
+                        let c = (*js.get([p, 3]).unwrap() * *js.get([p, 4]).unwrap()
+                            - *js.get([p, 2]).unwrap() * *js.get([p, 5]).unwrap())
                         .powi(2)
-                            + (*js.get(p, 5).unwrap() * *js.get(p, 0).unwrap()
-                                - *js.get(p, 4).unwrap() * *js.get(p, 1).unwrap())
+                            + (*js.get([p, 5]).unwrap() * *js.get([p, 0]).unwrap()
+                                - *js.get([p, 4]).unwrap() * *js.get([p, 1]).unwrap())
                             .powi(2)
-                            + (*js.get(p, 1).unwrap() * *js.get(p, 2).unwrap()
-                                - *js.get(p, 0).unwrap() * *js.get(p, 3).unwrap())
+                            + (*js.get([p, 1]).unwrap() * *js.get([p, 2]).unwrap()
+                                - *js.get([p, 0]).unwrap() * *js.get([p, 3]).unwrap())
                             .powi(2);
-                        *jacobian_inverses.get_mut(p, 0).unwrap() = (*js.get(p, 0).unwrap()
-                            * ((*js.get(p, 5).unwrap()).powi(2)
-                                + (*js.get(p, 3).unwrap()).powi(2))
-                            - *js.get(p, 1).unwrap()
-                                * (*js.get(p, 2).unwrap() * *js.get(p, 3).unwrap()
-                                    + *js.get(p, 4).unwrap() * *js.get(p, 5).unwrap()))
+                        *jacobian_inverses.get_mut([p, 0]).unwrap() = (*js.get([p, 0]).unwrap()
+                            * ((*js.get([p, 5]).unwrap()).powi(2)
+                                + (*js.get([p, 3]).unwrap()).powi(2))
+                            - *js.get([p, 1]).unwrap()
+                                * (*js.get([p, 2]).unwrap() * *js.get([p, 3]).unwrap()
+                                    + *js.get([p, 4]).unwrap() * *js.get([p, 5]).unwrap()))
                             / c;
-                        *jacobian_inverses.get_mut(p, 1).unwrap() = (*js.get(p, 2).unwrap()
-                            * ((*js.get(p, 1).unwrap()).powi(2)
-                                + (*js.get(p, 5).unwrap()).powi(2))
-                            - *js.get(p, 3).unwrap()
-                                * (*js.get(p, 4).unwrap() * *js.get(p, 5).unwrap()
-                                    + *js.get(p, 0).unwrap() * *js.get(p, 1).unwrap()))
+                        *jacobian_inverses.get_mut([p, 1]).unwrap() = (*js.get([p, 2]).unwrap()
+                            * ((*js.get([p, 1]).unwrap()).powi(2)
+                                + (*js.get([p, 5]).unwrap()).powi(2))
+                            - *js.get([p, 3]).unwrap()
+                                * (*js.get([p, 4]).unwrap() * *js.get([p, 5]).unwrap()
+                                    + *js.get([p, 0]).unwrap() * *js.get([p, 1]).unwrap()))
                             / c;
-                        *jacobian_inverses.get_mut(p, 2).unwrap() = (*js.get(p, 4).unwrap()
-                            * ((*js.get(p, 3).unwrap()).powi(2)
-                                + (*js.get(p, 1).unwrap()).powi(2))
-                            - *js.get(p, 5).unwrap()
-                                * (*js.get(p, 0).unwrap() * *js.get(p, 1).unwrap()
-                                    + *js.get(p, 2).unwrap() * *js.get(p, 3).unwrap()))
+                        *jacobian_inverses.get_mut([p, 2]).unwrap() = (*js.get([p, 4]).unwrap()
+                            * ((*js.get([p, 3]).unwrap()).powi(2)
+                                + (*js.get([p, 1]).unwrap()).powi(2))
+                            - *js.get([p, 5]).unwrap()
+                                * (*js.get([p, 0]).unwrap() * *js.get([p, 1]).unwrap()
+                                    + *js.get([p, 2]).unwrap() * *js.get([p, 3]).unwrap()))
                             / c;
-                        *jacobian_inverses.get_mut(p, 3).unwrap() = (*js.get(p, 1).unwrap()
-                            * ((*js.get(p, 4).unwrap()).powi(2)
-                                + (*js.get(p, 2).unwrap()).powi(2))
-                            - *js.get(p, 0).unwrap()
-                                * (*js.get(p, 2).unwrap() * *js.get(p, 3).unwrap()
-                                    + *js.get(p, 4).unwrap() * *js.get(p, 5).unwrap()))
+                        *jacobian_inverses.get_mut([p, 3]).unwrap() = (*js.get([p, 1]).unwrap()
+                            * ((*js.get([p, 4]).unwrap()).powi(2)
+                                + (*js.get([p, 2]).unwrap()).powi(2))
+                            - *js.get([p, 0]).unwrap()
+                                * (*js.get([p, 2]).unwrap() * *js.get([p, 3]).unwrap()
+                                    + *js.get([p, 4]).unwrap() * *js.get([p, 5]).unwrap()))
                             / c;
-                        *jacobian_inverses.get_mut(p, 4).unwrap() = (*js.get(p, 3).unwrap()
-                            * ((*js.get(p, 0).unwrap()).powi(2)
-                                + (*js.get(p, 4).unwrap()).powi(2))
-                            - *js.get(p, 2).unwrap()
-                                * (*js.get(p, 4).unwrap() * *js.get(p, 5).unwrap()
-                                    + *js.get(p, 0).unwrap() * *js.get(p, 1).unwrap()))
+                        *jacobian_inverses.get_mut([p, 4]).unwrap() = (*js.get([p, 3]).unwrap()
+                            * ((*js.get([p, 0]).unwrap()).powi(2)
+                                + (*js.get([p, 4]).unwrap()).powi(2))
+                            - *js.get([p, 2]).unwrap()
+                                * (*js.get([p, 4]).unwrap() * *js.get([p, 5]).unwrap()
+                                    + *js.get([p, 0]).unwrap() * *js.get([p, 1]).unwrap()))
                             / c;
-                        *jacobian_inverses.get_mut(p, 5).unwrap() = (*js.get(p, 5).unwrap()
-                            * ((*js.get(p, 2).unwrap()).powi(2)
-                                + (*js.get(p, 0).unwrap()).powi(2))
-                            - *js.get(p, 4).unwrap()
-                                * (*js.get(p, 0).unwrap() * *js.get(p, 1).unwrap()
-                                    + *js.get(p, 2).unwrap() * *js.get(p, 3).unwrap()))
+                        *jacobian_inverses.get_mut([p, 5]).unwrap() = (*js.get([p, 5]).unwrap()
+                            * ((*js.get([p, 2]).unwrap()).powi(2)
+                                + (*js.get([p, 0]).unwrap()).powi(2))
+                            - *js.get([p, 4]).unwrap()
+                                * (*js.get([p, 0]).unwrap() * *js.get([p, 1]).unwrap()
+                                    + *js.get([p, 2]).unwrap() * *js.get([p, 3]).unwrap()))
                             / c;
                     } else {
                         panic!("Unsupported dimensions.");
@@ -1136,7 +1034,7 @@ mod test {
     #[test]
     fn test_connectivity() {
         let g = SerialGrid::new(
-            to_matrix(&[0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], (4, 2)),
+            to_matrix(&[0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], [4, 2]),
             AdjacencyList::from_data(vec![0, 1, 2, 2, 1, 3], vec![0, 3, 6]),
             vec![ReferenceCellType::Triangle; 2],
         );
@@ -1242,7 +1140,7 @@ mod test {
                     0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0,
                     0.0, 0.0, -1.0,
                 ],
-                (6, 3),
+                [6, 3],
             ),
             AdjacencyList::from_data(
                 vec![
@@ -1268,7 +1166,7 @@ mod test {
                     0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0,
                     1.0, 1.0,
                 ],
-                (9, 2),
+                [9, 2],
             ),
             AdjacencyList::from_data(
                 vec![
@@ -1294,7 +1192,7 @@ mod test {
                     0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0,
                     1.0, 1.0,
                 ],
-                (9, 2),
+                [9, 2],
             ),
             AdjacencyList::from_data(
                 vec![0, 1, 4, 0, 4, 3, 1, 2, 4, 5, 3, 4, 7, 3, 7, 6, 4, 5, 7, 8],
@@ -1325,7 +1223,7 @@ mod test {
                 &[
                     0.0, 1.0, s, 0.0, -s, -1.0, -s, 0.0, s, 0.0, 0.0, s, 1.0, s, 0.0, -s, -1.0, -s,
                 ],
-                (9, 2),
+                [9, 2],
             ),
             AdjacencyList::from_data(vec![4, 8, 2, 1, 3, 0, 4, 6, 8, 7, 0, 5], vec![0, 6, 12]),
             vec![ReferenceCellType::Triangle, ReferenceCellType::Triangle],
@@ -1347,7 +1245,7 @@ mod test {
                     0.25, 0.5, 0.5, 0.5, 0.5, 0.75, 1.0, 1.0, 1.0, -0.5, 0.0, 0.0, 0.0, 0.0, 0.5,
                     0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                 ],
-                (13, 3),
+                [13, 3],
             ),
             AdjacencyList::from_data(
                 vec![2, 7, 12, 0, 2, 9, 11, 1, 4, 6, 10, 5, 2, 7, 11, 8, 6, 3],
@@ -1375,7 +1273,7 @@ mod test {
                     2.0, 4.0, 5.0, 0.0, -1.0, 0.0, 2.0, 2.0, 3.0, 0.0, 0.0, -1.0, 0.0, 0.0, 1.0,
                     1.0, 1.0, 1.0,
                 ],
-                (6, 3),
+                [6, 3],
             ),
             AdjacencyList::from_data(vec![0, 1, 2, 3, 4, 5], vec![0, 3, 6]),
             vec![ReferenceCellType::Triangle, ReferenceCellType::Triangle],
@@ -1385,159 +1283,159 @@ mod test {
 
         let points = to_matrix(
             &[0.2, 0.5, 1.0 / 3.0, 0.15, 0.0, 0.5, 1.0 / 3.0, 0.3],
-            (4, 2),
+            [4, 2],
         );
 
         // Test compute_points
-        let mut physical_points = zero_matrix((points.shape().0, 3));
+        let mut physical_points = zero_matrix([points.shape()[0], 3]);
         g.geometry()
             .compute_points(&points, 0, &mut physical_points);
         assert_relative_eq!(
-            *physical_points.get(0, 0).unwrap(),
+            *physical_points.get([0, 0]).unwrap(),
             2.4,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(0, 1).unwrap(),
+            *physical_points.get([0, 1]).unwrap(),
             2.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(0, 2).unwrap(),
+            *physical_points.get([0, 2]).unwrap(),
             0.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 0).unwrap(),
+            *physical_points.get([1, 0]).unwrap(),
             4.5,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 1).unwrap(),
+            *physical_points.get([1, 1]).unwrap(),
             2.5,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 2).unwrap(),
+            *physical_points.get([1, 2]).unwrap(),
             0.5,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 0).unwrap(),
+            *physical_points.get([2, 0]).unwrap(),
             11.0 / 3.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 1).unwrap(),
+            *physical_points.get([2, 1]).unwrap(),
             7.0 / 3.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 2).unwrap(),
+            *physical_points.get([2, 2]).unwrap(),
             1.0 / 3.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 0).unwrap(),
+            *physical_points.get([3, 0]).unwrap(),
             3.2,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 1).unwrap(),
+            *physical_points.get([3, 1]).unwrap(),
             2.3,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 2).unwrap(),
+            *physical_points.get([3, 2]).unwrap(),
             0.3,
             max_relative = 1e-14
         );
         g.geometry()
             .compute_points(&points, 1, &mut physical_points);
         assert_relative_eq!(
-            *physical_points.get(0, 0).unwrap(),
+            *physical_points.get([0, 0]).unwrap(),
             -0.2,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(0, 1).unwrap(),
+            *physical_points.get([0, 1]).unwrap(),
             0.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(0, 2).unwrap(),
+            *physical_points.get([0, 2]).unwrap(),
             1.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 0).unwrap(),
+            *physical_points.get([1, 0]).unwrap(),
             -0.5,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 1).unwrap(),
+            *physical_points.get([1, 1]).unwrap(),
             -0.5,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(1, 2).unwrap(),
+            *physical_points.get([1, 2]).unwrap(),
             1.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 0).unwrap(),
+            *physical_points.get([2, 0]).unwrap(),
             -1.0 / 3.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 1).unwrap(),
+            *physical_points.get([2, 1]).unwrap(),
             -1.0 / 3.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(2, 2).unwrap(),
+            *physical_points.get([2, 2]).unwrap(),
             1.0,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 0).unwrap(),
+            *physical_points.get([3, 0]).unwrap(),
             -0.15,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 1).unwrap(),
+            *physical_points.get([3, 1]).unwrap(),
             -0.3,
             max_relative = 1e-14
         );
         assert_relative_eq!(
-            *physical_points.get(3, 2).unwrap(),
+            *physical_points.get([3, 2]).unwrap(),
             1.0,
             max_relative = 1e-14
         );
 
         // Test compute_jacobians
-        let mut jacobians = zero_matrix((points.shape().0, 6));
+        let mut jacobians = zero_matrix([points.shape()[0], 6]);
         g.geometry().compute_jacobians(&points, 0, &mut jacobians);
         for i in 0..3 {
-            assert_relative_eq!(*jacobians.get(i, 0).unwrap(), 2.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 1).unwrap(), 3.0, max_relative = 1e-14);
-            // assert_relative_eq!(*jacobians.get(i, 2).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 3).unwrap(), 1.0, max_relative = 1e-14);
-            // assert_relative_eq!(*jacobians.get(i, 4).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 5).unwrap(), 1.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 0]).unwrap(), 2.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 1]).unwrap(), 3.0, max_relative = 1e-14);
+            // assert_relative_eq!(*jacobians.get([i, 2]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 3]).unwrap(), 1.0, max_relative = 1e-14);
+            // assert_relative_eq!(*jacobians.get([i, 4]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 5]).unwrap(), 1.0, max_relative = 1e-14);
         }
         g.geometry().compute_jacobians(&points, 1, &mut jacobians);
         for i in 0..3 {
-            assert_relative_eq!(*jacobians.get(i, 0).unwrap(), -1.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 1).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 2).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 3).unwrap(), -1.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 4).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jacobians.get(i, 5).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 0]).unwrap(), -1.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 1]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 2]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 3]).unwrap(), -1.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 4]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jacobians.get([i, 5]).unwrap(), 0.0, max_relative = 1e-14);
         }
 
         // test compute_jacobian_determinants
-        let mut dets = vec![0.0; points.shape().0];
+        let mut dets = vec![0.0; points.shape()[0]];
         g.geometry()
             .compute_jacobian_determinants(&points, 0, &mut dets);
         for d in &dets {
@@ -1550,26 +1448,26 @@ mod test {
         }
 
         // Test compute_jacobian_inverses
-        let mut jinvs = zero_matrix((points.shape().0, 6));
+        let mut jinvs = zero_matrix([points.shape()[0], 6]);
         g.geometry()
             .compute_jacobian_inverses(&points, 0, &mut jinvs);
         for i in 0..3 {
-            assert_relative_eq!(*jinvs.get(i, 0).unwrap(), 0.5, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 1).unwrap(), -0.75, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 2).unwrap(), -0.75, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 3).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 4).unwrap(), 0.5, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 5).unwrap(), 0.5, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 0]).unwrap(), 0.5, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 1]).unwrap(), -0.75, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 2]).unwrap(), -0.75, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 3]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 4]).unwrap(), 0.5, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 5]).unwrap(), 0.5, max_relative = 1e-14);
         }
         g.geometry()
             .compute_jacobian_inverses(&points, 1, &mut jinvs);
         for i in 0..3 {
-            assert_relative_eq!(*jinvs.get(i, 0).unwrap(), -1.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 1).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 2).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 3).unwrap(), 0.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 4).unwrap(), -1.0, max_relative = 1e-14);
-            assert_relative_eq!(*jinvs.get(i, 5).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 0]).unwrap(), -1.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 1]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 2]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 3]).unwrap(), 0.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 4]).unwrap(), -1.0, max_relative = 1e-14);
+            assert_relative_eq!(*jinvs.get([i, 5]).unwrap(), 0.0, max_relative = 1e-14);
         }
     }
 
@@ -1580,7 +1478,7 @@ mod test {
                 &[
                     0.0, 1.0, 0.0, -1.0, -1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0,
                 ],
-                (5, 3),
+                [5, 3],
             ),
             AdjacencyList::from_data(vec![0, 1, 2, 1, 3, 2, 2, 3, 4], vec![0, 3, 6, 9]),
             vec![
@@ -1592,25 +1490,25 @@ mod test {
             ],
         );
 
-        let pt = to_matrix(&[1.0 / 3.0, 1.0 / 3.0], (1, 2));
+        let pt = to_matrix(&[1.0 / 3.0, 1.0 / 3.0], [1, 2]);
 
-        let mut normal = zero_matrix((1, 3));
+        let mut normal = zero_matrix([1, 3]);
 
         g.geometry().compute_normals(&pt, 0, &mut normal);
-        assert_relative_eq!(*normal.get(0, 0).unwrap(), 0.0);
-        assert_relative_eq!(*normal.get(0, 1).unwrap(), -1.0);
-        assert_relative_eq!(*normal.get(0, 2).unwrap(), 0.0);
+        assert_relative_eq!(*normal.get([0, 0]).unwrap(), 0.0);
+        assert_relative_eq!(*normal.get([0, 1]).unwrap(), -1.0);
+        assert_relative_eq!(*normal.get([0, 2]).unwrap(), 0.0);
 
         g.geometry().compute_normals(&pt, 1, &mut normal);
         let a = f64::sqrt(1.0 / 3.0);
-        assert_relative_eq!(*normal.get(0, 0).unwrap(), a);
-        assert_relative_eq!(*normal.get(0, 1).unwrap(), a);
-        assert_relative_eq!(*normal.get(0, 2).unwrap(), a);
+        assert_relative_eq!(*normal.get([0, 0]).unwrap(), a);
+        assert_relative_eq!(*normal.get([0, 1]).unwrap(), a);
+        assert_relative_eq!(*normal.get([0, 2]).unwrap(), a);
 
         g.geometry().compute_normals(&pt, 2, &mut normal);
-        assert_relative_eq!(*normal.get(0, 0).unwrap(), 0.0);
-        assert_relative_eq!(*normal.get(0, 1).unwrap(), 0.0);
-        assert_relative_eq!(*normal.get(0, 2).unwrap(), 1.0);
+        assert_relative_eq!(*normal.get([0, 0]).unwrap(), 0.0);
+        assert_relative_eq!(*normal.get([0, 1]).unwrap(), 0.0);
+        assert_relative_eq!(*normal.get([0, 2]).unwrap(), 1.0);
 
         // Test a curved quadrilateral cell
         let curved_g = SerialGrid::new(
@@ -1619,67 +1517,67 @@ mod test {
                     -1.0, 1.0, -1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 0.0, -1.0, -1.0, 1.0, 1.0, -1.0,
                     0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0,
                 ],
-                (9, 3),
+                [9, 3],
             ),
             AdjacencyList::from_data(vec![0, 1, 2, 3, 4, 5, 6, 7, 8], vec![0, 9]),
             vec![ReferenceCellType::Quadrilateral],
         );
 
-        let points = to_matrix(&[0.0, 0.2, 0.5, 0.7, 1.0, 0.0, 0.3, 0.9, 1.0, 0.3], (5, 2));
-        let mut normals = zero_matrix((5, 3));
+        let points = to_matrix(&[0.0, 0.2, 0.5, 0.7, 1.0, 0.0, 0.3, 0.9, 1.0, 0.3], [5, 2]);
+        let mut normals = zero_matrix([5, 3]);
 
         curved_g
             .geometry()
             .compute_normals(&points, 0, &mut normals);
 
         assert_relative_eq!(
-            *normals.get(0, 0).unwrap(),
+            *normals.get([0, 0]).unwrap(),
             2.0 * f64::sqrt(1.0 / 5.0),
             epsilon = 1e-12
         );
-        assert_relative_eq!(*normals.get(0, 1).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([0, 1]).unwrap(), 0.0, epsilon = 1e-12);
         assert_relative_eq!(
-            *normals.get(0, 2).unwrap(),
+            *normals.get([0, 2]).unwrap(),
             f64::sqrt(1.0 / 5.0),
             epsilon = 1e-12
         );
 
         assert_relative_eq!(
-            *normals.get(1, 0).unwrap(),
+            *normals.get([1, 0]).unwrap(),
             1.2 * f64::sqrt(1.0 / 2.44),
             epsilon = 1e-12
         );
-        assert_relative_eq!(*normals.get(1, 1).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([1, 1]).unwrap(), 0.0, epsilon = 1e-12);
         assert_relative_eq!(
-            *normals.get(1, 2).unwrap(),
+            *normals.get([1, 2]).unwrap(),
             f64::sqrt(1.0 / 2.44),
             epsilon = 1e-12
         );
 
-        assert_relative_eq!(*normals.get(2, 0).unwrap(), 0.0, epsilon = 1e-12);
-        assert_relative_eq!(*normals.get(2, 1).unwrap(), 0.0, epsilon = 1e-12);
-        assert_relative_eq!(*normals.get(2, 2).unwrap(), 1.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([2, 0]).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([2, 1]).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([2, 2]).unwrap(), 1.0, epsilon = 1e-12);
 
         assert_relative_eq!(
-            *normals.get(3, 0).unwrap(),
+            *normals.get([3, 0]).unwrap(),
             -0.8 * f64::sqrt(1.0 / 1.64),
             epsilon = 1e-12
         );
-        assert_relative_eq!(*normals.get(3, 1).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([3, 1]).unwrap(), 0.0, epsilon = 1e-12);
         assert_relative_eq!(
-            *normals.get(3, 2).unwrap(),
+            *normals.get([3, 2]).unwrap(),
             f64::sqrt(1.0 / 1.64),
             epsilon = 1e-12
         );
 
         assert_relative_eq!(
-            *normals.get(4, 0).unwrap(),
+            *normals.get([4, 0]).unwrap(),
             -2.0 * f64::sqrt(1.0 / 5.0),
             epsilon = 1e-12
         );
-        assert_relative_eq!(*normals.get(4, 1).unwrap(), 0.0, epsilon = 1e-12);
+        assert_relative_eq!(*normals.get([4, 1]).unwrap(), 0.0, epsilon = 1e-12);
         assert_relative_eq!(
-            *normals.get(4, 2).unwrap(),
+            *normals.get([4, 2]).unwrap(),
             f64::sqrt(1.0 / 5.0),
             epsilon = 1e-12
         );
@@ -1694,19 +1592,19 @@ mod test {
             1,
             Continuity::Continuous,
         );
-        let pts = to_matrix(&[0.1, 0.2, 0.6, 0.1, 0.4, 0.2], (3, 2));
+        let pts = to_matrix(&[0.1, 0.2, 0.6, 0.1, 0.4, 0.2], [3, 2]);
         let e = grid.geometry().get_evaluator(&element, &pts);
 
-        let mut points0 = zero_matrix((3, 3));
-        let mut points1 = zero_matrix((3, 3));
+        let mut points0 = zero_matrix([3, 3]);
+        let mut points1 = zero_matrix([3, 3]);
         for c in 0..grid.geometry().cell_count() {
             grid.geometry().compute_points(&pts, c, &mut points0);
             e.compute_points(c, &mut points1);
             for i in 0..3 {
                 for j in 0..3 {
                     assert_relative_eq!(
-                        *points0.get(i, j).unwrap(),
-                        *points1.get(i, j).unwrap(),
+                        *points0.get([i, j]).unwrap(),
+                        *points1.get([i, j]).unwrap(),
                         epsilon = 1e-12
                     );
                 }
@@ -1715,7 +1613,7 @@ mod test {
     }
 
     #[test]
-    fn test_compute_normals_evaluator() {
+    fn test_compute_normals_and_jdets_evaluator() {
         let grid = regular_sphere(2);
         let element = create_element(
             ElementFamily::Lagrange,
@@ -1723,74 +1621,31 @@ mod test {
             1,
             Continuity::Continuous,
         );
-        let pts = to_matrix(&[0.1, 0.2, 0.6, 0.1, 0.4, 0.2], (3, 2));
+        let pts = to_matrix(&[0.1, 0.2, 0.6, 0.1, 0.4, 0.2], [3, 2]);
         let e = grid.geometry().get_evaluator(&element, &pts);
 
-        let mut normals0 = zero_matrix((3, 3));
-        let mut normals1 = zero_matrix((3, 3));
+        let mut normals0 = zero_matrix([3, 3]);
+        let mut normals1 = zero_matrix([3, 3]);
+        let mut jacobian_determinants0 = vec![0.0; 3];
+        let mut jacobian_determinants1 = vec![0.0; 3];
+
         for c in 0..grid.geometry().cell_count() {
             grid.geometry().compute_normals(&pts, c, &mut normals0);
-            e.compute_normals(c, &mut normals1);
+            grid.geometry()
+                .compute_jacobian_determinants(&pts, c, &mut jacobian_determinants0);
+            e.compute_normals_and_jacobian_determinants(
+                c,
+                &mut normals1,
+                &mut jacobian_determinants1,
+            );
             for i in 0..3 {
                 for j in 0..3 {
                     assert_relative_eq!(
-                        *normals0.get(i, j).unwrap(),
-                        *normals1.get(i, j).unwrap(),
-                        epsilon = 1e-12
-                    );
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn test_compute_jacobians_evaluator() {
-        let grid = regular_sphere(2);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            1,
-            Continuity::Continuous,
-        );
-        let pts = to_matrix(&[0.1, 0.1, 0.2, 0.4, 0.6, 0.2], (3, 2));
-        let e = grid.geometry().get_evaluator(&element, &pts);
-
-        let mut jacobians0 = zero_matrix((3, 6));
-        let mut jacobians1 = zero_matrix((3, 6));
-        for c in 0..grid.geometry().cell_count() {
-            grid.geometry().compute_jacobians(&pts, c, &mut jacobians0);
-            e.compute_jacobians(c, &mut jacobians1);
-            for i in 0..3 {
-                for j in 0..6 {
-                    assert_relative_eq!(
-                        *jacobians0.get(i, j).unwrap(),
-                        *jacobians1.get(i, j).unwrap(),
+                        *normals0.get([i, j]).unwrap(),
+                        *normals1.get([i, j]).unwrap(),
                         epsilon = 1e-12
                     );
                 }
-            }
-        }
-    }
-
-    #[test]
-    fn test_compute_jacobian_determinants_evaluator() {
-        let grid = regular_sphere(2);
-        let element = create_element(
-            ElementFamily::Lagrange,
-            ReferenceCellType::Triangle,
-            1,
-            Continuity::Continuous,
-        );
-        let pts = to_matrix(&[0.1, 0.1, 0.2, 0.4, 0.6, 0.2], (3, 2));
-        let e = grid.geometry().get_evaluator(&element, &pts);
-
-        let mut jacobian_determinants0 = vec![0.0; 3];
-        let mut jacobian_determinants1 = vec![0.0; 3];
-        for c in 0..grid.geometry().cell_count() {
-            grid.geometry()
-                .compute_jacobian_determinants(&pts, c, &mut jacobian_determinants0);
-            e.compute_jacobian_determinants(c, &mut jacobian_determinants1);
-            for i in 0..3 {
                 assert_relative_eq!(
                     jacobian_determinants0[i],
                     jacobian_determinants1[i],
diff --git a/grid/src/io.rs b/grid/src/io.rs
index 7014e12e..ab4e3c02 100644
--- a/grid/src/io.rs
+++ b/grid/src/io.rs
@@ -139,7 +139,7 @@ mod test {
                     0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0,
                     1.0, 1.0,
                 ],
-                (9, 2),
+                [9, 2],
             ),
             AdjacencyList::from_data(
                 vec![0, 1, 3, 4, 3, 4, 6, 7, 1, 2, 4, 5, 4, 5, 7, 8],
@@ -158,7 +158,7 @@ mod test {
                     0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0,
                     1.0, 1.0,
                 ],
-                (9, 2),
+                [9, 2],
             ),
             AdjacencyList::from_data(
                 vec![0, 1, 4, 0, 4, 3, 1, 2, 4, 5, 3, 4, 7, 3, 7, 6, 4, 5, 7, 8],
diff --git a/grid/src/parallel_grid.rs b/grid/src/parallel_grid.rs
index 586538c8..6c0e4205 100644
--- a/grid/src/parallel_grid.rs
+++ b/grid/src/parallel_grid.rs
@@ -7,7 +7,7 @@ use bempp_traits::cell::ReferenceCellType;
 use bempp_traits::element::FiniteElement;
 use bempp_traits::grid::{Geometry, GeometryEvaluator, Grid, Ownership, Topology};
 use mpi::{request::WaitGuard, topology::Communicator, traits::*};
-use rlst_dense::{RandomAccessByRef, RandomAccessMut, Shape};
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, Shape};
 
 /// Geometry of a parallel grid
 pub struct ParallelGeometry<'a, C: Communicator> {
@@ -74,8 +74,8 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> {
         self.serial_geometry.get_evaluator(element, points)
     }
     fn compute_points<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -86,8 +86,8 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> {
             .compute_points(points, cell, physical_points)
     }
     fn compute_normals<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -97,8 +97,8 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> {
         self.serial_geometry.compute_points(points, cell, normals)
     }
     fn compute_jacobians<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -108,7 +108,7 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> {
         self.serial_geometry
             .compute_jacobians(points, cell, jacobians)
     }
-    fn compute_jacobian_determinants<T: RandomAccessByRef<Item = f64> + Shape>(
+    fn compute_jacobian_determinants<T: RandomAccessByRef<2, Item = f64> + Shape<2>>(
         &self,
         points: &T,
         cell: usize,
@@ -118,8 +118,8 @@ impl<'a, C: Communicator> Geometry for ParallelGeometry<'a, C> {
             .compute_jacobian_determinants(points, cell, jacobian_determinants)
     }
     fn compute_jacobian_inverses<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -212,7 +212,7 @@ impl<'a, C: Communicator> ParallelGrid<'a, C> {
         let size = comm.size() as usize;
 
         // data used in computation
-        let mut vertex_owners = vec![(-1, 0); coordinates.shape().0];
+        let mut vertex_owners = vec![(-1, 0); coordinates.shape()[0]];
         let mut vertex_counts = vec![0; size];
         let mut cell_indices_per_proc = vec![vec![]; size];
 
@@ -238,8 +238,8 @@ impl<'a, C: Communicator> ParallelGrid<'a, C> {
                     vertex_indices_per_proc[p].push(*v);
                     vertex_owners_per_proc[p].push(vertex_owners[*v].0 as usize);
                     vertex_local_indices_per_proc[p].push(vertex_owners[*v].1);
-                    for i in 0..coordinates.shape().1 {
-                        coordinates_per_proc[p].push(*coordinates.get(*v, i).unwrap())
+                    for i in 0..coordinates.shape()[1] {
+                        coordinates_per_proc[p].push(*coordinates.get([*v, i]).unwrap())
                     }
                 }
             }
@@ -261,8 +261,8 @@ impl<'a, C: Communicator> ParallelGrid<'a, C> {
                         vertex_indices_per_proc[p].push(*v);
                         vertex_owners_per_proc[p].push(vertex_owners[*v].0 as usize);
                         vertex_local_indices_per_proc[p].push(vertex_owners[*v].1);
-                        for i in 0..coordinates.shape().1 {
-                            coordinates_per_proc[p].push(*coordinates.get(*v, i).unwrap())
+                        for i in 0..coordinates.shape()[1] {
+                            coordinates_per_proc[p].push(*coordinates.get([*v, i]).unwrap())
                         }
                     }
                     cells_per_proc[p].push(
@@ -403,10 +403,10 @@ impl<'a, C: Communicator> ParallelGrid<'a, C> {
                 panic!("Unsupported cell type");
             });
         }
-        let mut coordinates = zero_matrix((vertex_indices.len(), gdim));
+        let mut coordinates = zero_matrix([vertex_indices.len(), gdim]);
         for i in 0..vertex_indices.len() {
             for j in 0..gdim {
-                *coordinates.get_mut(i, j).unwrap() = flat_coordinates[i * gdim + j];
+                *coordinates.get_mut([i, j]).unwrap() = flat_coordinates[i * gdim + j];
             }
         }
 
diff --git a/grid/src/shapes.rs b/grid/src/shapes.rs
index 3905b2f5..a98d178b 100644
--- a/grid/src/shapes.rs
+++ b/grid/src/shapes.rs
@@ -2,11 +2,11 @@
 
 use crate::grid::SerialGrid;
 use bempp_element::cell::Triangle;
-use bempp_tools::arrays::{to_matrix, AdjacencyList};
+use bempp_tools::arrays::{to_matrix, zero_matrix, AdjacencyList};
 use bempp_traits::arrays::AdjacencyListAccess;
 use bempp_traits::cell::{ReferenceCell, ReferenceCellType};
 use bempp_traits::grid::{Geometry, Grid, Topology};
-use rlst_dense::{RandomAccessByRef, RandomAccessMut};
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut};
 
 /// Create a regular sphere
 ///
@@ -20,7 +20,7 @@ pub fn regular_sphere(refinement_level: usize) -> SerialGrid {
                 0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0,
                 0.0, -1.0,
             ],
-            (6, 3),
+            [6, 3],
         ),
         AdjacencyList::from_data(
             vec![
@@ -35,7 +35,7 @@ pub fn regular_sphere(refinement_level: usize) -> SerialGrid {
         let nvertices_old = g.topology().entity_count(0);
         let ncells_old = g.topology().entity_count(2);
         let nvertices = g.topology().entity_count(0) + g.topology().entity_count(1);
-        let mut coordinates = to_matrix(&vec![0.0; nvertices * 3], (nvertices, 3));
+        let mut coordinates = zero_matrix([nvertices, 3]);
         let mut cells = AdjacencyList::<usize>::new();
 
         for i in 0..ncells_old {
@@ -48,7 +48,7 @@ pub fn regular_sphere(refinement_level: usize) -> SerialGrid {
             let gv = g.geometry().cell_vertices(gi).unwrap();
             for (j, gv_j) in gv.iter().enumerate() {
                 for k in 0..g.geometry().dim() {
-                    *coordinates.get_mut(tv[j], k).unwrap() =
+                    *coordinates.get_mut([tv[j], k]).unwrap() =
                         *g.geometry().coordinate(*gv_j, k).unwrap();
                 }
             }
@@ -57,15 +57,15 @@ pub fn regular_sphere(refinement_level: usize) -> SerialGrid {
                 let vs = ref_e.connectivity(1, j, 0).unwrap();
                 let pt = (0..3)
                     .map(|k| {
-                        (*coordinates.get(tv[vs[0]], k).unwrap()
-                            + *coordinates.get(tv[vs[1]], k).unwrap())
+                        (*coordinates.get([tv[vs[0]], k]).unwrap()
+                            + *coordinates.get([tv[vs[1]], k]).unwrap())
                             / 2.0
                     })
                     .collect::<Vec<f64>>();
 
                 let norm = (pt[0].powi(2) + pt[1].powi(2) + pt[2].powi(2)).sqrt();
                 for (k, pt_k) in pt.iter().enumerate() {
-                    *coordinates.get_mut(nvertices_old + tedges_j, k).unwrap() = *pt_k / norm;
+                    *coordinates.get_mut([nvertices_old + tedges_j, k]).unwrap() = *pt_k / norm;
                 }
             }
 
@@ -92,7 +92,6 @@ pub fn regular_sphere(refinement_level: usize) -> SerialGrid {
 #[cfg(test)]
 mod test {
     use crate::shapes::*;
-    use bempp_tools::arrays::zero_matrix;
 
     #[test]
     fn test_regular_sphere_0() {
@@ -110,17 +109,17 @@ mod test {
     fn test_normal_is_outward() {
         for i in 0..3 {
             let g = regular_sphere(i);
-            let points = to_matrix(&[1.0 / 3.0, 1.0 / 3.0], (1, 2));
+            let points = to_matrix(&[1.0 / 3.0, 1.0 / 3.0], [1, 2]);
 
-            let mut mapped_pt = zero_matrix((1, 3));
-            let mut normal = zero_matrix((1, 3));
+            let mut mapped_pt = zero_matrix([1, 3]);
+            let mut normal = zero_matrix([1, 3]);
 
             for i in 0..g.geometry().cell_count() {
                 g.geometry().compute_points(&points, i, &mut mapped_pt);
                 g.geometry().compute_normals(&points, i, &mut normal);
-                let dot = *mapped_pt.get(0, 0).unwrap() * *normal.get(0, 0).unwrap()
-                    + *mapped_pt.get(0, 1).unwrap() * *normal.get(0, 1).unwrap()
-                    + *mapped_pt.get(0, 2).unwrap() * *normal.get(0, 2).unwrap();
+                let dot = *mapped_pt.get([0, 0]).unwrap() * *normal.get([0, 0]).unwrap()
+                    + *mapped_pt.get([0, 1]).unwrap() * *normal.get([0, 1]).unwrap()
+                    + *mapped_pt.get([0, 2]).unwrap() * *normal.get([0, 2]).unwrap();
                 assert!(dot > 0.0);
             }
         }
diff --git a/hyksort/Cargo.toml b/hyksort/Cargo.toml
index 20ecd361..e0d4d6d5 100644
--- a/hyksort/Cargo.toml
+++ b/hyksort/Cargo.toml
@@ -11,6 +11,5 @@ edition = "2021"
 
 [dependencies]
 mpi = "0.6.*"
-
-rand = "0.8.*"
+rand = "0.8.5"
 superslice = "1"
diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml
index c83fa2c3..d5021da8 100644
--- a/kernel/Cargo.toml
+++ b/kernel/Cargo.toml
@@ -28,7 +28,9 @@ approx = "0.5"
 rayon = "1.7"
 num = "0.4"
 num_cpus = "1"
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
+rand = "0.8.5"
 
 [dev-dependencies]
-rlst = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
\ No newline at end of file
+rlst = { git = "https://github.com/linalg-rs/rlst.git" }
diff --git a/kernel/src/laplace_3d.rs b/kernel/src/laplace_3d.rs
index 7d4e7dd9..2e1835f0 100644
--- a/kernel/src/laplace_3d.rs
+++ b/kernel/src/laplace_3d.rs
@@ -365,11 +365,41 @@ mod test {
 
     use super::*;
     use approx::assert_relative_eq;
+    use bempp_tools::arrays::Mat;
     use bempp_traits::types::Scalar;
-    use rlst;
-    use rlst::common::traits::{Copy, Eval, Transpose};
-    use rlst::dense::traits::*;
-    use rlst_dense;
+    use rand::prelude::*;
+    use rlst_dense::rlst_dynamic_array2;
+    use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, RawAccess, RawAccessMut, Shape};
+
+    fn copy(m_in: &Mat<f64>) -> Mat<f64> {
+        let mut m = rlst_dynamic_array2!(f64, m_in.shape());
+        for i in 0..m_in.shape()[0] {
+            for j in 0..m_in.shape()[1] {
+                *m.get_mut([i, j]).unwrap() = *m_in.get([i, j]).unwrap();
+            }
+        }
+        m
+    }
+
+    fn rand_mat(shape: [usize; 2]) -> Mat<f64> {
+        let mut m = rlst_dynamic_array2!(f64, shape);
+        let mut rng = rand::thread_rng();
+        for i in 0..shape[0] {
+            for j in 0..shape[1] {
+                *m.get_mut([i, j]).unwrap() = rng.gen()
+            }
+        }
+        m
+    }
+
+    fn rand_vec(size: usize) -> Mat<f64> {
+        let mut v = rlst_dynamic_array2!(f64, [size, 1]);
+        let mut rng = rand::thread_rng();
+        for i in 0..size {
+            *v.get_mut([i, 0]).unwrap() = rng.gen();
+        }
+        v
+    }
 
     #[test]
     fn test_laplace_3d() {
@@ -378,10 +408,10 @@ mod test {
         let nsources = 5;
         let ntargets = 3;
 
-        let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)];
-        let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)];
-        let charges = rlst::dense::rlst_rand_col_vec![f64, nsources];
-        let mut green_value = rlst::dense::rlst_col_vec![f64, ntargets];
+        let sources = rand_mat([nsources, 3]);
+        let targets = rand_mat([ntargets, 3]);
+        let charges = rand_vec(nsources);
+        let mut green_value = rlst::dense::rlst_dynamic_array2!(f64, [ntargets, 1]);
 
         Laplace3dKernel::<f64>::default().evaluate_st(
             EvalType::Value,
@@ -405,9 +435,9 @@ mod test {
             assert_relative_eq!(green_value[[target_index, 0]], expected, epsilon = 1E-12);
         }
 
-        let mut targets_x_eps = targets.copy();
-        let mut targets_y_eps = targets.copy();
-        let mut targets_z_eps = targets.copy();
+        let mut targets_x_eps = copy(&targets);
+        let mut targets_y_eps = copy(&targets);
+        let mut targets_z_eps = copy(&targets);
 
         for index in 0..ntargets {
             targets_x_eps[[index, 0]] += eps;
@@ -415,7 +445,7 @@ mod test {
             targets_z_eps[[index, 2]] += eps;
         }
 
-        let mut expected = rlst_dense::rlst_dynamic_mat!(f64, (4, ntargets));
+        let mut expected = rlst_dynamic_array2!(f64, [4, ntargets]);
 
         Laplace3dKernel::<f64>::default().evaluate_st(
             EvalType::ValueDeriv,
@@ -425,7 +455,7 @@ mod test {
             expected.data_mut(),
         );
 
-        let mut green_value_x_eps = rlst::dense::rlst_col_vec![f64, ntargets];
+        let mut green_value_x_eps = rlst_dynamic_array2![f64, [ntargets, 1]];
 
         Laplace3dKernel::<f64>::default().evaluate_st(
             EvalType::Value,
@@ -435,7 +465,7 @@ mod test {
             green_value_x_eps.data_mut(),
         );
 
-        let mut green_value_y_eps = rlst::dense::rlst_col_vec![f64, ntargets];
+        let mut green_value_y_eps = rlst_dynamic_array2![f64, [ntargets, 1]];
 
         Laplace3dKernel::<f64>::default().evaluate_st(
             EvalType::Value,
@@ -444,7 +474,7 @@ mod test {
             charges.data(),
             green_value_y_eps.data_mut(),
         );
-        let mut green_value_z_eps = rlst::dense::rlst_col_vec![f64, ntargets];
+        let mut green_value_z_eps = rlst_dynamic_array2![f64, [ntargets, 1]];
 
         Laplace3dKernel::<f64>::default().evaluate_st(
             EvalType::Value,
@@ -454,9 +484,16 @@ mod test {
             green_value_z_eps.data_mut(),
         );
 
-        let x_deriv = ((green_value_x_eps - &green_value) * (1.0 / eps)).eval();
-        let y_deriv = ((green_value_y_eps - &green_value) * (1.0 / eps)).eval();
-        let z_deriv = ((green_value_z_eps - &green_value) * (1.0 / eps)).eval();
+        let gv0 = copy(&green_value);
+        let gv1 = copy(&green_value);
+        let gv2 = copy(&green_value);
+
+        let mut x_deriv = rlst_dynamic_array2![f64, [ntargets, 1]];
+        let mut y_deriv = rlst_dynamic_array2![f64, [ntargets, 1]];
+        let mut z_deriv = rlst_dynamic_array2![f64, [ntargets, 1]];
+        x_deriv.fill_from((green_value_x_eps - gv0) * (1.0 / eps));
+        y_deriv.fill_from((green_value_y_eps - gv1) * (1.0 / eps));
+        z_deriv.fill_from((green_value_z_eps - gv2) * (1.0 / eps));
 
         for target_index in 0..ntargets {
             assert_relative_eq!(
@@ -489,25 +526,26 @@ mod test {
         let nsources = 3;
         let ntargets = 5;
 
-        let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)];
-        let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)];
-        let mut green_value = rlst_dense::rlst_dynamic_mat!(f64, (nsources, ntargets));
+        let sources = rand_mat([nsources, 3]);
+        let targets = rand_mat([ntargets, 3]);
+        let mut green_value_t = rlst_dynamic_array2!(f64, [nsources, ntargets]);
 
         Laplace3dKernel::<f64>::default().assemble_st(
             EvalType::Value,
             sources.data(),
             targets.data(),
-            green_value.data_mut(),
+            green_value_t.data_mut(),
         );
 
         // The matrix needs to be transposed so that the first row corresponds to the first target,
         // second row to the second target and so on.
 
-        let green_value = green_value.transpose().eval();
+        let mut green_value = rlst_dynamic_array2!(f64, [ntargets, nsources]);
+        green_value.fill_from(green_value_t.transpose());
 
         for charge_index in 0..nsources {
-            let mut charges = rlst::dense::rlst_col_vec![f64, nsources];
-            let mut expected = rlst::dense::rlst_col_vec![f64, ntargets];
+            let mut charges = rlst_dynamic_array2![f64, [nsources, 1]];
+            let mut expected = rlst_dynamic_array2![f64, [ntargets, 1]];
             charges[[charge_index, 0]] = 1.0;
 
             Laplace3dKernel::<f64>::default().evaluate_st(
@@ -527,22 +565,23 @@ mod test {
             }
         }
 
-        let mut green_value_deriv = rlst_dense::rlst_dynamic_mat!(f64, (nsources, 4 * ntargets));
+        let mut green_value_deriv_t = rlst_dynamic_array2!(f64, [nsources, 4 * ntargets]);
 
         Laplace3dKernel::<f64>::default().assemble_st(
             EvalType::ValueDeriv,
             sources.data(),
             targets.data(),
-            green_value_deriv.data_mut(),
+            green_value_deriv_t.data_mut(),
         );
 
         // The matrix needs to be transposed so that the first row corresponds to the first target, etc.
 
-        let green_value_deriv = green_value_deriv.transpose().eval();
+        let mut green_value_deriv = rlst_dynamic_array2!(f64, [4 * ntargets, nsources]);
+        green_value_deriv.fill_from(green_value_deriv_t.transpose());
 
         for charge_index in 0..nsources {
-            let mut charges = rlst::dense::rlst_col_vec![f64, nsources];
-            let mut expected = rlst_dense::rlst_dynamic_mat!(f64, (4, ntargets));
+            let mut charges = rlst_dynamic_array2![f64, [nsources, 1]];
+            let mut expected = rlst_dynamic_array2!(f64, [4, ntargets]);
 
             charges[[charge_index, 0]] = 1.0;
 
@@ -571,9 +610,9 @@ mod test {
         let nsources = 3;
         let ntargets = 5;
 
-        let sources = rlst::dense::rlst_rand_mat![f64, (nsources, 3)];
-        let targets = rlst::dense::rlst_rand_mat![f64, (ntargets, 3)];
-        let mut green_value_deriv = rlst_dense::rlst_dynamic_mat!(f64, (nsources, 4 * ntargets));
+        let sources = rand_mat([nsources, 3]);
+        let targets = rand_mat([ntargets, 3]);
+        let mut green_value_deriv = rlst_dynamic_array2!(f64, [nsources, 4 * ntargets]);
 
         Laplace3dKernel::<f64>::default().assemble_st(
             EvalType::ValueDeriv,
diff --git a/python/test/test_dependencies.py b/python/test/test_dependencies.py
index ae95b9ee..afdef6f1 100644
--- a/python/test/test_dependencies.py
+++ b/python/test/test_dependencies.py
@@ -27,6 +27,11 @@ def test_dependencies():
     deps = {}
     errors = []
     for c in cargos:
+
+        # TODO: remove this skip
+        if "field" in c or "fmm" in c or "tree" in c:
+            continue
+
         with open(c, "rb") as f:
             data = tomllib.load(f)
             if "dependencies" in data:
diff --git a/tools/Cargo.toml b/tools/Cargo.toml
index c9fd9ccf..c2c1c0af 100644
--- a/tools/Cargo.toml
+++ b/tools/Cargo.toml
@@ -24,4 +24,5 @@ libc = "0.2"
 num = "0.4"
 bempp-traits = { path = "../traits"}
 rayon = "1.7"
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
diff --git a/tools/src/arrays.rs b/tools/src/arrays.rs
index 9fdd6789..b7917fd2 100644
--- a/tools/src/arrays.rs
+++ b/tools/src/arrays.rs
@@ -1,176 +1,38 @@
 //! Containers to store multi-dimensional data
-use bempp_traits::arrays::{AdjacencyListAccess, Array3DAccess, Array4DAccess};
+use bempp_traits::arrays::AdjacencyListAccess;
 use num::Num;
-use rlst_dense::{operations::transpose::Scalar, rlst_dynamic_mat, UnsafeRandomAccessMut};
-use std::clone::Clone;
-
-pub type Mat<T> = rlst_dense::Matrix<
-    T,
-    rlst_dense::base_matrix::BaseMatrix<T, rlst_dense::VectorContainer<T>, rlst_dense::Dynamic>,
-    rlst_dense::Dynamic,
->;
-
-pub fn to_matrix<T: Scalar>(data: &[T], shape: (usize, usize)) -> Mat<T> {
-    let mut mat = rlst_dynamic_mat![T, shape];
+use rlst_common::types::Scalar;
+use rlst_dense::{
+    array::Array, base_array::BaseArray, data_container::VectorContainer, rlst_dynamic_array2,
+    traits::UnsafeRandomAccessMut,
+};
+
+pub type Mat<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
+pub type Array3D<T> = Array<T, BaseArray<T, VectorContainer<T>, 3>, 3>;
+pub type Array4D<T> = Array<T, BaseArray<T, VectorContainer<T>, 4>, 4>;
+
+pub fn to_matrix<T: Scalar>(data: &[T], shape: [usize; 2]) -> Mat<T> {
+    let mut mat = rlst_dynamic_array2![T, shape];
     for (i, d) in data.iter().enumerate() {
         unsafe {
-            *mat.get_unchecked_mut(i % shape.0, i / shape.0) = *d;
+            *mat.get_unchecked_mut([i % shape[0], i / shape[0]]) = *d;
         }
     }
     mat
 }
 
-pub fn transpose_to_matrix<T: Scalar>(data: &[T], shape: (usize, usize)) -> Mat<T> {
-    let mut mat = rlst_dynamic_mat![T, shape];
+pub fn transpose_to_matrix<T: Scalar>(data: &[T], shape: [usize; 2]) -> Mat<T> {
+    let mut mat = rlst_dynamic_array2![T, shape];
     for (i, d) in data.iter().enumerate() {
         unsafe {
-            *mat.get_unchecked_mut(i / shape.1, i % shape.1) = *d;
+            *mat.get_unchecked_mut([i / shape[1], i % shape[1]]) = *d;
         }
     }
     mat
 }
 
-pub fn zero_matrix<T: Scalar>(shape: (usize, usize)) -> Mat<T> {
-    rlst_dynamic_mat![T, shape]
-}
-
-/// A three-dimensional rectangular array
-#[derive(Clone)]
-pub struct Array3D<T: Num> {
-    /// The data in the array, in row-major order
-    data: Vec<T>,
-    /// The shape of the array
-    shape: (usize, usize, usize),
-}
-
-impl<T: Num + Clone> Array3D<T> {
-    /// Create an array from a data vector
-    pub fn new(shape: (usize, usize, usize)) -> Self {
-        Self {
-            data: vec![T::zero(); shape.0 * shape.1 * shape.2],
-            shape,
-        }
-    }
-    /// Create an array from a data vector
-    pub fn from_data(data: Vec<T>, shape: (usize, usize, usize)) -> Self {
-        assert_eq!(data.len(), shape.0 * shape.1 * shape.2);
-        Self { data, shape }
-    }
-}
-
-impl<T: Num> Array3DAccess<T> for Array3D<T> {
-    fn get(&self, index0: usize, index1: usize, index2: usize) -> Option<&T> {
-        if index0 >= self.shape.0 || index1 >= self.shape.1 || index2 >= self.shape.2 {
-            None
-        } else {
-            unsafe { Some(self.get_unchecked(index0, index1, index2)) }
-        }
-    }
-    fn get_mut(&mut self, index0: usize, index1: usize, index2: usize) -> Option<&mut T> {
-        if index0 >= self.shape.0 || index1 >= self.shape.1 || index2 >= self.shape.2 {
-            None
-        } else {
-            unsafe { Some(self.get_unchecked_mut(index0, index1, index2)) }
-        }
-    }
-    unsafe fn get_unchecked(&self, index0: usize, index1: usize, index2: usize) -> &T {
-        self.data
-            .get_unchecked((index0 * self.shape.1 + index1) * self.shape.2 + index2)
-    }
-    unsafe fn get_unchecked_mut(&mut self, index0: usize, index1: usize, index2: usize) -> &mut T {
-        self.data
-            .get_unchecked_mut((index0 * self.shape.1 + index1) * self.shape.2 + index2)
-    }
-    fn shape(&self) -> &(usize, usize, usize) {
-        &self.shape
-    }
-
-    fn get_data(&self) -> &[T] {
-        &self.data
-    }
-
-    fn get_data_mut(&mut self) -> &mut [T] {
-        &mut self.data
-    }
-}
-
-/// A four-dimensional rectangular array
-pub struct Array4D<T: Num> {
-    /// The data in the array, in row-major order
-    data: Vec<T>,
-    /// The shape of the array
-    shape: (usize, usize, usize, usize),
-}
-
-impl<T: Num + Clone> Array4D<T> {
-    /// Create an array from a data vector
-    pub fn new(shape: (usize, usize, usize, usize)) -> Self {
-        Self {
-            data: vec![T::zero(); shape.0 * shape.1 * shape.2 * shape.3],
-            shape,
-        }
-    }
-    /// Create an array from a data vector
-    pub fn from_data(data: Vec<T>, shape: (usize, usize, usize, usize)) -> Self {
-        assert_eq!(data.len(), shape.0 * shape.1 * shape.2 * shape.3);
-        Self { data, shape }
-    }
-}
-
-impl<T: Num> Array4DAccess<T> for Array4D<T> {
-    fn get(&self, index0: usize, index1: usize, index2: usize, index3: usize) -> Option<&T> {
-        if index0 >= self.shape.0
-            || index1 >= self.shape.1
-            || index2 >= self.shape.2
-            || index3 >= self.shape.3
-        {
-            None
-        } else {
-            unsafe { Some(self.get_unchecked(index0, index1, index2, index3)) }
-        }
-    }
-    fn get_mut(
-        &mut self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> Option<&mut T> {
-        if index0 >= self.shape.0
-            || index1 >= self.shape.1
-            || index2 >= self.shape.2
-            || index3 >= self.shape.3
-        {
-            None
-        } else {
-            unsafe { Some(self.get_unchecked_mut(index0, index1, index2, index3)) }
-        }
-    }
-    unsafe fn get_unchecked(
-        &self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> &T {
-        self.data.get_unchecked(
-            ((index0 * self.shape.1 + index1) * self.shape.2 + index2) * self.shape.3 + index3,
-        )
-    }
-    unsafe fn get_unchecked_mut(
-        &mut self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> &mut T {
-        self.data.get_unchecked_mut(
-            ((index0 * self.shape.1 + index1) * self.shape.2 + index2) * self.shape.3 + index3,
-        )
-    }
-    fn shape(&self) -> &(usize, usize, usize, usize) {
-        &self.shape
-    }
+pub fn zero_matrix<T: Scalar>(shape: [usize; 2]) -> Mat<T> {
+    rlst_dynamic_array2![T, shape]
 }
 
 /// An adjacency list
diff --git a/traits/Cargo.toml b/traits/Cargo.toml
index c1b209e7..d9af6c03 100644
--- a/traits/Cargo.toml
+++ b/traits/Cargo.toml
@@ -22,5 +22,5 @@ crate-type = ["lib", "cdylib"]
 cauchy="0.4.*"
 thiserror="1.*"
 num = "0.4"
-rlst-common = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
-rlst-dense = { git = "https://github.com/linalg-rs/rlst.git", branch = "legacy"}
+rlst-common = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
diff --git a/traits/src/arrays.rs b/traits/src/arrays.rs
index f3be65c5..bbc0dda7 100644
--- a/traits/src/arrays.rs
+++ b/traits/src/arrays.rs
@@ -1,76 +1,6 @@
 //! Containers to store multi-dimensional data
 use num::Num;
 
-pub trait Array3DAccess<T: Num> {
-    /// Get an item from the array
-    fn get(&self, index0: usize, index1: usize, index2: usize) -> Option<&T>;
-
-    /// Get a mutable item from the array
-    fn get_mut(&mut self, index0: usize, index1: usize, index2: usize) -> Option<&mut T>;
-
-    /// Get an item from the array without checking bounds
-    ///
-    /// # Safety
-    /// This function does not perform bound checks
-    unsafe fn get_unchecked(&self, index0: usize, index1: usize, index2: usize) -> &T;
-
-    /// Get a mutable item from the array without checking bounds
-    ///
-    /// # Safety
-    /// This function does not perform bound checks
-    unsafe fn get_unchecked_mut(&mut self, index0: usize, index1: usize, index2: usize) -> &mut T;
-
-    /// Get the shape of the array
-    fn shape(&self) -> &(usize, usize, usize);
-
-    /// Get a pointer to the raw data in the array
-    fn get_data(&self) -> &[T];
-
-    /// Get a mut pointer to the raw data in the array
-    fn get_data_mut(&mut self) -> &mut [T];
-}
-
-pub trait Array4DAccess<T: Num> {
-    /// Get an item from the array
-    fn get(&self, index0: usize, index1: usize, index2: usize, index3: usize) -> Option<&T>;
-
-    /// Get a mutable item from the array
-    fn get_mut(
-        &mut self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> Option<&mut T>;
-
-    /// Get an item from the array without checking bounds
-    ///
-    /// # Safety
-    /// This function does not perform bound checks
-    unsafe fn get_unchecked(
-        &self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> &T;
-
-    /// Get a mutable item from the array without checking bounds
-    ///
-    /// # Safety
-    /// This function does not perform bound checks
-    unsafe fn get_unchecked_mut(
-        &mut self,
-        index0: usize,
-        index1: usize,
-        index2: usize,
-        index3: usize,
-    ) -> &mut T;
-
-    /// Get the shape of the array
-    fn shape(&self) -> &(usize, usize, usize, usize);
-}
-
 pub trait AdjacencyListAccess<'a, T: Num> {
     type I: Iterator;
 
diff --git a/traits/src/element.rs b/traits/src/element.rs
index 91bd4600..3dd9b55d 100644
--- a/traits/src/element.rs
+++ b/traits/src/element.rs
@@ -1,8 +1,7 @@
 //! Finite element definitions
 
-use crate::arrays::Array4DAccess;
 use crate::cell::ReferenceCellType;
-use rlst_common::traits::{RandomAccessByRef, Shape};
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, Shape};
 
 /// The family of an element
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
@@ -71,11 +70,14 @@ pub trait FiniteElement {
     fn value_size(&self) -> usize;
 
     /// Tabulate the values of the basis functions and their derivatives at a set of points
-    fn tabulate<T: RandomAccessByRef<Item = f64> + Shape>(
+    fn tabulate<
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        T4Mut: RandomAccessMut<4, Item = f64>,
+    >(
         &self,
         points: &T,
         nderivs: usize,
-        data: &mut impl Array4DAccess<f64>,
+        data: &mut T4Mut,
     );
 
     /// The DOFs that are associated with a subentity of the reference cell
@@ -85,11 +87,11 @@ pub trait FiniteElement {
     fn map_type(&self) -> MapType;
 
     /// Get the required shape for a tabulation array
-    fn tabulate_array_shape(&self, nderivs: usize, npoints: usize) -> (usize, usize, usize, usize) {
+    fn tabulate_array_shape(&self, nderivs: usize, npoints: usize) -> [usize; 4] {
         let deriv_count = compute_derivative_count(nderivs, self.cell_type());
         let point_count = npoints;
         let basis_count = self.dim();
         let value_size = self.value_size();
-        (deriv_count, point_count, basis_count, value_size)
+        [deriv_count, point_count, basis_count, value_size]
     }
 }
diff --git a/traits/src/grid.rs b/traits/src/grid.rs
index 22893077..4808850f 100644
--- a/traits/src/grid.rs
+++ b/traits/src/grid.rs
@@ -3,7 +3,7 @@
 use crate::arrays::AdjacencyListAccess;
 use crate::cell::ReferenceCellType;
 use crate::element::FiniteElement;
-use rlst_common::traits::{RandomAccessByRef, RandomAccessMut, Shape};
+use rlst_dense::traits::{RandomAccessByRef, RandomAccessMut, Shape};
 
 /// The ownership of a mesh entity
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
@@ -13,8 +13,8 @@ pub enum Ownership {
 }
 
 pub trait GeometryEvaluator<
-    T: RandomAccessByRef<Item = f64> + Shape,
-    TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+    T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+    TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
 >
 {
     /// The points on the reference cell that this evaluator computes information at
@@ -23,25 +23,21 @@ pub trait GeometryEvaluator<
     /// Compute the points in a physical cell
     fn compute_points(&self, cell_index: usize, points: &mut TMut);
 
-    /// Compute the normals at this evaluator's points
-    fn compute_normals(&self, cell_index: usize, normals: &mut TMut);
-
-    /// Compute the jacobians at this evaluator's points
-    fn compute_jacobians(&self, cell_index: usize, jacobians: &mut TMut);
-
-    /// Compute the jacobians at this evaluator's points
-    fn compute_jacobian_determinants(&self, cell_index: usize, jdets: &mut [f64]);
-
-    /// Compute the jacobians at this evaluator's points
-    fn compute_jacobian_inverses(&self, cell_index: usize, jinvs: &mut TMut);
+    /// Compute the normals and jacobian determinants at this evaluator's points
+    fn compute_normals_and_jacobian_determinants(
+        &self,
+        cell_index: usize,
+        normals: &mut TMut,
+        jdets: &mut [f64],
+    );
 }
 
 pub trait Geometry {
     //! Grid geometry
     //!
     //! Grid geometry provides information about the physical locations of mesh points in space
-    type T: RandomAccessByRef<Item = f64> + Shape;
-    type TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape;
+    type T: RandomAccessByRef<2, Item = f64> + Shape<2>;
+    type TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>;
 
     /// The geometric dimension
     fn dim(&self) -> usize;
@@ -70,8 +66,8 @@ pub trait Geometry {
 
     /// Compute the physical coordinates of a set of points in a given cell
     fn compute_points<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -81,8 +77,8 @@ pub trait Geometry {
 
     /// Compute the normals to a set of points in a given cell
     fn compute_normals<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -94,8 +90,8 @@ pub trait Geometry {
     ///
     /// The input points should be given using coordinates on the reference element
     fn compute_jacobians<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
@@ -106,7 +102,7 @@ pub trait Geometry {
     /// Evaluate the determinand of the jacobian at a set of points in a given cell
     ///
     /// The input points should be given using coordinates on the reference element
-    fn compute_jacobian_determinants<T: RandomAccessByRef<Item = f64> + Shape>(
+    fn compute_jacobian_determinants<T: RandomAccessByRef<2, Item = f64> + Shape<2>>(
         &self,
         points: &T,
         cell: usize,
@@ -117,8 +113,8 @@ pub trait Geometry {
     ///
     /// The input points should be given using coordinates on the reference element
     fn compute_jacobian_inverses<
-        T: RandomAccessByRef<Item = f64> + Shape,
-        TMut: RandomAccessByRef<Item = f64> + RandomAccessMut<Item = f64> + Shape,
+        T: RandomAccessByRef<2, Item = f64> + Shape<2>,
+        TMut: RandomAccessByRef<2, Item = f64> + RandomAccessMut<2, Item = f64> + Shape<2>,
     >(
         &self,
         points: &T,
diff --git a/tree/Cargo.toml b/tree/Cargo.toml
index ea9084c9..d0cfc991 100644
--- a/tree/Cargo.toml
+++ b/tree/Cargo.toml
@@ -21,7 +21,8 @@ memoffset = "0.6"
 rand = "0.8.*"
 hyksort = { path = "../hyksort", optional = true }
 bempp-traits = { path = "../traits" }
-rlst = { git = "https://github.com/linalg-rs/rlst.git" , branch = "legacy"}
+rlst = { git = "https://github.com/linalg-rs/rlst.git" }
+rlst-dense = { git = "https://github.com/linalg-rs/rlst.git" }
 num = "0.4"
 
 [features]
diff --git a/tree/src/implementations/helpers.rs b/tree/src/implementations/helpers.rs
index 1277af51..a0ca99fa 100644
--- a/tree/src/implementations/helpers.rs
+++ b/tree/src/implementations/helpers.rs
@@ -6,11 +6,12 @@ use num::Float;
 use rand::prelude::*;
 use rand::SeedableRng;
 
-use rlst::dense::rlst_col_vec;
-use rlst::dense::{base_matrix::BaseMatrix, rlst_dynamic_mat, Dynamic, Matrix, VectorContainer};
+use rlst_dense::{
+    array::Array, base_array::BaseArray, data_container::VectorContainer, rlst_dynamic_array2,
+};
 
 /// Alias for an rlst container for point data.
-pub type PointsMat<T> = Matrix<T, BaseMatrix<T, VectorContainer<T>, Dynamic>, Dynamic>;
+pub type PointsMat<T> = Array<T, BaseArray<T, VectorContainer<T>, 2>, 2>;
 
 /// Points fixture for testing, uniformly samples in each axis from min to max.
 ///
@@ -33,7 +34,7 @@ pub fn points_fixture<T: Float + Scalar + rand::distributions::uniform::SampleUn
         between = rand::distributions::Uniform::from(T::zero()..T::one());
     }
 
-    let mut points = rlst_dynamic_mat![T, (npoints, 3)];
+    let mut points = rlst_dynamic_array2!(T, [npoints, 3]);
 
     for i in 0..npoints {
         points[[i, 0]] = between.sample(&mut range);
@@ -61,9 +62,9 @@ pub fn points_fixture_sphere<T: Scalar + rand::distributions::uniform::SampleUni
 
     let between = rand::distributions::Uniform::from(T::zero()..T::one());
 
-    let mut points = rlst_dynamic_mat![T, (npoints, 3)];
-    let mut phi = rlst_col_vec![T, npoints];
-    let mut theta = rlst_col_vec![T, npoints];
+    let mut points = rlst_dynamic_array2!(T, [npoints, 3]);
+    let mut phi = rlst_dynamic_array2!(T, [npoints, 1]);
+    let mut theta = rlst_dynamic_array2!(T, [npoints, 1]);
 
     for i in 0..npoints {
         phi[[i, 0]] = between.sample(&mut range) * two * pi;
@@ -93,7 +94,7 @@ pub fn points_fixture_col<T: Float + Scalar + rand::distributions::uniform::Samp
     let between1 = rand::distributions::Uniform::from(T::zero()..T::from(0.1).unwrap());
     let between2 = rand::distributions::Uniform::from(T::zero()..T::from(500).unwrap());
 
-    let mut points = rlst_dynamic_mat![T, (npoints, 3)];
+    let mut points = rlst_dynamic_array2!(T, [npoints, 3]);
 
     for i in 0..npoints {
         // One axis has a different sampling
diff --git a/tree/src/implementations/impl_domain.rs b/tree/src/implementations/impl_domain.rs
index 98d999bf..6610960d 100644
--- a/tree/src/implementations/impl_domain.rs
+++ b/tree/src/implementations/impl_domain.rs
@@ -64,7 +64,7 @@ impl<T: Float + Default> Domain<T> {
 #[cfg(test)]
 mod test {
     use bempp_traits::types::Scalar;
-    use rlst::dense::{RawAccess, Shape};
+    use rlst_dense::traits::{RawAccess, Shape};
 
     use crate::implementations::helpers::{points_fixture, points_fixture_col, PointsMat};
 
@@ -80,7 +80,7 @@ mod test {
         assert!(domain.diameter.iter().all(|&x| x == domain.diameter[0]));
 
         // Test that all local points are contained within the local domain
-        let npoints = points.shape().0;
+        let npoints = points.shape()[0];
         for i in 0..npoints {
             let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]];
 
diff --git a/tree/src/implementations/impl_morton.rs b/tree/src/implementations/impl_morton.rs
index 0e0dcaf5..f2944eae 100644
--- a/tree/src/implementations/impl_morton.rs
+++ b/tree/src/implementations/impl_morton.rs
@@ -1007,7 +1007,7 @@ impl MortonKeyInterface for MortonKey {
 #[cfg(test)]
 mod test {
     use itertools::Itertools;
-    use rlst::dense::{RawAccess, Shape};
+    use rlst_dense::traits::{RawAccess, Shape};
     use std::vec;
 
     use crate::implementations::helpers::points_fixture;
@@ -1189,7 +1189,7 @@ mod test {
 
         let mut keys: Vec<MortonKey> = Vec::new();
 
-        for i in 0..points.shape().0 {
+        for i in 0..points.shape()[0] {
             let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]];
 
             keys.push(MortonKey::from_point(&point, &domain, DEEPEST_LEVEL));
@@ -1473,7 +1473,7 @@ mod test {
 
         let mut keys = Vec::new();
 
-        for i in 0..points.shape().0 {
+        for i in 0..points.shape()[0] {
             let point = [points[[i, 0]], points[[i, 1]], points[[i, 2]]];
             keys.push(MortonKey::from_point(&point, &domain, DEEPEST_LEVEL))
         }
diff --git a/tree/src/implementations/impl_single_node.rs b/tree/src/implementations/impl_single_node.rs
index 646d5903..d266f54a 100644
--- a/tree/src/implementations/impl_single_node.rs
+++ b/tree/src/implementations/impl_single_node.rs
@@ -814,7 +814,7 @@ where
 #[cfg(test)]
 mod test {
 
-    use rlst::dense::RawAccess;
+    use rlst_dense::traits::RawAccess;
 
     use crate::implementations::helpers::{
         points_fixture, points_fixture_col, points_fixture_sphere,